v2.1.3: fix Crof mimo-v2.5-pro reasoning_content token exhaustion

- Strip reasoning_content from proxy output (Codex doesn't use it)
- Force max_tokens=64000 minimum for openai-compat providers
- Prevents models that emit large reasoning from running out of tokens
This commit is contained in:
admin
2026-05-19 21:59:38 +04:00
Unverified
parent 662d8e961e
commit 77423c5c35
5 changed files with 18 additions and 10 deletions

View File

@@ -1,5 +1,13 @@
# Changelog # Changelog
## v2.1.3 (2026-05-19)
- **Fixed Crof mimo-v2.5-pro stopping mid-response (finish_reason=length)**
- Root cause: model emits 600+ `reasoning_content` SSE chunks that exhaust `max_tokens` before any actual content is generated
- Strip `reasoning_content` from proxy output — Codex doesn't use reasoning, avoids wasting output tokens on invisible text
- Force `max_tokens` minimum of 64000 for openai-compat providers — gives models room for both reasoning and content
- Works for all openai-compat providers (Crof, Z.AI, DeepSeek, OpenRouter, etc.)
## v2.1.2 (2026-05-19) ## v2.1.2 (2026-05-19)
- **Fixed Crof.ai and providers stopping after first tool call (root cause: None tool IDs)** - **Fixed Crof.ai and providers stopping after first tool call (root cause: None tool IDs)**

Binary file not shown.

Binary file not shown.

View File

@@ -24,6 +24,11 @@ model_catalog_json = ""
""" """
CHANGELOG = [ CHANGELOG = [
("2.1.3", "2026-05-19", [
"Fixed Crof mimo-v2.5-pro stopping: reasoning_content exhausted all output tokens",
"Strip reasoning_content from proxy output — Codex doesn't use it, avoids token waste",
"Force max_tokens=64000 minimum for openai-compat providers — gives models room for both reasoning and content",
]),
("2.1.2", "2026-05-19", [ ("2.1.2", "2026-05-19", [
"Fixed Crof.ai and providers stopping after first tool call (root cause: None tool IDs)", "Fixed Crof.ai and providers stopping after first tool call (root cause: None tool IDs)",
"Codex sends function_call items with id=None — proxy now matches tool results to calls by position", "Codex sends function_call items with id=None — proxy now matches tool results to calls by position",
@@ -527,7 +532,7 @@ class LauncherWin(Gtk.Window):
# header row # header row
hdr = Gtk.Box(spacing=8) hdr = Gtk.Box(spacing=8)
vbox.pack_start(hdr, False, False, 0) vbox.pack_start(hdr, False, False, 0)
lbl = Gtk.Label(label="<b>Codex Launcher v2.1.2</b>") lbl = Gtk.Label(label="<b>Codex Launcher v2.1.3</b>")
lbl.set_use_markup(True) lbl.set_use_markup(True)
hdr.pack_start(lbl, False, False, 0) hdr.pack_start(lbl, False, False, 0)
changelog_btn = Gtk.Button(label="Changelog") changelog_btn = Gtk.Button(label="Changelog")

View File

@@ -370,10 +370,6 @@ def oa_resp_to_responses(chat_resp, model, resp_id=None):
fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"} fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"}
status = fm.get(finish, "incomplete") status = fm.get(finish, "incomplete")
outputs = [] outputs = []
rc = msg.get("reasoning_content")
if rc:
outputs.append({"type": "reasoning", "id": uid("rsn"), "status": "completed",
"content": [{"type": "text", "text": rc}]})
if content: if content:
outputs.append({"type": "message", "id": uid("msg"), "role": "assistant", "status": "completed", outputs.append({"type": "message", "id": uid("msg"), "role": "assistant", "status": "completed",
"content": [{"type": "output_text", "text": content, "annotations": []}]}) "content": [{"type": "output_text", "text": content, "annotations": []}]})
@@ -447,9 +443,7 @@ def oa_stream_to_sse(chat_stream, model, req_id):
yield emit("response.output_text.delta", {"type": "response.function_call_arguments.delta", yield emit("response.output_text.delta", {"type": "response.function_call_arguments.delta",
"delta": fn["arguments"], "item_id": tc_buf[idx]["id"]}) "delta": fn["arguments"], "item_id": tc_buf[idx]["id"]})
rc = delta.get("reasoning_content")
if rc:
yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": rc})
if msg_opened: if msg_opened:
yield emit("response.output_text.done", {"type": "response.output_text.done", yield emit("response.output_text.done", {"type": "response.output_text.done",
@@ -885,9 +879,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
if instructions: if instructions:
messages.insert(0, {"role": "system", "content": instructions}) messages.insert(0, {"role": "system", "content": instructions})
chat_body = {"model": model, "messages": messages} chat_body = {"model": model, "messages": messages}
for k in ("temperature", "top_p", "max_output_tokens"): for k in ("temperature", "top_p"):
if k in body: if k in body:
chat_body["max_tokens" if k == "max_output_tokens" else k] = body[k] chat_body[k] = body[k]
chat_body["max_tokens"] = max(body.get("max_output_tokens", 0), 64000)
tools = oa_convert_tools(body.get("tools")) tools = oa_convert_tools(body.get("tools"))
if tools: if tools:
chat_body["tools"] = tools chat_body["tools"] = tools