v2.1.3: fix Crof mimo-v2.5-pro reasoning_content token exhaustion
- Strip reasoning_content from proxy output (Codex doesn't use it) - Force max_tokens=64000 minimum for openai-compat providers - Prevents models that emit large reasoning from running out of tokens
This commit is contained in:
@@ -370,10 +370,6 @@ def oa_resp_to_responses(chat_resp, model, resp_id=None):
|
||||
fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"}
|
||||
status = fm.get(finish, "incomplete")
|
||||
outputs = []
|
||||
rc = msg.get("reasoning_content")
|
||||
if rc:
|
||||
outputs.append({"type": "reasoning", "id": uid("rsn"), "status": "completed",
|
||||
"content": [{"type": "text", "text": rc}]})
|
||||
if content:
|
||||
outputs.append({"type": "message", "id": uid("msg"), "role": "assistant", "status": "completed",
|
||||
"content": [{"type": "output_text", "text": content, "annotations": []}]})
|
||||
@@ -447,9 +443,7 @@ def oa_stream_to_sse(chat_stream, model, req_id):
|
||||
yield emit("response.output_text.delta", {"type": "response.function_call_arguments.delta",
|
||||
"delta": fn["arguments"], "item_id": tc_buf[idx]["id"]})
|
||||
|
||||
rc = delta.get("reasoning_content")
|
||||
if rc:
|
||||
yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": rc})
|
||||
|
||||
|
||||
if msg_opened:
|
||||
yield emit("response.output_text.done", {"type": "response.output_text.done",
|
||||
@@ -885,9 +879,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
if instructions:
|
||||
messages.insert(0, {"role": "system", "content": instructions})
|
||||
chat_body = {"model": model, "messages": messages}
|
||||
for k in ("temperature", "top_p", "max_output_tokens"):
|
||||
for k in ("temperature", "top_p"):
|
||||
if k in body:
|
||||
chat_body["max_tokens" if k == "max_output_tokens" else k] = body[k]
|
||||
chat_body[k] = body[k]
|
||||
chat_body["max_tokens"] = max(body.get("max_output_tokens", 0), 64000)
|
||||
tools = oa_convert_tools(body.get("tools"))
|
||||
if tools:
|
||||
chat_body["tools"] = tools
|
||||
|
||||
Reference in New Issue
Block a user