v3.8.5: sync src/translate-proxy.py with x-codebuff headers

This commit is contained in:
2026-05-24 19:06:59 +00:00
Unverified
parent 1c679f9c65
commit 517c35a424

View File

@@ -266,6 +266,13 @@ _response_store_lock = threading.Lock()
_MAX_STORED = 50
_RESPONSE_TTL = 600
_fb_reasoning_store = collections.OrderedDict()
_fb_reasoning_store_lock = threading.Lock()
_deepseek_reasoning_store = {}
_deepseek_reasoning_lock = threading.Lock()
_MAX_DS_STORED = 100
_crof_lock = threading.Lock()
_provider_caps_lock = threading.Lock()
_provider_caps = None
@@ -328,11 +335,12 @@ def _codebuff_get_session(token, model):
req = urllib.request.Request(url, data=body, headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {token}",
"User-Agent": "codex-launcher/3.8.3",
"User-Agent": "codex-launcher/3.8.4",
"x-codebuff-model": model,
})
resp = urllib.request.urlopen(req, timeout=15)
data = json.loads(resp.read())
instance_id = data.get("instanceId", "")
instance_id = data.get("instanceId", data.get("data", {}).get("instance_id", ""))
expires_at = data.get("remainingMs", 0)
if instance_id:
with _codebuff_token_lock:
@@ -352,7 +360,7 @@ def _codebuff_start_run(token, agent_id):
req = urllib.request.Request(url, data=body, headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {token}",
"User-Agent": "codex-launcher/3.8.3",
"User-Agent": "codex-launcher/3.8.4",
})
try:
resp = urllib.request.urlopen(req, timeout=15)
@@ -375,7 +383,7 @@ def _codebuff_finish_run(token, run_id, status="completed"):
req = urllib.request.Request(url, data=body, headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {token}",
"User-Agent": "codex-launcher/3.8.3",
"User-Agent": "codex-launcher/3.8.4",
})
try:
urllib.request.urlopen(req, timeout=10)
@@ -737,6 +745,162 @@ def resolve_previous_response(body):
combined = [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": str(prev_input)}]}] + list(prev_output) + new_input
return combined
def _fb_store_reasoning(resp_id, reasoning_text):
if not resp_id or not reasoning_text:
return
with _fb_reasoning_store_lock:
_fb_reasoning_store[resp_id] = {"reasoning": reasoning_text, "ts": time.time()}
while len(_fb_reasoning_store) > _MAX_STORED:
_fb_reasoning_store.popitem(last=False)
expired = [k for k, v in _fb_reasoning_store.items() if time.time() - v["ts"] > _RESPONSE_TTL]
for k in expired:
del _fb_reasoning_store[k]
def _fb_get_reasoning(resp_id):
if not resp_id:
return ""
with _fb_reasoning_store_lock:
entry = _fb_reasoning_store.get(resp_id)
return entry["reasoning"] if entry else ""
def _fb_get_any_reasoning():
with _fb_reasoning_store_lock:
for k in _fb_reasoning_store:
return _fb_reasoning_store[k]["reasoning"]
return ""
def _codebuff_hard_disable_reasoning(messages):
"""Strip all reasoning/thinking fields from every message.
Codebuff rejects mixed reasoning_content histories.
The final chat body must be clean before POST."""
for msg in messages:
if not isinstance(msg, dict):
continue
for key in ("reasoning_content", "reasoning", "thinking",
"thinking_content", "thoughts"):
msg.pop(key, None)
def _is_reasoning_content_error(error_text):
if not error_text:
return False
e = error_text.lower()
return ("reasoning_content" in e or "thinking mode" in e
or "must be passed back" in e)
def _ds_store_assistant(resp_id, assistant_msg):
if not resp_id or not isinstance(assistant_msg, dict):
return
tool_calls = assistant_msg.get("tool_calls") or []
reasoning = assistant_msg.get("reasoning_content")
if not tool_calls or not reasoning:
return
with _deepseek_reasoning_lock:
for tc in tool_calls:
tc_id = tc.get("id") or tc.get("call_id", "")
if tc_id:
_deepseek_reasoning_store[tc_id] = {
"resp_id": resp_id,
"assistant": dict(assistant_msg),
"reasoning_content": reasoning,
"ts": time.time(),
}
keys = list(_deepseek_reasoning_store.keys())
if len(keys) > _MAX_DS_STORED:
for k in keys[:len(keys) - _MAX_DS_STORED]:
del _deepseek_reasoning_store[k]
def _ds_rebuild_tool_history(messages):
with _deepseek_reasoning_lock:
snapshot = dict(_deepseek_reasoning_store)
expired = [k for k, v in snapshot.items() if time.time() - v["ts"] > 900]
for k in expired:
_deepseek_reasoning_store.pop(k, None)
snapshot.pop(k, None)
if not snapshot:
return messages
rebuilt = []
inserted_ids = set()
for msg in messages:
if msg.get("role") == "tool":
tc_id = msg.get("tool_call_id", "")
stored = snapshot.get(tc_id)
if stored and tc_id not in inserted_ids:
am = dict(stored["assistant"])
if am.get("reasoning_content"):
rebuilt.append(am)
inserted_ids.add(tc_id)
rebuilt.append(msg)
return rebuilt
def _fb_input_to_messages(input_data, instructions=""):
msgs = []
tool_name_by_id = {}
pending_tool_calls = []
last_flushed_ids = []
if isinstance(input_data, str):
msgs.append({"role": "user", "content": input_data})
elif isinstance(input_data, list):
for item in input_data:
t = item.get("type")
if t == "reasoning":
continue
if t == "function_call":
tcid = item.get("call_id") or item.get("id") or uid("tc")
pending_tool_calls.append(
{"id": tcid, "type": "function",
"function": {"name": item.get("name", ""),
"arguments": item.get("arguments", "{}")}})
tool_name_by_id[tcid] = item.get("name", "")
continue
if pending_tool_calls:
last_flushed_ids = [tc["id"] for tc in pending_tool_calls]
msg = {"role": "assistant", "content": None, "tool_calls": pending_tool_calls}
msgs.append(msg)
pending_tool_calls = []
if t == "message":
role = item.get("role", "user")
if role == "developer":
role = "system"
text = ""
content = item.get("content", [])
if isinstance(content, str):
text = content
else:
for part in content:
if isinstance(part, str):
text += part
continue
pt = part.get("type", "")
if pt in ("input_text", "output_text"):
text += part.get("text", "")
if text is not None:
am = {"role": role, "content": text}
if role == "assistant":
am["_fb_orig_id"] = item.get("id", "")
msgs.append(am)
elif t == "function_call_output":
tcid = item.get("call_id") or item.get("id") or ""
if not tcid and last_flushed_ids:
idx = len([m for m in msgs if m.get("role") == "tool"])
if idx < len(last_flushed_ids):
tcid = last_flushed_ids[idx]
msgs.append({"role": "tool", "tool_call_id": tcid,
"tool_name": tool_name_by_id.get(tcid, ""),
"content": item.get("output", "")})
if pending_tool_calls:
msg = {"role": "assistant", "content": None, "tool_calls": pending_tool_calls}
msgs.append(msg)
if instructions:
msgs.insert(0, {"role": "system", "content": instructions})
return msgs
def _fb_strip_reasoning_from_messages(messages):
out = []
for m in messages:
nm = {k: v for k, v in m.items() if k != "reasoning_content"}
out.append(nm)
return out
_HOP_BY_HOP_HEADERS = {
"connection",
"keep-alive",
@@ -1399,6 +1563,7 @@ def oa_input_to_messages(input_data):
if role == "developer":
role = "system"
text = ""
reasoning_text = ""
content = item.get("content", [])
if isinstance(content, str):
text = content
@@ -1410,6 +1575,9 @@ def oa_input_to_messages(input_data):
pt = part.get("type", "")
if pt in ("input_text", "output_text"):
text += part.get("text", "")
elif pt in ("reasoning",):
for rp in part.get("content", []):
reasoning_text += rp.get("text", "")
elif pt == "input_image":
img = part.get("image_url", part)
msgs.append({"role": role, "content": [{"type": "text", "text": text},
@@ -1417,7 +1585,10 @@ def oa_input_to_messages(input_data):
text = None
break
if text is not None:
msgs.append({"role": role, "content": text})
msg = {"role": role, "content": text}
if reasoning_text and role == "assistant":
msg["reasoning_content"] = reasoning_text
msgs.append(msg)
elif t == "function_call_output":
tcid = item.get("call_id") or item.get("id") or ""
if not tcid and last_flushed_ids:
@@ -1568,10 +1739,12 @@ def oa_resp_to_responses(chat_resp, model, resp_id=None):
"total_tokens": usage.get("total_tokens", 0),
"input_tokens_details": {"cached_tokens": usage.get("prompt_tokens_details", {}).get("cached_tokens", 0)}}}
def oa_stream_to_sse(chat_stream, model, req_id):
def oa_stream_to_sse(chat_stream, model, req_id, _reasoning_out=None):
resp_id = req_id or uid("resp")
msg_id = uid("msg")
text_buf = ""
reasoning_buf = ""
reasoning_opened = False
tc_buf = {}
fr = None
msg_opened = False
@@ -1597,6 +1770,13 @@ def oa_stream_to_sse(chat_stream, model, req_id):
delta = choices[0].get("delta", {})
fr = choices[0].get("finish_reason")
rc = delta.get("reasoning_content") or delta.get("reasoning")
if rc:
if not reasoning_opened:
reasoning_opened = True
reasoning_buf += rc
yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": rc})
content = delta.get("content")
if content:
if not msg_opened:
@@ -1626,7 +1806,10 @@ def oa_stream_to_sse(chat_stream, model, req_id):
yield emit("response.output_text.delta", {"type": "response.function_call_arguments.delta",
"delta": fn["arguments"], "item_id": tc_buf[idx]["id"]})
reasoning_rsn_id = uid("rsn") if reasoning_buf else None
if reasoning_opened:
yield emit("response.reasoning.done", {"type": "response.reasoning.done",
"item_id": reasoning_rsn_id, "text": reasoning_buf})
if msg_opened:
yield emit("response.output_text.done", {"type": "response.output_text.done",
@@ -1648,9 +1831,17 @@ def oa_stream_to_sse(chat_stream, model, req_id):
fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"}
status = fm.get(fr, "incomplete")
final_out = []
if reasoning_buf:
final_out.append({"type": "reasoning", "id": reasoning_rsn_id, "status": "completed",
"content": [{"type": "text", "text": reasoning_buf}]})
if msg_opened:
msg_content = []
if reasoning_buf:
msg_content.append({"type": "output_text", "text": text_buf, "annotations": []})
else:
msg_content.append({"type": "output_text", "text": text_buf, "annotations": []})
final_out.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
"content": [{"type": "output_text", "text": text_buf, "annotations": []}]})
"content": msg_content})
for idx in sorted(tc_buf):
t = tc_buf[idx]
final_out.append({"type": "function_call", "id": t["id"], "call_id": t["call_id"],
@@ -1658,6 +1849,9 @@ def oa_stream_to_sse(chat_stream, model, req_id):
yield emit("response.completed", {"type": "response.completed",
"response": {"id": resp_id, "object": "response", "model": model,
"status": status, "created": int(time.time()), "output": final_out}})
if _reasoning_out is not None:
_reasoning_out["text"] = reasoning_buf
_reasoning_out["tool_calls"] = [tc_buf[i] for i in sorted(tc_buf)] if tc_buf else []
# ═══════════════════════════════════════════════════════════════════
# Anthropic backend
@@ -1675,12 +1869,24 @@ def an_input_to_messages(input_data):
if role == "developer":
role = "user"
text = ""
thinking_blocks = []
for part in item.get("content", []):
pt = part.get("type", "")
if pt in ("input_text", "output_text"):
text += part.get("text", "")
elif pt in ("reasoning", "thinking"):
thinking_text = ""
for rp in part.get("content", []):
thinking_text += rp.get("text", "")
if thinking_text:
thinking_blocks.append({"type": "thinking", "thinking": thinking_text, "signature": part.get("signature", "")})
if role == "assistant":
msgs.append({"role": "assistant", "content": text})
content_parts = []
if thinking_blocks:
content_parts.extend(thinking_blocks)
if text:
content_parts.append({"type": "text", "text": text})
msgs.append({"role": "assistant", "content": content_parts if content_parts else text})
else:
msgs.append({"role": "user", "content": text})
elif t == "function_call":
@@ -4544,54 +4750,175 @@ class Handler(http.server.BaseHTTPRequestHandler):
store_response(rid, body.get("input", ""), result.get("output", []))
def _handle_codebuff(self, body, model, stream, tracker=None):
token = _get_codebuff_token()
if not token:
return self.send_json(401, {"error": {"type": "auth_error",
"message": "No codebuff credentials found. Install codebuff (npm i -g codebuff) and login first."}})
token = _get_codebuff_token()
if not token:
return self.send_json(401, {"error": {"type": "auth_error",
"message": "No codebuff credentials found. Install codebuff (npm i -g codebuff) and login first."}})
agent_id = _FREEBUFF_AGENT_MAP.get(model)
if not agent_id:
matched = None
for m in _FREEBUFF_AGENT_MAP:
if model.lower().replace("/", "").replace("-", "") in m.lower().replace("/", "").replace("-", ""):
matched = m
break
if matched:
agent_id = _FREEBUFF_AGENT_MAP[matched]
model = matched
else:
fallback_model = "deepseek/deepseek-v4-flash"
agent_id = _FREEBUFF_AGENT_MAP.get(fallback_model, "base2-free-deepseek-flash")
print(f"[codebuff] unknown model '{model}', falling back to {fallback_model}", file=sys.stderr)
model = fallback_model
agent_id = _FREEBUFF_AGENT_MAP.get(model)
if not agent_id:
matched = None
for m in _FREEBUFF_AGENT_MAP:
if model.lower().replace("/", "").replace("-", "") in m.lower().replace("/", "").replace("-", ""):
matched = m
break
if matched:
agent_id = _FREEBUFF_AGENT_MAP[matched]
model = matched
else:
fallback_model = "deepseek/deepseek-v4-flash"
agent_id = _FREEBUFF_AGENT_MAP.get(fallback_model, "base2-free-deepseek-flash")
print(f"[codebuff] unknown model '{model}', falling back to {fallback_model}", file=sys.stderr)
model = fallback_model
run_id = _codebuff_start_run(token, agent_id)
if not run_id:
return self.send_json(502, {"error": {"type": "upstream_error",
"message": "Failed to start codebuff agent run. Check credentials and network."}})
instance_id = _codebuff_get_session(token, model)
input_data = body.get("input", "")
instructions = body.get("instructions", "").strip()
messages = _fb_input_to_messages(input_data, instructions)
messages = _ds_rebuild_tool_history(messages)
metadata = {
"run_id": run_id,
"cost_mode": "free",
}
if instance_id:
metadata["codebuff_instance_id"] = instance_id
chat_body = {
"model": model,
"messages": messages,
"stream": stream,
"max_tokens": max(body.get("max_output_tokens", 0), 64000),
"codebuff_metadata": metadata,
}
for k in ("temperature", "top_p"):
if k in body:
chat_body[k] = body[k]
tools = oa_convert_tools(body.get("tools"))
if tools:
chat_body["tools"] = tools
if body.get("tool_choice"):
chat_body["tool_choice"] = body["tool_choice"]
target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {token}",
"User-Agent": "codex-launcher/3.8.4",
"x-codebuff-model": model,
}
if instance_id:
headers["x-codebuff-instance-id"] = instance_id
print(f"[{self._session_id}] [codebuff] POST {target} model={model} stream={stream} run={run_id}", file=sys.stderr)
chat_body_b = json.dumps(chat_body).encode()
try:
req = urllib.request.Request(target, data=chat_body_b, headers=headers)
upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
except urllib.error.HTTPError as e:
err_body = e.read().decode()[:1000]
_codebuff_finish_run(token, run_id, "failed")
if _is_reasoning_content_error(err_body):
print(f"[codebuff] reasoning_content error, retrying with thinking disabled (DeepSeek native format): {err_body[:200]}", file=sys.stderr)
result = self._fb_retry_thinking_disabled(body, model, token, agent_id, stream, tracker, input_data, instructions, err_body)
return result
print(f"[codebuff] HTTP {e.code}: {err_body[:300]}", file=sys.stderr)
return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}})
except Exception as e:
_codebuff_finish_run(token, run_id, "failed")
return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}})
t0 = time.time()
try:
if stream:
self.send_response(200)
self.send_header("Content-Type", "text/event-stream")
self.send_header("Cache-Control", "no-cache")
self.send_header("Connection", "keep-alive")
self.end_headers()
if hasattr(self, 'connection') and self.connection:
try:
self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
except Exception:
pass
last_resp_id = [None]
last_output = [None]
last_status = [None]
finish_reason = [None]
reasoning_out = {}
def _on_fb_event(event):
if tracker and tracker.cancelled.is_set():
return False
for line in event.strip().split("\n"):
if line.startswith("data: "):
try:
d = json.loads(line[6:])
if d.get("type") == "response.completed":
last_resp_id[0] = d.get("response", {}).get("id")
last_output[0] = d.get("response", {}).get("output", [])
last_status[0] = d.get("response", {}).get("status")
finish_reason[0] = "length" if last_status[0] == "incomplete" else "stop"
except Exception:
pass
return None
try:
self.stream_buffered_events(
oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id"),
_reasoning_out=reasoning_out),
on_event=_on_fb_event)
except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
print(f"[{self._session_id}] [codebuff] client disconnected", file=sys.stderr)
return
success = finish_reason[0] != "length"
_record_usage("codebuff", model, success, time.time() - t0)
if last_resp_id[0] and input_data is not None:
store_response(last_resp_id[0], input_data, last_output[0])
if last_resp_id[0] and reasoning_out.get("text") or reasoning_out.get("tool_calls"):
asm = {"role": "assistant", "content": reasoning_out.get("text", "") or ""}
if reasoning_out.get("tool_calls"):
asm["tool_calls"] = reasoning_out["tool_calls"]
if reasoning_out.get("text"):
asm["reasoning_content"] = reasoning_out["text"]
_ds_store_assistant(last_resp_id[0], asm)
print(f"[{self._session_id}] [codebuff] stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr)
else:
raw = upstream.read().decode()
chat_resp = json.loads(raw)
result = oa_resp_to_responses(chat_resp, model)
self.send_json(200, result)
rid = result.get("id")
if rid:
store_response(rid, input_data, result.get("output", []))
print(f"[{self._session_id}] [codebuff] non-stream done in {time.time()-t0:.1f}s", file=sys.stderr)
finally:
_codebuff_finish_run(token, run_id, "completed")
def _fb_retry_thinking_disabled(self, body, model, token, agent_id, stream, tracker, input_data, instructions, original_error):
run_id = _codebuff_start_run(token, agent_id)
if not run_id:
return self.send_json(502, {"error": {"type": "upstream_error",
"message": "Failed to start codebuff agent run. Check credentials and network."}})
"message": "Failed to start codebuff agent run for retry."}})
instance_id = _codebuff_get_session(token, model)
input_data = body.get("input", "")
messages = oa_input_to_messages(input_data)
instructions = body.get("instructions", "").strip()
if instructions:
messages.insert(0, {"role": "system", "content": instructions})
metadata = {
"run_id": run_id,
"cost_mode": "free",
}
messages = _fb_input_to_messages(input_data, instructions)
_codebuff_hard_disable_reasoning(messages)
metadata = {"run_id": run_id, "cost_mode": "free"}
if instance_id:
metadata["codebuff_instance_id"] = instance_id
chat_body = {
"model": model,
"messages": messages,
"stream": stream,
"model": model, "messages": messages, "stream": stream,
"max_tokens": max(body.get("max_output_tokens", 0), 64000),
"enable_thinking": REASONING_ENABLED and REASONING_EFFORT != "none",
"reasoning_effort": REASONING_EFFORT if REASONING_ENABLED else "none",
"thinking": {"type": "disabled"},
"codebuff_metadata": metadata,
}
for k in ("temperature", "top_p"):
@@ -4602,29 +4929,23 @@ class Handler(http.server.BaseHTTPRequestHandler):
chat_body["tools"] = tools
if body.get("tool_choice"):
chat_body["tool_choice"] = body["tool_choice"]
target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {token}",
"User-Agent": "codex-launcher/3.8.3",
}
print(f"[{self._session_id}] [codebuff] POST {target} model={model} stream={stream} run={run_id}", file=sys.stderr)
chat_body_b = json.dumps(chat_body).encode()
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}", "User-Agent": "codex-launcher/3.8.4", "x-codebuff-model": model}
if instance_id:
headers["x-codebuff-instance-id"] = instance_id
print(f"[codebuff] retry POST {target} model={model} stream={stream} run={run_id} (thinking disabled via DeepSeek native)", file=sys.stderr)
try:
req = urllib.request.Request(target, data=chat_body_b, headers=headers)
req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=headers)
upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
except urllib.error.HTTPError as e:
err_body = e.read().decode()[:500]
_codebuff_finish_run(token, run_id, "failed")
print(f"[codebuff] HTTP {e.code}: {err_body}", file=sys.stderr)
return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}})
print(f"[codebuff] thinking-disabled retry failed: HTTP {e.code}: {err_body[:300]}", file=sys.stderr)
return self.send_json(e.code, {"error": {"type": "codebuff_deepseek_thinking_error",
"message": "Codebuff/DeepSeek V4 requires reasoning_content round-trip for tool-call sessions. Use Command Code provider for this model instead.", "upstream_error": _sanitize_err_body(err_body)}})
except Exception as e:
_codebuff_finish_run(token, run_id, "failed")
return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}})
t0 = time.time()
try:
if stream:
@@ -4638,13 +4959,12 @@ class Handler(http.server.BaseHTTPRequestHandler):
self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
except Exception:
pass
last_resp_id = [None]
last_output = [None]
last_status = [None]
finish_reason = [None]
def _on_fb_event(event):
reasoning_out = {}
def _on_fb_retry_event(event):
if tracker and tracker.cancelled.is_set():
return False
for line in event.strip().split("\n"):
@@ -4659,20 +4979,25 @@ class Handler(http.server.BaseHTTPRequestHandler):
except Exception:
pass
return None
try:
self.stream_buffered_events(
oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")),
on_event=_on_fb_event)
oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id"),
_reasoning_out=reasoning_out),
on_event=_on_fb_retry_event)
except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
print(f"[{self._session_id}] [codebuff] client disconnected", file=sys.stderr)
return
success = finish_reason[0] != "length"
_record_usage("codebuff", model, success, time.time() - t0)
if last_resp_id[0] and input_data is not None:
store_response(last_resp_id[0], input_data, last_output[0])
print(f"[{self._session_id}] [codebuff] stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr)
if last_resp_id[0] and reasoning_out.get("text") or reasoning_out.get("tool_calls"):
asm = {"role": "assistant", "content": reasoning_out.get("text", "") or ""}
if reasoning_out.get("tool_calls"):
asm["tool_calls"] = reasoning_out["tool_calls"]
if reasoning_out.get("text"):
asm["reasoning_content"] = reasoning_out["text"]
_ds_store_assistant(last_resp_id[0], asm)
print(f"[{self._session_id}] [codebuff] retry stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr)
else:
raw = upstream.read().decode()
chat_resp = json.loads(raw)
@@ -4681,7 +5006,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
rid = result.get("id")
if rid:
store_response(rid, input_data, result.get("output", []))
print(f"[{self._session_id}] [codebuff] non-stream done in {time.time()-t0:.1f}s", file=sys.stderr)
print(f"[{self._session_id}] [codebuff] retry non-stream done in {time.time()-t0:.1f}s", file=sys.stderr)
finally:
_codebuff_finish_run(token, run_id, "completed")