v3.8.5: sync src/translate-proxy.py with x-codebuff headers
This commit is contained in:
@@ -266,6 +266,13 @@ _response_store_lock = threading.Lock()
|
|||||||
_MAX_STORED = 50
|
_MAX_STORED = 50
|
||||||
_RESPONSE_TTL = 600
|
_RESPONSE_TTL = 600
|
||||||
|
|
||||||
|
_fb_reasoning_store = collections.OrderedDict()
|
||||||
|
_fb_reasoning_store_lock = threading.Lock()
|
||||||
|
|
||||||
|
_deepseek_reasoning_store = {}
|
||||||
|
_deepseek_reasoning_lock = threading.Lock()
|
||||||
|
_MAX_DS_STORED = 100
|
||||||
|
|
||||||
_crof_lock = threading.Lock()
|
_crof_lock = threading.Lock()
|
||||||
_provider_caps_lock = threading.Lock()
|
_provider_caps_lock = threading.Lock()
|
||||||
_provider_caps = None
|
_provider_caps = None
|
||||||
@@ -328,11 +335,12 @@ def _codebuff_get_session(token, model):
|
|||||||
req = urllib.request.Request(url, data=body, headers={
|
req = urllib.request.Request(url, data=body, headers={
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"Authorization": f"Bearer {token}",
|
"Authorization": f"Bearer {token}",
|
||||||
"User-Agent": "codex-launcher/3.8.3",
|
"User-Agent": "codex-launcher/3.8.4",
|
||||||
|
"x-codebuff-model": model,
|
||||||
})
|
})
|
||||||
resp = urllib.request.urlopen(req, timeout=15)
|
resp = urllib.request.urlopen(req, timeout=15)
|
||||||
data = json.loads(resp.read())
|
data = json.loads(resp.read())
|
||||||
instance_id = data.get("instanceId", "")
|
instance_id = data.get("instanceId", data.get("data", {}).get("instance_id", ""))
|
||||||
expires_at = data.get("remainingMs", 0)
|
expires_at = data.get("remainingMs", 0)
|
||||||
if instance_id:
|
if instance_id:
|
||||||
with _codebuff_token_lock:
|
with _codebuff_token_lock:
|
||||||
@@ -352,7 +360,7 @@ def _codebuff_start_run(token, agent_id):
|
|||||||
req = urllib.request.Request(url, data=body, headers={
|
req = urllib.request.Request(url, data=body, headers={
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"Authorization": f"Bearer {token}",
|
"Authorization": f"Bearer {token}",
|
||||||
"User-Agent": "codex-launcher/3.8.3",
|
"User-Agent": "codex-launcher/3.8.4",
|
||||||
})
|
})
|
||||||
try:
|
try:
|
||||||
resp = urllib.request.urlopen(req, timeout=15)
|
resp = urllib.request.urlopen(req, timeout=15)
|
||||||
@@ -375,7 +383,7 @@ def _codebuff_finish_run(token, run_id, status="completed"):
|
|||||||
req = urllib.request.Request(url, data=body, headers={
|
req = urllib.request.Request(url, data=body, headers={
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"Authorization": f"Bearer {token}",
|
"Authorization": f"Bearer {token}",
|
||||||
"User-Agent": "codex-launcher/3.8.3",
|
"User-Agent": "codex-launcher/3.8.4",
|
||||||
})
|
})
|
||||||
try:
|
try:
|
||||||
urllib.request.urlopen(req, timeout=10)
|
urllib.request.urlopen(req, timeout=10)
|
||||||
@@ -737,6 +745,162 @@ def resolve_previous_response(body):
|
|||||||
combined = [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": str(prev_input)}]}] + list(prev_output) + new_input
|
combined = [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": str(prev_input)}]}] + list(prev_output) + new_input
|
||||||
return combined
|
return combined
|
||||||
|
|
||||||
|
def _fb_store_reasoning(resp_id, reasoning_text):
|
||||||
|
if not resp_id or not reasoning_text:
|
||||||
|
return
|
||||||
|
with _fb_reasoning_store_lock:
|
||||||
|
_fb_reasoning_store[resp_id] = {"reasoning": reasoning_text, "ts": time.time()}
|
||||||
|
while len(_fb_reasoning_store) > _MAX_STORED:
|
||||||
|
_fb_reasoning_store.popitem(last=False)
|
||||||
|
expired = [k for k, v in _fb_reasoning_store.items() if time.time() - v["ts"] > _RESPONSE_TTL]
|
||||||
|
for k in expired:
|
||||||
|
del _fb_reasoning_store[k]
|
||||||
|
|
||||||
|
def _fb_get_reasoning(resp_id):
|
||||||
|
if not resp_id:
|
||||||
|
return ""
|
||||||
|
with _fb_reasoning_store_lock:
|
||||||
|
entry = _fb_reasoning_store.get(resp_id)
|
||||||
|
return entry["reasoning"] if entry else ""
|
||||||
|
|
||||||
|
def _fb_get_any_reasoning():
|
||||||
|
with _fb_reasoning_store_lock:
|
||||||
|
for k in _fb_reasoning_store:
|
||||||
|
return _fb_reasoning_store[k]["reasoning"]
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _codebuff_hard_disable_reasoning(messages):
|
||||||
|
"""Strip all reasoning/thinking fields from every message.
|
||||||
|
Codebuff rejects mixed reasoning_content histories.
|
||||||
|
The final chat body must be clean before POST."""
|
||||||
|
for msg in messages:
|
||||||
|
if not isinstance(msg, dict):
|
||||||
|
continue
|
||||||
|
for key in ("reasoning_content", "reasoning", "thinking",
|
||||||
|
"thinking_content", "thoughts"):
|
||||||
|
msg.pop(key, None)
|
||||||
|
|
||||||
|
def _is_reasoning_content_error(error_text):
|
||||||
|
if not error_text:
|
||||||
|
return False
|
||||||
|
e = error_text.lower()
|
||||||
|
return ("reasoning_content" in e or "thinking mode" in e
|
||||||
|
or "must be passed back" in e)
|
||||||
|
|
||||||
|
def _ds_store_assistant(resp_id, assistant_msg):
|
||||||
|
if not resp_id or not isinstance(assistant_msg, dict):
|
||||||
|
return
|
||||||
|
tool_calls = assistant_msg.get("tool_calls") or []
|
||||||
|
reasoning = assistant_msg.get("reasoning_content")
|
||||||
|
if not tool_calls or not reasoning:
|
||||||
|
return
|
||||||
|
with _deepseek_reasoning_lock:
|
||||||
|
for tc in tool_calls:
|
||||||
|
tc_id = tc.get("id") or tc.get("call_id", "")
|
||||||
|
if tc_id:
|
||||||
|
_deepseek_reasoning_store[tc_id] = {
|
||||||
|
"resp_id": resp_id,
|
||||||
|
"assistant": dict(assistant_msg),
|
||||||
|
"reasoning_content": reasoning,
|
||||||
|
"ts": time.time(),
|
||||||
|
}
|
||||||
|
keys = list(_deepseek_reasoning_store.keys())
|
||||||
|
if len(keys) > _MAX_DS_STORED:
|
||||||
|
for k in keys[:len(keys) - _MAX_DS_STORED]:
|
||||||
|
del _deepseek_reasoning_store[k]
|
||||||
|
|
||||||
|
def _ds_rebuild_tool_history(messages):
|
||||||
|
with _deepseek_reasoning_lock:
|
||||||
|
snapshot = dict(_deepseek_reasoning_store)
|
||||||
|
expired = [k for k, v in snapshot.items() if time.time() - v["ts"] > 900]
|
||||||
|
for k in expired:
|
||||||
|
_deepseek_reasoning_store.pop(k, None)
|
||||||
|
snapshot.pop(k, None)
|
||||||
|
if not snapshot:
|
||||||
|
return messages
|
||||||
|
rebuilt = []
|
||||||
|
inserted_ids = set()
|
||||||
|
for msg in messages:
|
||||||
|
if msg.get("role") == "tool":
|
||||||
|
tc_id = msg.get("tool_call_id", "")
|
||||||
|
stored = snapshot.get(tc_id)
|
||||||
|
if stored and tc_id not in inserted_ids:
|
||||||
|
am = dict(stored["assistant"])
|
||||||
|
if am.get("reasoning_content"):
|
||||||
|
rebuilt.append(am)
|
||||||
|
inserted_ids.add(tc_id)
|
||||||
|
rebuilt.append(msg)
|
||||||
|
return rebuilt
|
||||||
|
|
||||||
|
def _fb_input_to_messages(input_data, instructions=""):
|
||||||
|
msgs = []
|
||||||
|
tool_name_by_id = {}
|
||||||
|
pending_tool_calls = []
|
||||||
|
last_flushed_ids = []
|
||||||
|
if isinstance(input_data, str):
|
||||||
|
msgs.append({"role": "user", "content": input_data})
|
||||||
|
elif isinstance(input_data, list):
|
||||||
|
for item in input_data:
|
||||||
|
t = item.get("type")
|
||||||
|
if t == "reasoning":
|
||||||
|
continue
|
||||||
|
if t == "function_call":
|
||||||
|
tcid = item.get("call_id") or item.get("id") or uid("tc")
|
||||||
|
pending_tool_calls.append(
|
||||||
|
{"id": tcid, "type": "function",
|
||||||
|
"function": {"name": item.get("name", ""),
|
||||||
|
"arguments": item.get("arguments", "{}")}})
|
||||||
|
tool_name_by_id[tcid] = item.get("name", "")
|
||||||
|
continue
|
||||||
|
if pending_tool_calls:
|
||||||
|
last_flushed_ids = [tc["id"] for tc in pending_tool_calls]
|
||||||
|
msg = {"role": "assistant", "content": None, "tool_calls": pending_tool_calls}
|
||||||
|
msgs.append(msg)
|
||||||
|
pending_tool_calls = []
|
||||||
|
if t == "message":
|
||||||
|
role = item.get("role", "user")
|
||||||
|
if role == "developer":
|
||||||
|
role = "system"
|
||||||
|
text = ""
|
||||||
|
content = item.get("content", [])
|
||||||
|
if isinstance(content, str):
|
||||||
|
text = content
|
||||||
|
else:
|
||||||
|
for part in content:
|
||||||
|
if isinstance(part, str):
|
||||||
|
text += part
|
||||||
|
continue
|
||||||
|
pt = part.get("type", "")
|
||||||
|
if pt in ("input_text", "output_text"):
|
||||||
|
text += part.get("text", "")
|
||||||
|
if text is not None:
|
||||||
|
am = {"role": role, "content": text}
|
||||||
|
if role == "assistant":
|
||||||
|
am["_fb_orig_id"] = item.get("id", "")
|
||||||
|
msgs.append(am)
|
||||||
|
elif t == "function_call_output":
|
||||||
|
tcid = item.get("call_id") or item.get("id") or ""
|
||||||
|
if not tcid and last_flushed_ids:
|
||||||
|
idx = len([m for m in msgs if m.get("role") == "tool"])
|
||||||
|
if idx < len(last_flushed_ids):
|
||||||
|
tcid = last_flushed_ids[idx]
|
||||||
|
msgs.append({"role": "tool", "tool_call_id": tcid,
|
||||||
|
"tool_name": tool_name_by_id.get(tcid, ""),
|
||||||
|
"content": item.get("output", "")})
|
||||||
|
if pending_tool_calls:
|
||||||
|
msg = {"role": "assistant", "content": None, "tool_calls": pending_tool_calls}
|
||||||
|
msgs.append(msg)
|
||||||
|
if instructions:
|
||||||
|
msgs.insert(0, {"role": "system", "content": instructions})
|
||||||
|
return msgs
|
||||||
|
|
||||||
|
def _fb_strip_reasoning_from_messages(messages):
|
||||||
|
out = []
|
||||||
|
for m in messages:
|
||||||
|
nm = {k: v for k, v in m.items() if k != "reasoning_content"}
|
||||||
|
out.append(nm)
|
||||||
|
return out
|
||||||
|
|
||||||
_HOP_BY_HOP_HEADERS = {
|
_HOP_BY_HOP_HEADERS = {
|
||||||
"connection",
|
"connection",
|
||||||
"keep-alive",
|
"keep-alive",
|
||||||
@@ -1399,6 +1563,7 @@ def oa_input_to_messages(input_data):
|
|||||||
if role == "developer":
|
if role == "developer":
|
||||||
role = "system"
|
role = "system"
|
||||||
text = ""
|
text = ""
|
||||||
|
reasoning_text = ""
|
||||||
content = item.get("content", [])
|
content = item.get("content", [])
|
||||||
if isinstance(content, str):
|
if isinstance(content, str):
|
||||||
text = content
|
text = content
|
||||||
@@ -1410,6 +1575,9 @@ def oa_input_to_messages(input_data):
|
|||||||
pt = part.get("type", "")
|
pt = part.get("type", "")
|
||||||
if pt in ("input_text", "output_text"):
|
if pt in ("input_text", "output_text"):
|
||||||
text += part.get("text", "")
|
text += part.get("text", "")
|
||||||
|
elif pt in ("reasoning",):
|
||||||
|
for rp in part.get("content", []):
|
||||||
|
reasoning_text += rp.get("text", "")
|
||||||
elif pt == "input_image":
|
elif pt == "input_image":
|
||||||
img = part.get("image_url", part)
|
img = part.get("image_url", part)
|
||||||
msgs.append({"role": role, "content": [{"type": "text", "text": text},
|
msgs.append({"role": role, "content": [{"type": "text", "text": text},
|
||||||
@@ -1417,7 +1585,10 @@ def oa_input_to_messages(input_data):
|
|||||||
text = None
|
text = None
|
||||||
break
|
break
|
||||||
if text is not None:
|
if text is not None:
|
||||||
msgs.append({"role": role, "content": text})
|
msg = {"role": role, "content": text}
|
||||||
|
if reasoning_text and role == "assistant":
|
||||||
|
msg["reasoning_content"] = reasoning_text
|
||||||
|
msgs.append(msg)
|
||||||
elif t == "function_call_output":
|
elif t == "function_call_output":
|
||||||
tcid = item.get("call_id") or item.get("id") or ""
|
tcid = item.get("call_id") or item.get("id") or ""
|
||||||
if not tcid and last_flushed_ids:
|
if not tcid and last_flushed_ids:
|
||||||
@@ -1568,10 +1739,12 @@ def oa_resp_to_responses(chat_resp, model, resp_id=None):
|
|||||||
"total_tokens": usage.get("total_tokens", 0),
|
"total_tokens": usage.get("total_tokens", 0),
|
||||||
"input_tokens_details": {"cached_tokens": usage.get("prompt_tokens_details", {}).get("cached_tokens", 0)}}}
|
"input_tokens_details": {"cached_tokens": usage.get("prompt_tokens_details", {}).get("cached_tokens", 0)}}}
|
||||||
|
|
||||||
def oa_stream_to_sse(chat_stream, model, req_id):
|
def oa_stream_to_sse(chat_stream, model, req_id, _reasoning_out=None):
|
||||||
resp_id = req_id or uid("resp")
|
resp_id = req_id or uid("resp")
|
||||||
msg_id = uid("msg")
|
msg_id = uid("msg")
|
||||||
text_buf = ""
|
text_buf = ""
|
||||||
|
reasoning_buf = ""
|
||||||
|
reasoning_opened = False
|
||||||
tc_buf = {}
|
tc_buf = {}
|
||||||
fr = None
|
fr = None
|
||||||
msg_opened = False
|
msg_opened = False
|
||||||
@@ -1597,6 +1770,13 @@ def oa_stream_to_sse(chat_stream, model, req_id):
|
|||||||
delta = choices[0].get("delta", {})
|
delta = choices[0].get("delta", {})
|
||||||
fr = choices[0].get("finish_reason")
|
fr = choices[0].get("finish_reason")
|
||||||
|
|
||||||
|
rc = delta.get("reasoning_content") or delta.get("reasoning")
|
||||||
|
if rc:
|
||||||
|
if not reasoning_opened:
|
||||||
|
reasoning_opened = True
|
||||||
|
reasoning_buf += rc
|
||||||
|
yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": rc})
|
||||||
|
|
||||||
content = delta.get("content")
|
content = delta.get("content")
|
||||||
if content:
|
if content:
|
||||||
if not msg_opened:
|
if not msg_opened:
|
||||||
@@ -1626,7 +1806,10 @@ def oa_stream_to_sse(chat_stream, model, req_id):
|
|||||||
yield emit("response.output_text.delta", {"type": "response.function_call_arguments.delta",
|
yield emit("response.output_text.delta", {"type": "response.function_call_arguments.delta",
|
||||||
"delta": fn["arguments"], "item_id": tc_buf[idx]["id"]})
|
"delta": fn["arguments"], "item_id": tc_buf[idx]["id"]})
|
||||||
|
|
||||||
|
reasoning_rsn_id = uid("rsn") if reasoning_buf else None
|
||||||
|
if reasoning_opened:
|
||||||
|
yield emit("response.reasoning.done", {"type": "response.reasoning.done",
|
||||||
|
"item_id": reasoning_rsn_id, "text": reasoning_buf})
|
||||||
|
|
||||||
if msg_opened:
|
if msg_opened:
|
||||||
yield emit("response.output_text.done", {"type": "response.output_text.done",
|
yield emit("response.output_text.done", {"type": "response.output_text.done",
|
||||||
@@ -1648,9 +1831,17 @@ def oa_stream_to_sse(chat_stream, model, req_id):
|
|||||||
fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"}
|
fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"}
|
||||||
status = fm.get(fr, "incomplete")
|
status = fm.get(fr, "incomplete")
|
||||||
final_out = []
|
final_out = []
|
||||||
|
if reasoning_buf:
|
||||||
|
final_out.append({"type": "reasoning", "id": reasoning_rsn_id, "status": "completed",
|
||||||
|
"content": [{"type": "text", "text": reasoning_buf}]})
|
||||||
if msg_opened:
|
if msg_opened:
|
||||||
|
msg_content = []
|
||||||
|
if reasoning_buf:
|
||||||
|
msg_content.append({"type": "output_text", "text": text_buf, "annotations": []})
|
||||||
|
else:
|
||||||
|
msg_content.append({"type": "output_text", "text": text_buf, "annotations": []})
|
||||||
final_out.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
|
final_out.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
|
||||||
"content": [{"type": "output_text", "text": text_buf, "annotations": []}]})
|
"content": msg_content})
|
||||||
for idx in sorted(tc_buf):
|
for idx in sorted(tc_buf):
|
||||||
t = tc_buf[idx]
|
t = tc_buf[idx]
|
||||||
final_out.append({"type": "function_call", "id": t["id"], "call_id": t["call_id"],
|
final_out.append({"type": "function_call", "id": t["id"], "call_id": t["call_id"],
|
||||||
@@ -1658,6 +1849,9 @@ def oa_stream_to_sse(chat_stream, model, req_id):
|
|||||||
yield emit("response.completed", {"type": "response.completed",
|
yield emit("response.completed", {"type": "response.completed",
|
||||||
"response": {"id": resp_id, "object": "response", "model": model,
|
"response": {"id": resp_id, "object": "response", "model": model,
|
||||||
"status": status, "created": int(time.time()), "output": final_out}})
|
"status": status, "created": int(time.time()), "output": final_out}})
|
||||||
|
if _reasoning_out is not None:
|
||||||
|
_reasoning_out["text"] = reasoning_buf
|
||||||
|
_reasoning_out["tool_calls"] = [tc_buf[i] for i in sorted(tc_buf)] if tc_buf else []
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
# Anthropic backend
|
# Anthropic backend
|
||||||
@@ -1675,12 +1869,24 @@ def an_input_to_messages(input_data):
|
|||||||
if role == "developer":
|
if role == "developer":
|
||||||
role = "user"
|
role = "user"
|
||||||
text = ""
|
text = ""
|
||||||
|
thinking_blocks = []
|
||||||
for part in item.get("content", []):
|
for part in item.get("content", []):
|
||||||
pt = part.get("type", "")
|
pt = part.get("type", "")
|
||||||
if pt in ("input_text", "output_text"):
|
if pt in ("input_text", "output_text"):
|
||||||
text += part.get("text", "")
|
text += part.get("text", "")
|
||||||
|
elif pt in ("reasoning", "thinking"):
|
||||||
|
thinking_text = ""
|
||||||
|
for rp in part.get("content", []):
|
||||||
|
thinking_text += rp.get("text", "")
|
||||||
|
if thinking_text:
|
||||||
|
thinking_blocks.append({"type": "thinking", "thinking": thinking_text, "signature": part.get("signature", "")})
|
||||||
if role == "assistant":
|
if role == "assistant":
|
||||||
msgs.append({"role": "assistant", "content": text})
|
content_parts = []
|
||||||
|
if thinking_blocks:
|
||||||
|
content_parts.extend(thinking_blocks)
|
||||||
|
if text:
|
||||||
|
content_parts.append({"type": "text", "text": text})
|
||||||
|
msgs.append({"role": "assistant", "content": content_parts if content_parts else text})
|
||||||
else:
|
else:
|
||||||
msgs.append({"role": "user", "content": text})
|
msgs.append({"role": "user", "content": text})
|
||||||
elif t == "function_call":
|
elif t == "function_call":
|
||||||
@@ -4573,10 +4779,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
instance_id = _codebuff_get_session(token, model)
|
instance_id = _codebuff_get_session(token, model)
|
||||||
|
|
||||||
input_data = body.get("input", "")
|
input_data = body.get("input", "")
|
||||||
messages = oa_input_to_messages(input_data)
|
|
||||||
instructions = body.get("instructions", "").strip()
|
instructions = body.get("instructions", "").strip()
|
||||||
if instructions:
|
messages = _fb_input_to_messages(input_data, instructions)
|
||||||
messages.insert(0, {"role": "system", "content": instructions})
|
messages = _ds_rebuild_tool_history(messages)
|
||||||
|
|
||||||
metadata = {
|
metadata = {
|
||||||
"run_id": run_id,
|
"run_id": run_id,
|
||||||
@@ -4590,8 +4795,6 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
"messages": messages,
|
"messages": messages,
|
||||||
"stream": stream,
|
"stream": stream,
|
||||||
"max_tokens": max(body.get("max_output_tokens", 0), 64000),
|
"max_tokens": max(body.get("max_output_tokens", 0), 64000),
|
||||||
"enable_thinking": REASONING_ENABLED and REASONING_EFFORT != "none",
|
|
||||||
"reasoning_effort": REASONING_EFFORT if REASONING_ENABLED else "none",
|
|
||||||
"codebuff_metadata": metadata,
|
"codebuff_metadata": metadata,
|
||||||
}
|
}
|
||||||
for k in ("temperature", "top_p"):
|
for k in ("temperature", "top_p"):
|
||||||
@@ -4607,8 +4810,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"Authorization": f"Bearer {token}",
|
"Authorization": f"Bearer {token}",
|
||||||
"User-Agent": "codex-launcher/3.8.3",
|
"User-Agent": "codex-launcher/3.8.4",
|
||||||
|
"x-codebuff-model": model,
|
||||||
}
|
}
|
||||||
|
if instance_id:
|
||||||
|
headers["x-codebuff-instance-id"] = instance_id
|
||||||
|
|
||||||
print(f"[{self._session_id}] [codebuff] POST {target} model={model} stream={stream} run={run_id}", file=sys.stderr)
|
print(f"[{self._session_id}] [codebuff] POST {target} model={model} stream={stream} run={run_id}", file=sys.stderr)
|
||||||
chat_body_b = json.dumps(chat_body).encode()
|
chat_body_b = json.dumps(chat_body).encode()
|
||||||
@@ -4617,9 +4823,13 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
req = urllib.request.Request(target, data=chat_body_b, headers=headers)
|
req = urllib.request.Request(target, data=chat_body_b, headers=headers)
|
||||||
upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
|
upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
|
||||||
except urllib.error.HTTPError as e:
|
except urllib.error.HTTPError as e:
|
||||||
err_body = e.read().decode()[:500]
|
err_body = e.read().decode()[:1000]
|
||||||
_codebuff_finish_run(token, run_id, "failed")
|
_codebuff_finish_run(token, run_id, "failed")
|
||||||
print(f"[codebuff] HTTP {e.code}: {err_body}", file=sys.stderr)
|
if _is_reasoning_content_error(err_body):
|
||||||
|
print(f"[codebuff] reasoning_content error, retrying with thinking disabled (DeepSeek native format): {err_body[:200]}", file=sys.stderr)
|
||||||
|
result = self._fb_retry_thinking_disabled(body, model, token, agent_id, stream, tracker, input_data, instructions, err_body)
|
||||||
|
return result
|
||||||
|
print(f"[codebuff] HTTP {e.code}: {err_body[:300]}", file=sys.stderr)
|
||||||
return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}})
|
return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_codebuff_finish_run(token, run_id, "failed")
|
_codebuff_finish_run(token, run_id, "failed")
|
||||||
@@ -4643,6 +4853,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
last_output = [None]
|
last_output = [None]
|
||||||
last_status = [None]
|
last_status = [None]
|
||||||
finish_reason = [None]
|
finish_reason = [None]
|
||||||
|
reasoning_out = {}
|
||||||
|
|
||||||
def _on_fb_event(event):
|
def _on_fb_event(event):
|
||||||
if tracker and tracker.cancelled.is_set():
|
if tracker and tracker.cancelled.is_set():
|
||||||
@@ -4662,7 +4873,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
self.stream_buffered_events(
|
self.stream_buffered_events(
|
||||||
oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")),
|
oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id"),
|
||||||
|
_reasoning_out=reasoning_out),
|
||||||
on_event=_on_fb_event)
|
on_event=_on_fb_event)
|
||||||
except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
|
except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
|
||||||
print(f"[{self._session_id}] [codebuff] client disconnected", file=sys.stderr)
|
print(f"[{self._session_id}] [codebuff] client disconnected", file=sys.stderr)
|
||||||
@@ -4672,6 +4884,13 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
_record_usage("codebuff", model, success, time.time() - t0)
|
_record_usage("codebuff", model, success, time.time() - t0)
|
||||||
if last_resp_id[0] and input_data is not None:
|
if last_resp_id[0] and input_data is not None:
|
||||||
store_response(last_resp_id[0], input_data, last_output[0])
|
store_response(last_resp_id[0], input_data, last_output[0])
|
||||||
|
if last_resp_id[0] and reasoning_out.get("text") or reasoning_out.get("tool_calls"):
|
||||||
|
asm = {"role": "assistant", "content": reasoning_out.get("text", "") or ""}
|
||||||
|
if reasoning_out.get("tool_calls"):
|
||||||
|
asm["tool_calls"] = reasoning_out["tool_calls"]
|
||||||
|
if reasoning_out.get("text"):
|
||||||
|
asm["reasoning_content"] = reasoning_out["text"]
|
||||||
|
_ds_store_assistant(last_resp_id[0], asm)
|
||||||
print(f"[{self._session_id}] [codebuff] stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr)
|
print(f"[{self._session_id}] [codebuff] stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr)
|
||||||
else:
|
else:
|
||||||
raw = upstream.read().decode()
|
raw = upstream.read().decode()
|
||||||
@@ -4685,6 +4904,112 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
finally:
|
finally:
|
||||||
_codebuff_finish_run(token, run_id, "completed")
|
_codebuff_finish_run(token, run_id, "completed")
|
||||||
|
|
||||||
|
def _fb_retry_thinking_disabled(self, body, model, token, agent_id, stream, tracker, input_data, instructions, original_error):
|
||||||
|
run_id = _codebuff_start_run(token, agent_id)
|
||||||
|
if not run_id:
|
||||||
|
return self.send_json(502, {"error": {"type": "upstream_error",
|
||||||
|
"message": "Failed to start codebuff agent run for retry."}})
|
||||||
|
instance_id = _codebuff_get_session(token, model)
|
||||||
|
messages = _fb_input_to_messages(input_data, instructions)
|
||||||
|
_codebuff_hard_disable_reasoning(messages)
|
||||||
|
metadata = {"run_id": run_id, "cost_mode": "free"}
|
||||||
|
if instance_id:
|
||||||
|
metadata["codebuff_instance_id"] = instance_id
|
||||||
|
chat_body = {
|
||||||
|
"model": model, "messages": messages, "stream": stream,
|
||||||
|
"max_tokens": max(body.get("max_output_tokens", 0), 64000),
|
||||||
|
"thinking": {"type": "disabled"},
|
||||||
|
"codebuff_metadata": metadata,
|
||||||
|
}
|
||||||
|
for k in ("temperature", "top_p"):
|
||||||
|
if k in body:
|
||||||
|
chat_body[k] = body[k]
|
||||||
|
tools = oa_convert_tools(body.get("tools"))
|
||||||
|
if tools:
|
||||||
|
chat_body["tools"] = tools
|
||||||
|
if body.get("tool_choice"):
|
||||||
|
chat_body["tool_choice"] = body["tool_choice"]
|
||||||
|
target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions"
|
||||||
|
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}", "User-Agent": "codex-launcher/3.8.4", "x-codebuff-model": model}
|
||||||
|
if instance_id:
|
||||||
|
headers["x-codebuff-instance-id"] = instance_id
|
||||||
|
print(f"[codebuff] retry POST {target} model={model} stream={stream} run={run_id} (thinking disabled via DeepSeek native)", file=sys.stderr)
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=headers)
|
||||||
|
upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
err_body = e.read().decode()[:500]
|
||||||
|
_codebuff_finish_run(token, run_id, "failed")
|
||||||
|
print(f"[codebuff] thinking-disabled retry failed: HTTP {e.code}: {err_body[:300]}", file=sys.stderr)
|
||||||
|
return self.send_json(e.code, {"error": {"type": "codebuff_deepseek_thinking_error",
|
||||||
|
"message": "Codebuff/DeepSeek V4 requires reasoning_content round-trip for tool-call sessions. Use Command Code provider for this model instead.", "upstream_error": _sanitize_err_body(err_body)}})
|
||||||
|
except Exception as e:
|
||||||
|
_codebuff_finish_run(token, run_id, "failed")
|
||||||
|
return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}})
|
||||||
|
t0 = time.time()
|
||||||
|
try:
|
||||||
|
if stream:
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/event-stream")
|
||||||
|
self.send_header("Cache-Control", "no-cache")
|
||||||
|
self.send_header("Connection", "keep-alive")
|
||||||
|
self.end_headers()
|
||||||
|
if hasattr(self, 'connection') and self.connection:
|
||||||
|
try:
|
||||||
|
self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
last_resp_id = [None]
|
||||||
|
last_output = [None]
|
||||||
|
last_status = [None]
|
||||||
|
finish_reason = [None]
|
||||||
|
reasoning_out = {}
|
||||||
|
def _on_fb_retry_event(event):
|
||||||
|
if tracker and tracker.cancelled.is_set():
|
||||||
|
return False
|
||||||
|
for line in event.strip().split("\n"):
|
||||||
|
if line.startswith("data: "):
|
||||||
|
try:
|
||||||
|
d = json.loads(line[6:])
|
||||||
|
if d.get("type") == "response.completed":
|
||||||
|
last_resp_id[0] = d.get("response", {}).get("id")
|
||||||
|
last_output[0] = d.get("response", {}).get("output", [])
|
||||||
|
last_status[0] = d.get("response", {}).get("status")
|
||||||
|
finish_reason[0] = "length" if last_status[0] == "incomplete" else "stop"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
self.stream_buffered_events(
|
||||||
|
oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id"),
|
||||||
|
_reasoning_out=reasoning_out),
|
||||||
|
on_event=_on_fb_retry_event)
|
||||||
|
except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
|
||||||
|
return
|
||||||
|
success = finish_reason[0] != "length"
|
||||||
|
_record_usage("codebuff", model, success, time.time() - t0)
|
||||||
|
if last_resp_id[0] and input_data is not None:
|
||||||
|
store_response(last_resp_id[0], input_data, last_output[0])
|
||||||
|
if last_resp_id[0] and reasoning_out.get("text") or reasoning_out.get("tool_calls"):
|
||||||
|
asm = {"role": "assistant", "content": reasoning_out.get("text", "") or ""}
|
||||||
|
if reasoning_out.get("tool_calls"):
|
||||||
|
asm["tool_calls"] = reasoning_out["tool_calls"]
|
||||||
|
if reasoning_out.get("text"):
|
||||||
|
asm["reasoning_content"] = reasoning_out["text"]
|
||||||
|
_ds_store_assistant(last_resp_id[0], asm)
|
||||||
|
print(f"[{self._session_id}] [codebuff] retry stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
raw = upstream.read().decode()
|
||||||
|
chat_resp = json.loads(raw)
|
||||||
|
result = oa_resp_to_responses(chat_resp, model)
|
||||||
|
self.send_json(200, result)
|
||||||
|
rid = result.get("id")
|
||||||
|
if rid:
|
||||||
|
store_response(rid, input_data, result.get("output", []))
|
||||||
|
print(f"[{self._session_id}] [codebuff] retry non-stream done in {time.time()-t0:.1f}s", file=sys.stderr)
|
||||||
|
finally:
|
||||||
|
_codebuff_finish_run(token, run_id, "completed")
|
||||||
|
|
||||||
def _handle_auto(self, body, model, stream, tracker=None):
|
def _handle_auto(self, body, model, stream, tracker=None):
|
||||||
"""Auto-sensing backend: probe schema, adapt, retry on errors.
|
"""Auto-sensing backend: probe schema, adapt, retry on errors.
|
||||||
Uses hostname heuristics as initial guess, then learns from errors
|
Uses hostname heuristics as initial guess, then learns from errors
|
||||||
|
|||||||
Reference in New Issue
Block a user