From 863f438cc32a0ea09a74d61a5a35722d567bcc29 Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 24 May 2026 19:06:59 +0000 Subject: [PATCH] v3.8.5: sync src/translate-proxy.py with x-freebuff headers --- src/translate-proxy.py | 467 ++++++++++++++++++++++++++++++++++------- 1 file changed, 396 insertions(+), 71 deletions(-) diff --git a/src/translate-proxy.py b/src/translate-proxy.py index 1e0ac88..d183e0e 100755 --- a/src/translate-proxy.py +++ b/src/translate-proxy.py @@ -266,6 +266,13 @@ _response_store_lock = threading.Lock() _MAX_STORED = 50 _RESPONSE_TTL = 600 +_fb_reasoning_store = collections.OrderedDict() +_fb_reasoning_store_lock = threading.Lock() + +_deepseek_reasoning_store = {} +_deepseek_reasoning_lock = threading.Lock() +_MAX_DS_STORED = 100 + _crof_lock = threading.Lock() _provider_caps_lock = threading.Lock() _provider_caps = None @@ -328,11 +335,12 @@ def _freebuff_get_session(token, model): req = urllib.request.Request(url, data=body, headers={ "Content-Type": "application/json", "Authorization": f"Bearer {token}", - "User-Agent": "codex-launcher/3.8.3", + "User-Agent": "codex-launcher/3.8.4", + "x-freebuff-model": model, }) resp = urllib.request.urlopen(req, timeout=15) data = json.loads(resp.read()) - instance_id = data.get("instanceId", "") + instance_id = data.get("instanceId", data.get("data", {}).get("instance_id", "")) expires_at = data.get("remainingMs", 0) if instance_id: with _freebuff_token_lock: @@ -352,7 +360,7 @@ def _freebuff_start_run(token, agent_id): req = urllib.request.Request(url, data=body, headers={ "Content-Type": "application/json", "Authorization": f"Bearer {token}", - "User-Agent": "codex-launcher/3.8.3", + "User-Agent": "codex-launcher/3.8.4", }) try: resp = urllib.request.urlopen(req, timeout=15) @@ -375,7 +383,7 @@ def _freebuff_finish_run(token, run_id, status="completed"): req = urllib.request.Request(url, data=body, headers={ "Content-Type": "application/json", "Authorization": f"Bearer {token}", - "User-Agent": "codex-launcher/3.8.3", + "User-Agent": "codex-launcher/3.8.4", }) try: urllib.request.urlopen(req, timeout=10) @@ -737,6 +745,162 @@ def resolve_previous_response(body): combined = [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": str(prev_input)}]}] + list(prev_output) + new_input return combined +def _fb_store_reasoning(resp_id, reasoning_text): + if not resp_id or not reasoning_text: + return + with _fb_reasoning_store_lock: + _fb_reasoning_store[resp_id] = {"reasoning": reasoning_text, "ts": time.time()} + while len(_fb_reasoning_store) > _MAX_STORED: + _fb_reasoning_store.popitem(last=False) + expired = [k for k, v in _fb_reasoning_store.items() if time.time() - v["ts"] > _RESPONSE_TTL] + for k in expired: + del _fb_reasoning_store[k] + +def _fb_get_reasoning(resp_id): + if not resp_id: + return "" + with _fb_reasoning_store_lock: + entry = _fb_reasoning_store.get(resp_id) + return entry["reasoning"] if entry else "" + +def _fb_get_any_reasoning(): + with _fb_reasoning_store_lock: + for k in _fb_reasoning_store: + return _fb_reasoning_store[k]["reasoning"] + return "" + +def _freebuff_hard_disable_reasoning(messages): + """Strip all reasoning/thinking fields from every message. + FreeBuff rejects mixed reasoning_content histories. + The final chat body must be clean before POST.""" + for msg in messages: + if not isinstance(msg, dict): + continue + for key in ("reasoning_content", "reasoning", "thinking", + "thinking_content", "thoughts"): + msg.pop(key, None) + +def _is_reasoning_content_error(error_text): + if not error_text: + return False + e = error_text.lower() + return ("reasoning_content" in e or "thinking mode" in e + or "must be passed back" in e) + +def _ds_store_assistant(resp_id, assistant_msg): + if not resp_id or not isinstance(assistant_msg, dict): + return + tool_calls = assistant_msg.get("tool_calls") or [] + reasoning = assistant_msg.get("reasoning_content") + if not tool_calls or not reasoning: + return + with _deepseek_reasoning_lock: + for tc in tool_calls: + tc_id = tc.get("id") or tc.get("call_id", "") + if tc_id: + _deepseek_reasoning_store[tc_id] = { + "resp_id": resp_id, + "assistant": dict(assistant_msg), + "reasoning_content": reasoning, + "ts": time.time(), + } + keys = list(_deepseek_reasoning_store.keys()) + if len(keys) > _MAX_DS_STORED: + for k in keys[:len(keys) - _MAX_DS_STORED]: + del _deepseek_reasoning_store[k] + +def _ds_rebuild_tool_history(messages): + with _deepseek_reasoning_lock: + snapshot = dict(_deepseek_reasoning_store) + expired = [k for k, v in snapshot.items() if time.time() - v["ts"] > 900] + for k in expired: + _deepseek_reasoning_store.pop(k, None) + snapshot.pop(k, None) + if not snapshot: + return messages + rebuilt = [] + inserted_ids = set() + for msg in messages: + if msg.get("role") == "tool": + tc_id = msg.get("tool_call_id", "") + stored = snapshot.get(tc_id) + if stored and tc_id not in inserted_ids: + am = dict(stored["assistant"]) + if am.get("reasoning_content"): + rebuilt.append(am) + inserted_ids.add(tc_id) + rebuilt.append(msg) + return rebuilt + +def _fb_input_to_messages(input_data, instructions=""): + msgs = [] + tool_name_by_id = {} + pending_tool_calls = [] + last_flushed_ids = [] + if isinstance(input_data, str): + msgs.append({"role": "user", "content": input_data}) + elif isinstance(input_data, list): + for item in input_data: + t = item.get("type") + if t == "reasoning": + continue + if t == "function_call": + tcid = item.get("call_id") or item.get("id") or uid("tc") + pending_tool_calls.append( + {"id": tcid, "type": "function", + "function": {"name": item.get("name", ""), + "arguments": item.get("arguments", "{}")}}) + tool_name_by_id[tcid] = item.get("name", "") + continue + if pending_tool_calls: + last_flushed_ids = [tc["id"] for tc in pending_tool_calls] + msg = {"role": "assistant", "content": None, "tool_calls": pending_tool_calls} + msgs.append(msg) + pending_tool_calls = [] + if t == "message": + role = item.get("role", "user") + if role == "developer": + role = "system" + text = "" + content = item.get("content", []) + if isinstance(content, str): + text = content + else: + for part in content: + if isinstance(part, str): + text += part + continue + pt = part.get("type", "") + if pt in ("input_text", "output_text"): + text += part.get("text", "") + if text is not None: + am = {"role": role, "content": text} + if role == "assistant": + am["_fb_orig_id"] = item.get("id", "") + msgs.append(am) + elif t == "function_call_output": + tcid = item.get("call_id") or item.get("id") or "" + if not tcid and last_flushed_ids: + idx = len([m for m in msgs if m.get("role") == "tool"]) + if idx < len(last_flushed_ids): + tcid = last_flushed_ids[idx] + msgs.append({"role": "tool", "tool_call_id": tcid, + "tool_name": tool_name_by_id.get(tcid, ""), + "content": item.get("output", "")}) + if pending_tool_calls: + msg = {"role": "assistant", "content": None, "tool_calls": pending_tool_calls} + msgs.append(msg) + if instructions: + msgs.insert(0, {"role": "system", "content": instructions}) + return msgs + +def _fb_strip_reasoning_from_messages(messages): + out = [] + for m in messages: + nm = {k: v for k, v in m.items() if k != "reasoning_content"} + out.append(nm) + return out + _HOP_BY_HOP_HEADERS = { "connection", "keep-alive", @@ -1399,6 +1563,7 @@ def oa_input_to_messages(input_data): if role == "developer": role = "system" text = "" + reasoning_text = "" content = item.get("content", []) if isinstance(content, str): text = content @@ -1410,6 +1575,9 @@ def oa_input_to_messages(input_data): pt = part.get("type", "") if pt in ("input_text", "output_text"): text += part.get("text", "") + elif pt in ("reasoning",): + for rp in part.get("content", []): + reasoning_text += rp.get("text", "") elif pt == "input_image": img = part.get("image_url", part) msgs.append({"role": role, "content": [{"type": "text", "text": text}, @@ -1417,7 +1585,10 @@ def oa_input_to_messages(input_data): text = None break if text is not None: - msgs.append({"role": role, "content": text}) + msg = {"role": role, "content": text} + if reasoning_text and role == "assistant": + msg["reasoning_content"] = reasoning_text + msgs.append(msg) elif t == "function_call_output": tcid = item.get("call_id") or item.get("id") or "" if not tcid and last_flushed_ids: @@ -1568,10 +1739,12 @@ def oa_resp_to_responses(chat_resp, model, resp_id=None): "total_tokens": usage.get("total_tokens", 0), "input_tokens_details": {"cached_tokens": usage.get("prompt_tokens_details", {}).get("cached_tokens", 0)}}} -def oa_stream_to_sse(chat_stream, model, req_id): +def oa_stream_to_sse(chat_stream, model, req_id, _reasoning_out=None): resp_id = req_id or uid("resp") msg_id = uid("msg") text_buf = "" + reasoning_buf = "" + reasoning_opened = False tc_buf = {} fr = None msg_opened = False @@ -1597,6 +1770,13 @@ def oa_stream_to_sse(chat_stream, model, req_id): delta = choices[0].get("delta", {}) fr = choices[0].get("finish_reason") + rc = delta.get("reasoning_content") or delta.get("reasoning") + if rc: + if not reasoning_opened: + reasoning_opened = True + reasoning_buf += rc + yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": rc}) + content = delta.get("content") if content: if not msg_opened: @@ -1626,7 +1806,10 @@ def oa_stream_to_sse(chat_stream, model, req_id): yield emit("response.output_text.delta", {"type": "response.function_call_arguments.delta", "delta": fn["arguments"], "item_id": tc_buf[idx]["id"]}) - + reasoning_rsn_id = uid("rsn") if reasoning_buf else None + if reasoning_opened: + yield emit("response.reasoning.done", {"type": "response.reasoning.done", + "item_id": reasoning_rsn_id, "text": reasoning_buf}) if msg_opened: yield emit("response.output_text.done", {"type": "response.output_text.done", @@ -1648,9 +1831,17 @@ def oa_stream_to_sse(chat_stream, model, req_id): fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"} status = fm.get(fr, "incomplete") final_out = [] + if reasoning_buf: + final_out.append({"type": "reasoning", "id": reasoning_rsn_id, "status": "completed", + "content": [{"type": "text", "text": reasoning_buf}]}) if msg_opened: + msg_content = [] + if reasoning_buf: + msg_content.append({"type": "output_text", "text": text_buf, "annotations": []}) + else: + msg_content.append({"type": "output_text", "text": text_buf, "annotations": []}) final_out.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed", - "content": [{"type": "output_text", "text": text_buf, "annotations": []}]}) + "content": msg_content}) for idx in sorted(tc_buf): t = tc_buf[idx] final_out.append({"type": "function_call", "id": t["id"], "call_id": t["call_id"], @@ -1658,6 +1849,9 @@ def oa_stream_to_sse(chat_stream, model, req_id): yield emit("response.completed", {"type": "response.completed", "response": {"id": resp_id, "object": "response", "model": model, "status": status, "created": int(time.time()), "output": final_out}}) + if _reasoning_out is not None: + _reasoning_out["text"] = reasoning_buf + _reasoning_out["tool_calls"] = [tc_buf[i] for i in sorted(tc_buf)] if tc_buf else [] # ═══════════════════════════════════════════════════════════════════ # Anthropic backend @@ -1675,12 +1869,24 @@ def an_input_to_messages(input_data): if role == "developer": role = "user" text = "" + thinking_blocks = [] for part in item.get("content", []): pt = part.get("type", "") if pt in ("input_text", "output_text"): text += part.get("text", "") + elif pt in ("reasoning", "thinking"): + thinking_text = "" + for rp in part.get("content", []): + thinking_text += rp.get("text", "") + if thinking_text: + thinking_blocks.append({"type": "thinking", "thinking": thinking_text, "signature": part.get("signature", "")}) if role == "assistant": - msgs.append({"role": "assistant", "content": text}) + content_parts = [] + if thinking_blocks: + content_parts.extend(thinking_blocks) + if text: + content_parts.append({"type": "text", "text": text}) + msgs.append({"role": "assistant", "content": content_parts if content_parts else text}) else: msgs.append({"role": "user", "content": text}) elif t == "function_call": @@ -4544,54 +4750,175 @@ class Handler(http.server.BaseHTTPRequestHandler): store_response(rid, body.get("input", ""), result.get("output", [])) def _handle_freebuff(self, body, model, stream, tracker=None): - token = _get_freebuff_token() - if not token: - return self.send_json(401, {"error": {"type": "auth_error", - "message": "No freebuff credentials found. Install freebuff (npm i -g freebuff) and login first."}}) + token = _get_freebuff_token() + if not token: + return self.send_json(401, {"error": {"type": "auth_error", + "message": "No freebuff credentials found. Install freebuff (npm i -g freebuff) and login first."}}) - agent_id = _FREEBUFF_AGENT_MAP.get(model) - if not agent_id: - matched = None - for m in _FREEBUFF_AGENT_MAP: - if model.lower().replace("/", "").replace("-", "") in m.lower().replace("/", "").replace("-", ""): - matched = m - break - if matched: - agent_id = _FREEBUFF_AGENT_MAP[matched] - model = matched - else: - fallback_model = "deepseek/deepseek-v4-flash" - agent_id = _FREEBUFF_AGENT_MAP.get(fallback_model, "base2-free-deepseek-flash") - print(f"[freebuff] unknown model '{model}', falling back to {fallback_model}", file=sys.stderr) - model = fallback_model + agent_id = _FREEBUFF_AGENT_MAP.get(model) + if not agent_id: + matched = None + for m in _FREEBUFF_AGENT_MAP: + if model.lower().replace("/", "").replace("-", "") in m.lower().replace("/", "").replace("-", ""): + matched = m + break + if matched: + agent_id = _FREEBUFF_AGENT_MAP[matched] + model = matched + else: + fallback_model = "deepseek/deepseek-v4-flash" + agent_id = _FREEBUFF_AGENT_MAP.get(fallback_model, "base2-free-deepseek-flash") + print(f"[freebuff] unknown model '{model}', falling back to {fallback_model}", file=sys.stderr) + model = fallback_model + run_id = _freebuff_start_run(token, agent_id) + if not run_id: + return self.send_json(502, {"error": {"type": "upstream_error", + "message": "Failed to start freebuff agent run. Check credentials and network."}}) + + instance_id = _freebuff_get_session(token, model) + + input_data = body.get("input", "") + instructions = body.get("instructions", "").strip() + messages = _fb_input_to_messages(input_data, instructions) + messages = _ds_rebuild_tool_history(messages) + + metadata = { + "run_id": run_id, + "cost_mode": "free", + } + if instance_id: + metadata["freebuff_instance_id"] = instance_id + + chat_body = { + "model": model, + "messages": messages, + "stream": stream, + "max_tokens": max(body.get("max_output_tokens", 0), 64000), + "codebuff_metadata": metadata, + } + for k in ("temperature", "top_p"): + if k in body: + chat_body[k] = body[k] + tools = oa_convert_tools(body.get("tools")) + if tools: + chat_body["tools"] = tools + if body.get("tool_choice"): + chat_body["tool_choice"] = body["tool_choice"] + + target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions" + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {token}", + "User-Agent": "codex-launcher/3.8.4", + "x-freebuff-model": model, + } + if instance_id: + headers["x-freebuff-instance-id"] = instance_id + + print(f"[{self._session_id}] [freebuff] POST {target} model={model} stream={stream} run={run_id}", file=sys.stderr) + chat_body_b = json.dumps(chat_body).encode() + + try: + req = urllib.request.Request(target, data=chat_body_b, headers=headers) + upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream)) + except urllib.error.HTTPError as e: + err_body = e.read().decode()[:1000] + _freebuff_finish_run(token, run_id, "failed") + if _is_reasoning_content_error(err_body): + print(f"[freebuff] reasoning_content error, retrying with thinking disabled (DeepSeek native format): {err_body[:200]}", file=sys.stderr) + result = self._fb_retry_thinking_disabled(body, model, token, agent_id, stream, tracker, input_data, instructions, err_body) + return result + print(f"[freebuff] HTTP {e.code}: {err_body[:300]}", file=sys.stderr) + return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) + except Exception as e: + _freebuff_finish_run(token, run_id, "failed") + return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}}) + + t0 = time.time() + try: + if stream: + self.send_response(200) + self.send_header("Content-Type", "text/event-stream") + self.send_header("Cache-Control", "no-cache") + self.send_header("Connection", "keep-alive") + self.end_headers() + if hasattr(self, 'connection') and self.connection: + try: + self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + except Exception: + pass + + last_resp_id = [None] + last_output = [None] + last_status = [None] + finish_reason = [None] + reasoning_out = {} + + def _on_fb_event(event): + if tracker and tracker.cancelled.is_set(): + return False + for line in event.strip().split("\n"): + if line.startswith("data: "): + try: + d = json.loads(line[6:]) + if d.get("type") == "response.completed": + last_resp_id[0] = d.get("response", {}).get("id") + last_output[0] = d.get("response", {}).get("output", []) + last_status[0] = d.get("response", {}).get("status") + finish_reason[0] = "length" if last_status[0] == "incomplete" else "stop" + except Exception: + pass + return None + + try: + self.stream_buffered_events( + oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id"), + _reasoning_out=reasoning_out), + on_event=_on_fb_event) + except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError): + print(f"[{self._session_id}] [freebuff] client disconnected", file=sys.stderr) + return + + success = finish_reason[0] != "length" + _record_usage("freebuff", model, success, time.time() - t0) + if last_resp_id[0] and input_data is not None: + store_response(last_resp_id[0], input_data, last_output[0]) + if last_resp_id[0] and reasoning_out.get("text") or reasoning_out.get("tool_calls"): + asm = {"role": "assistant", "content": reasoning_out.get("text", "") or ""} + if reasoning_out.get("tool_calls"): + asm["tool_calls"] = reasoning_out["tool_calls"] + if reasoning_out.get("text"): + asm["reasoning_content"] = reasoning_out["text"] + _ds_store_assistant(last_resp_id[0], asm) + print(f"[{self._session_id}] [freebuff] stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr) + else: + raw = upstream.read().decode() + chat_resp = json.loads(raw) + result = oa_resp_to_responses(chat_resp, model) + self.send_json(200, result) + rid = result.get("id") + if rid: + store_response(rid, input_data, result.get("output", [])) + print(f"[{self._session_id}] [freebuff] non-stream done in {time.time()-t0:.1f}s", file=sys.stderr) + finally: + _freebuff_finish_run(token, run_id, "completed") + + def _fb_retry_thinking_disabled(self, body, model, token, agent_id, stream, tracker, input_data, instructions, original_error): run_id = _freebuff_start_run(token, agent_id) if not run_id: return self.send_json(502, {"error": {"type": "upstream_error", - "message": "Failed to start freebuff agent run. Check credentials and network."}}) - + "message": "Failed to start freebuff agent run for retry."}}) instance_id = _freebuff_get_session(token, model) - - input_data = body.get("input", "") - messages = oa_input_to_messages(input_data) - instructions = body.get("instructions", "").strip() - if instructions: - messages.insert(0, {"role": "system", "content": instructions}) - - metadata = { - "run_id": run_id, - "cost_mode": "free", - } + messages = _fb_input_to_messages(input_data, instructions) + _freebuff_hard_disable_reasoning(messages) + metadata = {"run_id": run_id, "cost_mode": "free"} if instance_id: metadata["freebuff_instance_id"] = instance_id - chat_body = { - "model": model, - "messages": messages, - "stream": stream, + "model": model, "messages": messages, "stream": stream, "max_tokens": max(body.get("max_output_tokens", 0), 64000), - "enable_thinking": REASONING_ENABLED and REASONING_EFFORT != "none", - "reasoning_effort": REASONING_EFFORT if REASONING_ENABLED else "none", + "thinking": {"type": "disabled"}, "codebuff_metadata": metadata, } for k in ("temperature", "top_p"): @@ -4602,29 +4929,23 @@ class Handler(http.server.BaseHTTPRequestHandler): chat_body["tools"] = tools if body.get("tool_choice"): chat_body["tool_choice"] = body["tool_choice"] - target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions" - headers = { - "Content-Type": "application/json", - "Authorization": f"Bearer {token}", - "User-Agent": "codex-launcher/3.8.3", - } - - print(f"[{self._session_id}] [freebuff] POST {target} model={model} stream={stream} run={run_id}", file=sys.stderr) - chat_body_b = json.dumps(chat_body).encode() - + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}", "User-Agent": "codex-launcher/3.8.4", "x-freebuff-model": model} + if instance_id: + headers["x-freebuff-instance-id"] = instance_id + print(f"[freebuff] retry POST {target} model={model} stream={stream} run={run_id} (thinking disabled via DeepSeek native)", file=sys.stderr) try: - req = urllib.request.Request(target, data=chat_body_b, headers=headers) + req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=headers) upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream)) except urllib.error.HTTPError as e: err_body = e.read().decode()[:500] _freebuff_finish_run(token, run_id, "failed") - print(f"[freebuff] HTTP {e.code}: {err_body}", file=sys.stderr) - return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) + print(f"[freebuff] thinking-disabled retry failed: HTTP {e.code}: {err_body[:300]}", file=sys.stderr) + return self.send_json(e.code, {"error": {"type": "freebuff_deepseek_thinking_error", + "message": "FreeBuff/DeepSeek V4 requires reasoning_content round-trip for tool-call sessions. Use Command Code provider for this model instead.", "upstream_error": _sanitize_err_body(err_body)}}) except Exception as e: _freebuff_finish_run(token, run_id, "failed") return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}}) - t0 = time.time() try: if stream: @@ -4638,13 +4959,12 @@ class Handler(http.server.BaseHTTPRequestHandler): self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) except Exception: pass - last_resp_id = [None] last_output = [None] last_status = [None] finish_reason = [None] - - def _on_fb_event(event): + reasoning_out = {} + def _on_fb_retry_event(event): if tracker and tracker.cancelled.is_set(): return False for line in event.strip().split("\n"): @@ -4659,20 +4979,25 @@ class Handler(http.server.BaseHTTPRequestHandler): except Exception: pass return None - try: self.stream_buffered_events( - oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")), - on_event=_on_fb_event) + oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id"), + _reasoning_out=reasoning_out), + on_event=_on_fb_retry_event) except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError): - print(f"[{self._session_id}] [freebuff] client disconnected", file=sys.stderr) return - success = finish_reason[0] != "length" _record_usage("freebuff", model, success, time.time() - t0) if last_resp_id[0] and input_data is not None: store_response(last_resp_id[0], input_data, last_output[0]) - print(f"[{self._session_id}] [freebuff] stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr) + if last_resp_id[0] and reasoning_out.get("text") or reasoning_out.get("tool_calls"): + asm = {"role": "assistant", "content": reasoning_out.get("text", "") or ""} + if reasoning_out.get("tool_calls"): + asm["tool_calls"] = reasoning_out["tool_calls"] + if reasoning_out.get("text"): + asm["reasoning_content"] = reasoning_out["text"] + _ds_store_assistant(last_resp_id[0], asm) + print(f"[{self._session_id}] [freebuff] retry stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr) else: raw = upstream.read().decode() chat_resp = json.loads(raw) @@ -4681,7 +5006,7 @@ class Handler(http.server.BaseHTTPRequestHandler): rid = result.get("id") if rid: store_response(rid, input_data, result.get("output", [])) - print(f"[{self._session_id}] [freebuff] non-stream done in {time.time()-t0:.1f}s", file=sys.stderr) + print(f"[{self._session_id}] [freebuff] retry non-stream done in {time.time()-t0:.1f}s", file=sys.stderr) finally: _freebuff_finish_run(token, run_id, "completed")