From 863f438cc32a0ea09a74d61a5a35722d567bcc29 Mon Sep 17 00:00:00 2001
From: admin <admin@nomadarch.local>
Date: Sun, 24 May 2026 19:06:59 +0000
Subject: [PATCH] v3.8.5: sync src/translate-proxy.py with x-freebuff headers

---
 src/translate-proxy.py | 467 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 396 insertions(+), 71 deletions(-)

diff --git a/src/translate-proxy.py b/src/translate-proxy.py
index 1e0ac88..d183e0e 100755
--- a/src/translate-proxy.py
+++ b/src/translate-proxy.py
@@ -266,6 +266,13 @@ _response_store_lock = threading.Lock()
 _MAX_STORED = 50
 _RESPONSE_TTL = 600
 
+_fb_reasoning_store = collections.OrderedDict()
+_fb_reasoning_store_lock = threading.Lock()
+
+_deepseek_reasoning_store = {}
+_deepseek_reasoning_lock = threading.Lock()
+_MAX_DS_STORED = 100
+
 _crof_lock = threading.Lock()
 _provider_caps_lock = threading.Lock()
 _provider_caps = None
@@ -328,11 +335,12 @@ def _freebuff_get_session(token, model):
         req = urllib.request.Request(url, data=body, headers={
             "Content-Type": "application/json",
             "Authorization": f"Bearer {token}",
-            "User-Agent": "codex-launcher/3.8.3",
+            "User-Agent": "codex-launcher/3.8.4",
+            "x-freebuff-model": model,
         })
         resp = urllib.request.urlopen(req, timeout=15)
         data = json.loads(resp.read())
-        instance_id = data.get("instanceId", "")
+        instance_id = data.get("instanceId", data.get("data", {}).get("instance_id", ""))
         expires_at = data.get("remainingMs", 0)
         if instance_id:
             with _freebuff_token_lock:
@@ -352,7 +360,7 @@ def _freebuff_start_run(token, agent_id):
     req = urllib.request.Request(url, data=body, headers={
         "Content-Type": "application/json",
         "Authorization": f"Bearer {token}",
-        "User-Agent": "codex-launcher/3.8.3",
+        "User-Agent": "codex-launcher/3.8.4",
     })
     try:
         resp = urllib.request.urlopen(req, timeout=15)
@@ -375,7 +383,7 @@ def _freebuff_finish_run(token, run_id, status="completed"):
     req = urllib.request.Request(url, data=body, headers={
         "Content-Type": "application/json",
         "Authorization": f"Bearer {token}",
-        "User-Agent": "codex-launcher/3.8.3",
+        "User-Agent": "codex-launcher/3.8.4",
     })
     try:
         urllib.request.urlopen(req, timeout=10)
@@ -737,6 +745,162 @@ def resolve_previous_response(body):
         combined = [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": str(prev_input)}]}] + list(prev_output) + new_input
     return combined
 
+def _fb_store_reasoning(resp_id, reasoning_text):
+    if not resp_id or not reasoning_text:
+        return
+    with _fb_reasoning_store_lock:
+        _fb_reasoning_store[resp_id] = {"reasoning": reasoning_text, "ts": time.time()}
+        while len(_fb_reasoning_store) > _MAX_STORED:
+            _fb_reasoning_store.popitem(last=False)
+        expired = [k for k, v in _fb_reasoning_store.items() if time.time() - v["ts"] > _RESPONSE_TTL]
+        for k in expired:
+            del _fb_reasoning_store[k]
+
+def _fb_get_reasoning(resp_id):
+    if not resp_id:
+        return ""
+    with _fb_reasoning_store_lock:
+        entry = _fb_reasoning_store.get(resp_id)
+        return entry["reasoning"] if entry else ""
+
+def _fb_get_any_reasoning():
+    with _fb_reasoning_store_lock:
+        for k in _fb_reasoning_store:
+            return _fb_reasoning_store[k]["reasoning"]
+        return ""
+
+def _freebuff_hard_disable_reasoning(messages):
+    """Strip all reasoning/thinking fields from every message.
+    FreeBuff rejects mixed reasoning_content histories.
+    The final chat body must be clean before POST."""
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        for key in ("reasoning_content", "reasoning", "thinking",
+                     "thinking_content", "thoughts"):
+            msg.pop(key, None)
+
+def _is_reasoning_content_error(error_text):
+    if not error_text:
+        return False
+    e = error_text.lower()
+    return ("reasoning_content" in e or "thinking mode" in e
+            or "must be passed back" in e)
+
+def _ds_store_assistant(resp_id, assistant_msg):
+    if not resp_id or not isinstance(assistant_msg, dict):
+        return
+    tool_calls = assistant_msg.get("tool_calls") or []
+    reasoning = assistant_msg.get("reasoning_content")
+    if not tool_calls or not reasoning:
+        return
+    with _deepseek_reasoning_lock:
+        for tc in tool_calls:
+            tc_id = tc.get("id") or tc.get("call_id", "")
+            if tc_id:
+                _deepseek_reasoning_store[tc_id] = {
+                    "resp_id": resp_id,
+                    "assistant": dict(assistant_msg),
+                    "reasoning_content": reasoning,
+                    "ts": time.time(),
+                }
+        keys = list(_deepseek_reasoning_store.keys())
+        if len(keys) > _MAX_DS_STORED:
+            for k in keys[:len(keys) - _MAX_DS_STORED]:
+                del _deepseek_reasoning_store[k]
+
+def _ds_rebuild_tool_history(messages):
+    with _deepseek_reasoning_lock:
+        snapshot = dict(_deepseek_reasoning_store)
+        expired = [k for k, v in snapshot.items() if time.time() - v["ts"] > 900]
+        for k in expired:
+            _deepseek_reasoning_store.pop(k, None)
+            snapshot.pop(k, None)
+    if not snapshot:
+        return messages
+    rebuilt = []
+    inserted_ids = set()
+    for msg in messages:
+        if msg.get("role") == "tool":
+            tc_id = msg.get("tool_call_id", "")
+            stored = snapshot.get(tc_id)
+            if stored and tc_id not in inserted_ids:
+                am = dict(stored["assistant"])
+                if am.get("reasoning_content"):
+                    rebuilt.append(am)
+                    inserted_ids.add(tc_id)
+        rebuilt.append(msg)
+    return rebuilt
+
+def _fb_input_to_messages(input_data, instructions=""):
+    msgs = []
+    tool_name_by_id = {}
+    pending_tool_calls = []
+    last_flushed_ids = []
+    if isinstance(input_data, str):
+        msgs.append({"role": "user", "content": input_data})
+    elif isinstance(input_data, list):
+        for item in input_data:
+            t = item.get("type")
+            if t == "reasoning":
+                continue
+            if t == "function_call":
+                tcid = item.get("call_id") or item.get("id") or uid("tc")
+                pending_tool_calls.append(
+                    {"id": tcid, "type": "function",
+                     "function": {"name": item.get("name", ""),
+                                   "arguments": item.get("arguments", "{}")}})
+                tool_name_by_id[tcid] = item.get("name", "")
+                continue
+            if pending_tool_calls:
+                last_flushed_ids = [tc["id"] for tc in pending_tool_calls]
+                msg = {"role": "assistant", "content": None, "tool_calls": pending_tool_calls}
+                msgs.append(msg)
+                pending_tool_calls = []
+            if t == "message":
+                role = item.get("role", "user")
+                if role == "developer":
+                    role = "system"
+                text = ""
+                content = item.get("content", [])
+                if isinstance(content, str):
+                    text = content
+                else:
+                    for part in content:
+                        if isinstance(part, str):
+                            text += part
+                            continue
+                        pt = part.get("type", "")
+                        if pt in ("input_text", "output_text"):
+                            text += part.get("text", "")
+                if text is not None:
+                    am = {"role": role, "content": text}
+                    if role == "assistant":
+                        am["_fb_orig_id"] = item.get("id", "")
+                    msgs.append(am)
+            elif t == "function_call_output":
+                tcid = item.get("call_id") or item.get("id") or ""
+                if not tcid and last_flushed_ids:
+                    idx = len([m for m in msgs if m.get("role") == "tool"])
+                    if idx < len(last_flushed_ids):
+                        tcid = last_flushed_ids[idx]
+                msgs.append({"role": "tool", "tool_call_id": tcid,
+                             "tool_name": tool_name_by_id.get(tcid, ""),
+                             "content": item.get("output", "")})
+        if pending_tool_calls:
+            msg = {"role": "assistant", "content": None, "tool_calls": pending_tool_calls}
+            msgs.append(msg)
+    if instructions:
+        msgs.insert(0, {"role": "system", "content": instructions})
+    return msgs
+
+def _fb_strip_reasoning_from_messages(messages):
+    out = []
+    for m in messages:
+        nm = {k: v for k, v in m.items() if k != "reasoning_content"}
+        out.append(nm)
+    return out
+
 _HOP_BY_HOP_HEADERS = {
     "connection",
     "keep-alive",
@@ -1399,6 +1563,7 @@ def oa_input_to_messages(input_data):
                 if role == "developer":
                     role = "system"
                 text = ""
+                reasoning_text = ""
                 content = item.get("content", [])
                 if isinstance(content, str):
                     text = content
@@ -1410,6 +1575,9 @@ def oa_input_to_messages(input_data):
                         pt = part.get("type", "")
                         if pt in ("input_text", "output_text"):
                             text += part.get("text", "")
+                        elif pt in ("reasoning",):
+                            for rp in part.get("content", []):
+                                reasoning_text += rp.get("text", "")
                         elif pt == "input_image":
                             img = part.get("image_url", part)
                             msgs.append({"role": role, "content": [{"type": "text", "text": text},
@@ -1417,7 +1585,10 @@ def oa_input_to_messages(input_data):
                             text = None
                             break
                 if text is not None:
-                    msgs.append({"role": role, "content": text})
+                    msg = {"role": role, "content": text}
+                    if reasoning_text and role == "assistant":
+                        msg["reasoning_content"] = reasoning_text
+                    msgs.append(msg)
             elif t == "function_call_output":
                 tcid = item.get("call_id") or item.get("id") or ""
                 if not tcid and last_flushed_ids:
@@ -1568,10 +1739,12 @@ def oa_resp_to_responses(chat_resp, model, resp_id=None):
                       "total_tokens": usage.get("total_tokens", 0),
                       "input_tokens_details": {"cached_tokens": usage.get("prompt_tokens_details", {}).get("cached_tokens", 0)}}}
 
-def oa_stream_to_sse(chat_stream, model, req_id):
+def oa_stream_to_sse(chat_stream, model, req_id, _reasoning_out=None):
     resp_id = req_id or uid("resp")
     msg_id = uid("msg")
     text_buf = ""
+    reasoning_buf = ""
+    reasoning_opened = False
     tc_buf = {}
     fr = None
     msg_opened = False
@@ -1597,6 +1770,13 @@ def oa_stream_to_sse(chat_stream, model, req_id):
         delta = choices[0].get("delta", {})
         fr = choices[0].get("finish_reason")
 
+        rc = delta.get("reasoning_content") or delta.get("reasoning")
+        if rc:
+            if not reasoning_opened:
+                reasoning_opened = True
+            reasoning_buf += rc
+            yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": rc})
+
         content = delta.get("content")
         if content:
             if not msg_opened:
@@ -1626,7 +1806,10 @@ def oa_stream_to_sse(chat_stream, model, req_id):
                 yield emit("response.output_text.delta", {"type": "response.function_call_arguments.delta",
                             "delta": fn["arguments"], "item_id": tc_buf[idx]["id"]})
 
-
+    reasoning_rsn_id = uid("rsn") if reasoning_buf else None
+    if reasoning_opened:
+        yield emit("response.reasoning.done", {"type": "response.reasoning.done",
+                    "item_id": reasoning_rsn_id, "text": reasoning_buf})
 
     if msg_opened:
         yield emit("response.output_text.done", {"type": "response.output_text.done",
@@ -1648,9 +1831,17 @@ def oa_stream_to_sse(chat_stream, model, req_id):
     fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"}
     status = fm.get(fr, "incomplete")
     final_out = []
+    if reasoning_buf:
+        final_out.append({"type": "reasoning", "id": reasoning_rsn_id, "status": "completed",
+                          "content": [{"type": "text", "text": reasoning_buf}]})
     if msg_opened:
+        msg_content = []
+        if reasoning_buf:
+            msg_content.append({"type": "output_text", "text": text_buf, "annotations": []})
+        else:
+            msg_content.append({"type": "output_text", "text": text_buf, "annotations": []})
         final_out.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
-                          "content": [{"type": "output_text", "text": text_buf, "annotations": []}]})
+                          "content": msg_content})
     for idx in sorted(tc_buf):
         t = tc_buf[idx]
         final_out.append({"type": "function_call", "id": t["id"], "call_id": t["call_id"],
@@ -1658,6 +1849,9 @@ def oa_stream_to_sse(chat_stream, model, req_id):
     yield emit("response.completed", {"type": "response.completed",
         "response": {"id": resp_id, "object": "response", "model": model,
                      "status": status, "created": int(time.time()), "output": final_out}})
+    if _reasoning_out is not None:
+        _reasoning_out["text"] = reasoning_buf
+        _reasoning_out["tool_calls"] = [tc_buf[i] for i in sorted(tc_buf)] if tc_buf else []
 
 # ═══════════════════════════════════════════════════════════════════
 # Anthropic backend
@@ -1675,12 +1869,24 @@ def an_input_to_messages(input_data):
                 if role == "developer":
                     role = "user"
                 text = ""
+                thinking_blocks = []
                 for part in item.get("content", []):
                     pt = part.get("type", "")
                     if pt in ("input_text", "output_text"):
                         text += part.get("text", "")
+                    elif pt in ("reasoning", "thinking"):
+                        thinking_text = ""
+                        for rp in part.get("content", []):
+                            thinking_text += rp.get("text", "")
+                        if thinking_text:
+                            thinking_blocks.append({"type": "thinking", "thinking": thinking_text, "signature": part.get("signature", "")})
                 if role == "assistant":
-                    msgs.append({"role": "assistant", "content": text})
+                    content_parts = []
+                    if thinking_blocks:
+                        content_parts.extend(thinking_blocks)
+                    if text:
+                        content_parts.append({"type": "text", "text": text})
+                    msgs.append({"role": "assistant", "content": content_parts if content_parts else text})
                 else:
                     msgs.append({"role": "user", "content": text})
             elif t == "function_call":
@@ -4544,54 +4750,175 @@ class Handler(http.server.BaseHTTPRequestHandler):
                 store_response(rid, body.get("input", ""), result.get("output", []))
 
     def _handle_freebuff(self, body, model, stream, tracker=None):
-        token = _get_freebuff_token()
-        if not token:
-            return self.send_json(401, {"error": {"type": "auth_error",
-                "message": "No freebuff credentials found. Install freebuff (npm i -g freebuff) and login first."}})
+         token = _get_freebuff_token()
+         if not token:
+             return self.send_json(401, {"error": {"type": "auth_error",
+                 "message": "No freebuff credentials found. Install freebuff (npm i -g freebuff) and login first."}})
 
-        agent_id = _FREEBUFF_AGENT_MAP.get(model)
-        if not agent_id:
-            matched = None
-            for m in _FREEBUFF_AGENT_MAP:
-                if model.lower().replace("/", "").replace("-", "") in m.lower().replace("/", "").replace("-", ""):
-                    matched = m
-                    break
-            if matched:
-                agent_id = _FREEBUFF_AGENT_MAP[matched]
-                model = matched
-            else:
-                fallback_model = "deepseek/deepseek-v4-flash"
-                agent_id = _FREEBUFF_AGENT_MAP.get(fallback_model, "base2-free-deepseek-flash")
-                print(f"[freebuff] unknown model '{model}', falling back to {fallback_model}", file=sys.stderr)
-                model = fallback_model
+         agent_id = _FREEBUFF_AGENT_MAP.get(model)
+         if not agent_id:
+             matched = None
+             for m in _FREEBUFF_AGENT_MAP:
+                 if model.lower().replace("/", "").replace("-", "") in m.lower().replace("/", "").replace("-", ""):
+                     matched = m
+                     break
+             if matched:
+                 agent_id = _FREEBUFF_AGENT_MAP[matched]
+                 model = matched
+             else:
+                 fallback_model = "deepseek/deepseek-v4-flash"
+                 agent_id = _FREEBUFF_AGENT_MAP.get(fallback_model, "base2-free-deepseek-flash")
+                 print(f"[freebuff] unknown model '{model}', falling back to {fallback_model}", file=sys.stderr)
+                 model = fallback_model
 
+         run_id = _freebuff_start_run(token, agent_id)
+         if not run_id:
+             return self.send_json(502, {"error": {"type": "upstream_error",
+                 "message": "Failed to start freebuff agent run. Check credentials and network."}})
+
+         instance_id = _freebuff_get_session(token, model)
+
+         input_data = body.get("input", "")
+         instructions = body.get("instructions", "").strip()
+         messages = _fb_input_to_messages(input_data, instructions)
+         messages = _ds_rebuild_tool_history(messages)
+
+         metadata = {
+             "run_id": run_id,
+             "cost_mode": "free",
+         }
+         if instance_id:
+             metadata["freebuff_instance_id"] = instance_id
+
+         chat_body = {
+             "model": model,
+             "messages": messages,
+             "stream": stream,
+             "max_tokens": max(body.get("max_output_tokens", 0), 64000),
+             "codebuff_metadata": metadata,
+         }
+         for k in ("temperature", "top_p"):
+             if k in body:
+                 chat_body[k] = body[k]
+         tools = oa_convert_tools(body.get("tools"))
+         if tools:
+             chat_body["tools"] = tools
+         if body.get("tool_choice"):
+             chat_body["tool_choice"] = body["tool_choice"]
+
+         target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions"
+         headers = {
+             "Content-Type": "application/json",
+             "Authorization": f"Bearer {token}",
+             "User-Agent": "codex-launcher/3.8.4",
+             "x-freebuff-model": model,
+         }
+         if instance_id:
+             headers["x-freebuff-instance-id"] = instance_id
+
+         print(f"[{self._session_id}] [freebuff] POST {target} model={model} stream={stream} run={run_id}", file=sys.stderr)
+         chat_body_b = json.dumps(chat_body).encode()
+
+         try:
+             req = urllib.request.Request(target, data=chat_body_b, headers=headers)
+             upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
+         except urllib.error.HTTPError as e:
+             err_body = e.read().decode()[:1000]
+             _freebuff_finish_run(token, run_id, "failed")
+             if _is_reasoning_content_error(err_body):
+                 print(f"[freebuff] reasoning_content error, retrying with thinking disabled (DeepSeek native format): {err_body[:200]}", file=sys.stderr)
+                 result = self._fb_retry_thinking_disabled(body, model, token, agent_id, stream, tracker, input_data, instructions, err_body)
+                 return result
+             print(f"[freebuff] HTTP {e.code}: {err_body[:300]}", file=sys.stderr)
+             return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}})
+         except Exception as e:
+             _freebuff_finish_run(token, run_id, "failed")
+             return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}})
+
+         t0 = time.time()
+         try:
+             if stream:
+                 self.send_response(200)
+                 self.send_header("Content-Type", "text/event-stream")
+                 self.send_header("Cache-Control", "no-cache")
+                 self.send_header("Connection", "keep-alive")
+                 self.end_headers()
+                 if hasattr(self, 'connection') and self.connection:
+                     try:
+                         self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+                     except Exception:
+                         pass
+
+                 last_resp_id = [None]
+                 last_output = [None]
+                 last_status = [None]
+                 finish_reason = [None]
+                 reasoning_out = {}
+
+                 def _on_fb_event(event):
+                     if tracker and tracker.cancelled.is_set():
+                         return False
+                     for line in event.strip().split("\n"):
+                         if line.startswith("data: "):
+                             try:
+                                 d = json.loads(line[6:])
+                                 if d.get("type") == "response.completed":
+                                     last_resp_id[0] = d.get("response", {}).get("id")
+                                     last_output[0] = d.get("response", {}).get("output", [])
+                                     last_status[0] = d.get("response", {}).get("status")
+                                     finish_reason[0] = "length" if last_status[0] == "incomplete" else "stop"
+                             except Exception:
+                                 pass
+                     return None
+
+                 try:
+                     self.stream_buffered_events(
+                         oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id"),
+                                          _reasoning_out=reasoning_out),
+                         on_event=_on_fb_event)
+                 except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
+                     print(f"[{self._session_id}] [freebuff] client disconnected", file=sys.stderr)
+                     return
+
+                 success = finish_reason[0] != "length"
+                 _record_usage("freebuff", model, success, time.time() - t0)
+                 if last_resp_id[0] and input_data is not None:
+                     store_response(last_resp_id[0], input_data, last_output[0])
+                 if last_resp_id[0] and reasoning_out.get("text") or reasoning_out.get("tool_calls"):
+                     asm = {"role": "assistant", "content": reasoning_out.get("text", "") or ""}
+                     if reasoning_out.get("tool_calls"):
+                         asm["tool_calls"] = reasoning_out["tool_calls"]
+                     if reasoning_out.get("text"):
+                         asm["reasoning_content"] = reasoning_out["text"]
+                     _ds_store_assistant(last_resp_id[0], asm)
+                 print(f"[{self._session_id}] [freebuff] stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr)
+             else:
+                 raw = upstream.read().decode()
+                 chat_resp = json.loads(raw)
+                 result = oa_resp_to_responses(chat_resp, model)
+                 self.send_json(200, result)
+                 rid = result.get("id")
+                 if rid:
+                     store_response(rid, input_data, result.get("output", []))
+                 print(f"[{self._session_id}] [freebuff] non-stream done in {time.time()-t0:.1f}s", file=sys.stderr)
+         finally:
+             _freebuff_finish_run(token, run_id, "completed")
+
+    def _fb_retry_thinking_disabled(self, body, model, token, agent_id, stream, tracker, input_data, instructions, original_error):
         run_id = _freebuff_start_run(token, agent_id)
         if not run_id:
             return self.send_json(502, {"error": {"type": "upstream_error",
-                "message": "Failed to start freebuff agent run. Check credentials and network."}})
-
+                "message": "Failed to start freebuff agent run for retry."}})
         instance_id = _freebuff_get_session(token, model)
-
-        input_data = body.get("input", "")
-        messages = oa_input_to_messages(input_data)
-        instructions = body.get("instructions", "").strip()
-        if instructions:
-            messages.insert(0, {"role": "system", "content": instructions})
-
-        metadata = {
-            "run_id": run_id,
-            "cost_mode": "free",
-        }
+        messages = _fb_input_to_messages(input_data, instructions)
+        _freebuff_hard_disable_reasoning(messages)
+        metadata = {"run_id": run_id, "cost_mode": "free"}
         if instance_id:
             metadata["freebuff_instance_id"] = instance_id
-
         chat_body = {
-            "model": model,
-            "messages": messages,
-            "stream": stream,
+            "model": model, "messages": messages, "stream": stream,
             "max_tokens": max(body.get("max_output_tokens", 0), 64000),
-            "enable_thinking": REASONING_ENABLED and REASONING_EFFORT != "none",
-            "reasoning_effort": REASONING_EFFORT if REASONING_ENABLED else "none",
+            "thinking": {"type": "disabled"},
             "codebuff_metadata": metadata,
         }
         for k in ("temperature", "top_p"):
@@ -4602,29 +4929,23 @@ class Handler(http.server.BaseHTTPRequestHandler):
             chat_body["tools"] = tools
         if body.get("tool_choice"):
             chat_body["tool_choice"] = body["tool_choice"]
-
         target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions"
-        headers = {
-            "Content-Type": "application/json",
-            "Authorization": f"Bearer {token}",
-            "User-Agent": "codex-launcher/3.8.3",
-        }
-
-        print(f"[{self._session_id}] [freebuff] POST {target} model={model} stream={stream} run={run_id}", file=sys.stderr)
-        chat_body_b = json.dumps(chat_body).encode()
-
+        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}", "User-Agent": "codex-launcher/3.8.4", "x-freebuff-model": model}
+        if instance_id:
+            headers["x-freebuff-instance-id"] = instance_id
+        print(f"[freebuff] retry POST {target} model={model} stream={stream} run={run_id} (thinking disabled via DeepSeek native)", file=sys.stderr)
         try:
-            req = urllib.request.Request(target, data=chat_body_b, headers=headers)
+            req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=headers)
             upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
         except urllib.error.HTTPError as e:
             err_body = e.read().decode()[:500]
             _freebuff_finish_run(token, run_id, "failed")
-            print(f"[freebuff] HTTP {e.code}: {err_body}", file=sys.stderr)
-            return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}})
+            print(f"[freebuff] thinking-disabled retry failed: HTTP {e.code}: {err_body[:300]}", file=sys.stderr)
+            return self.send_json(e.code, {"error": {"type": "freebuff_deepseek_thinking_error",
+                "message": "FreeBuff/DeepSeek V4 requires reasoning_content round-trip for tool-call sessions. Use Command Code provider for this model instead.", "upstream_error": _sanitize_err_body(err_body)}})
         except Exception as e:
             _freebuff_finish_run(token, run_id, "failed")
             return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}})
-
         t0 = time.time()
         try:
             if stream:
@@ -4638,13 +4959,12 @@ class Handler(http.server.BaseHTTPRequestHandler):
                         self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
                     except Exception:
                         pass
-
                 last_resp_id = [None]
                 last_output = [None]
                 last_status = [None]
                 finish_reason = [None]
-
-                def _on_fb_event(event):
+                reasoning_out = {}
+                def _on_fb_retry_event(event):
                     if tracker and tracker.cancelled.is_set():
                         return False
                     for line in event.strip().split("\n"):
@@ -4659,20 +4979,25 @@ class Handler(http.server.BaseHTTPRequestHandler):
                             except Exception:
                                 pass
                     return None
-
                 try:
                     self.stream_buffered_events(
-                        oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")),
-                        on_event=_on_fb_event)
+                        oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id"),
+                                         _reasoning_out=reasoning_out),
+                        on_event=_on_fb_retry_event)
                 except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
-                    print(f"[{self._session_id}] [freebuff] client disconnected", file=sys.stderr)
                     return
-
                 success = finish_reason[0] != "length"
                 _record_usage("freebuff", model, success, time.time() - t0)
                 if last_resp_id[0] and input_data is not None:
                     store_response(last_resp_id[0], input_data, last_output[0])
-                print(f"[{self._session_id}] [freebuff] stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr)
+                if last_resp_id[0] and reasoning_out.get("text") or reasoning_out.get("tool_calls"):
+                    asm = {"role": "assistant", "content": reasoning_out.get("text", "") or ""}
+                    if reasoning_out.get("tool_calls"):
+                        asm["tool_calls"] = reasoning_out["tool_calls"]
+                    if reasoning_out.get("text"):
+                        asm["reasoning_content"] = reasoning_out["text"]
+                    _ds_store_assistant(last_resp_id[0], asm)
+                print(f"[{self._session_id}] [freebuff] retry stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr)
             else:
                 raw = upstream.read().decode()
                 chat_resp = json.loads(raw)
@@ -4681,7 +5006,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
                 rid = result.get("id")
                 if rid:
                     store_response(rid, input_data, result.get("output", []))
-                print(f"[{self._session_id}] [freebuff] non-stream done in {time.time()-t0:.1f}s", file=sys.stderr)
+                print(f"[{self._session_id}] [freebuff] retry non-stream done in {time.time()-t0:.1f}s", file=sys.stderr)
         finally:
             _freebuff_finish_run(token, run_id, "completed")