fix: add previous_response_id support for multi-turn tool calls (Crof fix)

Codex Desktop uses previous_response_id to chain conversation turns. Without storing and resolving these, the proxy sent only the new function_call_output to upstream providers, missing the original user message and assistant tool call. This caused Crof.ai (and any provider using tool calls) to stop after the first response. - Add in-memory response store (50 entry LRU) keyed by response ID - resolve_previous_response() reconstructs full input chain on multi-turn - Fix orphan message output item when response has only tool calls - Applies to all backends: openai-compat, anthropic, command-code - v2.1.2
2026-05-19 20:38:39 +04:00
parent 389866a2c6
commit cb6381afe4
6 changed files with 93 additions and 9 deletions
--- a/src/translate-proxy.py
+++ b/src/translate-proxy.py
@@ -88,6 +88,32 @@ CC_VERSION = CONFIG.get("cc_version", "")

 _pool = uuid.uuid4().hex[:8]

+_response_store = {}
+_MAX_STORED = 50
+
+def store_response(resp_id, input_data, output_items):
+    if not resp_id:
+        return
+    _response_store[resp_id] = {"input": input_data, "output": output_items}
+    if len(_response_store) > _MAX_STORED:
+        oldest = list(_response_store.keys())[0]
+        del _response_store[oldest]
+
+def resolve_previous_response(body):
+    prev_id = body.get("previous_response_id")
+    input_data = body.get("input", "")
+    if not prev_id or prev_id not in _response_store:
+        return input_data
+    stored = _response_store[prev_id]
+    prev_input = stored["input"]
+    prev_output = stored["output"]
+    new_input = input_data if isinstance(input_data, list) else []
+    if isinstance(prev_input, list):
+        combined = list(prev_input) + list(prev_output) + new_input
+    else:
+        combined = [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": str(prev_input)}]}] + list(prev_output) + new_input
+    return combined
+
 _HOP_BY_HOP_HEADERS = {
    "connection",
    "keep-alive",
@@ -236,15 +262,12 @@ def oa_stream_to_sse(chat_stream, model, req_id):
    text_buf = ""
    tc_buf = {}
    fr = None
+    msg_opened = False

    yield emit("response.created", {"type": "response.created",
        "response": {"id": resp_id, "object": "response", "model": model,
                     "status": "in_progress", "created": int(time.time()), "output": []}})
    yield emit("response.in_progress", {"type": "response.in_progress", "response": {"id": resp_id}})
-    yield emit("response.output_item.added", {"type": "response.output_item.added",
-        "item": {"type": "message", "id": msg_id, "role": "assistant", "status": "in_progress", "content": []}})
-    yield emit("response.content_part.added", {"type": "response.content_part.added",
-        "part": {"type": "output_text", "text": "", "annotations": []}, "item_id": msg_id})

    for line in chat_stream:
        line = line.decode("utf-8", errors="replace").strip()
@@ -264,6 +287,13 @@ def oa_stream_to_sse(chat_stream, model, req_id):

        content = delta.get("content")
        if content:
+            if not msg_opened:
+                msg_id = uid("msg")
+                yield emit("response.output_item.added", {"type": "response.output_item.added",
+                    "item": {"type": "message", "id": msg_id, "role": "assistant", "status": "in_progress", "content": []}})
+                yield emit("response.content_part.added", {"type": "response.content_part.added",
+                    "part": {"type": "output_text", "text": "", "annotations": []}, "item_id": msg_id})
+                msg_opened = True
            text_buf += content
            yield emit("response.output_text.delta", {"type": "response.output_text.delta",
                        "delta": content, "item_id": msg_id, "content_index": 0})
@@ -288,7 +318,7 @@ def oa_stream_to_sse(chat_stream, model, req_id):
        if rc:
            yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": rc})

-    if text_buf:
+    if msg_opened:
        yield emit("response.output_text.done", {"type": "response.output_text.done",
                    "text": text_buf, "item_id": msg_id, "content_index": 0})
        yield emit("response.content_part.done", {"type": "response.content_part.done",
@@ -308,7 +338,7 @@ def oa_stream_to_sse(chat_stream, model, req_id):
    fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"}
    status = fm.get(fr, "incomplete")
    final_out = []
-    if text_buf:
+    if msg_opened:
        final_out.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
                          "content": [{"type": "output_text", "text": text_buf, "annotations": []}]})
    for idx in sorted(tc_buf):
@@ -646,6 +676,12 @@ class Handler(http.server.BaseHTTPRequestHandler):
        except Exception as e:
            return self.send_json(400, {"error": {"message": f"Bad request: {e}"}})

+        input_data = resolve_previous_response(body)
+        body["input"] = input_data
+        prev_id = body.get("previous_response_id")
+        input_types = [i.get("type") for i in input_data] if isinstance(input_data, list) else str(type(input_data))
+        print(f"[REQUEST] prev_id={prev_id} resolved_input_types={input_types}", file=sys.stderr)
+
        model = body.get("model", MODELS[0]["id"] if MODELS else "unknown")
        stream = body.get("stream", False)

@@ -686,7 +722,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
        )
        self._forward(req, stream, model,
            lambda r: oa_resp_to_responses(json.loads(r.read()), model),
-            lambda s: oa_stream_to_sse(s, model, body.get("request_id") or body.get("id")))
+            lambda s: oa_stream_to_sse(s, model, body.get("request_id") or body.get("id")),
+            input_data=body.get("input", ""))

    def _handle_anthropic(self, body, model, stream):
        input_data = body.get("input", "")
@@ -721,7 +758,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
        )
        self._forward(req, stream, model,
            lambda r: an_resp_to_responses(json.loads(r.read()), model),
-            lambda s: an_stream_to_sse(s, model, body.get("request_id") or body.get("id")))
+            lambda s: an_stream_to_sse(s, model, body.get("request_id") or body.get("id")),
+            input_data=body.get("input", ""))

    def _handle_command_code(self, body, model, stream):
        input_data = body.get("input", "")
@@ -800,9 +838,21 @@ class Handler(http.server.BaseHTTPRequestHandler):
            self.send_header("Cache-Control", "no-cache")
            self.send_header("Connection", "keep-alive")
            self.end_headers()
+            last_resp_id = None
+            last_output = None
            for event in cc_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")):
                self.wfile.write(event.encode("utf-8"))
                self.wfile.flush()
+                for line in event.strip().split("\n"):
+                    if line.startswith("data: "):
+                        try:
+                            d = json.loads(line[6:])
+                            if d.get("type") == "response.completed":
+                                last_resp_id = d.get("response", {}).get("id")
+                                last_output = d.get("response", {}).get("output", [])
+                        except: pass
+            if last_resp_id:
+                store_response(last_resp_id, body.get("input", ""), last_output)
        else:
            try:
                upstream = urllib.request.urlopen(req)
@@ -816,8 +866,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
            lines = raw.strip().split("\n")
            result = cc_resp_to_responses(lines, model)
            self.send_json(200, result)
+            rid = result.get("id")
+            if rid:
+                store_response(rid, body.get("input", ""), result.get("output", []))

-    def _forward(self, req, stream, model, nonstream_fn, stream_fn):
+    def _forward(self, req, stream, model, nonstream_fn, stream_fn, input_data=None):
        try:
            upstream = urllib.request.urlopen(req)
        except urllib.error.HTTPError as e:
@@ -832,12 +885,27 @@ class Handler(http.server.BaseHTTPRequestHandler):
            self.send_header("Cache-Control", "no-cache")
            self.send_header("Connection", "keep-alive")
            self.end_headers()
+            last_resp_id = None
+            last_output = None
            for event in stream_fn(upstream):
                self.wfile.write(event.encode("utf-8"))
                self.wfile.flush()
+                for line in event.strip().split("\n"):
+                    if line.startswith("data: "):
+                        try:
+                            d = json.loads(line[6:])
+                            if d.get("type") == "response.completed":
+                                last_resp_id = d.get("response", {}).get("id")
+                                last_output = d.get("response", {}).get("output", [])
+                        except: pass
+            if last_resp_id and input_data is not None:
+                store_response(last_resp_id, input_data, last_output)
        else:
            result = nonstream_fn(upstream)
            self.send_json(200, result)
+            rid = result.get("id")
+            if rid and input_data is not None:
+                store_response(rid, input_data, result.get("output", []))

    def send_json(self, status, data):
        body = json.dumps(data).encode()