fix: add previous_response_id support for multi-turn tool calls (Crof fix)

Codex Desktop uses previous_response_id to chain conversation turns.
Without storing and resolving these, the proxy sent only the new
function_call_output to upstream providers, missing the original user
message and assistant tool call. This caused Crof.ai (and any provider
using tool calls) to stop after the first response.

- Add in-memory response store (50 entry LRU) keyed by response ID
- resolve_previous_response() reconstructs full input chain on multi-turn
- Fix orphan message output item when response has only tool calls
- Applies to all backends: openai-compat, anthropic, command-code
- v2.1.2
This commit is contained in:
admin
2026-05-19 20:38:39 +04:00
Unverified
parent 389866a2c6
commit cb6381afe4
6 changed files with 93 additions and 9 deletions

View File

@@ -88,6 +88,32 @@ CC_VERSION = CONFIG.get("cc_version", "")
_pool = uuid.uuid4().hex[:8]
_response_store = {}
_MAX_STORED = 50
def store_response(resp_id, input_data, output_items):
if not resp_id:
return
_response_store[resp_id] = {"input": input_data, "output": output_items}
if len(_response_store) > _MAX_STORED:
oldest = list(_response_store.keys())[0]
del _response_store[oldest]
def resolve_previous_response(body):
prev_id = body.get("previous_response_id")
input_data = body.get("input", "")
if not prev_id or prev_id not in _response_store:
return input_data
stored = _response_store[prev_id]
prev_input = stored["input"]
prev_output = stored["output"]
new_input = input_data if isinstance(input_data, list) else []
if isinstance(prev_input, list):
combined = list(prev_input) + list(prev_output) + new_input
else:
combined = [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": str(prev_input)}]}] + list(prev_output) + new_input
return combined
_HOP_BY_HOP_HEADERS = {
"connection",
"keep-alive",
@@ -236,15 +262,12 @@ def oa_stream_to_sse(chat_stream, model, req_id):
text_buf = ""
tc_buf = {}
fr = None
msg_opened = False
yield emit("response.created", {"type": "response.created",
"response": {"id": resp_id, "object": "response", "model": model,
"status": "in_progress", "created": int(time.time()), "output": []}})
yield emit("response.in_progress", {"type": "response.in_progress", "response": {"id": resp_id}})
yield emit("response.output_item.added", {"type": "response.output_item.added",
"item": {"type": "message", "id": msg_id, "role": "assistant", "status": "in_progress", "content": []}})
yield emit("response.content_part.added", {"type": "response.content_part.added",
"part": {"type": "output_text", "text": "", "annotations": []}, "item_id": msg_id})
for line in chat_stream:
line = line.decode("utf-8", errors="replace").strip()
@@ -264,6 +287,13 @@ def oa_stream_to_sse(chat_stream, model, req_id):
content = delta.get("content")
if content:
if not msg_opened:
msg_id = uid("msg")
yield emit("response.output_item.added", {"type": "response.output_item.added",
"item": {"type": "message", "id": msg_id, "role": "assistant", "status": "in_progress", "content": []}})
yield emit("response.content_part.added", {"type": "response.content_part.added",
"part": {"type": "output_text", "text": "", "annotations": []}, "item_id": msg_id})
msg_opened = True
text_buf += content
yield emit("response.output_text.delta", {"type": "response.output_text.delta",
"delta": content, "item_id": msg_id, "content_index": 0})
@@ -288,7 +318,7 @@ def oa_stream_to_sse(chat_stream, model, req_id):
if rc:
yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": rc})
if text_buf:
if msg_opened:
yield emit("response.output_text.done", {"type": "response.output_text.done",
"text": text_buf, "item_id": msg_id, "content_index": 0})
yield emit("response.content_part.done", {"type": "response.content_part.done",
@@ -308,7 +338,7 @@ def oa_stream_to_sse(chat_stream, model, req_id):
fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"}
status = fm.get(fr, "incomplete")
final_out = []
if text_buf:
if msg_opened:
final_out.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
"content": [{"type": "output_text", "text": text_buf, "annotations": []}]})
for idx in sorted(tc_buf):
@@ -646,6 +676,12 @@ class Handler(http.server.BaseHTTPRequestHandler):
except Exception as e:
return self.send_json(400, {"error": {"message": f"Bad request: {e}"}})
input_data = resolve_previous_response(body)
body["input"] = input_data
prev_id = body.get("previous_response_id")
input_types = [i.get("type") for i in input_data] if isinstance(input_data, list) else str(type(input_data))
print(f"[REQUEST] prev_id={prev_id} resolved_input_types={input_types}", file=sys.stderr)
model = body.get("model", MODELS[0]["id"] if MODELS else "unknown")
stream = body.get("stream", False)
@@ -686,7 +722,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
)
self._forward(req, stream, model,
lambda r: oa_resp_to_responses(json.loads(r.read()), model),
lambda s: oa_stream_to_sse(s, model, body.get("request_id") or body.get("id")))
lambda s: oa_stream_to_sse(s, model, body.get("request_id") or body.get("id")),
input_data=body.get("input", ""))
def _handle_anthropic(self, body, model, stream):
input_data = body.get("input", "")
@@ -721,7 +758,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
)
self._forward(req, stream, model,
lambda r: an_resp_to_responses(json.loads(r.read()), model),
lambda s: an_stream_to_sse(s, model, body.get("request_id") or body.get("id")))
lambda s: an_stream_to_sse(s, model, body.get("request_id") or body.get("id")),
input_data=body.get("input", ""))
def _handle_command_code(self, body, model, stream):
input_data = body.get("input", "")
@@ -800,9 +838,21 @@ class Handler(http.server.BaseHTTPRequestHandler):
self.send_header("Cache-Control", "no-cache")
self.send_header("Connection", "keep-alive")
self.end_headers()
last_resp_id = None
last_output = None
for event in cc_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")):
self.wfile.write(event.encode("utf-8"))
self.wfile.flush()
for line in event.strip().split("\n"):
if line.startswith("data: "):
try:
d = json.loads(line[6:])
if d.get("type") == "response.completed":
last_resp_id = d.get("response", {}).get("id")
last_output = d.get("response", {}).get("output", [])
except: pass
if last_resp_id:
store_response(last_resp_id, body.get("input", ""), last_output)
else:
try:
upstream = urllib.request.urlopen(req)
@@ -816,8 +866,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
lines = raw.strip().split("\n")
result = cc_resp_to_responses(lines, model)
self.send_json(200, result)
rid = result.get("id")
if rid:
store_response(rid, body.get("input", ""), result.get("output", []))
def _forward(self, req, stream, model, nonstream_fn, stream_fn):
def _forward(self, req, stream, model, nonstream_fn, stream_fn, input_data=None):
try:
upstream = urllib.request.urlopen(req)
except urllib.error.HTTPError as e:
@@ -832,12 +885,27 @@ class Handler(http.server.BaseHTTPRequestHandler):
self.send_header("Cache-Control", "no-cache")
self.send_header("Connection", "keep-alive")
self.end_headers()
last_resp_id = None
last_output = None
for event in stream_fn(upstream):
self.wfile.write(event.encode("utf-8"))
self.wfile.flush()
for line in event.strip().split("\n"):
if line.startswith("data: "):
try:
d = json.loads(line[6:])
if d.get("type") == "response.completed":
last_resp_id = d.get("response", {}).get("id")
last_output = d.get("response", {}).get("output", [])
except: pass
if last_resp_id and input_data is not None:
store_response(last_resp_id, input_data, last_output)
else:
result = nonstream_fn(upstream)
self.send_json(200, result)
rid = result.get("id")
if rid and input_data is not None:
store_response(rid, input_data, result.get("output", []))
def send_json(self, status, data):
body = json.dumps(data).encode()