fix: add previous_response_id support for multi-turn tool calls (Crof fix)

Codex Desktop uses previous_response_id to chain conversation turns.
Without storing and resolving these, the proxy sent only the new
function_call_output to upstream providers, missing the original user
message and assistant tool call. This caused Crof.ai (and any provider
using tool calls) to stop after the first response.

- Add in-memory response store (50 entry LRU) keyed by response ID
- resolve_previous_response() reconstructs full input chain on multi-turn
- Fix orphan message output item when response has only tool calls
- Applies to all backends: openai-compat, anthropic, command-code
- v2.1.2
This commit is contained in:
admin
2026-05-19 20:38:39 +04:00
Unverified
parent 389866a2c6
commit cb6381afe4
6 changed files with 93 additions and 9 deletions

View File

@@ -1,5 +1,14 @@
# Changelog # Changelog
## v2.1.2 (2026-05-19)
- **Fixed Crof.ai and other providers stopping after first tool call (root cause)**
- Proxy now stores responses and resolves `previous_response_id` for multi-turn conversations
- Codex Desktop uses `previous_response_id` to chain turns — proxy reconstructs full conversation context
- Without this fix, the proxy sent only the new `function_call_output` to upstream without the original user message or assistant tool call, causing the upstream model to return incomplete responses
- Fixed orphan message output item when response is only tool calls (no text content)
- Response store capped at 50 entries (LRU eviction)
## v2.1.1 (2026-05-19) ## v2.1.1 (2026-05-19)
- Added Command Code backend to translation proxy (proprietary `/alpha/generate` API) - Added Command Code backend to translation proxy (proprietary `/alpha/generate` API)

View File

@@ -390,6 +390,7 @@ README.md # This file
| Proxy stops when terminal closes | SIGHUP to subprocess | Launcher uses `os.setsid` process groups | | Proxy stops when terminal closes | SIGHUP to subprocess | Launcher uses `os.setsid` process groups |
| Models not showing in picker | Wrong model catalog format | Must have both `slug` + `model` fields | | Models not showing in picker | Wrong model catalog format | Must have both `slug` + `model` fields |
| Codex hangs in "thinking" | Missing `response.completed` | Proxy emits full SSE event sequence | | Codex hangs in "thinking" | Missing `response.completed` | Proxy emits full SSE event sequence |
| Stops after first tool call (Crof) | `previous_response_id` not resolved | V2.1.2 stores and chains responses for multi-turn |
--- ---

Binary file not shown.

Binary file not shown.

View File

@@ -24,6 +24,12 @@ model_catalog_json = ""
""" """
CHANGELOG = [ CHANGELOG = [
("2.1.2", "2026-05-19", [
"Fixed Crof.ai and other providers stopping after first tool call",
"Proxy now stores and resolves previous_response_id for multi-turn conversations",
"Codex Desktop uses previous_response_id to chain turns — proxy reconstructs full context",
"Fixed orphan message output item when response is only tool calls (no text)",
]),
("2.1.1", "2026-05-19", [ ("2.1.1", "2026-05-19", [
"Fixed proxy: map 'developer' role to 'system' for Chat Completions providers", "Fixed proxy: map 'developer' role to 'system' for Chat Completions providers",
"Fixed proxy: map 'developer' role to 'user' for Anthropic providers", "Fixed proxy: map 'developer' role to 'user' for Anthropic providers",

View File

@@ -88,6 +88,32 @@ CC_VERSION = CONFIG.get("cc_version", "")
_pool = uuid.uuid4().hex[:8] _pool = uuid.uuid4().hex[:8]
_response_store = {}
_MAX_STORED = 50
def store_response(resp_id, input_data, output_items):
if not resp_id:
return
_response_store[resp_id] = {"input": input_data, "output": output_items}
if len(_response_store) > _MAX_STORED:
oldest = list(_response_store.keys())[0]
del _response_store[oldest]
def resolve_previous_response(body):
prev_id = body.get("previous_response_id")
input_data = body.get("input", "")
if not prev_id or prev_id not in _response_store:
return input_data
stored = _response_store[prev_id]
prev_input = stored["input"]
prev_output = stored["output"]
new_input = input_data if isinstance(input_data, list) else []
if isinstance(prev_input, list):
combined = list(prev_input) + list(prev_output) + new_input
else:
combined = [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": str(prev_input)}]}] + list(prev_output) + new_input
return combined
_HOP_BY_HOP_HEADERS = { _HOP_BY_HOP_HEADERS = {
"connection", "connection",
"keep-alive", "keep-alive",
@@ -236,15 +262,12 @@ def oa_stream_to_sse(chat_stream, model, req_id):
text_buf = "" text_buf = ""
tc_buf = {} tc_buf = {}
fr = None fr = None
msg_opened = False
yield emit("response.created", {"type": "response.created", yield emit("response.created", {"type": "response.created",
"response": {"id": resp_id, "object": "response", "model": model, "response": {"id": resp_id, "object": "response", "model": model,
"status": "in_progress", "created": int(time.time()), "output": []}}) "status": "in_progress", "created": int(time.time()), "output": []}})
yield emit("response.in_progress", {"type": "response.in_progress", "response": {"id": resp_id}}) yield emit("response.in_progress", {"type": "response.in_progress", "response": {"id": resp_id}})
yield emit("response.output_item.added", {"type": "response.output_item.added",
"item": {"type": "message", "id": msg_id, "role": "assistant", "status": "in_progress", "content": []}})
yield emit("response.content_part.added", {"type": "response.content_part.added",
"part": {"type": "output_text", "text": "", "annotations": []}, "item_id": msg_id})
for line in chat_stream: for line in chat_stream:
line = line.decode("utf-8", errors="replace").strip() line = line.decode("utf-8", errors="replace").strip()
@@ -264,6 +287,13 @@ def oa_stream_to_sse(chat_stream, model, req_id):
content = delta.get("content") content = delta.get("content")
if content: if content:
if not msg_opened:
msg_id = uid("msg")
yield emit("response.output_item.added", {"type": "response.output_item.added",
"item": {"type": "message", "id": msg_id, "role": "assistant", "status": "in_progress", "content": []}})
yield emit("response.content_part.added", {"type": "response.content_part.added",
"part": {"type": "output_text", "text": "", "annotations": []}, "item_id": msg_id})
msg_opened = True
text_buf += content text_buf += content
yield emit("response.output_text.delta", {"type": "response.output_text.delta", yield emit("response.output_text.delta", {"type": "response.output_text.delta",
"delta": content, "item_id": msg_id, "content_index": 0}) "delta": content, "item_id": msg_id, "content_index": 0})
@@ -288,7 +318,7 @@ def oa_stream_to_sse(chat_stream, model, req_id):
if rc: if rc:
yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": rc}) yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": rc})
if text_buf: if msg_opened:
yield emit("response.output_text.done", {"type": "response.output_text.done", yield emit("response.output_text.done", {"type": "response.output_text.done",
"text": text_buf, "item_id": msg_id, "content_index": 0}) "text": text_buf, "item_id": msg_id, "content_index": 0})
yield emit("response.content_part.done", {"type": "response.content_part.done", yield emit("response.content_part.done", {"type": "response.content_part.done",
@@ -308,7 +338,7 @@ def oa_stream_to_sse(chat_stream, model, req_id):
fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"} fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"}
status = fm.get(fr, "incomplete") status = fm.get(fr, "incomplete")
final_out = [] final_out = []
if text_buf: if msg_opened:
final_out.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed", final_out.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
"content": [{"type": "output_text", "text": text_buf, "annotations": []}]}) "content": [{"type": "output_text", "text": text_buf, "annotations": []}]})
for idx in sorted(tc_buf): for idx in sorted(tc_buf):
@@ -646,6 +676,12 @@ class Handler(http.server.BaseHTTPRequestHandler):
except Exception as e: except Exception as e:
return self.send_json(400, {"error": {"message": f"Bad request: {e}"}}) return self.send_json(400, {"error": {"message": f"Bad request: {e}"}})
input_data = resolve_previous_response(body)
body["input"] = input_data
prev_id = body.get("previous_response_id")
input_types = [i.get("type") for i in input_data] if isinstance(input_data, list) else str(type(input_data))
print(f"[REQUEST] prev_id={prev_id} resolved_input_types={input_types}", file=sys.stderr)
model = body.get("model", MODELS[0]["id"] if MODELS else "unknown") model = body.get("model", MODELS[0]["id"] if MODELS else "unknown")
stream = body.get("stream", False) stream = body.get("stream", False)
@@ -686,7 +722,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
) )
self._forward(req, stream, model, self._forward(req, stream, model,
lambda r: oa_resp_to_responses(json.loads(r.read()), model), lambda r: oa_resp_to_responses(json.loads(r.read()), model),
lambda s: oa_stream_to_sse(s, model, body.get("request_id") or body.get("id"))) lambda s: oa_stream_to_sse(s, model, body.get("request_id") or body.get("id")),
input_data=body.get("input", ""))
def _handle_anthropic(self, body, model, stream): def _handle_anthropic(self, body, model, stream):
input_data = body.get("input", "") input_data = body.get("input", "")
@@ -721,7 +758,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
) )
self._forward(req, stream, model, self._forward(req, stream, model,
lambda r: an_resp_to_responses(json.loads(r.read()), model), lambda r: an_resp_to_responses(json.loads(r.read()), model),
lambda s: an_stream_to_sse(s, model, body.get("request_id") or body.get("id"))) lambda s: an_stream_to_sse(s, model, body.get("request_id") or body.get("id")),
input_data=body.get("input", ""))
def _handle_command_code(self, body, model, stream): def _handle_command_code(self, body, model, stream):
input_data = body.get("input", "") input_data = body.get("input", "")
@@ -800,9 +838,21 @@ class Handler(http.server.BaseHTTPRequestHandler):
self.send_header("Cache-Control", "no-cache") self.send_header("Cache-Control", "no-cache")
self.send_header("Connection", "keep-alive") self.send_header("Connection", "keep-alive")
self.end_headers() self.end_headers()
last_resp_id = None
last_output = None
for event in cc_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")): for event in cc_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")):
self.wfile.write(event.encode("utf-8")) self.wfile.write(event.encode("utf-8"))
self.wfile.flush() self.wfile.flush()
for line in event.strip().split("\n"):
if line.startswith("data: "):
try:
d = json.loads(line[6:])
if d.get("type") == "response.completed":
last_resp_id = d.get("response", {}).get("id")
last_output = d.get("response", {}).get("output", [])
except: pass
if last_resp_id:
store_response(last_resp_id, body.get("input", ""), last_output)
else: else:
try: try:
upstream = urllib.request.urlopen(req) upstream = urllib.request.urlopen(req)
@@ -816,8 +866,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
lines = raw.strip().split("\n") lines = raw.strip().split("\n")
result = cc_resp_to_responses(lines, model) result = cc_resp_to_responses(lines, model)
self.send_json(200, result) self.send_json(200, result)
rid = result.get("id")
if rid:
store_response(rid, body.get("input", ""), result.get("output", []))
def _forward(self, req, stream, model, nonstream_fn, stream_fn): def _forward(self, req, stream, model, nonstream_fn, stream_fn, input_data=None):
try: try:
upstream = urllib.request.urlopen(req) upstream = urllib.request.urlopen(req)
except urllib.error.HTTPError as e: except urllib.error.HTTPError as e:
@@ -832,12 +885,27 @@ class Handler(http.server.BaseHTTPRequestHandler):
self.send_header("Cache-Control", "no-cache") self.send_header("Cache-Control", "no-cache")
self.send_header("Connection", "keep-alive") self.send_header("Connection", "keep-alive")
self.end_headers() self.end_headers()
last_resp_id = None
last_output = None
for event in stream_fn(upstream): for event in stream_fn(upstream):
self.wfile.write(event.encode("utf-8")) self.wfile.write(event.encode("utf-8"))
self.wfile.flush() self.wfile.flush()
for line in event.strip().split("\n"):
if line.startswith("data: "):
try:
d = json.loads(line[6:])
if d.get("type") == "response.completed":
last_resp_id = d.get("response", {}).get("id")
last_output = d.get("response", {}).get("output", [])
except: pass
if last_resp_id and input_data is not None:
store_response(last_resp_id, input_data, last_output)
else: else:
result = nonstream_fn(upstream) result = nonstream_fn(upstream)
self.send_json(200, result) self.send_json(200, result)
rid = result.get("id")
if rid and input_data is not None:
store_response(rid, input_data, result.get("output", []))
def send_json(self, status, data): def send_json(self, status, data):
body = json.dumps(data).encode() body = json.dumps(data).encode()