fix: add previous_response_id support for multi-turn tool calls (Crof fix)
Codex Desktop uses previous_response_id to chain conversation turns. Without storing and resolving these, the proxy sent only the new function_call_output to upstream providers, missing the original user message and assistant tool call. This caused Crof.ai (and any provider using tool calls) to stop after the first response. - Add in-memory response store (50 entry LRU) keyed by response ID - resolve_previous_response() reconstructs full input chain on multi-turn - Fix orphan message output item when response has only tool calls - Applies to all backends: openai-compat, anthropic, command-code - v2.1.2
This commit is contained in:
@@ -1,5 +1,14 @@
|
||||
# Changelog
|
||||
|
||||
## v2.1.2 (2026-05-19)
|
||||
|
||||
- **Fixed Crof.ai and other providers stopping after first tool call (root cause)**
|
||||
- Proxy now stores responses and resolves `previous_response_id` for multi-turn conversations
|
||||
- Codex Desktop uses `previous_response_id` to chain turns — proxy reconstructs full conversation context
|
||||
- Without this fix, the proxy sent only the new `function_call_output` to upstream without the original user message or assistant tool call, causing the upstream model to return incomplete responses
|
||||
- Fixed orphan message output item when response is only tool calls (no text content)
|
||||
- Response store capped at 50 entries (LRU eviction)
|
||||
|
||||
## v2.1.1 (2026-05-19)
|
||||
|
||||
- Added Command Code backend to translation proxy (proprietary `/alpha/generate` API)
|
||||
|
||||
@@ -390,6 +390,7 @@ README.md # This file
|
||||
| Proxy stops when terminal closes | SIGHUP to subprocess | Launcher uses `os.setsid` process groups |
|
||||
| Models not showing in picker | Wrong model catalog format | Must have both `slug` + `model` fields |
|
||||
| Codex hangs in "thinking" | Missing `response.completed` | Proxy emits full SSE event sequence |
|
||||
| Stops after first tool call (Crof) | `previous_response_id` not resolved | V2.1.2 stores and chains responses for multi-turn |
|
||||
|
||||
---
|
||||
|
||||
|
||||
Binary file not shown.
BIN
codex-launcher_2.1.2_all.deb
Normal file
BIN
codex-launcher_2.1.2_all.deb
Normal file
Binary file not shown.
@@ -24,6 +24,12 @@ model_catalog_json = ""
|
||||
"""
|
||||
|
||||
CHANGELOG = [
|
||||
("2.1.2", "2026-05-19", [
|
||||
"Fixed Crof.ai and other providers stopping after first tool call",
|
||||
"Proxy now stores and resolves previous_response_id for multi-turn conversations",
|
||||
"Codex Desktop uses previous_response_id to chain turns — proxy reconstructs full context",
|
||||
"Fixed orphan message output item when response is only tool calls (no text)",
|
||||
]),
|
||||
("2.1.1", "2026-05-19", [
|
||||
"Fixed proxy: map 'developer' role to 'system' for Chat Completions providers",
|
||||
"Fixed proxy: map 'developer' role to 'user' for Anthropic providers",
|
||||
|
||||
@@ -88,6 +88,32 @@ CC_VERSION = CONFIG.get("cc_version", "")
|
||||
|
||||
_pool = uuid.uuid4().hex[:8]
|
||||
|
||||
_response_store = {}
|
||||
_MAX_STORED = 50
|
||||
|
||||
def store_response(resp_id, input_data, output_items):
|
||||
if not resp_id:
|
||||
return
|
||||
_response_store[resp_id] = {"input": input_data, "output": output_items}
|
||||
if len(_response_store) > _MAX_STORED:
|
||||
oldest = list(_response_store.keys())[0]
|
||||
del _response_store[oldest]
|
||||
|
||||
def resolve_previous_response(body):
|
||||
prev_id = body.get("previous_response_id")
|
||||
input_data = body.get("input", "")
|
||||
if not prev_id or prev_id not in _response_store:
|
||||
return input_data
|
||||
stored = _response_store[prev_id]
|
||||
prev_input = stored["input"]
|
||||
prev_output = stored["output"]
|
||||
new_input = input_data if isinstance(input_data, list) else []
|
||||
if isinstance(prev_input, list):
|
||||
combined = list(prev_input) + list(prev_output) + new_input
|
||||
else:
|
||||
combined = [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": str(prev_input)}]}] + list(prev_output) + new_input
|
||||
return combined
|
||||
|
||||
_HOP_BY_HOP_HEADERS = {
|
||||
"connection",
|
||||
"keep-alive",
|
||||
@@ -236,15 +262,12 @@ def oa_stream_to_sse(chat_stream, model, req_id):
|
||||
text_buf = ""
|
||||
tc_buf = {}
|
||||
fr = None
|
||||
msg_opened = False
|
||||
|
||||
yield emit("response.created", {"type": "response.created",
|
||||
"response": {"id": resp_id, "object": "response", "model": model,
|
||||
"status": "in_progress", "created": int(time.time()), "output": []}})
|
||||
yield emit("response.in_progress", {"type": "response.in_progress", "response": {"id": resp_id}})
|
||||
yield emit("response.output_item.added", {"type": "response.output_item.added",
|
||||
"item": {"type": "message", "id": msg_id, "role": "assistant", "status": "in_progress", "content": []}})
|
||||
yield emit("response.content_part.added", {"type": "response.content_part.added",
|
||||
"part": {"type": "output_text", "text": "", "annotations": []}, "item_id": msg_id})
|
||||
|
||||
for line in chat_stream:
|
||||
line = line.decode("utf-8", errors="replace").strip()
|
||||
@@ -264,6 +287,13 @@ def oa_stream_to_sse(chat_stream, model, req_id):
|
||||
|
||||
content = delta.get("content")
|
||||
if content:
|
||||
if not msg_opened:
|
||||
msg_id = uid("msg")
|
||||
yield emit("response.output_item.added", {"type": "response.output_item.added",
|
||||
"item": {"type": "message", "id": msg_id, "role": "assistant", "status": "in_progress", "content": []}})
|
||||
yield emit("response.content_part.added", {"type": "response.content_part.added",
|
||||
"part": {"type": "output_text", "text": "", "annotations": []}, "item_id": msg_id})
|
||||
msg_opened = True
|
||||
text_buf += content
|
||||
yield emit("response.output_text.delta", {"type": "response.output_text.delta",
|
||||
"delta": content, "item_id": msg_id, "content_index": 0})
|
||||
@@ -288,7 +318,7 @@ def oa_stream_to_sse(chat_stream, model, req_id):
|
||||
if rc:
|
||||
yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": rc})
|
||||
|
||||
if text_buf:
|
||||
if msg_opened:
|
||||
yield emit("response.output_text.done", {"type": "response.output_text.done",
|
||||
"text": text_buf, "item_id": msg_id, "content_index": 0})
|
||||
yield emit("response.content_part.done", {"type": "response.content_part.done",
|
||||
@@ -308,7 +338,7 @@ def oa_stream_to_sse(chat_stream, model, req_id):
|
||||
fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"}
|
||||
status = fm.get(fr, "incomplete")
|
||||
final_out = []
|
||||
if text_buf:
|
||||
if msg_opened:
|
||||
final_out.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
|
||||
"content": [{"type": "output_text", "text": text_buf, "annotations": []}]})
|
||||
for idx in sorted(tc_buf):
|
||||
@@ -646,6 +676,12 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
except Exception as e:
|
||||
return self.send_json(400, {"error": {"message": f"Bad request: {e}"}})
|
||||
|
||||
input_data = resolve_previous_response(body)
|
||||
body["input"] = input_data
|
||||
prev_id = body.get("previous_response_id")
|
||||
input_types = [i.get("type") for i in input_data] if isinstance(input_data, list) else str(type(input_data))
|
||||
print(f"[REQUEST] prev_id={prev_id} resolved_input_types={input_types}", file=sys.stderr)
|
||||
|
||||
model = body.get("model", MODELS[0]["id"] if MODELS else "unknown")
|
||||
stream = body.get("stream", False)
|
||||
|
||||
@@ -686,7 +722,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
)
|
||||
self._forward(req, stream, model,
|
||||
lambda r: oa_resp_to_responses(json.loads(r.read()), model),
|
||||
lambda s: oa_stream_to_sse(s, model, body.get("request_id") or body.get("id")))
|
||||
lambda s: oa_stream_to_sse(s, model, body.get("request_id") or body.get("id")),
|
||||
input_data=body.get("input", ""))
|
||||
|
||||
def _handle_anthropic(self, body, model, stream):
|
||||
input_data = body.get("input", "")
|
||||
@@ -721,7 +758,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
)
|
||||
self._forward(req, stream, model,
|
||||
lambda r: an_resp_to_responses(json.loads(r.read()), model),
|
||||
lambda s: an_stream_to_sse(s, model, body.get("request_id") or body.get("id")))
|
||||
lambda s: an_stream_to_sse(s, model, body.get("request_id") or body.get("id")),
|
||||
input_data=body.get("input", ""))
|
||||
|
||||
def _handle_command_code(self, body, model, stream):
|
||||
input_data = body.get("input", "")
|
||||
@@ -800,9 +838,21 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
self.send_header("Cache-Control", "no-cache")
|
||||
self.send_header("Connection", "keep-alive")
|
||||
self.end_headers()
|
||||
last_resp_id = None
|
||||
last_output = None
|
||||
for event in cc_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")):
|
||||
self.wfile.write(event.encode("utf-8"))
|
||||
self.wfile.flush()
|
||||
for line in event.strip().split("\n"):
|
||||
if line.startswith("data: "):
|
||||
try:
|
||||
d = json.loads(line[6:])
|
||||
if d.get("type") == "response.completed":
|
||||
last_resp_id = d.get("response", {}).get("id")
|
||||
last_output = d.get("response", {}).get("output", [])
|
||||
except: pass
|
||||
if last_resp_id:
|
||||
store_response(last_resp_id, body.get("input", ""), last_output)
|
||||
else:
|
||||
try:
|
||||
upstream = urllib.request.urlopen(req)
|
||||
@@ -816,8 +866,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
lines = raw.strip().split("\n")
|
||||
result = cc_resp_to_responses(lines, model)
|
||||
self.send_json(200, result)
|
||||
rid = result.get("id")
|
||||
if rid:
|
||||
store_response(rid, body.get("input", ""), result.get("output", []))
|
||||
|
||||
def _forward(self, req, stream, model, nonstream_fn, stream_fn):
|
||||
def _forward(self, req, stream, model, nonstream_fn, stream_fn, input_data=None):
|
||||
try:
|
||||
upstream = urllib.request.urlopen(req)
|
||||
except urllib.error.HTTPError as e:
|
||||
@@ -832,12 +885,27 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
self.send_header("Cache-Control", "no-cache")
|
||||
self.send_header("Connection", "keep-alive")
|
||||
self.end_headers()
|
||||
last_resp_id = None
|
||||
last_output = None
|
||||
for event in stream_fn(upstream):
|
||||
self.wfile.write(event.encode("utf-8"))
|
||||
self.wfile.flush()
|
||||
for line in event.strip().split("\n"):
|
||||
if line.startswith("data: "):
|
||||
try:
|
||||
d = json.loads(line[6:])
|
||||
if d.get("type") == "response.completed":
|
||||
last_resp_id = d.get("response", {}).get("id")
|
||||
last_output = d.get("response", {}).get("output", [])
|
||||
except: pass
|
||||
if last_resp_id and input_data is not None:
|
||||
store_response(last_resp_id, input_data, last_output)
|
||||
else:
|
||||
result = nonstream_fn(upstream)
|
||||
self.send_json(200, result)
|
||||
rid = result.get("id")
|
||||
if rid and input_data is not None:
|
||||
store_response(rid, input_data, result.get("output", []))
|
||||
|
||||
def send_json(self, status, data):
|
||||
body = json.dumps(data).encode()
|
||||
|
||||
Reference in New Issue
Block a user