fix: add previous_response_id support for multi-turn tool calls (Crof fix)
Codex Desktop uses previous_response_id to chain conversation turns. Without storing and resolving these, the proxy sent only the new function_call_output to upstream providers, missing the original user message and assistant tool call. This caused Crof.ai (and any provider using tool calls) to stop after the first response. - Add in-memory response store (50 entry LRU) keyed by response ID - resolve_previous_response() reconstructs full input chain on multi-turn - Fix orphan message output item when response has only tool calls - Applies to all backends: openai-compat, anthropic, command-code - v2.1.2
This commit is contained in:
@@ -1,5 +1,14 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## v2.1.2 (2026-05-19)
|
||||||
|
|
||||||
|
- **Fixed Crof.ai and other providers stopping after first tool call (root cause)**
|
||||||
|
- Proxy now stores responses and resolves `previous_response_id` for multi-turn conversations
|
||||||
|
- Codex Desktop uses `previous_response_id` to chain turns — proxy reconstructs full conversation context
|
||||||
|
- Without this fix, the proxy sent only the new `function_call_output` to upstream without the original user message or assistant tool call, causing the upstream model to return incomplete responses
|
||||||
|
- Fixed orphan message output item when response is only tool calls (no text content)
|
||||||
|
- Response store capped at 50 entries (LRU eviction)
|
||||||
|
|
||||||
## v2.1.1 (2026-05-19)
|
## v2.1.1 (2026-05-19)
|
||||||
|
|
||||||
- Added Command Code backend to translation proxy (proprietary `/alpha/generate` API)
|
- Added Command Code backend to translation proxy (proprietary `/alpha/generate` API)
|
||||||
|
|||||||
@@ -390,6 +390,7 @@ README.md # This file
|
|||||||
| Proxy stops when terminal closes | SIGHUP to subprocess | Launcher uses `os.setsid` process groups |
|
| Proxy stops when terminal closes | SIGHUP to subprocess | Launcher uses `os.setsid` process groups |
|
||||||
| Models not showing in picker | Wrong model catalog format | Must have both `slug` + `model` fields |
|
| Models not showing in picker | Wrong model catalog format | Must have both `slug` + `model` fields |
|
||||||
| Codex hangs in "thinking" | Missing `response.completed` | Proxy emits full SSE event sequence |
|
| Codex hangs in "thinking" | Missing `response.completed` | Proxy emits full SSE event sequence |
|
||||||
|
| Stops after first tool call (Crof) | `previous_response_id` not resolved | V2.1.2 stores and chains responses for multi-turn |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
BIN
codex-launcher_2.1.2_all.deb
Normal file
BIN
codex-launcher_2.1.2_all.deb
Normal file
Binary file not shown.
@@ -24,6 +24,12 @@ model_catalog_json = ""
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
CHANGELOG = [
|
CHANGELOG = [
|
||||||
|
("2.1.2", "2026-05-19", [
|
||||||
|
"Fixed Crof.ai and other providers stopping after first tool call",
|
||||||
|
"Proxy now stores and resolves previous_response_id for multi-turn conversations",
|
||||||
|
"Codex Desktop uses previous_response_id to chain turns — proxy reconstructs full context",
|
||||||
|
"Fixed orphan message output item when response is only tool calls (no text)",
|
||||||
|
]),
|
||||||
("2.1.1", "2026-05-19", [
|
("2.1.1", "2026-05-19", [
|
||||||
"Fixed proxy: map 'developer' role to 'system' for Chat Completions providers",
|
"Fixed proxy: map 'developer' role to 'system' for Chat Completions providers",
|
||||||
"Fixed proxy: map 'developer' role to 'user' for Anthropic providers",
|
"Fixed proxy: map 'developer' role to 'user' for Anthropic providers",
|
||||||
|
|||||||
@@ -88,6 +88,32 @@ CC_VERSION = CONFIG.get("cc_version", "")
|
|||||||
|
|
||||||
_pool = uuid.uuid4().hex[:8]
|
_pool = uuid.uuid4().hex[:8]
|
||||||
|
|
||||||
|
_response_store = {}
|
||||||
|
_MAX_STORED = 50
|
||||||
|
|
||||||
|
def store_response(resp_id, input_data, output_items):
|
||||||
|
if not resp_id:
|
||||||
|
return
|
||||||
|
_response_store[resp_id] = {"input": input_data, "output": output_items}
|
||||||
|
if len(_response_store) > _MAX_STORED:
|
||||||
|
oldest = list(_response_store.keys())[0]
|
||||||
|
del _response_store[oldest]
|
||||||
|
|
||||||
|
def resolve_previous_response(body):
|
||||||
|
prev_id = body.get("previous_response_id")
|
||||||
|
input_data = body.get("input", "")
|
||||||
|
if not prev_id or prev_id not in _response_store:
|
||||||
|
return input_data
|
||||||
|
stored = _response_store[prev_id]
|
||||||
|
prev_input = stored["input"]
|
||||||
|
prev_output = stored["output"]
|
||||||
|
new_input = input_data if isinstance(input_data, list) else []
|
||||||
|
if isinstance(prev_input, list):
|
||||||
|
combined = list(prev_input) + list(prev_output) + new_input
|
||||||
|
else:
|
||||||
|
combined = [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": str(prev_input)}]}] + list(prev_output) + new_input
|
||||||
|
return combined
|
||||||
|
|
||||||
_HOP_BY_HOP_HEADERS = {
|
_HOP_BY_HOP_HEADERS = {
|
||||||
"connection",
|
"connection",
|
||||||
"keep-alive",
|
"keep-alive",
|
||||||
@@ -236,15 +262,12 @@ def oa_stream_to_sse(chat_stream, model, req_id):
|
|||||||
text_buf = ""
|
text_buf = ""
|
||||||
tc_buf = {}
|
tc_buf = {}
|
||||||
fr = None
|
fr = None
|
||||||
|
msg_opened = False
|
||||||
|
|
||||||
yield emit("response.created", {"type": "response.created",
|
yield emit("response.created", {"type": "response.created",
|
||||||
"response": {"id": resp_id, "object": "response", "model": model,
|
"response": {"id": resp_id, "object": "response", "model": model,
|
||||||
"status": "in_progress", "created": int(time.time()), "output": []}})
|
"status": "in_progress", "created": int(time.time()), "output": []}})
|
||||||
yield emit("response.in_progress", {"type": "response.in_progress", "response": {"id": resp_id}})
|
yield emit("response.in_progress", {"type": "response.in_progress", "response": {"id": resp_id}})
|
||||||
yield emit("response.output_item.added", {"type": "response.output_item.added",
|
|
||||||
"item": {"type": "message", "id": msg_id, "role": "assistant", "status": "in_progress", "content": []}})
|
|
||||||
yield emit("response.content_part.added", {"type": "response.content_part.added",
|
|
||||||
"part": {"type": "output_text", "text": "", "annotations": []}, "item_id": msg_id})
|
|
||||||
|
|
||||||
for line in chat_stream:
|
for line in chat_stream:
|
||||||
line = line.decode("utf-8", errors="replace").strip()
|
line = line.decode("utf-8", errors="replace").strip()
|
||||||
@@ -264,6 +287,13 @@ def oa_stream_to_sse(chat_stream, model, req_id):
|
|||||||
|
|
||||||
content = delta.get("content")
|
content = delta.get("content")
|
||||||
if content:
|
if content:
|
||||||
|
if not msg_opened:
|
||||||
|
msg_id = uid("msg")
|
||||||
|
yield emit("response.output_item.added", {"type": "response.output_item.added",
|
||||||
|
"item": {"type": "message", "id": msg_id, "role": "assistant", "status": "in_progress", "content": []}})
|
||||||
|
yield emit("response.content_part.added", {"type": "response.content_part.added",
|
||||||
|
"part": {"type": "output_text", "text": "", "annotations": []}, "item_id": msg_id})
|
||||||
|
msg_opened = True
|
||||||
text_buf += content
|
text_buf += content
|
||||||
yield emit("response.output_text.delta", {"type": "response.output_text.delta",
|
yield emit("response.output_text.delta", {"type": "response.output_text.delta",
|
||||||
"delta": content, "item_id": msg_id, "content_index": 0})
|
"delta": content, "item_id": msg_id, "content_index": 0})
|
||||||
@@ -288,7 +318,7 @@ def oa_stream_to_sse(chat_stream, model, req_id):
|
|||||||
if rc:
|
if rc:
|
||||||
yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": rc})
|
yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": rc})
|
||||||
|
|
||||||
if text_buf:
|
if msg_opened:
|
||||||
yield emit("response.output_text.done", {"type": "response.output_text.done",
|
yield emit("response.output_text.done", {"type": "response.output_text.done",
|
||||||
"text": text_buf, "item_id": msg_id, "content_index": 0})
|
"text": text_buf, "item_id": msg_id, "content_index": 0})
|
||||||
yield emit("response.content_part.done", {"type": "response.content_part.done",
|
yield emit("response.content_part.done", {"type": "response.content_part.done",
|
||||||
@@ -308,7 +338,7 @@ def oa_stream_to_sse(chat_stream, model, req_id):
|
|||||||
fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"}
|
fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"}
|
||||||
status = fm.get(fr, "incomplete")
|
status = fm.get(fr, "incomplete")
|
||||||
final_out = []
|
final_out = []
|
||||||
if text_buf:
|
if msg_opened:
|
||||||
final_out.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
|
final_out.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
|
||||||
"content": [{"type": "output_text", "text": text_buf, "annotations": []}]})
|
"content": [{"type": "output_text", "text": text_buf, "annotations": []}]})
|
||||||
for idx in sorted(tc_buf):
|
for idx in sorted(tc_buf):
|
||||||
@@ -646,6 +676,12 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
return self.send_json(400, {"error": {"message": f"Bad request: {e}"}})
|
return self.send_json(400, {"error": {"message": f"Bad request: {e}"}})
|
||||||
|
|
||||||
|
input_data = resolve_previous_response(body)
|
||||||
|
body["input"] = input_data
|
||||||
|
prev_id = body.get("previous_response_id")
|
||||||
|
input_types = [i.get("type") for i in input_data] if isinstance(input_data, list) else str(type(input_data))
|
||||||
|
print(f"[REQUEST] prev_id={prev_id} resolved_input_types={input_types}", file=sys.stderr)
|
||||||
|
|
||||||
model = body.get("model", MODELS[0]["id"] if MODELS else "unknown")
|
model = body.get("model", MODELS[0]["id"] if MODELS else "unknown")
|
||||||
stream = body.get("stream", False)
|
stream = body.get("stream", False)
|
||||||
|
|
||||||
@@ -686,7 +722,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
)
|
)
|
||||||
self._forward(req, stream, model,
|
self._forward(req, stream, model,
|
||||||
lambda r: oa_resp_to_responses(json.loads(r.read()), model),
|
lambda r: oa_resp_to_responses(json.loads(r.read()), model),
|
||||||
lambda s: oa_stream_to_sse(s, model, body.get("request_id") or body.get("id")))
|
lambda s: oa_stream_to_sse(s, model, body.get("request_id") or body.get("id")),
|
||||||
|
input_data=body.get("input", ""))
|
||||||
|
|
||||||
def _handle_anthropic(self, body, model, stream):
|
def _handle_anthropic(self, body, model, stream):
|
||||||
input_data = body.get("input", "")
|
input_data = body.get("input", "")
|
||||||
@@ -721,7 +758,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
)
|
)
|
||||||
self._forward(req, stream, model,
|
self._forward(req, stream, model,
|
||||||
lambda r: an_resp_to_responses(json.loads(r.read()), model),
|
lambda r: an_resp_to_responses(json.loads(r.read()), model),
|
||||||
lambda s: an_stream_to_sse(s, model, body.get("request_id") or body.get("id")))
|
lambda s: an_stream_to_sse(s, model, body.get("request_id") or body.get("id")),
|
||||||
|
input_data=body.get("input", ""))
|
||||||
|
|
||||||
def _handle_command_code(self, body, model, stream):
|
def _handle_command_code(self, body, model, stream):
|
||||||
input_data = body.get("input", "")
|
input_data = body.get("input", "")
|
||||||
@@ -800,9 +838,21 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
self.send_header("Cache-Control", "no-cache")
|
self.send_header("Cache-Control", "no-cache")
|
||||||
self.send_header("Connection", "keep-alive")
|
self.send_header("Connection", "keep-alive")
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
|
last_resp_id = None
|
||||||
|
last_output = None
|
||||||
for event in cc_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")):
|
for event in cc_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")):
|
||||||
self.wfile.write(event.encode("utf-8"))
|
self.wfile.write(event.encode("utf-8"))
|
||||||
self.wfile.flush()
|
self.wfile.flush()
|
||||||
|
for line in event.strip().split("\n"):
|
||||||
|
if line.startswith("data: "):
|
||||||
|
try:
|
||||||
|
d = json.loads(line[6:])
|
||||||
|
if d.get("type") == "response.completed":
|
||||||
|
last_resp_id = d.get("response", {}).get("id")
|
||||||
|
last_output = d.get("response", {}).get("output", [])
|
||||||
|
except: pass
|
||||||
|
if last_resp_id:
|
||||||
|
store_response(last_resp_id, body.get("input", ""), last_output)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
upstream = urllib.request.urlopen(req)
|
upstream = urllib.request.urlopen(req)
|
||||||
@@ -816,8 +866,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
lines = raw.strip().split("\n")
|
lines = raw.strip().split("\n")
|
||||||
result = cc_resp_to_responses(lines, model)
|
result = cc_resp_to_responses(lines, model)
|
||||||
self.send_json(200, result)
|
self.send_json(200, result)
|
||||||
|
rid = result.get("id")
|
||||||
|
if rid:
|
||||||
|
store_response(rid, body.get("input", ""), result.get("output", []))
|
||||||
|
|
||||||
def _forward(self, req, stream, model, nonstream_fn, stream_fn):
|
def _forward(self, req, stream, model, nonstream_fn, stream_fn, input_data=None):
|
||||||
try:
|
try:
|
||||||
upstream = urllib.request.urlopen(req)
|
upstream = urllib.request.urlopen(req)
|
||||||
except urllib.error.HTTPError as e:
|
except urllib.error.HTTPError as e:
|
||||||
@@ -832,12 +885,27 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
self.send_header("Cache-Control", "no-cache")
|
self.send_header("Cache-Control", "no-cache")
|
||||||
self.send_header("Connection", "keep-alive")
|
self.send_header("Connection", "keep-alive")
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
|
last_resp_id = None
|
||||||
|
last_output = None
|
||||||
for event in stream_fn(upstream):
|
for event in stream_fn(upstream):
|
||||||
self.wfile.write(event.encode("utf-8"))
|
self.wfile.write(event.encode("utf-8"))
|
||||||
self.wfile.flush()
|
self.wfile.flush()
|
||||||
|
for line in event.strip().split("\n"):
|
||||||
|
if line.startswith("data: "):
|
||||||
|
try:
|
||||||
|
d = json.loads(line[6:])
|
||||||
|
if d.get("type") == "response.completed":
|
||||||
|
last_resp_id = d.get("response", {}).get("id")
|
||||||
|
last_output = d.get("response", {}).get("output", [])
|
||||||
|
except: pass
|
||||||
|
if last_resp_id and input_data is not None:
|
||||||
|
store_response(last_resp_id, input_data, last_output)
|
||||||
else:
|
else:
|
||||||
result = nonstream_fn(upstream)
|
result = nonstream_fn(upstream)
|
||||||
self.send_json(200, result)
|
self.send_json(200, result)
|
||||||
|
rid = result.get("id")
|
||||||
|
if rid and input_data is not None:
|
||||||
|
store_response(rid, input_data, result.get("output", []))
|
||||||
|
|
||||||
def send_json(self, status, data):
|
def send_json(self, status, data):
|
||||||
body = json.dumps(data).encode()
|
body = json.dumps(data).encode()
|
||||||
|
|||||||
Reference in New Issue
Block a user