fix: Crof multi-turn tool calls + auto-trim long conversations

Root cause: Codex sends function_call items with id=None, causing
tool_call_id mismatch between tool calls and tool results. Proxy now
resolves IDs by call_id + positional fallback.

Auto-trim: conversations exceeding 30 items are trimmed automatically,
keeping system messages, original user query, and most recent items.
This prevents context overflow on providers with smaller context
windows (Crof mimo-v2.5-pro stops responding at ~40 items).

- Fix None tool IDs in oa_input_to_messages with positional matching
- Auto-trim input to 30 items max (keeps head + tail)
- Add request/response logging to ~/.cache/codex-proxy/requests.log
- Proxy stderr visible in launcher terminal for debugging
- v2.1.2
This commit is contained in:
admin
2026-05-19 21:25:35 +04:00
Unverified
parent cb6381afe4
commit aa377024d9
4 changed files with 108 additions and 17 deletions

View File

@@ -2,12 +2,15 @@
## v2.1.2 (2026-05-19)
- **Fixed Crof.ai and other providers stopping after first tool call (root cause)**
- Proxy now stores responses and resolves `previous_response_id` for multi-turn conversations
- Codex Desktop uses `previous_response_id` to chain turns — proxy reconstructs full conversation context
- Without this fix, the proxy sent only the new `function_call_output` to upstream without the original user message or assistant tool call, causing the upstream model to return incomplete responses
- **Fixed Crof.ai and providers stopping after first tool call (root cause: None tool IDs)**
- Codex sends `function_call` items with `id=None` — proxy now matches tool results to calls by call_id + positional fallback
- Fixed orphan message output item when response is only tool calls (no text content)
- Response store capped at 50 entries (LRU eviction)
- **Auto-trims long conversations (>30 items)** to prevent context overflow on providers like Crof
- Keeps system/developer messages, original user query, and most recent items
- Drops oldest tool call/outputs from the middle when conversation grows too long
- Prevents `status=incomplete` errors on providers with smaller context windows
- Added request/response logging to `~/.cache/codex-proxy/requests.log` for debugging
- Proxy stderr no longer discarded by launcher (visible in terminal for debugging)
## v2.1.1 (2026-05-19)

Binary file not shown.

View File

@@ -25,10 +25,11 @@ model_catalog_json = ""
CHANGELOG = [
("2.1.2", "2026-05-19", [
"Fixed Crof.ai and other providers stopping after first tool call",
"Proxy now stores and resolves previous_response_id for multi-turn conversations",
"Codex Desktop uses previous_response_id to chain turns — proxy reconstructs full context",
"Fixed orphan message output item when response is only tool calls (no text)",
"Fixed Crof.ai and providers stopping after first tool call (root cause: None tool IDs)",
"Codex sends function_call items with id=None — proxy now matches tool results to calls by position",
"Fixed orphan message output item when response has only tool calls (no text)",
"Auto-trims long conversations (>30 items) to prevent context overflow on providers like Crof",
"Added request/response logging to ~/.cache/codex-proxy/requests.log",
]),
("2.1.1", "2026-05-19", [
"Fixed proxy: map 'developer' role to 'system' for Chat Completions providers",
@@ -437,7 +438,7 @@ def _start_proxy_for(endpoint, logfn):
_proxy_proc = subprocess.Popen(
["python3", str(PROXY), "--config", str(pcfg_path)],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
preexec_fn=os.setsid,
)
@@ -526,7 +527,7 @@ class LauncherWin(Gtk.Window):
# header row
hdr = Gtk.Box(spacing=8)
vbox.pack_start(hdr, False, False, 0)
lbl = Gtk.Label(label="<b>Codex Launcher v2.2.0</b>")
lbl = Gtk.Label(label="<b>Codex Launcher v2.1.2</b>")
lbl.set_use_markup(True)
hdr.pack_start(lbl, False, False, 0)
changelog_btn = Gtk.Button(label="Changelog")

View File

@@ -165,6 +165,28 @@ def forwarded_headers(request_headers, extra=None, browser_ua=False):
headers.update(extra)
return headers
_MAX_INPUT_ITEMS = 30
def _trim_input(input_data):
if not isinstance(input_data, list) or len(input_data) <= _MAX_INPUT_ITEMS:
return input_data
head_end = 0
for i, item in enumerate(input_data):
t = item.get("type")
if t == "message" and item.get("role") in ("developer", "system"):
head_end = i + 1
elif t == "message" and item.get("role") == "user" and head_end == i:
head_end = i + 1
else:
break
head = input_data[:head_end]
tail_keep = _MAX_INPUT_ITEMS - len(head)
tail = input_data[-tail_keep:]
trimmed = len(input_data) - len(head) - len(tail)
if trimmed > 0:
print(f"[trim] {len(input_data)} items -> {len(head) + len(tail)} (dropped {trimmed} old items)", file=sys.stderr)
return head + tail
# ═══════════════════════════════════════════════════════════════════
# OpenAI-compat backend
# ═══════════════════════════════════════════════════════════════════
@@ -175,16 +197,19 @@ def oa_input_to_messages(input_data):
msgs.append({"role": "user", "content": input_data})
elif isinstance(input_data, list):
pending_tool_calls = []
last_flushed_ids = []
for item in input_data:
t = item.get("type")
if t == "function_call":
tcid = item.get("call_id") or item.get("id") or uid("tc")
pending_tool_calls.append(
{"id": item.get("call_id", item.get("id", uid("tc"))),
{"id": tcid,
"type": "function",
"function": {"name": item.get("name", ""),
"arguments": item.get("arguments", "{}")}})
continue
if pending_tool_calls:
last_flushed_ids = [tc["id"] for tc in pending_tool_calls]
msgs.append({"role": "assistant", "content": None, "tool_calls": pending_tool_calls})
pending_tool_calls = []
if t == "message":
@@ -205,7 +230,12 @@ def oa_input_to_messages(input_data):
if text is not None:
msgs.append({"role": role, "content": text})
elif t == "function_call_output":
msgs.append({"role": "tool", "tool_call_id": item.get("id", ""),
tcid = item.get("call_id") or item.get("id") or ""
if not tcid and last_flushed_ids:
idx = len([m for m in msgs if m.get("role") == "tool"])
if idx < len(last_flushed_ids):
tcid = last_flushed_ids[idx]
msgs.append({"role": "tool", "tool_call_id": tcid,
"content": item.get("output", "")})
if pending_tool_calls:
msgs.append({"role": "assistant", "content": None, "tool_calls": pending_tool_calls})
@@ -654,6 +684,29 @@ def cc_stream_to_sse(cc_stream, model, req_id):
# HTTP Server
# ═══════════════════════════════════════════════════════════════════
_LOG_DIR = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy")
os.makedirs(_LOG_DIR, exist_ok=True)
def _log_resp(resp_id, status, output):
try:
import datetime as _dt
_lp = os.path.join(_LOG_DIR, "requests.log")
with open(_lp, "a") as _f:
_f.write(f" RESPONSE id={resp_id} status={status}\n")
if output:
for o in output:
ot = o.get("type")
if ot == "message":
_f.write(f" -> message: {o.get('content',[{}])[0].get('text','')[:200]}\n")
elif ot == "function_call":
_f.write(f" -> function_call: {o.get('name')}({o.get('arguments','')[:120]})\n")
else:
_f.write(f" -> {ot}\n")
_f.write(f"{'='*60}\n")
_f.flush()
except Exception:
pass
class Handler(http.server.BaseHTTPRequestHandler):
protocol_version = "HTTP/1.1"
@@ -669,6 +722,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
else:
self.send_error(404)
_logf = None
def _handle(self):
try:
clen = int(self.headers.get("Content-Length", 0))
@@ -676,11 +731,39 @@ class Handler(http.server.BaseHTTPRequestHandler):
except Exception as e:
return self.send_json(400, {"error": {"message": f"Bad request: {e}"}})
input_data = resolve_previous_response(body)
body["input"] = input_data
import datetime as _dt
_log_path = os.path.join(_LOG_DIR, "requests.log")
_ts = _dt.datetime.now().isoformat()
prev_id = body.get("previous_response_id")
input_types = [i.get("type") for i in input_data] if isinstance(input_data, list) else str(type(input_data))
print(f"[REQUEST] prev_id={prev_id} resolved_input_types={input_types}", file=sys.stderr)
raw_input = body.get("input", "")
input_data = resolve_previous_response(body)
input_data = _trim_input(input_data)
body["input"] = input_data
raw_types = [i.get("type") for i in raw_input] if isinstance(raw_input, list) else "str"
resolved_types = [i.get("type") for i in input_data] if isinstance(input_data, list) else "str"
print(f"[REQUEST] prev_id={prev_id} raw={raw_types} resolved={resolved_types}", file=sys.stderr)
with open(_log_path, "a") as _lf:
_lf.write(f"\n{'='*60}\n{_ts} REQUEST {self.path}\n")
_lf.write(f" prev_id={prev_id}\n")
_lf.write(f" raw_input_types={raw_types}\n")
_lf.write(f" resolved_input_types={resolved_types}\n")
_lf.write(f" stream={body.get('stream')} model={body.get('model')}\n")
_lf.write(f" store_keys={list(_response_store.keys())}\n")
if isinstance(input_data, list):
for i, item in enumerate(input_data):
t = item.get("type")
if t == "message":
_lf.write(f" [{i}] message role={item.get('role')} text={str(item.get('content',''))[:120]}\n")
elif t == "function_call":
_lf.write(f" [{i}] function_call call_id={item.get('call_id')} id={item.get('id')} name={item.get('name')} args={item.get('arguments','')[:120]}\n")
elif t == "function_call_output":
_lf.write(f" [{i}] function_call_output id={item.get('id')} output={str(item.get('output',''))[:120]}\n")
else:
_lf.write(f" [{i}] {t}\n")
_lf.flush()
model = body.get("model", MODELS[0]["id"] if MODELS else "unknown")
stream = body.get("stream", False)
@@ -887,6 +970,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
self.end_headers()
last_resp_id = None
last_output = None
last_status = None
for event in stream_fn(upstream):
self.wfile.write(event.encode("utf-8"))
self.wfile.flush()
@@ -897,13 +981,16 @@ class Handler(http.server.BaseHTTPRequestHandler):
if d.get("type") == "response.completed":
last_resp_id = d.get("response", {}).get("id")
last_output = d.get("response", {}).get("output", [])
last_status = d.get("response", {}).get("status")
except: pass
_log_resp(last_resp_id, last_status, last_output)
if last_resp_id and input_data is not None:
store_response(last_resp_id, input_data, last_output)
else:
result = nonstream_fn(upstream)
self.send_json(200, result)
rid = result.get("id")
_log_resp(rid, result.get("status"), result.get("output", []))
if rid and input_data is not None:
store_response(rid, input_data, result.get("output", []))