diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e15f1c..b13074b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,9 +6,10 @@ - Codex sends `function_call` items with `id=None` — proxy now matches tool results to calls by call_id + positional fallback - Fixed orphan message output item when response is only tool calls (no text content) - **Auto-trims long conversations (>30 items)** to prevent context overflow on providers like Crof - - Keeps system/developer messages, original user query, and most recent items - - Drops oldest tool call/outputs from the middle when conversation grows too long - - Prevents `status=incomplete` errors on providers with smaller context windows + - Keeps system/developer messages, original user query, and most recent 10 items + - **Auto-compacts old items into a summary** instead of just dropping them + - Summary includes: user requests, assistant responses, tool calls made, files touched + - Preserves enough context for the model to continue long tasks intelligently - **Truncates large tool outputs (>8000 chars)** to prevent model output token exhaustion - Crof's models return `incomplete` when tool results contain too much text (e.g., full HTML pages) - Truncated outputs include `[truncated N chars]` suffix so the model knows data was cut diff --git a/codex-launcher_2.1.2_all.deb b/codex-launcher_2.1.2_all.deb index e57cfa8..00f3d80 100644 Binary files a/codex-launcher_2.1.2_all.deb and b/codex-launcher_2.1.2_all.deb differ diff --git a/src/translate-proxy.py b/src/translate-proxy.py index a34b911..6477672 100755 --- a/src/translate-proxy.py +++ b/src/translate-proxy.py @@ -167,23 +167,66 @@ def forwarded_headers(request_headers, extra=None, browser_ua=False): _MAX_INPUT_ITEMS = 30 _MAX_TOOL_OUTPUT_CHARS = 8000 +_COMPACT_KEEP_RECENT = 10 -def _trim_input(input_data): - if not isinstance(input_data, list): - return input_data - out = [] - for item in input_data: - if item.get("type") == "function_call_output": - o = item.get("output", "") - if len(o) > _MAX_TOOL_OUTPUT_CHARS: - item = dict(item) - item["output"] = o[:_MAX_TOOL_OUTPUT_CHARS] + f"\n... [truncated {len(o) - _MAX_TOOL_OUTPUT_CHARS} chars]" - print(f"[trim] tool output truncated {len(o)} -> {_MAX_TOOL_OUTPUT_CHARS}", file=sys.stderr) - out.append(item) - if len(out) <= _MAX_INPUT_ITEMS: +def _item_summary(item, max_len=200): + t = item.get("type") + if t == "message": + role = item.get("role", "?") + text = "" + for p in item.get("content", []): + if p.get("type") in ("input_text", "output_text"): + text += p.get("text", "") + return f"[{role}] {text[:max_len]}" + elif t == "function_call": + name = item.get("name", "?") + args = item.get("arguments", "{}") + try: + a = json.loads(args) + cmd = a.get("cmd", a.get("command", "")) + if cmd: + return f"[tool call] {name}: {cmd[:max_len]}" + except Exception: + pass + return f"[tool call] {name}({args[:max_len]})" + elif t == "function_call_output": + output = item.get("output", "") + if len(output) > max_len: + return f"[tool result] {output[:max_len]}..." + return f"[tool result] {output}" + return f"[{t}]" + +def _extract_files(items): + files = set() + for item in items: + if item.get("type") == "function_call": + try: + a = json.loads(item.get("arguments", "{}")) + cmd = a.get("cmd", a.get("command", "")) + for prefix in (">", ">>", " > ", " >> "): + for part in cmd.split(prefix)[1:]: + f = part.strip().split()[0].strip("'\"") + if f and not f.startswith("-") and "/" in f: + files.add(f) + except Exception: + pass + return files + +def _compact_input(input_data): + if not isinstance(input_data, list) or len(input_data) <= _MAX_INPUT_ITEMS: + out = [] + for item in input_data: + if isinstance(item, dict) and item.get("type") == "function_call_output": + o = item.get("output", "") + if len(o) > _MAX_TOOL_OUTPUT_CHARS: + item = dict(item) + item["output"] = o[:_MAX_TOOL_OUTPUT_CHARS] + f"\n... [truncated {len(o) - _MAX_TOOL_OUTPUT_CHARS} chars]" + print(f"[compact] tool output truncated {len(o)} -> {_MAX_TOOL_OUTPUT_CHARS}", file=sys.stderr) + out.append(item) return out + head_end = 0 - for i, item in enumerate(out): + for i, item in enumerate(input_data): t = item.get("type") if t == "message" and item.get("role") in ("developer", "system"): head_end = i + 1 @@ -191,13 +234,61 @@ def _trim_input(input_data): head_end = i + 1 else: break - head = out[:head_end] - tail_keep = _MAX_INPUT_ITEMS - len(head) - tail = out[-tail_keep:] - trimmed = len(out) - len(head) - len(tail) - if trimmed > 0: - print(f"[trim] {len(out)} items -> {len(head) + len(tail)} (dropped {trimmed} old items)", file=sys.stderr) - return head + tail + + head = input_data[:head_end] + tail = input_data[-_COMPACT_KEEP_RECENT:] + body = input_data[head_end:-_COMPACT_KEEP_RECENT] + + if not body: + return head + tail + + for item in tail: + if isinstance(item, dict) and item.get("type") == "function_call_output": + o = item.get("output", "") + if len(o) > _MAX_TOOL_OUTPUT_CHARS: + item["output"] = o[:_MAX_TOOL_OUTPUT_CHARS] + f"\n... [truncated {len(o) - _MAX_TOOL_OUTPUT_CHARS} chars]" + + user_queries = [] + for item in body: + if item.get("type") == "message" and item.get("role") == "user": + for p in item.get("content", []): + if p.get("type") == "input_text": + user_queries.append(p.get("text", "")[:300]) + assistant_msgs = [] + for item in body: + if item.get("type") == "message" and item.get("role") == "assistant": + for p in item.get("content", []): + if p.get("type") == "output_text": + assistant_msgs.append(p.get("text", "")[:300]) + + tool_summaries = [] + for item in body: + if item.get("type") in ("function_call", "function_call_output"): + tool_summaries.append(_item_summary(item, max_len=150)) + + files = _extract_files(body) + + summary_lines = [f"[Auto-compacted: {len(body)} earlier turns summarized to preserve context]"] + if user_queries: + summary_lines.append(f"User requests: {'; '.join(user_queries[-3:])}") + if assistant_msgs: + summary_lines.append(f"Assistant responses: {'; '.join(assistant_msgs[-3:])}") + if tool_summaries: + summary_lines.append(f"Actions taken ({len(tool_summaries)} steps):") + for ts in tool_summaries[-15:]: + summary_lines.append(f" {ts}") + if files: + summary_lines.append(f"Files touched: {', '.join(sorted(files)[-10:])}") + + summary_text = "\n".join(summary_lines) + summary_msg = { + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": summary_text}] + } + + print(f"[compact] {len(input_data)} items -> {len(head) + 1 + len(tail)} (compacted {len(body)} old items into summary)", file=sys.stderr) + return head + [summary_msg] + tail # ═══════════════════════════════════════════════════════════════════ # OpenAI-compat backend @@ -750,7 +841,7 @@ class Handler(http.server.BaseHTTPRequestHandler): prev_id = body.get("previous_response_id") raw_input = body.get("input", "") input_data = resolve_previous_response(body) - input_data = _trim_input(input_data) + input_data = _compact_input(input_data) body["input"] = input_data raw_types = [i.get("type") for i in raw_input] if isinstance(raw_input, list) else "str"