feat: auto-compaction for long conversations (like Claude Code/Codex /compact)

Instead of just truncating old items, the proxy now auto-compacts them into a structured summary preserving key context: - User requests, assistant responses, tool calls made, files touched - Keeps original query + system messages + last 10 recent items - 38 items -> 14 items in testing, with summary of dropped turns - Similar to Claude Code's auto-compact and Codex CLI's /compact - No extra API calls needed, instant, zero cost
2026-05-19 21:49:55 +04:00
parent c90912ed07
commit 662d8e961e
3 changed files with 117 additions and 25 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,9 +6,10 @@
 - Codex sends `function_call` items with `id=None` — proxy now matches tool results to calls by call_id + positional fallback
 - Fixed orphan message output item when response is only tool calls (no text content)
 - **Auto-trims long conversations (>30 items)** to prevent context overflow on providers like Crof
-  - Keeps system/developer messages, original user query, and most recent items
-  - Drops oldest tool call/outputs from the middle when conversation grows too long
-  - Prevents `status=incomplete` errors on providers with smaller context windows
+  - Keeps system/developer messages, original user query, and most recent 10 items
+  - **Auto-compacts old items into a summary** instead of just dropping them
+  - Summary includes: user requests, assistant responses, tool calls made, files touched
+  - Preserves enough context for the model to continue long tasks intelligently
 - **Truncates large tool outputs (>8000 chars)** to prevent model output token exhaustion
  - Crof's models return `incomplete` when tool results contain too much text (e.g., full HTML pages)
  - Truncated outputs include `[truncated N chars]` suffix so the model knows data was cut
--- a/codex-launcher_2.1.2_all.deb
+++ b/codex-launcher_2.1.2_all.deb
--- a/src/translate-proxy.py
+++ b/src/translate-proxy.py
@@ -167,23 +167,66 @@ def forwarded_headers(request_headers, extra=None, browser_ua=False):

 _MAX_INPUT_ITEMS = 30
 _MAX_TOOL_OUTPUT_CHARS = 8000
+_COMPACT_KEEP_RECENT = 10

-def _trim_input(input_data):
-    if not isinstance(input_data, list):
-        return input_data
-    out = []
-    for item in input_data:
-        if item.get("type") == "function_call_output":
-            o = item.get("output", "")
-            if len(o) > _MAX_TOOL_OUTPUT_CHARS:
-                item = dict(item)
-                item["output"] = o[:_MAX_TOOL_OUTPUT_CHARS] + f"\n... [truncated {len(o) - _MAX_TOOL_OUTPUT_CHARS} chars]"
-                print(f"[trim] tool output truncated {len(o)} -> {_MAX_TOOL_OUTPUT_CHARS}", file=sys.stderr)
-        out.append(item)
-    if len(out) <= _MAX_INPUT_ITEMS:
+def _item_summary(item, max_len=200):
+    t = item.get("type")
+    if t == "message":
+        role = item.get("role", "?")
+        text = ""
+        for p in item.get("content", []):
+            if p.get("type") in ("input_text", "output_text"):
+                text += p.get("text", "")
+        return f"[{role}] {text[:max_len]}"
+    elif t == "function_call":
+        name = item.get("name", "?")
+        args = item.get("arguments", "{}")
+        try:
+            a = json.loads(args)
+            cmd = a.get("cmd", a.get("command", ""))
+            if cmd:
+                return f"[tool call] {name}: {cmd[:max_len]}"
+        except Exception:
+            pass
+        return f"[tool call] {name}({args[:max_len]})"
+    elif t == "function_call_output":
+        output = item.get("output", "")
+        if len(output) > max_len:
+            return f"[tool result] {output[:max_len]}..."
+        return f"[tool result] {output}"
+    return f"[{t}]"
+
+def _extract_files(items):
+    files = set()
+    for item in items:
+        if item.get("type") == "function_call":
+            try:
+                a = json.loads(item.get("arguments", "{}"))
+                cmd = a.get("cmd", a.get("command", ""))
+                for prefix in (">", ">>", " > ", " >> "):
+                    for part in cmd.split(prefix)[1:]:
+                        f = part.strip().split()[0].strip("'\"")
+                        if f and not f.startswith("-") and "/" in f:
+                            files.add(f)
+            except Exception:
+                pass
+    return files
+
+def _compact_input(input_data):
+    if not isinstance(input_data, list) or len(input_data) <= _MAX_INPUT_ITEMS:
+        out = []
+        for item in input_data:
+            if isinstance(item, dict) and item.get("type") == "function_call_output":
+                o = item.get("output", "")
+                if len(o) > _MAX_TOOL_OUTPUT_CHARS:
+                    item = dict(item)
+                    item["output"] = o[:_MAX_TOOL_OUTPUT_CHARS] + f"\n... [truncated {len(o) - _MAX_TOOL_OUTPUT_CHARS} chars]"
+                    print(f"[compact] tool output truncated {len(o)} -> {_MAX_TOOL_OUTPUT_CHARS}", file=sys.stderr)
+            out.append(item)
        return out
+
    head_end = 0
-    for i, item in enumerate(out):
+    for i, item in enumerate(input_data):
        t = item.get("type")
        if t == "message" and item.get("role") in ("developer", "system"):
            head_end = i + 1
@@ -191,13 +234,61 @@ def _trim_input(input_data):
            head_end = i + 1
        else:
            break
-    head = out[:head_end]
-    tail_keep = _MAX_INPUT_ITEMS - len(head)
-    tail = out[-tail_keep:]
-    trimmed = len(out) - len(head) - len(tail)
-    if trimmed > 0:
-        print(f"[trim] {len(out)} items -> {len(head) + len(tail)} (dropped {trimmed} old items)", file=sys.stderr)
-    return head + tail
+
+    head = input_data[:head_end]
+    tail = input_data[-_COMPACT_KEEP_RECENT:]
+    body = input_data[head_end:-_COMPACT_KEEP_RECENT]
+
+    if not body:
+        return head + tail
+
+    for item in tail:
+        if isinstance(item, dict) and item.get("type") == "function_call_output":
+            o = item.get("output", "")
+            if len(o) > _MAX_TOOL_OUTPUT_CHARS:
+                item["output"] = o[:_MAX_TOOL_OUTPUT_CHARS] + f"\n... [truncated {len(o) - _MAX_TOOL_OUTPUT_CHARS} chars]"
+
+    user_queries = []
+    for item in body:
+        if item.get("type") == "message" and item.get("role") == "user":
+            for p in item.get("content", []):
+                if p.get("type") == "input_text":
+                    user_queries.append(p.get("text", "")[:300])
+    assistant_msgs = []
+    for item in body:
+        if item.get("type") == "message" and item.get("role") == "assistant":
+            for p in item.get("content", []):
+                if p.get("type") == "output_text":
+                    assistant_msgs.append(p.get("text", "")[:300])
+
+    tool_summaries = []
+    for item in body:
+        if item.get("type") in ("function_call", "function_call_output"):
+            tool_summaries.append(_item_summary(item, max_len=150))
+
+    files = _extract_files(body)
+
+    summary_lines = [f"[Auto-compacted: {len(body)} earlier turns summarized to preserve context]"]
+    if user_queries:
+        summary_lines.append(f"User requests: {'; '.join(user_queries[-3:])}")
+    if assistant_msgs:
+        summary_lines.append(f"Assistant responses: {'; '.join(assistant_msgs[-3:])}")
+    if tool_summaries:
+        summary_lines.append(f"Actions taken ({len(tool_summaries)} steps):")
+        for ts in tool_summaries[-15:]:
+            summary_lines.append(f"  {ts}")
+    if files:
+        summary_lines.append(f"Files touched: {', '.join(sorted(files)[-10:])}")
+
+    summary_text = "\n".join(summary_lines)
+    summary_msg = {
+        "type": "message",
+        "role": "user",
+        "content": [{"type": "input_text", "text": summary_text}]
+    }
+
+    print(f"[compact] {len(input_data)} items -> {len(head) + 1 + len(tail)} (compacted {len(body)} old items into summary)", file=sys.stderr)
+    return head + [summary_msg] + tail

 # ═══════════════════════════════════════════════════════════════════
 # OpenAI-compat backend
@@ -750,7 +841,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
        prev_id = body.get("previous_response_id")
        raw_input = body.get("input", "")
        input_data = resolve_previous_response(body)
-        input_data = _trim_input(input_data)
+        input_data = _compact_input(input_data)
        body["input"] = input_data

        raw_types = [i.get("type") for i in raw_input] if isinstance(raw_input, list) else "str"