feat: auto-compaction for long conversations (like Claude Code/Codex /compact)
Instead of just truncating old items, the proxy now auto-compacts them into a structured summary preserving key context: - User requests, assistant responses, tool calls made, files touched - Keeps original query + system messages + last 10 recent items - 38 items -> 14 items in testing, with summary of dropped turns - Similar to Claude Code's auto-compact and Codex CLI's /compact - No extra API calls needed, instant, zero cost
This commit is contained in:
@@ -6,9 +6,10 @@
|
|||||||
- Codex sends `function_call` items with `id=None` — proxy now matches tool results to calls by call_id + positional fallback
|
- Codex sends `function_call` items with `id=None` — proxy now matches tool results to calls by call_id + positional fallback
|
||||||
- Fixed orphan message output item when response is only tool calls (no text content)
|
- Fixed orphan message output item when response is only tool calls (no text content)
|
||||||
- **Auto-trims long conversations (>30 items)** to prevent context overflow on providers like Crof
|
- **Auto-trims long conversations (>30 items)** to prevent context overflow on providers like Crof
|
||||||
- Keeps system/developer messages, original user query, and most recent items
|
- Keeps system/developer messages, original user query, and most recent 10 items
|
||||||
- Drops oldest tool call/outputs from the middle when conversation grows too long
|
- **Auto-compacts old items into a summary** instead of just dropping them
|
||||||
- Prevents `status=incomplete` errors on providers with smaller context windows
|
- Summary includes: user requests, assistant responses, tool calls made, files touched
|
||||||
|
- Preserves enough context for the model to continue long tasks intelligently
|
||||||
- **Truncates large tool outputs (>8000 chars)** to prevent model output token exhaustion
|
- **Truncates large tool outputs (>8000 chars)** to prevent model output token exhaustion
|
||||||
- Crof's models return `incomplete` when tool results contain too much text (e.g., full HTML pages)
|
- Crof's models return `incomplete` when tool results contain too much text (e.g., full HTML pages)
|
||||||
- Truncated outputs include `[truncated N chars]` suffix so the model knows data was cut
|
- Truncated outputs include `[truncated N chars]` suffix so the model knows data was cut
|
||||||
|
|||||||
Binary file not shown.
@@ -167,23 +167,66 @@ def forwarded_headers(request_headers, extra=None, browser_ua=False):
|
|||||||
|
|
||||||
_MAX_INPUT_ITEMS = 30
|
_MAX_INPUT_ITEMS = 30
|
||||||
_MAX_TOOL_OUTPUT_CHARS = 8000
|
_MAX_TOOL_OUTPUT_CHARS = 8000
|
||||||
|
_COMPACT_KEEP_RECENT = 10
|
||||||
|
|
||||||
def _trim_input(input_data):
|
def _item_summary(item, max_len=200):
|
||||||
if not isinstance(input_data, list):
|
t = item.get("type")
|
||||||
return input_data
|
if t == "message":
|
||||||
|
role = item.get("role", "?")
|
||||||
|
text = ""
|
||||||
|
for p in item.get("content", []):
|
||||||
|
if p.get("type") in ("input_text", "output_text"):
|
||||||
|
text += p.get("text", "")
|
||||||
|
return f"[{role}] {text[:max_len]}"
|
||||||
|
elif t == "function_call":
|
||||||
|
name = item.get("name", "?")
|
||||||
|
args = item.get("arguments", "{}")
|
||||||
|
try:
|
||||||
|
a = json.loads(args)
|
||||||
|
cmd = a.get("cmd", a.get("command", ""))
|
||||||
|
if cmd:
|
||||||
|
return f"[tool call] {name}: {cmd[:max_len]}"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return f"[tool call] {name}({args[:max_len]})"
|
||||||
|
elif t == "function_call_output":
|
||||||
|
output = item.get("output", "")
|
||||||
|
if len(output) > max_len:
|
||||||
|
return f"[tool result] {output[:max_len]}..."
|
||||||
|
return f"[tool result] {output}"
|
||||||
|
return f"[{t}]"
|
||||||
|
|
||||||
|
def _extract_files(items):
|
||||||
|
files = set()
|
||||||
|
for item in items:
|
||||||
|
if item.get("type") == "function_call":
|
||||||
|
try:
|
||||||
|
a = json.loads(item.get("arguments", "{}"))
|
||||||
|
cmd = a.get("cmd", a.get("command", ""))
|
||||||
|
for prefix in (">", ">>", " > ", " >> "):
|
||||||
|
for part in cmd.split(prefix)[1:]:
|
||||||
|
f = part.strip().split()[0].strip("'\"")
|
||||||
|
if f and not f.startswith("-") and "/" in f:
|
||||||
|
files.add(f)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return files
|
||||||
|
|
||||||
|
def _compact_input(input_data):
|
||||||
|
if not isinstance(input_data, list) or len(input_data) <= _MAX_INPUT_ITEMS:
|
||||||
out = []
|
out = []
|
||||||
for item in input_data:
|
for item in input_data:
|
||||||
if item.get("type") == "function_call_output":
|
if isinstance(item, dict) and item.get("type") == "function_call_output":
|
||||||
o = item.get("output", "")
|
o = item.get("output", "")
|
||||||
if len(o) > _MAX_TOOL_OUTPUT_CHARS:
|
if len(o) > _MAX_TOOL_OUTPUT_CHARS:
|
||||||
item = dict(item)
|
item = dict(item)
|
||||||
item["output"] = o[:_MAX_TOOL_OUTPUT_CHARS] + f"\n... [truncated {len(o) - _MAX_TOOL_OUTPUT_CHARS} chars]"
|
item["output"] = o[:_MAX_TOOL_OUTPUT_CHARS] + f"\n... [truncated {len(o) - _MAX_TOOL_OUTPUT_CHARS} chars]"
|
||||||
print(f"[trim] tool output truncated {len(o)} -> {_MAX_TOOL_OUTPUT_CHARS}", file=sys.stderr)
|
print(f"[compact] tool output truncated {len(o)} -> {_MAX_TOOL_OUTPUT_CHARS}", file=sys.stderr)
|
||||||
out.append(item)
|
out.append(item)
|
||||||
if len(out) <= _MAX_INPUT_ITEMS:
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
head_end = 0
|
head_end = 0
|
||||||
for i, item in enumerate(out):
|
for i, item in enumerate(input_data):
|
||||||
t = item.get("type")
|
t = item.get("type")
|
||||||
if t == "message" and item.get("role") in ("developer", "system"):
|
if t == "message" and item.get("role") in ("developer", "system"):
|
||||||
head_end = i + 1
|
head_end = i + 1
|
||||||
@@ -191,14 +234,62 @@ def _trim_input(input_data):
|
|||||||
head_end = i + 1
|
head_end = i + 1
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
head = out[:head_end]
|
|
||||||
tail_keep = _MAX_INPUT_ITEMS - len(head)
|
head = input_data[:head_end]
|
||||||
tail = out[-tail_keep:]
|
tail = input_data[-_COMPACT_KEEP_RECENT:]
|
||||||
trimmed = len(out) - len(head) - len(tail)
|
body = input_data[head_end:-_COMPACT_KEEP_RECENT]
|
||||||
if trimmed > 0:
|
|
||||||
print(f"[trim] {len(out)} items -> {len(head) + len(tail)} (dropped {trimmed} old items)", file=sys.stderr)
|
if not body:
|
||||||
return head + tail
|
return head + tail
|
||||||
|
|
||||||
|
for item in tail:
|
||||||
|
if isinstance(item, dict) and item.get("type") == "function_call_output":
|
||||||
|
o = item.get("output", "")
|
||||||
|
if len(o) > _MAX_TOOL_OUTPUT_CHARS:
|
||||||
|
item["output"] = o[:_MAX_TOOL_OUTPUT_CHARS] + f"\n... [truncated {len(o) - _MAX_TOOL_OUTPUT_CHARS} chars]"
|
||||||
|
|
||||||
|
user_queries = []
|
||||||
|
for item in body:
|
||||||
|
if item.get("type") == "message" and item.get("role") == "user":
|
||||||
|
for p in item.get("content", []):
|
||||||
|
if p.get("type") == "input_text":
|
||||||
|
user_queries.append(p.get("text", "")[:300])
|
||||||
|
assistant_msgs = []
|
||||||
|
for item in body:
|
||||||
|
if item.get("type") == "message" and item.get("role") == "assistant":
|
||||||
|
for p in item.get("content", []):
|
||||||
|
if p.get("type") == "output_text":
|
||||||
|
assistant_msgs.append(p.get("text", "")[:300])
|
||||||
|
|
||||||
|
tool_summaries = []
|
||||||
|
for item in body:
|
||||||
|
if item.get("type") in ("function_call", "function_call_output"):
|
||||||
|
tool_summaries.append(_item_summary(item, max_len=150))
|
||||||
|
|
||||||
|
files = _extract_files(body)
|
||||||
|
|
||||||
|
summary_lines = [f"[Auto-compacted: {len(body)} earlier turns summarized to preserve context]"]
|
||||||
|
if user_queries:
|
||||||
|
summary_lines.append(f"User requests: {'; '.join(user_queries[-3:])}")
|
||||||
|
if assistant_msgs:
|
||||||
|
summary_lines.append(f"Assistant responses: {'; '.join(assistant_msgs[-3:])}")
|
||||||
|
if tool_summaries:
|
||||||
|
summary_lines.append(f"Actions taken ({len(tool_summaries)} steps):")
|
||||||
|
for ts in tool_summaries[-15:]:
|
||||||
|
summary_lines.append(f" {ts}")
|
||||||
|
if files:
|
||||||
|
summary_lines.append(f"Files touched: {', '.join(sorted(files)[-10:])}")
|
||||||
|
|
||||||
|
summary_text = "\n".join(summary_lines)
|
||||||
|
summary_msg = {
|
||||||
|
"type": "message",
|
||||||
|
"role": "user",
|
||||||
|
"content": [{"type": "input_text", "text": summary_text}]
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"[compact] {len(input_data)} items -> {len(head) + 1 + len(tail)} (compacted {len(body)} old items into summary)", file=sys.stderr)
|
||||||
|
return head + [summary_msg] + tail
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
# OpenAI-compat backend
|
# OpenAI-compat backend
|
||||||
# ═══════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
@@ -750,7 +841,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
prev_id = body.get("previous_response_id")
|
prev_id = body.get("previous_response_id")
|
||||||
raw_input = body.get("input", "")
|
raw_input = body.get("input", "")
|
||||||
input_data = resolve_previous_response(body)
|
input_data = resolve_previous_response(body)
|
||||||
input_data = _trim_input(input_data)
|
input_data = _compact_input(input_data)
|
||||||
body["input"] = input_data
|
body["input"] = input_data
|
||||||
|
|
||||||
raw_types = [i.get("type") for i in raw_input] if isinstance(raw_input, list) else "str"
|
raw_types = [i.get("type") for i in raw_input] if isinstance(raw_input, list) else "str"
|
||||||
|
|||||||
Reference in New Issue
Block a user