From 360f88e3bde09e84932ab86f2ad3361e4bbd0411 Mon Sep 17 00:00:00 2001 From: Roman | RyzenAdvanced Date: Tue, 26 May 2026 14:01:04 +0400 Subject: [PATCH] feat: aggressive OpenAdapter policy + context-length retry with extreme compaction - Lowered tool_output_limit 6000 -> 2000, max_input_items 30 -> 15 - Switched compaction from balanced to aggressive - Added automatic retry with extreme compaction on context_length_exceeded errors - Prevents upstream context-length failures from killing sessions --- src/translate-proxy.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/translate-proxy.py b/src/translate-proxy.py index e3dda35..9701a28 100755 --- a/src/translate-proxy.py +++ b/src/translate-proxy.py @@ -1689,7 +1689,7 @@ _PROVIDER_POLICIES = { "openrouter": {"reasoning_mode": "provider_default", "max_tokens": 32768, "strip_reasoning": True, "tool_output_limit": 6000, "max_input_items": 35, "compaction": "balanced"}, "openadapter": {"reasoning_mode": "off", "max_tokens": 32768, "strip_reasoning": True, - "tool_output_limit": 6000, "max_input_items": 30, "compaction": "balanced"}, + "tool_output_limit": 2000, "max_input_items": 15, "compaction": "aggressive"}, "cloudcode-pa": {"compaction": "aggressive", "context_size": 1000000, "tool_output_limit": 6000, "max_input_items": 60}, "googleapis": {"compaction": "balanced", "context_size": 1000000, @@ -4762,6 +4762,22 @@ class Handler(http.server.BaseHTTPRequestHandler): upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream)) except urllib.error.HTTPError as e: err_body = e.read().decode() + if "context_length_exceeded" in err_body and attempt < max_retries: + print(f"[{self._session_id}] context_length_exceeded (attempt {attempt+1}/{max_retries}), retrying with extreme compaction!", file=sys.stderr) + policy = provider_policy() + if isinstance(input_data, list): + print(f"[{self._session_id}] applying extreme compaction to {len(input_data)} items", file=sys.stderr) + input_data = _crof_compact_for_retry(input_data, model) + body = dict(body) + body["input"] = input_data + messages = oa_input_to_messages(input_data) + messages = _inject_stored_reasoning(messages) + instructions = body.get("instructions", "").strip() + if instructions: + messages.insert(0, {"role": "system", "content": instructions}) + chat_body = self._build_chat_body(model, messages, body, stream) + chat_body_b = json.dumps(chat_body).encode() + continue if e.code in (429, 502, 503) and attempt < max_retries: if e.code == 429 and _api_key_pool: pool_acct = _api_key_pool.get()