feat: aggressive OpenAdapter policy + context-length retry with extreme compaction

- Lowered tool_output_limit 6000 -> 2000, max_input_items 30 -> 15
- Switched compaction from balanced to aggressive
- Added automatic retry with extreme compaction on context_length_exceeded errors
- Prevents upstream context-length failures from killing sessions
This commit is contained in:
Roman | RyzenAdvanced
2026-05-26 14:01:04 +04:00
Unverified
parent c3a21950e8
commit 360f88e3bd

View File

@@ -1689,7 +1689,7 @@ _PROVIDER_POLICIES = {
"openrouter": {"reasoning_mode": "provider_default", "max_tokens": 32768, "strip_reasoning": True,
"tool_output_limit": 6000, "max_input_items": 35, "compaction": "balanced"},
"openadapter": {"reasoning_mode": "off", "max_tokens": 32768, "strip_reasoning": True,
"tool_output_limit": 6000, "max_input_items": 30, "compaction": "balanced"},
"tool_output_limit": 2000, "max_input_items": 15, "compaction": "aggressive"},
"cloudcode-pa": {"compaction": "aggressive", "context_size": 1000000,
"tool_output_limit": 6000, "max_input_items": 60},
"googleapis": {"compaction": "balanced", "context_size": 1000000,
@@ -4762,6 +4762,22 @@ class Handler(http.server.BaseHTTPRequestHandler):
upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
except urllib.error.HTTPError as e:
err_body = e.read().decode()
if "context_length_exceeded" in err_body and attempt < max_retries:
print(f"[{self._session_id}] context_length_exceeded (attempt {attempt+1}/{max_retries}), retrying with extreme compaction!", file=sys.stderr)
policy = provider_policy()
if isinstance(input_data, list):
print(f"[{self._session_id}] applying extreme compaction to {len(input_data)} items", file=sys.stderr)
input_data = _crof_compact_for_retry(input_data, model)
body = dict(body)
body["input"] = input_data
messages = oa_input_to_messages(input_data)
messages = _inject_stored_reasoning(messages)
instructions = body.get("instructions", "").strip()
if instructions:
messages.insert(0, {"role": "system", "content": instructions})
chat_body = self._build_chat_body(model, messages, body, stream)
chat_body_b = json.dumps(chat_body).encode()
continue
if e.code in (429, 502, 503) and attempt < max_retries:
if e.code == 429 and _api_key_pool:
pool_acct = _api_key_pool.get()