feat: aggressive OpenAdapter policy + context-length retry with extreme compaction
- Lowered tool_output_limit 6000 -> 2000, max_input_items 30 -> 15 - Switched compaction from balanced to aggressive - Added automatic retry with extreme compaction on context_length_exceeded errors - Prevents upstream context-length failures from killing sessions
This commit is contained in:
@@ -1689,7 +1689,7 @@ _PROVIDER_POLICIES = {
|
|||||||
"openrouter": {"reasoning_mode": "provider_default", "max_tokens": 32768, "strip_reasoning": True,
|
"openrouter": {"reasoning_mode": "provider_default", "max_tokens": 32768, "strip_reasoning": True,
|
||||||
"tool_output_limit": 6000, "max_input_items": 35, "compaction": "balanced"},
|
"tool_output_limit": 6000, "max_input_items": 35, "compaction": "balanced"},
|
||||||
"openadapter": {"reasoning_mode": "off", "max_tokens": 32768, "strip_reasoning": True,
|
"openadapter": {"reasoning_mode": "off", "max_tokens": 32768, "strip_reasoning": True,
|
||||||
"tool_output_limit": 6000, "max_input_items": 30, "compaction": "balanced"},
|
"tool_output_limit": 2000, "max_input_items": 15, "compaction": "aggressive"},
|
||||||
"cloudcode-pa": {"compaction": "aggressive", "context_size": 1000000,
|
"cloudcode-pa": {"compaction": "aggressive", "context_size": 1000000,
|
||||||
"tool_output_limit": 6000, "max_input_items": 60},
|
"tool_output_limit": 6000, "max_input_items": 60},
|
||||||
"googleapis": {"compaction": "balanced", "context_size": 1000000,
|
"googleapis": {"compaction": "balanced", "context_size": 1000000,
|
||||||
@@ -4762,6 +4762,22 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
|
upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
|
||||||
except urllib.error.HTTPError as e:
|
except urllib.error.HTTPError as e:
|
||||||
err_body = e.read().decode()
|
err_body = e.read().decode()
|
||||||
|
if "context_length_exceeded" in err_body and attempt < max_retries:
|
||||||
|
print(f"[{self._session_id}] context_length_exceeded (attempt {attempt+1}/{max_retries}), retrying with extreme compaction!", file=sys.stderr)
|
||||||
|
policy = provider_policy()
|
||||||
|
if isinstance(input_data, list):
|
||||||
|
print(f"[{self._session_id}] applying extreme compaction to {len(input_data)} items", file=sys.stderr)
|
||||||
|
input_data = _crof_compact_for_retry(input_data, model)
|
||||||
|
body = dict(body)
|
||||||
|
body["input"] = input_data
|
||||||
|
messages = oa_input_to_messages(input_data)
|
||||||
|
messages = _inject_stored_reasoning(messages)
|
||||||
|
instructions = body.get("instructions", "").strip()
|
||||||
|
if instructions:
|
||||||
|
messages.insert(0, {"role": "system", "content": instructions})
|
||||||
|
chat_body = self._build_chat_body(model, messages, body, stream)
|
||||||
|
chat_body_b = json.dumps(chat_body).encode()
|
||||||
|
continue
|
||||||
if e.code in (429, 502, 503) and attempt < max_retries:
|
if e.code in (429, 502, 503) and attempt < max_retries:
|
||||||
if e.code == 429 and _api_key_pool:
|
if e.code == 429 and _api_key_pool:
|
||||||
pool_acct = _api_key_pool.get()
|
pool_acct = _api_key_pool.get()
|
||||||
|
|||||||
Reference in New Issue
Block a user