diff --git a/CHANGELOG.md b/CHANGELOG.md index ba58ebf..d4fc79a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,26 @@ # Changelog +## v3.10.9 (2026-05-25) + +**Antigravity Overhaul — Context Normalizer, Claude Thinking Fix, Endpoint Lockdown** + +### Antigravity Endpoint Lockdown +- Production-only: `cloudcode-pa.googleapis.com` by default +- Sandbox/staging blocked unless `ALLOW_ANTIGRAVITY_STAGING=1` +- 403 SERVICE_DISABLED falls through, 429 returns to client + +### AntigravityContextNormalizer +- Bounded context — no more 136-item polluted requests for "hi" +- Simple message detector, auto-reset polluted context +- Duplicate removal, tool output budget, hard char limits + +### Claude Thinking Fix (Antigravity-only) +- Fixed 400 error: `maxOutputTokens=64000` when thinking enabled +- Snake_case config, VALIDATED toolConfig, proper budgets + +### z.ai / OpenRouter (cobra91 PR #4) +- Full OpenClaw attribution headers, OpenRouter caching + ## v3.10.8 (2026-05-25) **OAuth & Antigravity Endpoint Fixes** diff --git a/codex-launcher_3.10.8_all.deb b/codex-launcher_3.10.8_all.deb deleted file mode 100644 index 239e7f1..0000000 Binary files a/codex-launcher_3.10.8_all.deb and /dev/null differ diff --git a/codex-launcher_3.10.9_all.deb b/codex-launcher_3.10.9_all.deb new file mode 100644 index 0000000..4f6c405 Binary files /dev/null and b/codex-launcher_3.10.9_all.deb differ diff --git a/codex_launcher_lib.py b/codex_launcher_lib.py index 9e26c2c..7255a98 100644 --- a/codex_launcher_lib.py +++ b/codex_launcher_lib.py @@ -83,12 +83,22 @@ model_catalog_json = "" """ CHANGELOG = [ - ("3.10.8", "2026-05-25", [ - "Re-OAuth: replaced deprecated OOB flow with PKCE + localhost callback", - "Project auto-discovery: validates project API enabled, searches alternatives if disabled", - "Windows GUI: _google_reoauth now uses PKCE + callback (was broken OOB paste)", - "Windows GUI: endpoint OAuth flow uses shared project discovery helper", - "Linux GUI: endpoint OAuth flow uses shared _oauth_discover_project helper", + ("3.10.9", "2026-05-25", [ + "Antigravity: production-only endpoints (cloudcode-pa.googleapis.com), sandbox blocked unless ALLOW_ANTIGRAVITY_STAGING=1", + "Antigravity: 403 SERVICE_DISABLED falls through, 429 returns to client (no sandbox fallback)", + "AntigravityContextNormalizer: bounded context — simple messages send minimal payload", + "Simple message detector: 'hi' etc sends only user message, no tool history", + "Auto-reset polluted context: 200+ items with simple message resets to minimal", + "Duplicate user message removal, tool output budget (max 2 verbatim, rest summarized)", + "Hard limits: 20 contents, 120K/250K/500K char budgets", + "Claude thinking fix: maxOutputTokens=64000, snake_case thinking config, VALIDATED toolConfig", + "Claude budgets: low=8192, medium=16384, high=32768", + "All fixes scoped to OAUTH_PROVIDER==google-antigravity only", + "Project discovery uses production endpoint (not staging)", + "z.ai: full OpenClaw attribution headers (cobra91 PR #4)", + "OpenRouter: X-OpenRouter-Cache header (cobra91 PR #4)", + "Fix Linux Re-OAuth: load_oauth_secrets() was undefined", + "Fix GLib.idle_add lambda returning truthy tuple", ]), ("3.10.7", "2026-05-25", [ "Prompt Enhancer: per-provider toggle to improve prompt clarity after compaction", diff --git a/install.sh b/install.sh index a98fe33..1c7dd21 100755 --- a/install.sh +++ b/install.sh @@ -3,11 +3,11 @@ set -e SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -if [ -f "$SCRIPT_DIR/codex-launcher_3.10.8_all.deb" ]; then - echo "Installing codex-launcher_3.10.8_all.deb ..." - sudo dpkg -i "$SCRIPT_DIR/codex-launcher_3.10.8_all.deb" +if [ -f "$SCRIPT_DIR/codex-launcher_3.10.9_all.deb" ]; then + echo "Installing codex-launcher_3.10.9_all.deb ..." + sudo dpkg -i "$SCRIPT_DIR/codex-launcher_3.10.9_all.deb" echo "" - echo "Installed v3.10.8 via .deb package." + echo "Installed v3.10.9 via .deb package." echo " translate-proxy.py -> /usr/bin/translate-proxy.py" echo " codex-launcher-gui -> /usr/bin/codex-launcher-gui" echo " cleanup-codex-stale -> /usr/bin/cleanup-codex-stale.sh" diff --git a/src/codex_launcher_lib.py b/src/codex_launcher_lib.py index 0768eb1..7255a98 100644 --- a/src/codex_launcher_lib.py +++ b/src/codex_launcher_lib.py @@ -83,15 +83,22 @@ model_catalog_json = "" """ CHANGELOG = [ - ("3.10.8", "2026-05-25", [ - "Fix Re-OAuth buttons: load_oauth_secrets() was undefined in Linux GUI", - "Re-OAuth: replaced deprecated OOB flow with PKCE + localhost callback", - "Proxy: prefer production cloudcode-pa over staging/sandbox endpoints", - "Proxy: fallthrough 403 SERVICE_DISABLED to next endpoint", - "Project discovery: validate against production endpoint, not staging", - "Antigravity preset base_url changed to production (was daily-cloudcode-pa.sandbox)", - "Fix GLib.idle_add lambda returning truthy tuple (caused repeated calls)", - "Windows GUI: project discovery also uses production endpoint", + ("3.10.9", "2026-05-25", [ + "Antigravity: production-only endpoints (cloudcode-pa.googleapis.com), sandbox blocked unless ALLOW_ANTIGRAVITY_STAGING=1", + "Antigravity: 403 SERVICE_DISABLED falls through, 429 returns to client (no sandbox fallback)", + "AntigravityContextNormalizer: bounded context — simple messages send minimal payload", + "Simple message detector: 'hi' etc sends only user message, no tool history", + "Auto-reset polluted context: 200+ items with simple message resets to minimal", + "Duplicate user message removal, tool output budget (max 2 verbatim, rest summarized)", + "Hard limits: 20 contents, 120K/250K/500K char budgets", + "Claude thinking fix: maxOutputTokens=64000, snake_case thinking config, VALIDATED toolConfig", + "Claude budgets: low=8192, medium=16384, high=32768", + "All fixes scoped to OAUTH_PROVIDER==google-antigravity only", + "Project discovery uses production endpoint (not staging)", + "z.ai: full OpenClaw attribution headers (cobra91 PR #4)", + "OpenRouter: X-OpenRouter-Cache header (cobra91 PR #4)", + "Fix Linux Re-OAuth: load_oauth_secrets() was undefined", + "Fix GLib.idle_add lambda returning truthy tuple", ]), ("3.10.7", "2026-05-25", [ "Prompt Enhancer: per-provider toggle to improve prompt clarity after compaction", diff --git a/src/translate-proxy.py b/src/translate-proxy.py index 703104f..0eaf6e2 100755 --- a/src/translate-proxy.py +++ b/src/translate-proxy.py @@ -1300,7 +1300,6 @@ def forwarded_headers(request_headers, extra=None, browser_ua=False): headers.update(extra) return headers - def _openrouter_extra(): if not TARGET_URL: return {} @@ -4258,6 +4257,177 @@ def _auto_continue_gemini(handler, flush_event, message_id, model, gen_config, g break return accumulated_text +_ANTIGRAVITY_MAX_CONTENTS = 20 +_ANTIGRAVITY_MAX_TOOL_VERBATIM = 2 +_ANTIGRAVITY_MAX_TOOL_CHARS = 2000 +_ANTIGRAVITY_MAX_OLD_SUMMARY_CHARS = 1200 +_ANTIGRAVITY_SOFT_CHARS = 120000 +_ANTIGRAVITY_HARD_CHARS = 250000 +_ANTIGRAVITY_EMERGENCY_CHARS = 500000 +_ANTIGRAVITY_SIMPLE_WORDS = frozenset({"hi", "hello", "hey", "test", "ping", "thanks", "thank you", "ok", "okay", "yes", "no", "cool", "nice", "good", "great", "done", "go", "stop", "yep", "nope", "sure", "right", "correct", "continue", "cont", "k", "thx", "ty", "np", "lol", "brb", "bye"}) +_ANTIGRAVITY_EDIT_WORDS = frozenset(("change", "fix", "update", "redesign", "rewrite", "modify", "improve", "replace", "edit", "make it", "add", "remove", "delete", "rename", "move", "convert", "create", "build", "implement")) +_ANTIGRAVITY_REFERENCE_WORDS = frozenset(("previous", "file", "error", "again", "that", "this", "it", "same", "last", "above", "earlier", "before", "earlier output", "last error", "previous result", "what was", "show me", "give me")) + +def _antigravity_is_simple_user(text): + if not text: + return True + stripped = text.strip().lower() + if stripped in _ANTIGRAVITY_SIMPLE_WORDS: + return True + if len(stripped) < 30: + words = set(stripped.split()) + if not words.intersection(_ANTIGRAVITY_REFERENCE_WORDS) and not words.intersection(_ANTIGRAVITY_EDIT_WORDS): + return True + return False + +def _antigravity_normalize_context(input_data): + if not isinstance(input_data, list) or len(input_data) < 2: + return input_data + + latest_user = "" + latest_user_idx = -1 + for i in range(len(input_data) - 1, -1, -1): + item = input_data[i] + if isinstance(item, dict) and item.get("type") == "message" and item.get("role") == "user": + c = item.get("content", "") + if isinstance(c, str): + latest_user = c + elif isinstance(c, list): + latest_user = "\n".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict)) + latest_user_idx = i + break + + if not latest_user: + return input_data + + is_simple = _antigravity_is_simple_user(latest_user) + + n_raw = len(input_data) + n_tool_outputs = sum(1 for it in input_data if isinstance(it, dict) and it.get("type") == "function_call_output") + n_tool_calls = sum(1 for it in input_data if isinstance(it, dict) and it.get("type") == "function_call") + + auto_reset = (n_raw > 200 or n_tool_outputs > 20) and is_simple + if os.environ.get("ANTIGRAVITY_AUTO_RESET_POLLUTED_CONTEXT", "1") != "1": + auto_reset = False + + if is_simple and (auto_reset or n_tool_outputs == 0): + system_items = [it for it in input_data if isinstance(it, dict) and it.get("type") == "message" and it.get("role") in ("developer", "system")] + user_item = input_data[latest_user_idx] + result = system_items + [user_item] if system_items else [user_item] + print(f"[antigravity-context] raw_items={n_raw} compacted_items={n_raw} final_items={len(result)}", file=sys.stderr) + print(f"[antigravity-context] raw_tool_outputs={n_tool_outputs} kept_tool_outputs=0", file=sys.stderr) + print(f"[antigravity-context] simple_latest_user=true auto_reset={auto_reset}", file=sys.stderr) + return result + + dev_messages = [] + recent_items = [] + tool_outputs = [] + other_items = [] + + for i, item in enumerate(input_data): + if not isinstance(item, dict): + continue + t = item.get("type") + if t == "message" and item.get("role") in ("developer", "system"): + dev_messages.append(item) + elif t == "function_call_output": + tool_outputs.append((i, item)) + elif t in ("function_call",): + other_items.append((i, item)) + elif t == "message": + recent_items.append((i, item)) + + latest_words = set(latest_user.strip().lower().split()) + has_edit_intent = bool(latest_words.intersection(_ANTIGRAVITY_EDIT_WORDS)) + has_ref_intent = bool(latest_words.intersection(_ANTIGRAVITY_REFERENCE_WORDS)) + keep_tools = 2 if (has_edit_intent or has_ref_intent) else 1 + + kept_tools = tool_outputs[-keep_tools:] if tool_outputs and (has_edit_intent or has_ref_intent) else [] + + for idx_t, t_item in enumerate(kept_tools): + orig = t_item[1] + out = orig.get("output", "") + if isinstance(out, str) and len(out) > _ANTIGRAVITY_MAX_TOOL_CHARS: + new_item = dict(orig) + new_item["output"] = out[:_ANTIGRAVITY_MAX_TOOL_CHARS] + f"\n... [truncated: kept {_ANTIGRAVITY_MAX_TOOL_CHARS} of {len(out)} chars]" + kept_tools[idx_t] = (t_item[0], new_item) + + n_summarized = len(tool_outputs) - len(kept_tools) + + tail_start = max(0, len(recent_items) - 6) + recent_tail = recent_items[tail_start:] + + tool_call_ids = set() + for _, t_item in kept_tools: + cid = t_item.get("call_id", t_item.get("id", "")) + if cid: + tool_call_ids.add(cid) + + paired_calls = [] + for idx, item in other_items: + cid = item.get("call_id", item.get("id", "")) + if cid in tool_call_ids: + paired_calls.append((idx, item)) + + result = list(dev_messages) + + if n_summarized > 0: + summary_text = f"[Tool history summary: {n_summarized} older tool outputs omitted. {n_tool_calls} prior function calls were made for file inspection/editing.]" + result.append({"type": "message", "role": "user", "content": [{"type": "input_text", "text": summary_text}]}) + + for _, call_item in paired_calls: + result.append(call_item) + + for _, tool_item in kept_tools: + result.append(tool_item) + + for _, msg_item in recent_tail: + if msg_item is not input_data[latest_user_idx]: + result.append(msg_item) + + latest_hash = hashlib.sha256(" ".join(latest_user.strip().split()).encode()).hexdigest() + already_present = False + for r in result: + if isinstance(r, dict) and r.get("type") == "message" and r.get("role") == "user": + c = r.get("content", "") + if isinstance(c, str): + rh = hashlib.sha256(" ".join(c.strip().split()).encode()).hexdigest() + elif isinstance(c, list): + combined = " ".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict)) + rh = hashlib.sha256(" ".join(combined.strip().split()).encode()).hexdigest() + else: + rh = "" + if rh == latest_hash: + already_present = True + break + + if not already_present: + result.append(input_data[latest_user_idx]) + + total_chars = sum(len(json.dumps(it, ensure_ascii=False)) for it in result) + + if total_chars > _ANTIGRAVITY_EMERGENCY_CHARS: + print(f"[antigravity-context] EMERGENCY: {total_chars} chars exceeds limit, resetting to minimal", file=sys.stderr) + result = list(dev_messages) + [input_data[latest_user_idx]] + total_chars = sum(len(json.dumps(it, ensure_ascii=False)) for it in result) + + while len(result) > _ANTIGRAVITY_MAX_CONTENTS and total_chars > _ANTIGRAVITY_SOFT_CHARS: + for i in range(1, len(result) - 1): + if isinstance(result[i], dict) and result[i].get("type") in ("message", "function_call_output"): + removed = result.pop(i) + total_chars -= len(json.dumps(removed, ensure_ascii=False)) + break + else: + break + + est_tokens = total_chars // 4 + print(f"[antigravity-context] raw_items={n_raw} final_items={len(result)}", file=sys.stderr) + print(f"[antigravity-context] raw_tool_outputs={n_tool_outputs} kept_tool_outputs={len(kept_tools)} summarized_tool_outputs={n_summarized}", file=sys.stderr) + print(f"[antigravity-context] simple_latest_user={is_simple} auto_reset={auto_reset}", file=sys.stderr) + print(f"[antigravity-context] final_chars={total_chars} estimated_tokens={est_tokens}", file=sys.stderr) + + return result + class Handler(http.server.BaseHTTPRequestHandler): protocol_version = "HTTP/1.1" @@ -4623,6 +4793,11 @@ class Handler(http.server.BaseHTTPRequestHandler): body = dict(body) body["input"] = input_data + if OAUTH_PROVIDER == "google-antigravity" and isinstance(input_data, list): + input_data = _antigravity_normalize_context(input_data) + body = dict(body) + body["input"] = input_data + access_token = _refresh_oauth_token() token_name = "google-antigravity-oauth-token.json" if OAUTH_PROVIDER == "google-antigravity" else "google-cli-oauth-token.json" token_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", token_name) @@ -4743,7 +4918,26 @@ class Handler(http.server.BaseHTTPRequestHandler): if body.get("top_p") is not None: gen_config["topP"] = body["top_p"] - if REASONING_ENABLED and REASONING_EFFORT != "none": + _is_claude_model = "claude" in model.lower() + _is_claude_thinking = _is_claude_model and "thinking" in model.lower() + + if OAUTH_PROVIDER == "google-antigravity" and _is_claude_thinking: + if REASONING_ENABLED and REASONING_EFFORT != "none": + budget = {"low": 8192, "medium": 16384, "high": 32768}.get(REASONING_EFFORT, 16384) + else: + budget = 16384 + gen_config["thinkingConfig"] = { + "include_thoughts": True, + "thinking_budget": budget, + } + current_max = gen_config.get("maxOutputTokens", 0) + if not current_max or current_max <= budget: + gen_config["maxOutputTokens"] = 64000 + print(f"[antigravity-claude] thinking model={model} budget={budget} maxOutputTokens={gen_config.get('maxOutputTokens')}", file=sys.stderr) + elif OAUTH_PROVIDER == "google-antigravity" and _is_claude_model: + if "thinkingConfig" in gen_config: + del gen_config["thinkingConfig"] + elif REASONING_ENABLED and REASONING_EFFORT != "none": budget = {"low": 2048, "medium": 8192, "high": 24576}.get(REASONING_EFFORT, 8192) gen_config["thinkingConfig"] = {"includeThoughts": True, "thinkingBudget": budget} @@ -4823,6 +5017,11 @@ class Handler(http.server.BaseHTTPRequestHandler): if gemini_tools: request_body["tools"] = gemini_tools + if OAUTH_PROVIDER == "google-antigravity" and _is_claude_model and gemini_tools: + request_body["toolConfig"] = {"functionCallingConfig": {"mode": "VALIDATED"}} + if _is_claude_thinking: + print(f"[antigravity-claude] applied VALIDATED toolConfig for thinking model", file=sys.stderr) + wrapped = { "project": project_id, "model": model, @@ -4833,13 +5032,17 @@ class Handler(http.server.BaseHTTPRequestHandler): wrapped["userAgent"] = "antigravity" wrapped["requestId"] = f"agent-{uuid.uuid4().hex[:12]}" - endpoints = ([ - "https://cloudcode-pa.googleapis.com", - "https://daily-cloudcode-pa.sandbox.googleapis.com", - "https://autopush-cloudcode-pa.sandbox.googleapis.com", - ] if OAUTH_PROVIDER == "google-antigravity" else [ - "https://cloudcode-pa.googleapis.com", - ]) + _allow_staging = os.environ.get("ALLOW_ANTIGRAVITY_STAGING", "0") == "1" + if OAUTH_PROVIDER == "google-antigravity": + _antigravity_endpoints = ["https://cloudcode-pa.googleapis.com"] + if _allow_staging: + _antigravity_endpoints.extend([ + "https://daily-cloudcode-pa.sandbox.googleapis.com", + "https://autopush-cloudcode-pa.sandbox.googleapis.com", + ]) + endpoints = _antigravity_endpoints + else: + endpoints = ["https://cloudcode-pa.googleapis.com"] action = "streamGenerateContent" if stream else "generateContent" url_suffix = f"v1internal:{action}?alt=sse" if stream else f"v1internal:{action}" @@ -4888,7 +5091,7 @@ class Handler(http.server.BaseHTTPRequestHandler): if e.code == 403 and "SERVICE_DISABLED" in err_body[:500] and ep != endpoints[-1]: print(f"[{self._session_id}] {ep} SERVICE_DISABLED, trying next endpoint", file=sys.stderr) continue - if e.code == 429 and ep != endpoints[-1]: + if e.code == 429 and ep != endpoints[-1] and _allow_staging: print(f"[{self._session_id}] {ep} HTTP 429, trying next endpoint", file=sys.stderr) continue if e.code == 429: diff --git a/translate-proxy.py b/translate-proxy.py index be60045..0eaf6e2 100755 --- a/translate-proxy.py +++ b/translate-proxy.py @@ -1300,6 +1300,26 @@ def forwarded_headers(request_headers, extra=None, browser_ua=False): headers.update(extra) return headers +def _openrouter_extra(): + if not TARGET_URL: + return {} + if "z.ai" in TARGET_URL: + return { + "HTTP-Referer": "https://openclaw.ai", + "X-OpenRouter-Title": "OpenClaw", + "X-OpenRouter-Categories": + "cli-agent,cloud-agent,programming-app,creative-writing," + "writing-assistant,general-chat,personal-agent", + } + if "openrouter.ai" in TARGET_URL: + return { + "HTTP-Referer": "https://chats-llm.com", + "X-OpenRouter-Title": "Chats-LLM", + "X-OpenRouter-Categories": "general-chat, ide-extension", + "X-OpenRouter-Cache": "true", + } + return {} + _MAX_INPUT_ITEMS = 30 _MAX_TOOL_OUTPUT_CHARS = 8000 _COMPACT_KEEP_RECENT = 10 @@ -4237,6 +4257,177 @@ def _auto_continue_gemini(handler, flush_event, message_id, model, gen_config, g break return accumulated_text +_ANTIGRAVITY_MAX_CONTENTS = 20 +_ANTIGRAVITY_MAX_TOOL_VERBATIM = 2 +_ANTIGRAVITY_MAX_TOOL_CHARS = 2000 +_ANTIGRAVITY_MAX_OLD_SUMMARY_CHARS = 1200 +_ANTIGRAVITY_SOFT_CHARS = 120000 +_ANTIGRAVITY_HARD_CHARS = 250000 +_ANTIGRAVITY_EMERGENCY_CHARS = 500000 +_ANTIGRAVITY_SIMPLE_WORDS = frozenset({"hi", "hello", "hey", "test", "ping", "thanks", "thank you", "ok", "okay", "yes", "no", "cool", "nice", "good", "great", "done", "go", "stop", "yep", "nope", "sure", "right", "correct", "continue", "cont", "k", "thx", "ty", "np", "lol", "brb", "bye"}) +_ANTIGRAVITY_EDIT_WORDS = frozenset(("change", "fix", "update", "redesign", "rewrite", "modify", "improve", "replace", "edit", "make it", "add", "remove", "delete", "rename", "move", "convert", "create", "build", "implement")) +_ANTIGRAVITY_REFERENCE_WORDS = frozenset(("previous", "file", "error", "again", "that", "this", "it", "same", "last", "above", "earlier", "before", "earlier output", "last error", "previous result", "what was", "show me", "give me")) + +def _antigravity_is_simple_user(text): + if not text: + return True + stripped = text.strip().lower() + if stripped in _ANTIGRAVITY_SIMPLE_WORDS: + return True + if len(stripped) < 30: + words = set(stripped.split()) + if not words.intersection(_ANTIGRAVITY_REFERENCE_WORDS) and not words.intersection(_ANTIGRAVITY_EDIT_WORDS): + return True + return False + +def _antigravity_normalize_context(input_data): + if not isinstance(input_data, list) or len(input_data) < 2: + return input_data + + latest_user = "" + latest_user_idx = -1 + for i in range(len(input_data) - 1, -1, -1): + item = input_data[i] + if isinstance(item, dict) and item.get("type") == "message" and item.get("role") == "user": + c = item.get("content", "") + if isinstance(c, str): + latest_user = c + elif isinstance(c, list): + latest_user = "\n".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict)) + latest_user_idx = i + break + + if not latest_user: + return input_data + + is_simple = _antigravity_is_simple_user(latest_user) + + n_raw = len(input_data) + n_tool_outputs = sum(1 for it in input_data if isinstance(it, dict) and it.get("type") == "function_call_output") + n_tool_calls = sum(1 for it in input_data if isinstance(it, dict) and it.get("type") == "function_call") + + auto_reset = (n_raw > 200 or n_tool_outputs > 20) and is_simple + if os.environ.get("ANTIGRAVITY_AUTO_RESET_POLLUTED_CONTEXT", "1") != "1": + auto_reset = False + + if is_simple and (auto_reset or n_tool_outputs == 0): + system_items = [it for it in input_data if isinstance(it, dict) and it.get("type") == "message" and it.get("role") in ("developer", "system")] + user_item = input_data[latest_user_idx] + result = system_items + [user_item] if system_items else [user_item] + print(f"[antigravity-context] raw_items={n_raw} compacted_items={n_raw} final_items={len(result)}", file=sys.stderr) + print(f"[antigravity-context] raw_tool_outputs={n_tool_outputs} kept_tool_outputs=0", file=sys.stderr) + print(f"[antigravity-context] simple_latest_user=true auto_reset={auto_reset}", file=sys.stderr) + return result + + dev_messages = [] + recent_items = [] + tool_outputs = [] + other_items = [] + + for i, item in enumerate(input_data): + if not isinstance(item, dict): + continue + t = item.get("type") + if t == "message" and item.get("role") in ("developer", "system"): + dev_messages.append(item) + elif t == "function_call_output": + tool_outputs.append((i, item)) + elif t in ("function_call",): + other_items.append((i, item)) + elif t == "message": + recent_items.append((i, item)) + + latest_words = set(latest_user.strip().lower().split()) + has_edit_intent = bool(latest_words.intersection(_ANTIGRAVITY_EDIT_WORDS)) + has_ref_intent = bool(latest_words.intersection(_ANTIGRAVITY_REFERENCE_WORDS)) + keep_tools = 2 if (has_edit_intent or has_ref_intent) else 1 + + kept_tools = tool_outputs[-keep_tools:] if tool_outputs and (has_edit_intent or has_ref_intent) else [] + + for idx_t, t_item in enumerate(kept_tools): + orig = t_item[1] + out = orig.get("output", "") + if isinstance(out, str) and len(out) > _ANTIGRAVITY_MAX_TOOL_CHARS: + new_item = dict(orig) + new_item["output"] = out[:_ANTIGRAVITY_MAX_TOOL_CHARS] + f"\n... [truncated: kept {_ANTIGRAVITY_MAX_TOOL_CHARS} of {len(out)} chars]" + kept_tools[idx_t] = (t_item[0], new_item) + + n_summarized = len(tool_outputs) - len(kept_tools) + + tail_start = max(0, len(recent_items) - 6) + recent_tail = recent_items[tail_start:] + + tool_call_ids = set() + for _, t_item in kept_tools: + cid = t_item.get("call_id", t_item.get("id", "")) + if cid: + tool_call_ids.add(cid) + + paired_calls = [] + for idx, item in other_items: + cid = item.get("call_id", item.get("id", "")) + if cid in tool_call_ids: + paired_calls.append((idx, item)) + + result = list(dev_messages) + + if n_summarized > 0: + summary_text = f"[Tool history summary: {n_summarized} older tool outputs omitted. {n_tool_calls} prior function calls were made for file inspection/editing.]" + result.append({"type": "message", "role": "user", "content": [{"type": "input_text", "text": summary_text}]}) + + for _, call_item in paired_calls: + result.append(call_item) + + for _, tool_item in kept_tools: + result.append(tool_item) + + for _, msg_item in recent_tail: + if msg_item is not input_data[latest_user_idx]: + result.append(msg_item) + + latest_hash = hashlib.sha256(" ".join(latest_user.strip().split()).encode()).hexdigest() + already_present = False + for r in result: + if isinstance(r, dict) and r.get("type") == "message" and r.get("role") == "user": + c = r.get("content", "") + if isinstance(c, str): + rh = hashlib.sha256(" ".join(c.strip().split()).encode()).hexdigest() + elif isinstance(c, list): + combined = " ".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict)) + rh = hashlib.sha256(" ".join(combined.strip().split()).encode()).hexdigest() + else: + rh = "" + if rh == latest_hash: + already_present = True + break + + if not already_present: + result.append(input_data[latest_user_idx]) + + total_chars = sum(len(json.dumps(it, ensure_ascii=False)) for it in result) + + if total_chars > _ANTIGRAVITY_EMERGENCY_CHARS: + print(f"[antigravity-context] EMERGENCY: {total_chars} chars exceeds limit, resetting to minimal", file=sys.stderr) + result = list(dev_messages) + [input_data[latest_user_idx]] + total_chars = sum(len(json.dumps(it, ensure_ascii=False)) for it in result) + + while len(result) > _ANTIGRAVITY_MAX_CONTENTS and total_chars > _ANTIGRAVITY_SOFT_CHARS: + for i in range(1, len(result) - 1): + if isinstance(result[i], dict) and result[i].get("type") in ("message", "function_call_output"): + removed = result.pop(i) + total_chars -= len(json.dumps(removed, ensure_ascii=False)) + break + else: + break + + est_tokens = total_chars // 4 + print(f"[antigravity-context] raw_items={n_raw} final_items={len(result)}", file=sys.stderr) + print(f"[antigravity-context] raw_tool_outputs={n_tool_outputs} kept_tool_outputs={len(kept_tools)} summarized_tool_outputs={n_summarized}", file=sys.stderr) + print(f"[antigravity-context] simple_latest_user={is_simple} auto_reset={auto_reset}", file=sys.stderr) + print(f"[antigravity-context] final_chars={total_chars} estimated_tokens={est_tokens}", file=sys.stderr) + + return result + class Handler(http.server.BaseHTTPRequestHandler): protocol_version = "HTTP/1.1" @@ -4450,6 +4641,7 @@ class Handler(http.server.BaseHTTPRequestHandler): fwd = forwarded_headers(self.headers, { "Content-Type": "application/json", "Authorization": f"Bearer {effective_key}", + **_openrouter_extra(), }, browser_ua=True) print(f"[{self._session_id}] POST {target} model={model} stream={stream} items={len(input_data) if isinstance(input_data,list) else 1}", file=sys.stderr) chat_body_b = json.dumps(chat_body).encode() @@ -4601,6 +4793,11 @@ class Handler(http.server.BaseHTTPRequestHandler): body = dict(body) body["input"] = input_data + if OAUTH_PROVIDER == "google-antigravity" and isinstance(input_data, list): + input_data = _antigravity_normalize_context(input_data) + body = dict(body) + body["input"] = input_data + access_token = _refresh_oauth_token() token_name = "google-antigravity-oauth-token.json" if OAUTH_PROVIDER == "google-antigravity" else "google-cli-oauth-token.json" token_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", token_name) @@ -4721,7 +4918,26 @@ class Handler(http.server.BaseHTTPRequestHandler): if body.get("top_p") is not None: gen_config["topP"] = body["top_p"] - if REASONING_ENABLED and REASONING_EFFORT != "none": + _is_claude_model = "claude" in model.lower() + _is_claude_thinking = _is_claude_model and "thinking" in model.lower() + + if OAUTH_PROVIDER == "google-antigravity" and _is_claude_thinking: + if REASONING_ENABLED and REASONING_EFFORT != "none": + budget = {"low": 8192, "medium": 16384, "high": 32768}.get(REASONING_EFFORT, 16384) + else: + budget = 16384 + gen_config["thinkingConfig"] = { + "include_thoughts": True, + "thinking_budget": budget, + } + current_max = gen_config.get("maxOutputTokens", 0) + if not current_max or current_max <= budget: + gen_config["maxOutputTokens"] = 64000 + print(f"[antigravity-claude] thinking model={model} budget={budget} maxOutputTokens={gen_config.get('maxOutputTokens')}", file=sys.stderr) + elif OAUTH_PROVIDER == "google-antigravity" and _is_claude_model: + if "thinkingConfig" in gen_config: + del gen_config["thinkingConfig"] + elif REASONING_ENABLED and REASONING_EFFORT != "none": budget = {"low": 2048, "medium": 8192, "high": 24576}.get(REASONING_EFFORT, 8192) gen_config["thinkingConfig"] = {"includeThoughts": True, "thinkingBudget": budget} @@ -4801,6 +5017,11 @@ class Handler(http.server.BaseHTTPRequestHandler): if gemini_tools: request_body["tools"] = gemini_tools + if OAUTH_PROVIDER == "google-antigravity" and _is_claude_model and gemini_tools: + request_body["toolConfig"] = {"functionCallingConfig": {"mode": "VALIDATED"}} + if _is_claude_thinking: + print(f"[antigravity-claude] applied VALIDATED toolConfig for thinking model", file=sys.stderr) + wrapped = { "project": project_id, "model": model, @@ -4811,13 +5032,17 @@ class Handler(http.server.BaseHTTPRequestHandler): wrapped["userAgent"] = "antigravity" wrapped["requestId"] = f"agent-{uuid.uuid4().hex[:12]}" - endpoints = ([ - "https://cloudcode-pa.googleapis.com", - "https://daily-cloudcode-pa.sandbox.googleapis.com", - "https://autopush-cloudcode-pa.sandbox.googleapis.com", - ] if OAUTH_PROVIDER == "google-antigravity" else [ - "https://cloudcode-pa.googleapis.com", - ]) + _allow_staging = os.environ.get("ALLOW_ANTIGRAVITY_STAGING", "0") == "1" + if OAUTH_PROVIDER == "google-antigravity": + _antigravity_endpoints = ["https://cloudcode-pa.googleapis.com"] + if _allow_staging: + _antigravity_endpoints.extend([ + "https://daily-cloudcode-pa.sandbox.googleapis.com", + "https://autopush-cloudcode-pa.sandbox.googleapis.com", + ]) + endpoints = _antigravity_endpoints + else: + endpoints = ["https://cloudcode-pa.googleapis.com"] action = "streamGenerateContent" if stream else "generateContent" url_suffix = f"v1internal:{action}?alt=sse" if stream else f"v1internal:{action}" @@ -4866,7 +5091,7 @@ class Handler(http.server.BaseHTTPRequestHandler): if e.code == 403 and "SERVICE_DISABLED" in err_body[:500] and ep != endpoints[-1]: print(f"[{self._session_id}] {ep} SERVICE_DISABLED, trying next endpoint", file=sys.stderr) continue - if e.code == 429 and ep != endpoints[-1]: + if e.code == 429 and ep != endpoints[-1] and _allow_staging: print(f"[{self._session_id}] {ep} HTTP 429, trying next endpoint", file=sys.stderr) continue if e.code == 429: @@ -5087,6 +5312,7 @@ class Handler(http.server.BaseHTTPRequestHandler): fwd = forwarded_headers(self.headers, { "Content-Type": "application/json", "Authorization": f"Bearer {r_key}", + **_openrouter_extra(), }, browser_ua=True) print(f"[{self._session_id}] trying route '{route.get('name', r_url)}' model={r_model}", file=sys.stderr) req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=fwd) @@ -5349,6 +5575,7 @@ class Handler(http.server.BaseHTTPRequestHandler): "Content-Type": "application/json", "x-api-key": API_KEY, "anthropic-version": "2023-06-01", + **_openrouter_extra(), }), ) self._forward(req, stream, model, @@ -5416,7 +5643,7 @@ class Handler(http.server.BaseHTTPRequestHandler): "threadId": thread_id, } - fwd = forwarded_headers(self.headers, headers_extra, browser_ua=True) + fwd = forwarded_headers(self.headers, {**headers_extra, **_openrouter_extra()}, browser_ua=True) print(f"[{self._session_id}] POST {target} model={model} stream={stream} attempt={attempt} [command-code]", file=sys.stderr) req = urllib.request.Request( target, @@ -5950,7 +6177,7 @@ class Handler(http.server.BaseHTTPRequestHandler): req_body["reasoning_effort"] = REASONING_EFFORT req_body_b = json.dumps(req_body).encode() - fwd = forwarded_headers(self.headers, headers_extra, browser_ua=True) + fwd = forwarded_headers(self.headers, {**headers_extra, **_openrouter_extra()}, browser_ua=True) print(f"[auto-sense] POST {target} model={model} attempt={attempt} schema={schema.hints()}", file=sys.stderr) req = urllib.request.Request(target, data=req_body_b, headers=fwd)