diff --git a/CHANGELOG.md b/CHANGELOG.md index b4a7242..7547332 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,36 @@ # Changelog +## v3.3.0 (2026-05-20) + +**Antigravity + Gemini CLI OAuth — full Codex agent loop working** + +### Gemini CLI OAuth + Antigravity OAuth +- Split Google OAuth into separate Gemini CLI OAuth and Google Antigravity OAuth presets/backends. +- Gemini CLI OAuth uses the Gemini CLI public OAuth client and Code Assist endpoints. +- Antigravity OAuth uses Antigravity OAuth credentials, Code Assist daily/autopush/prod fallback, and Antigravity-style request wrapping. +- Added Antigravity version discovery from the updater/changelog with local caching. +- Added Antigravity model alias mapping from UI-facing `antigravity-*` IDs to upstream Code Assist model IDs. + +### Responses API + Tool Flow +- Added Gemini-style history hardening for Google OAuth requests: removes empty turns, coalesces adjacent roles, drops duplicate user repeats, and enforces user-start/user-end history. +- Preserves function-call IDs across turns and adds synthetic `thoughtSignature` for historical Gemini function calls, matching Gemini CLI hardening behavior. +- Fixed Antigravity streaming Responses API compatibility: single assistant message item, text done events, content part done, output item done, final completed event, and connection close. +- Added `response.function_call_arguments.delta` and `response.function_call_arguments.done` events so Codex can execute Antigravity tool calls and create files. +- Fixed functionResponse name matching — uses the original functionCall name instead of falling back to call_id. +- Strengthened Antigravity prompt policy: use tools immediately for file changes, avoid planning-only responses, and answer directly when no suitable tool exists. + +### Reliability + Routing +- Added BGP++ route scoring, route cooldowns, token buckets, and persisted route stats. +- Added provider policy layer and adaptive context compaction. +- Added tool-call pairing validation/repair for orphaned tool outputs. +- Added Endpoint Doctor in the endpoint editor. +- Added log redaction helper for common API key/token patterns. + +## v3.1.0 (2026-05-20) + +- Initial Antigravity/Gemini CLI OAuth backend split. +- Gemini-style history hardening, SSE streaming fixes. + ## v3.0.0 (2026-05-20) **Major architectural overhaul — Phase 0 + Phase 1 of engineering roadmap** diff --git a/README.md b/README.md index 785eb9d..5b4398d 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@
Run OpenAI Codex CLI & Desktop with any AI provider.
- OpenCode • Z.AI • Anthropic • Command Code • OpenRouter • Crof.ai • NVIDIA NIM • Kilo.ai • and more
+ Google Antigravity • Gemini CLI • OpenCode • Z.AI • Anthropic • Command Code • OpenRouter • Crof.ai • NVIDIA NIM • Kilo.ai • and more
@@ -43,14 +43,16 @@ OpenAI's Codex CLI v2.0+ exclusively uses the **Responses API** — a protocol t | Provider | API | Works with Codex? | |----------|-----|:-:| | OpenAI | Responses API | ✅ | -| Z.AI | Chat Completions | ❌ | -| OpenCode | Chat Completions | ❌ | -| Anthropic | Messages API | ❌ | -| Command Code | Custom `/alpha/generate` | ❌ | -| Ollama | Chat Completions | ❌ | -| OpenRouter | Chat Completions | ❌ | -| NVIDIA NIM | Chat Completions | ❌ | -| Crof.ai | Chat Completions | ❌ | +| Google Antigravity (OAuth) | Code Assist / Gemini Native | ✅ | +| Gemini CLI OAuth | Code Assist | ✅ | +| Z.AI | Chat Completions | ✅ | +| OpenCode | Chat Completions | ✅ | +| Anthropic | Messages API | ✅ | +| Command Code | Custom `/alpha/generate` | ✅ | +| Ollama | Chat Completions | ✅ | +| OpenRouter | Chat Completions | ✅ | +| NVIDIA NIM | Chat Completions | ✅ | +| Crof.ai | Chat Completions | ✅ | The protocols differ in **endpoint paths**, **message formats**, **tool-call structures**, **streaming events**, and **completion semantics**. You can't just swap a base URL. diff --git a/codex-launcher_2.7.0_all.deb b/codex-launcher_2.7.0_all.deb deleted file mode 100644 index 1ba20a6..0000000 Binary files a/codex-launcher_2.7.0_all.deb and /dev/null differ diff --git a/codex-launcher_3.3.0_all.deb b/codex-launcher_3.3.0_all.deb new file mode 100644 index 0000000..67c8d99 Binary files /dev/null and b/codex-launcher_3.3.0_all.deb differ diff --git a/src/codex-launcher-gui b/src/codex-launcher-gui index 604e4d3..c4de26a 100755 --- a/src/codex-launcher-gui +++ b/src/codex-launcher-gui @@ -4,8 +4,9 @@ import gi gi.require_version("Gtk", "3.0") from gi.repository import Gtk, GLib -import subprocess, os, signal, sys, threading, time, json, urllib.request, tempfile, shutil +import subprocess, os, signal, sys, threading, time, json, urllib.request, urllib.parse, tempfile, shutil import hashlib, socket, contextlib +import base64, secrets from pathlib import Path HOME = Path.home() @@ -25,6 +26,21 @@ model_catalog_json = "" """ CHANGELOG = [ + ("3.3.0", "2026-05-20", [ + "Added Google Antigravity OAuth backend with Code Assist endpoints and model alias mapping", + "Added Gemini CLI OAuth backend using public Gemini CLI OAuth client", + "Antigravity now creates files via tool calls — full Codex agent loop with Gemini-style history hardening", + "Fixed tool-call streaming: function_call_arguments delta/done events, thought signatures, functionResponse name matching", + "Added Endpoint Doctor, adaptive BGP scoring, provider policies, adaptive compaction, log redaction", + ]), + ("3.1.0", "2026-05-20", [ + "Initial Antigravity/Gemini CLI OAuth split, history hardening, SSE fixes", + ]), + ("3.0.0", "2026-05-20", [ + "ThreadingHTTPServer with dynamic proxy ports and health-gated Codex launch", + "Atomic config writes, safe cleanup registry, graceful shutdown, and buffered SSE streaming", + "Usage Dashboard v2, TCP_NODELAY streaming, Anthropic prompt caching, and batched usage stats", + ]), ("2.6.1", "2026-05-20", [ "Google OAuth rebuilt to emulate Gemini CLI — no client_secret.json needed", "Uses Google's public OAuth client_id (same as gemini-cli)", @@ -226,13 +242,25 @@ PROVIDER_PRESETS = { ], }, "Google Gemini (OAuth)": { - "backend_type": "openai-compat", - "base_url": "https://generativelanguage.googleapis.com/v1beta/openai", - "oauth_provider": "google", + "backend_type": "gemini-oauth-cli", + "base_url": "https://cloudcode-pa.googleapis.com", + "oauth_provider": "google-cli", "models": [ "gemini-2.5-flash", "gemini-2.5-pro", - "gemini-2.0-flash", "gemini-2.0-flash-lite", - "gemini-2.5-flash-preview-native-audio-dialog", + ], + }, + "Google Antigravity (OAuth)": { + "backend_type": "gemini-oauth-antigravity", + "base_url": "https://daily-cloudcode-pa.sandbox.googleapis.com", + "oauth_provider": "google-antigravity", + "models": [ + "antigravity-gemini-3-flash", + "antigravity-gemini-3-pro", + "antigravity-gemini-3.1-pro", + "antigravity-claude-sonnet-4-6", + "antigravity-claude-opus-4-6-thinking", + "gemini-2.5-flash", "gemini-2.5-pro", + "gemini-3-flash-preview", "gemini-3-pro-preview", "gemini-3.1-pro-preview", ], }, "OpenAdapter": { @@ -301,8 +329,10 @@ def apply_provider_preset(endpoint, preset_name): updated["base_url"] = normalize_base_url(preset["base_url"]) if preset.get("cc_version") and not updated.get("cc_version"): updated["cc_version"] = preset["cc_version"] - if not updated.get("models"): + if not updated.get("models") or (preset.get("backend_type") or "").startswith("gemini-oauth"): updated["models"] = list(preset.get("models", [])) + if preset.get("oauth_provider"): + updated["oauth_provider"] = preset["oauth_provider"] if not updated.get("default_model") and updated.get("models"): updated["default_model"] = updated["models"][0] return updated @@ -630,6 +660,18 @@ def _start_proxy_for(endpoint, logfn): port = _pick_free_port() _proxy_port = port + model_list = endpoint.get("models", []) + if (endpoint.get("backend_type") or "").startswith("gemini-oauth") and (endpoint.get("oauth_provider") or "").startswith("google"): + token_name = "google-antigravity-oauth-token.json" if endpoint.get("oauth_provider") == "google-antigravity" else "google-cli-oauth-token.json" + token_path = os.path.expanduser(f"~/.cache/codex-proxy/{token_name}") + try: + with open(token_path) as tf: + td = json.load(tf) + discovered = [] if endpoint.get("oauth_provider") == "google-antigravity" else td.get("available_models", []) + if discovered: + model_list = discovered + except Exception: + pass pcfg = { "port": port, "backend_type": endpoint["backend_type"], @@ -640,7 +682,7 @@ def _start_proxy_for(endpoint, logfn): "reasoning_enabled": endpoint.get("reasoning_enabled", True), "reasoning_effort": endpoint.get("reasoning_effort", "medium"), "models": [{"id": m, "object": "model", "created": 1700000000, "owned_by": endpoint["name"]} - for m in endpoint.get("models", [])], + for m in model_list], } pcfg_path = PROXY_CONFIG_DIR / f"proxy-{safe_name(endpoint['name'])}-{port}.json" pcfg_path.parent.mkdir(parents=True, exist_ok=True) @@ -763,7 +805,7 @@ class LauncherWin(Gtk.Window): # header row hdr = Gtk.Box(spacing=8) vbox.pack_start(hdr, False, False, 0) - lbl = Gtk.Label(label="Codex Launcher v3.0.0") + lbl = Gtk.Label(label="Codex Launcher v3.3.0") lbl.set_use_markup(True) hdr.pack_start(lbl, False, False, 0) changelog_btn = Gtk.Button(label="Changelog") @@ -1277,7 +1319,7 @@ class LauncherWin(Gtk.Window): self.log("ERROR: no model selected") return - is_bgp = name.startswith("🔀 ") + is_bgp = bool(name and name.startswith("🔀 ")) if is_bgp: pool_name = name[2:] pool = None @@ -1781,6 +1823,8 @@ class EditEndpointDialog(Gtk.Dialog): for val, lab in [("openai-compat", "OpenAI-compatible (needs proxy)"), ("anthropic", "Anthropic (needs proxy)"), ("command-code", "Command Code (needs proxy)"), + ("gemini-oauth-cli", "Gemini CLI OAuth (needs proxy)"), + ("gemini-oauth-antigravity", "Antigravity OAuth (needs proxy)"), ("native", "Native OpenAI (no proxy)")]: self._combo_type.append(val, lab) bt = self._data.get("backend_type", "openai-compat") @@ -1866,6 +1910,9 @@ class EditEndpointDialog(Gtk.Dialog): self._fetch_models_btn = Gtk.Button(label="Fetch from API") self._fetch_models_btn.connect("clicked", lambda b: self._fetch_models()) mbox.pack_start(self._fetch_models_btn, False, False, 0) + self._test_btn = Gtk.Button(label="Test Endpoint") + self._test_btn.connect("clicked", lambda b: self._diagnose_endpoint()) + mbox.pack_start(self._test_btn, False, False, 0) bulk_lbl = Gtk.Label(label="Bulk add models (one per line or comma-separated):", xalign=0) area.pack_start(bulk_lbl, False, False, 2) @@ -1970,29 +2017,52 @@ class EditEndpointDialog(Gtk.Dialog): preset_name = self._combo_preset.get_active_text() or "Custom" preset = PROVIDER_PRESETS.get(preset_name, {}) provider = preset.get("oauth_provider", "") - if provider == "google": - self._google_oauth_flow() + if (provider or "").startswith("google"): + self._google_oauth_flow(provider) - def _google_oauth_flow(self): - token_path = os.path.expanduser("~/.cache/codex-proxy/google-oauth-token.json") + def _google_oauth_flow(self, oauth_provider="google-cli"): + is_antigravity = oauth_provider == "google-antigravity" + token_path = os.path.expanduser("~/.cache/codex-proxy/google-antigravity-oauth-token.json" if is_antigravity else "~/.cache/codex-proxy/google-cli-oauth-token.json") - CLIENT_ID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com" - CLIENT_SECRET = "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w" - SCOPES = [ - "https://www.googleapis.com/auth/cloud-platform", - "https://www.googleapis.com/auth/generative-language.retriever", - "https://www.googleapis.com/auth/userinfo.email", - "https://www.googleapis.com/auth/userinfo.profile", - ] - import http.server, hashlib, secrets, socket + if is_antigravity: + CLIENT_ID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com" + CLIENT_SECRET = "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" + SCOPES = [ + "https://www.googleapis.com/auth/cloud-platform", + "https://www.googleapis.com/auth/userinfo.email", + "https://www.googleapis.com/auth/userinfo.profile", + "https://www.googleapis.com/auth/cclog", + "https://www.googleapis.com/auth/experimentsandconfigs", + ] + port = 51121 + redirect_uri = f"http://localhost:{port}/oauth-callback" + callback_path = "/oauth-callback" + provider_kind = "antigravity" + else: + CLIENT_ID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com" + CLIENT_SECRET = "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" + SCOPES = [ + "https://www.googleapis.com/auth/cloud-platform", + "https://www.googleapis.com/auth/userinfo.email", + "https://www.googleapis.com/auth/userinfo.profile", + ] + port = 0 + redirect_uri = None + callback_path = "/oauth2callback" + provider_kind = "cli" + + import http.server - port = 8085 state = secrets.token_hex(32) - verifier = secrets.token_urlsafe(32) - challenge = hashlib.sha256(verifier.encode()).digest() - challenge_b64 = urllib.parse.quote_plus(__import__('base64').urlsafe_b64encode(challenge).rstrip(b'=').decode()) + verifier = secrets.token_urlsafe(64) + challenge = base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest()).rstrip(b"=").decode() + + if port == 0: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("127.0.0.1", 0)) + port = s.getsockname()[1] + redirect_uri = f"http://127.0.0.1:{port}/oauth2callback" - redirect_uri = f"http://127.0.0.1:{port}/oauth2callback" scope_str = " ".join(SCOPES) auth_url = ( f"https://accounts.google.com/o/oauth2/v2/auth?" @@ -2001,9 +2071,9 @@ class EditEndpointDialog(Gtk.Dialog): f"&response_type=code" f"&scope={urllib.parse.quote(scope_str)}" f"&access_type=offline" - f"&prompt=consent" + f"&prompt=select_account%20consent" f"&state={state}" - f"&code_challenge={challenge_b64}" + f"&code_challenge={challenge}" f"&code_challenge_method=S256" ) @@ -2043,6 +2113,14 @@ class EditEndpointDialog(Gtk.Dialog): qs = urllib.parse.urlparse(self2.path).query params = urllib.parse.parse_qs(qs) received_state[0] = params.get("state", [None])[0] + with open("/tmp/codex-oauth-debug.log", "a") as _dbg: + _dbg.write(f"[{time.strftime('%H:%M:%S')}] GET {self2.path} state={received_state[0]} code={'code' in params}\n") + if self2.path.find(callback_path) == -1: + self2.send_response(302) + self2.send_header("Location", "https://developers.google.com/gemini-code-assist/auth_failure_gemini") + self2.end_headers() + error_holder[0] = "unexpected request" + return if "code" in params: if received_state[0] != state: self2.send_response(400) @@ -2061,63 +2139,171 @@ class EditEndpointDialog(Gtk.Dialog): self2.send_response(302) self2.send_header("Location", "https://developers.google.com/gemini-code-assist/auth_failure_gemini") self2.end_headers() - def log_message(self2, *a): pass + def log_message(self2, fmt, *args): + with open("/tmp/codex-oauth-debug.log", "a") as _dbg: + _dbg.write(f"[{time.strftime('%H:%M:%S')}] {fmt % args}\n") try: - server = http.server.HTTPServer(("127.0.0.1", port), OAuthHandler) + bind_host = "localhost" if is_antigravity else "127.0.0.1" + server = http.server.HTTPServer((bind_host, port), OAuthHandler) except OSError: self._oauth_status.set_text(f"Port {port} already in use — close other apps and retry.") spinner.stop() dlg.run(); dlg.destroy() return + def _oauth_log(msg): + with open("/tmp/codex-oauth-debug.log", "a") as _f: + _f.write(f"[{time.strftime('%H:%M:%S')}] {msg}\n") + + _oauth_log(f"Starting OAuth: port={port} redirect_uri={redirect_uri}") + def wait_for_code(): - server.handle_request() + _oauth_log("wait_for_code thread started") + deadline = time.time() + 120 + while code_holder[0] is None and error_holder[0] is None and time.time() < deadline: + server.handle_request() server.server_close() - GLib.idle_add(self._google_oauth_complete_gemini, dlg, code_holder, error_holder, - CLIENT_ID, CLIENT_SECRET, redirect_uri, token_path, spinner, verifier) + _oauth_log(f"Server closed. code={'yes' if code_holder[0] else 'no'} error={'yes' if error_holder[0] else 'no'}") + if code_holder[0]: + try: + _oauth_log("Exchanging code for token...") + token_data = urllib.parse.urlencode({ + "code": code_holder[0], + "client_id": CLIENT_ID, + "client_secret": CLIENT_SECRET, + "redirect_uri": redirect_uri, + "grant_type": "authorization_code", + "code_verifier": verifier, + }).encode() + req = urllib.request.Request("https://oauth2.googleapis.com/token", data=token_data, + headers={"Content-Type": "application/x-www-form-urlencoded"}) + resp = urllib.request.urlopen(req, timeout=30) + tokens = json.loads(resp.read()) + tokens["client_id"] = CLIENT_ID + tokens["client_secret"] = CLIENT_SECRET + tokens["provider_kind"] = provider_kind + tokens["expires_at"] = time.time() + tokens.get("expires_in", 3600) + os.makedirs(os.path.dirname(token_path), exist_ok=True) + with open(token_path, "w") as f: + json.dump(tokens, f, indent=2) + os.chmod(token_path, 0o600) + _oauth_log(f"Token saved to {token_path}") + project_id = "" + try: + _oauth_log("Discovering project ID via loadCodeAssist...") + lr = urllib.request.Request( + "https://cloudcode-pa.googleapis.com/v1internal:loadCodeAssist", + data=json.dumps({}).encode(), + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {tokens['access_token']}", + "User-Agent": "google-api-nodejs-client/9.15.1", + }) + lresp = urllib.request.urlopen(lr, timeout=15) + ldata = json.loads(lresp.read()) + p = ldata.get("cloudaicompanionProject", "") + if isinstance(p, dict): + project_id = p.get("id", "") + elif isinstance(p, str): + project_id = p + _oauth_log(f"Project ID: {project_id or '(none)'}") + if project_id: + tokens["project_id"] = project_id + with open(token_path, "w") as f2: + json.dump(tokens, f2, indent=2) + os.chmod(token_path, 0o600) + except Exception as pe: + _oauth_log(f"loadCodeAssist failed (non-fatal): {pe}") + if is_antigravity: + found_models = [ + "gemini-2.5-flash", "gemini-2.5-pro", + "gemini-3-flash-preview", "gemini-3-pro-preview", "gemini-3.1-pro-preview", + "gemini-3-pro-low", "gemini-3-pro-high", + "gemini-3.1-pro-low", "gemini-3.1-pro-high", + "gemini-3-flash-low", "gemini-3-flash-medium", "gemini-3-flash-high", + "claude-sonnet-4-6", "claude-opus-4-6-thinking", + "claude-opus-4-6-thinking-low", "claude-opus-4-6-thinking-medium", "claude-opus-4-6-thinking-high", + "gemini-claude-sonnet-4-6", + "gemini-claude-opus-4-6-thinking-low", "gemini-claude-opus-4-6-thinking-medium", "gemini-claude-opus-4-6-thinking-high", + "gemini-3-pro-image", + ] + probe_candidates = [ + "gemini-2.5-flash", "gemini-2.5-pro", + "gemini-3-flash-preview", "gemini-3-pro-preview", "gemini-3.1-pro-preview", + ] + _oauth_log(f"Probing {len(probe_candidates)} model candidates...") + for mc in probe_candidates: + try: + pr = urllib.request.Request( + "https://daily-cloudcode-pa.sandbox.googleapis.com/v1internal:generateContent", + data=json.dumps({ + "project": project_id, + "model": mc, + "request": {"contents": [{"role": "user", "parts": [{"text": "x"}]}], + "generationConfig": {"maxOutputTokens": 1}}, + }).encode(), + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {tokens['access_token']}", + "User-Agent": "google-api-nodejs-client/9.15.1", + "Client-Metadata": "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI", + }) + pr.get_method = lambda: "POST" + resp = urllib.request.urlopen(pr, timeout=10) + resp.read() + found_models.append(mc) + _oauth_log(f" {mc} → available") + except urllib.error.HTTPError as e: + if e.code == 429: + found_models.append(mc) + _oauth_log(f" {mc} → available (rate limited)") + else: + e.read() + _oauth_log(f" {mc} → HTTP {e.code}") + except Exception as e: + _oauth_log(f" {mc} → error: {e}") + else: + found_models = ["gemini-2.5-flash", "gemini-2.5-pro"] + if found_models: + tokens["available_models"] = found_models + with open(token_path, "w") as f3: + json.dump(tokens, f3, indent=2) + os.chmod(token_path, 0o600) + _oauth_log(f"Discovered {len(found_models)} models: {found_models}") + else: + _oauth_log("No models discovered (will use defaults)") + GLib.idle_add(self._oauth_success, dlg, tokens.get("access_token", ""), spinner) + return + except urllib.error.HTTPError as e: + body = e.read().decode(errors='replace') + _oauth_log(f"Token exchange HTTP {e.code}: {body}") + GLib.idle_add(self._oauth_failed, dlg, f"Token exchange failed ({e.code}): {body[:200]}", spinner) + return + except Exception as e: + _oauth_log(f"Token exchange FAILED: {e}") + GLib.idle_add(self._oauth_failed, dlg, f"Token exchange failed: {e}", spinner) + return + _oauth_log(f"OAuth failed: {error_holder[0] or 'timeout'}") + GLib.idle_add(self._oauth_failed, dlg, + error_holder[0] or "No authorization code received.", spinner) threading.Thread(target=wait_for_code, daemon=True).start() subprocess.Popen(["xdg-open", auth_url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + dlg.connect("response", lambda d, r: d.destroy()) dlg.run() - dlg.destroy() - def _google_oauth_complete_gemini(self, dlg, code_holder, error_holder, - client_id, client_secret, redirect_uri, token_path, spinner, verifier): + def _oauth_success(self, dlg, access_token, spinner): spinner.stop() - if error_holder[0]: - self._oauth_status.set_markup(f'Error: {error_holder[0]}') - return - if not code_holder[0]: - self._oauth_status.set_text("No authorization code received.") - return + self._entry_key.set_text(access_token) + self._oauth_status.set_markup('Authorization successful! Token saved.') + dlg.set_title("Google OAuth — Success") + GLib.timeout_add(1500, lambda: dlg.response(Gtk.ResponseType.OK)) - self._oauth_status.set_text("Exchanging code for token…") - try: - token_data = urllib.parse.urlencode({ - "code": code_holder[0], - "client_id": client_id, - "client_secret": client_secret, - "redirect_uri": redirect_uri, - "grant_type": "authorization_code", - "code_verifier": verifier, - }).encode() - req = urllib.request.Request("https://oauth2.googleapis.com/token", data=token_data, - headers={"Content-Type": "application/x-www-form-urlencoded"}) - resp = urllib.request.urlopen(req, timeout=30) - tokens = json.loads(resp.read()) - tokens["client_id"] = client_id - tokens["client_secret"] = client_secret - tokens["expires_at"] = time.time() + tokens.get("expires_in", 3600) - os.makedirs(os.path.dirname(token_path), exist_ok=True) - with open(token_path, "w") as f: - json.dump(tokens, f, indent=2) - os.chmod(token_path, 0o600) - self._entry_key.set_text(tokens.get("access_token", "")) - self._oauth_status.set_markup('Authorization successful! Token saved.') - dlg.set_title("Google OAuth — Success") - except Exception as e: - self._oauth_status.set_markup(f'Token exchange failed: {e}') + def _oauth_failed(self, dlg, msg, spinner): + spinner.stop() + self._oauth_status.set_markup(f'{msg}') + GLib.timeout_add(3000, lambda: dlg.response(Gtk.ResponseType.CANCEL)) def _remove_model(self, path): current = self._combo_default.get_active_text() @@ -2163,6 +2349,70 @@ class EditEndpointDialog(Gtk.Dialog): return True, None return False, err or "No models returned by endpoint" + def _diagnose_endpoint(self): + url = self._entry_url.get_text().strip() + key = self._entry_key.get_text().strip() + bt = self._combo_type.get_active_id() or "openai-compat" + model = self._combo_default.get_active_text() or "" + + checks = [] + def add(name, ok, detail=""): + checks.append((name, ok, detail)) + + parsed = urllib.parse.urlparse(url) + add("URL format", bool(parsed.scheme and parsed.netloc), + url if parsed.scheme else "Missing scheme (https://)") + + try: + t0 = time.time() + ep = {"base_url": url, "api_key": key, "backend_type": bt} + ids, err = fetch_models_for_endpoint(ep) + lat = (time.time() - t0) * 1000 + if ids: + add("Network reachable", True, f"{lat:.0f}ms") + add("Auth valid", True) + add("/models endpoint", True, f"{len(ids)} models in {lat:.0f}ms") + if model: + add("Selected model exists", model in ids, + model if model in ids else f"'{model}' not in {ids[:5]}...") + else: + add("Selected model", False, "No model selected") + elif err and ("401" in str(err) or "403" in str(err)): + add("Network reachable", True, f"{lat:.0f}ms") + add("Auth valid", False, str(err)[:100]) + add("/models endpoint", False, "Auth failed") + else: + add("Network reachable", False, str(err or "no response")[:100]) + except Exception as e: + add("Network", False, str(e)[:100]) + + dlg = Gtk.Dialog(title="Endpoint Doctor", parent=self, modal=True) + dlg.add_button("Close", Gtk.ResponseType.CLOSE) + dlg.set_default_size(420, 300) + area = dlg.get_content_area() + area.set_margin_start(12) + area.set_margin_end(12) + area.set_margin_top(12) + area.set_margin_bottom(12) + area.set_spacing(4) + for name, ok, detail in checks: + row = Gtk.Box(spacing=6) + icon = Gtk.Label() + icon.set_markup(f'{"\u2713" if ok else "\u2717"}') + row.pack_start(icon, False, False, 0) + lbl = Gtk.Label() + lbl.set_markup(f'{name}') + row.pack_start(lbl, False, False, 0) + if detail: + det = Gtk.Label() + det.set_markup(f'{detail}') + row.pack_end(det, False, False, 0) + area.pack_start(row, False, False, 0) + dlg.show_all() + dlg.run() + dlg.destroy() + def _on_response(self, dialog, response): if response != Gtk.ResponseType.OK: self.destroy() @@ -2172,7 +2422,7 @@ class EditEndpointDialog(Gtk.Dialog): if not name: self._show_error("Name is required") return - bt = self._combo_type.get_active_id() + bt = self._combo_type.get_active_id() or PROVIDER_PRESETS.get(self._combo_preset.get_active_text() or "", {}).get("backend_type") or "openai-compat" url = self._entry_url.get_text().strip() key = self._entry_key.get_text().strip() models = [self._model_store[i][0] for i in range(len(self._model_store))] diff --git a/src/translate-proxy.py b/src/translate-proxy.py index 53996f6..d86e14d 100755 --- a/src/translate-proxy.py +++ b/src/translate-proxy.py @@ -11,7 +11,7 @@ Usage: python3 translate-proxy.py --backend openai-compat --target-url https://... --api-key sk-... """ -import json, http.server, socketserver, urllib.request, urllib.parse, urllib.error +import json, http.server, socketserver, urllib.request, urllib.parse, urllib.error, re import time, uuid, os, sys, argparse, threading, socket, collections, contextlib, signal # ═══════════════════════════════════════════════════════════════════ @@ -107,9 +107,57 @@ _active_connections = 0 _active_connections_lock = threading.Lock() _pool = uuid.uuid4().hex[:8] +_antigravity_version = "1.18.3" +_antigravity_version_checked = 0 +_antigravity_version_lock = threading.Lock() + +def _fetch_antigravity_version(): + cache_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", "antigravity-version.json") + try: + with open(cache_path) as f: + cached = json.load(f) + if cached.get("version") and cached.get("checked_at", 0) > time.time() - 6 * 3600: + return cached["version"] + except Exception: + pass + urls = [ + ("https://antigravity-auto-updater-974169037036.us-central1.run.app", None), + ("https://antigravity.google/changelog", 5000), + ] + for url, limit in urls: + try: + req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}) + resp = urllib.request.urlopen(req, timeout=5) + text = resp.read().decode(errors="replace") + if limit: + text = text[:limit] + m = re.search(r"\d+\.\d+\.\d+", text) + if m: + version = m.group(0) + try: + os.makedirs(os.path.dirname(cache_path), exist_ok=True) + with open(cache_path, "w") as f: + json.dump({"version": version, "checked_at": time.time()}, f) + except Exception: + pass + return version + except Exception: + pass + return _antigravity_version + +def _ensure_antigravity_version(): + global _antigravity_version, _antigravity_version_checked + if time.time() - _antigravity_version_checked < 6 * 3600: + return _antigravity_version + with _antigravity_version_lock: + if time.time() - _antigravity_version_checked < 6 * 3600: + return _antigravity_version + _antigravity_version = _fetch_antigravity_version() + _antigravity_version_checked = time.time() + return _antigravity_version def _init_runtime(): - global CONFIG, PORT, BACKEND, TARGET_URL, API_KEY, OAUTH_PROVIDER + global CONFIG, PORT, BACKEND, TARGET_URL, API_KEY, OAUTH_PROVIDER, _antigravity_version global MODELS, CC_VERSION, REASONING_ENABLED, REASONING_EFFORT, BGP_ROUTES CONFIG = load_config() @@ -117,12 +165,15 @@ def _init_runtime(): BACKEND = CONFIG["backend_type"] TARGET_URL = CONFIG["target_url"].rstrip("/") API_KEY = CONFIG["api_key"] - OAUTH_PROVIDER = CONFIG.get("oauth_provider", "") + OAUTH_PROVIDER = CONFIG.get("oauth_provider") or "" MODELS = CONFIG["models"] CC_VERSION = CONFIG.get("cc_version", "") REASONING_ENABLED = CONFIG.get("reasoning_enabled", True) REASONING_EFFORT = CONFIG.get("reasoning_effort", "medium") BGP_ROUTES = CONFIG.get("bgp_routes", []) + if OAUTH_PROVIDER == "google-antigravity": + _antigravity_version = _ensure_antigravity_version() + print(f"[antigravity] version={_antigravity_version}", file=sys.stderr) bgp_models = [] for _r in BGP_ROUTES: @@ -134,13 +185,33 @@ def _init_runtime(): MODELS = [{"id": m, "object": "model", "created": 1700000000, "owned_by": "bgp"} for m in bgp_models] CONFIG["models"] = MODELS + if (BACKEND or "").startswith("gemini-oauth") and (OAUTH_PROVIDER or "").startswith("google"): + token_name = "google-antigravity-oauth-token.json" if OAUTH_PROVIDER == "google-antigravity" else "google-cli-oauth-token.json" + token_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", token_name) + try: + with open(token_path) as _tf: + _td = json.load(_tf) + _discovered = [] if OAUTH_PROVIDER == "google-antigravity" else _td.get("available_models", []) + if _discovered: + _seen = [] + for _m in _discovered: + if _m not in _seen: + _seen.append(_m) + MODELS = [{"id": m, "object": "model", "created": 1700000000, "owned_by": "gemini-oauth"} for m in _seen] + CONFIG["models"] = MODELS + print(f"[gemini-oauth] loaded {len(_seen)} discovered models: {_seen}", file=sys.stderr) + except Exception: + pass + def _refresh_oauth_token(): return _refresh_oauth_token_for(API_KEY, OAUTH_PROVIDER) def _refresh_oauth_token_for(api_key, oauth_provider): - if oauth_provider != "google": + oauth_provider = oauth_provider or "" + if not oauth_provider.startswith("google"): return api_key - token_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", "google-oauth-token.json") + token_name = "google-antigravity-oauth-token.json" if oauth_provider == "google-antigravity" else "google-cli-oauth-token.json" + token_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", token_name) if not os.path.exists(token_path): return api_key try: @@ -329,6 +400,70 @@ _CROF_ADAPTIVE = { "min_keep_recent": 4, } +_BGP_STATS_PATH = os.path.join(_LOG_DIR, "bgp-route-stats.json") +_bgp_stats_lock = threading.Lock() + +def _route_key(route): + return f"{route.get('name', '')}::{route.get('target_url', '')}::{route.get('model', '')}" + +def _load_bgp_stats(): + try: + if os.path.exists(_BGP_STATS_PATH): + return json.load(open(_BGP_STATS_PATH)) + except Exception: + pass + return {} + +def _save_bgp_stats(stats): + tmp = _BGP_STATS_PATH + ".tmp" + with open(tmp, "w") as f: + json.dump(stats, f, indent=2) + os.replace(tmp, _BGP_STATS_PATH) + +def _score_route(route, stats): + key = _route_key(route) + rs = stats.get(key, {}) + now = time.time() + if float(rs.get("open_until_ts", 0)) > now: + return 1_000_000 + priority = int(route.get("priority", 99)) + ewma = float(rs.get("ewma_latency_s", 0)) + failures = int(rs.get("consecutive_failures", 0)) + score = priority + min(ewma * 5, 50) + failures * 20 + if float(rs.get("rate_limited_until", 0)) > now: + score += 500 + return score + +def _update_route_stats(route, success, duration_s, http_code=None, error_type=None): + with _bgp_stats_lock: + stats = _load_bgp_stats() + key = _route_key(route) + rs = stats.setdefault(key, { + "ewma_latency_s": duration_s, "consecutive_failures": 0, + "last_success": None, "last_failure": None, + "open_until_ts": 0, "rate_limited_until": 0, "last_error": None, + }) + alpha = 0.25 + rs["ewma_latency_s"] = alpha * duration_s + (1 - alpha) * float(rs.get("ewma_latency_s", duration_s)) + if success: + rs["consecutive_failures"] = 0 + rs["last_success"] = time.time() + else: + rs["consecutive_failures"] = int(rs.get("consecutive_failures", 0)) + 1 + rs["last_failure"] = time.time() + rs["last_error"] = error_type or (f"http_{http_code}" if http_code else "unknown") + if http_code == 429: + rs["rate_limited_until"] = time.time() + 120 + if rs["consecutive_failures"] >= 3: + rs["open_until_ts"] = time.time() + 60 + rs["consecutive_failures"] = 0 + _save_bgp_stats(stats) + +def _sorted_bgp_routes(): + with _bgp_stats_lock: + stats = _load_bgp_stats() + return sorted(BGP_ROUTES, key=lambda r: _score_route(r, stats)) + def _crof_record(model, n_items, success): if not isinstance(n_items, int) or n_items < 1: return @@ -536,6 +671,193 @@ def _compact_input(input_data): print(f"[compact] {len(input_data)} items -> {len(head) + 1 + len(tail)} (compacted {len(body)} old items into summary)", file=sys.stderr) return head + [summary_msg] + tail +# ═══════════════════════════════════════════════════════════════════ +# Provider policies +# ═══════════════════════════════════════════════════════════════════ + +_PROVIDER_POLICIES = { + "crof": {"reasoning_mode": "off", "max_tokens": 32768, "strip_reasoning": True, + "tool_output_limit": 4000, "max_input_items": 18, "compaction": "aggressive"}, + "chats-llm": {"reasoning_mode": "off", "max_tokens": 32768, "strip_reasoning": True, + "tool_output_limit": 4000, "max_input_items": 20, "compaction": "aggressive"}, + "z.ai": {"reasoning_mode": "medium", "max_tokens": 65536, "strip_reasoning": True, + "tool_output_limit": 8000, "max_input_items": 40, "compaction": "balanced"}, + "openrouter": {"reasoning_mode": "provider_default", "max_tokens": 32768, "strip_reasoning": True, + "tool_output_limit": 6000, "max_input_items": 35, "compaction": "balanced"}, + "openadapter": {"reasoning_mode": "off", "max_tokens": 32768, "strip_reasoning": True, + "tool_output_limit": 6000, "max_input_items": 30, "compaction": "balanced"}, +} + +def provider_policy(target_url=None, backend=None): + host = urllib.parse.urlparse(target_url or TARGET_URL).netloc.lower() + for key, policy in _PROVIDER_POLICIES.items(): + if key in host: + return policy + return {} + +# ═══════════════════════════════════════════════════════════════════ +# Adaptive context compaction (model-aware) +# ═══════════════════════════════════════════════════════════════════ + +_MODEL_CONTEXT = { + "gpt-4o": 128000, "gpt-4o-mini": 128000, "gpt-5": 128000, + "claude-sonnet": 200000, "claude-haiku": 200000, + "glm-5.1": 128000, "glm-5": 128000, "glm-4": 128000, + "deepseek": 64000, "gemini-2.5-flash": 1000000, "gemini-2.5-pro": 2000000, + "mimo": 32768, "minimax": 32768, "kimi": 128000, + "_default": 32768, +} + +def _context_limit_for_model(model): + if not model: + return _MODEL_CONTEXT["_default"] + ml = model.lower() + for key, limit in _MODEL_CONTEXT.items(): + if key != "_default" and key in ml: + return limit + return _MODEL_CONTEXT["_default"] + +def _estimate_tokens(obj): + if obj is None: + return 0 + if isinstance(obj, str): + return max(1, len(obj) // 4) + try: + raw = json.dumps(obj, ensure_ascii=False) + except Exception: + raw = str(obj) + return max(1, len(raw) // 4) + +def _adaptive_compact(input_data, model, policy=None): + policy = policy or {} + context_size = int(policy.get("context_size", _context_limit_for_model(model))) + input_budget = int(context_size * 0.60) + estimated = _estimate_tokens(input_data) + if estimated <= input_budget: + return input_data, False + if not isinstance(input_data, list): + return input_data, False + reduction = max(0.15, input_budget / max(estimated, 1)) + target_items = max(int(len(input_data) * reduction), 6) + if target_items >= len(input_data): + return input_data, False + head_end = 0 + for i, item in enumerate(input_data): + t = item.get("type") + if t == "message" and item.get("role") in ("developer", "system"): + head_end = i + 1 + elif t == "message" and item.get("role") == "user" and head_end == i: + head_end = i + 1 + else: + break + head = input_data[:head_end] + keep = max(4, target_items // 3) + tail_start = max(head_end, len(input_data) - keep) + while tail_start > head_end: + t = input_data[tail_start].get("type") + if t in ("function_call_output", "function_call"): + tail_start -= 1 + elif t == "message" and input_data[tail_start].get("role") == "assistant": + tail_start -= 1 + else: + break + tail = input_data[tail_start:] + body = input_data[head_end:tail_start] + if not body: + return head + tail, True + summary_lines = [f"[Auto-compacted: {len(body)} turns removed (budget={input_budget}tok, model={model})]"] + for item in body[-5:]: + summary_lines.append(_item_summary(item, max_len=120)) + summary_msg = {"type": "message", "role": "user", + "content": [{"type": "input_text", "text": "\n".join(summary_lines)}]} + print(f"[adaptive-compact] model={model} est={estimated}tok budget={input_budget}tok " + f"items {len(input_data)}->{len(head)+1+len(tail)}", file=sys.stderr) + return head + [summary_msg] + tail, True + +# ═══════════════════════════════════════════════════════════════════ +# Tool-call pairing validator +# ═══════════════════════════════════════════════════════════════════ + +def validate_tool_pairs(input_items): + if not isinstance(input_items, list): + return [] + calls = {} + errors = [] + for idx, item in enumerate(input_items): + t = item.get("type") + if t == "function_call": + cid = item.get("call_id") or item.get("id") + if cid: + calls[cid] = idx + elif t == "function_call_output": + cid = item.get("call_id") or item.get("id") + if not cid or cid not in calls: + errors.append({"index": idx, "call_id": cid, "error": "orphan_function_call_output"}) + return errors + +def repair_orphan_tool_outputs(input_items, errors): + bad = {e["index"] for e in errors} + repaired = [] + for idx, item in enumerate(input_items): + if idx in bad: + output = item.get("output", "") + repaired.append({"type": "message", "role": "user", + "content": [{"type": "input_text", + "text": f"[Proxy: unmatched tool output]\n{str(output)[:4000]}"}]}) + else: + repaired.append(item) + return repaired + +# ═══════════════════════════════════════════════════════════════════ +# Log redaction +# ═══════════════════════════════════════════════════════════════════ + +_SECRET_PATTERNS = [ + (r"sk-[A-Za-z0-9_\-]{20,}", "[REDACTED:key]"), + (r"sk-ant-[A-Za-z0-9_\-]{20,}", "[REDACTED:anthropic]"), + (r"gh[pousr]_[A-Za-z0-9_]{20,}", "[REDACTED:github]"), + (r"Bearer\s+[A-Za-z0-9._\-]{20,}", "Bearer [REDACTED]"), +] + +def _redact(text): + if not text: + return text + import re + for pattern, replacement in _SECRET_PATTERNS: + text = re.sub(pattern, replacement, text) + return text + +# ═══════════════════════════════════════════════════════════════════ +# Rate-limit token buckets +# ═══════════════════════════════════════════════════════════════════ + +class TokenBucket: + def __init__(self, capacity=10, refill=1.0): + self.capacity = float(capacity) + self.tokens = float(capacity) + self.refill = float(refill) + self.updated = time.monotonic() + self.lock = threading.Lock() + def allow(self, cost=1): + with self.lock: + now = time.monotonic() + self.tokens = min(self.capacity, self.tokens + (now - self.updated) * self.refill) + self.updated = now + if self.tokens >= cost: + self.tokens -= cost + return True + return False + +_rate_buckets = {} +_rate_buckets_lock = threading.Lock() + +def _bucket_for_route(route): + name = route.get("name") or route.get("target_url") or "default" + with _rate_buckets_lock: + if name not in _rate_buckets: + _rate_buckets[name] = TokenBucket(capacity=10, refill=1.0) + return _rate_buckets[name] + # ═══════════════════════════════════════════════════════════════════ # OpenAI-compat backend # ═══════════════════════════════════════════════════════════════════ @@ -1154,14 +1476,31 @@ class Handler(http.server.BaseHTTPRequestHandler): self._handle_anthropic(body, model, stream) elif BACKEND == "command-code": self._handle_command_code(body, model, stream) + elif (BACKEND or "").startswith("gemini-oauth"): + self._handle_gemini_oauth(body, model, stream) else: self._handle_openai_compat(body, model, stream) def _handle_openai_compat(self, body, model, stream): input_data = body.get("input", "") + policy = provider_policy() + + pair_errors = validate_tool_pairs(input_data) + if pair_errors: + print(f"[tool-validator] repairing {len(pair_errors)} orphan tool outputs", file=sys.stderr) + input_data = repair_orphan_tool_outputs(input_data, pair_errors) + body = dict(body) + body["input"] = input_data + + compacted = False + if policy.get("compaction") and isinstance(input_data, list): + input_data, compacted = _adaptive_compact(input_data, model, policy) + if compacted: + body = dict(body) + body["input"] = input_data crof_limit = _crof_item_limit(model) - if isinstance(input_data, list) and len(input_data) > crof_limit: + if not compacted and isinstance(input_data, list) and len(input_data) > crof_limit: print(f"[crof-adaptive] proactive compact: {len(input_data)} items > limit {crof_limit}", file=sys.stderr) input_data = _crof_compact_for_retry(input_data, model) body = dict(body) @@ -1228,8 +1567,379 @@ class Handler(http.server.BaseHTTPRequestHandler): chat_body["reasoning_effort"] = REASONING_EFFORT return chat_body + def _handle_gemini_oauth(self, body, model, stream): + input_data = body.get("input", "") + policy = provider_policy() + if OAUTH_PROVIDER == "google-antigravity": + alias_map = { + "antigravity-gemini-3-flash": "gemini-3-flash", + "antigravity-gemini-3-pro": "gemini-3-pro-low", + "antigravity-gemini-3.1-pro": "gemini-3.1-pro-low", + "gemini-3-flash-preview": "gemini-3-flash", + "gemini-3-pro-preview": "gemini-3-pro-low", + "gemini-3.1-pro-preview": "gemini-3.1-pro-low", + "gemini-3-pro": "gemini-3-pro-low", + "gemini-3.1-pro": "gemini-3.1-pro-low", + "antigravity-claude-sonnet-4-6": "claude-sonnet-4-6", + "antigravity-claude-opus-4-6-thinking": "claude-opus-4-6-thinking", + } + model = alias_map.get(model, model) + + pair_errors = validate_tool_pairs(input_data) + if pair_errors: + input_data = repair_orphan_tool_outputs(input_data, pair_errors) + body = dict(body) + body["input"] = input_data + + compacted = False + if policy.get("compaction") and isinstance(input_data, list): + input_data, compacted = _adaptive_compact(input_data, model, policy) + if compacted: + body = dict(body) + body["input"] = input_data + + access_token = _refresh_oauth_token() + token_name = "google-antigravity-oauth-token.json" if OAUTH_PROVIDER == "google-antigravity" else "google-cli-oauth-token.json" + token_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", token_name) + project_id = "" + try: + with open(token_path) as f: + project_id = json.load(f).get("project_id", "") + except Exception: + pass + + contents = [] + system_parts = [] + instructions = body.get("instructions", "").strip() + tool_call_names = {} + + if isinstance(input_data, list): + for item in input_data: + t = item.get("type") + if t == "message": + role = "user" if item.get("role") == "user" else "model" + content = item.get("content", "") + if isinstance(content, list): + parts = [] + for c in content: + ct = c.get("type") + if ct == "input_text": + parts.append({"text": c.get("text", "")}) + elif ct == "text": + parts.append({"text": c.get("text", "")}) + elif ct == "input_image" or ct == "image_url": + iu = c.get("image_url") or c.get("url", {}) + url = iu.get("url", iu) if isinstance(iu, dict) else iu + if isinstance(url, str) and url.startswith("data:"): + mime, _, b64 = url.partition(";base64,") + mime = mime.replace("data:", "") or "image/png" + parts.append({"inlineData": {"mimeType": mime, "data": b64}}) + else: + parts.append({"text": str(url)}) + if parts: + contents.append({"role": role, "parts": parts}) + elif isinstance(content, str): + contents.append({"role": role, "parts": [{"text": content}]}) + elif t == "function_call": + call_id = item.get("call_id") or item.get("id") or f"call_{uuid.uuid4().hex[:24]}" + fname = item.get("name", "") + if call_id and fname: + tool_call_names[call_id] = fname + args = item.get("arguments", "{}") + if isinstance(args, str): + try: + args = json.loads(args) + except Exception: + args = {} + contents.append({"role": "model", "parts": [{"functionCall": {"name": fname, "args": args, "id": call_id}, "thoughtSignature": "skip_thought_signature_validator"}]}) + elif t == "function_call_output": + call_id = item.get("call_id", item.get("id", "")) + output = item.get("output", "") + fname = item.get("name", "") or tool_call_names.get(call_id, "") + try: + output_parsed = json.loads(output) if isinstance(output, str) else output + except Exception: + output_parsed = output + resp_part = {"functionResponse": {"name": fname or "unknown", "response": {"result": output_parsed if isinstance(output_parsed, (dict, list)) else output}}} + if call_id: + resp_part["functionResponse"]["id"] = call_id + contents.append({"role": "user", "parts": [resp_part]}) + + if OAUTH_PROVIDER.startswith("google"): + sanitized = [] + last_user_text = None + last_role = None + for content in contents: + role = content.get("role") + parts = [p for p in content.get("parts", []) if isinstance(p, dict)] + if not parts: + continue + text_key = "\n".join([p.get("text", "") for p in parts if "text" in p]).strip() + if role == "user" and text_key and text_key == last_user_text: + continue + if role == last_role and role in ("user", "model") and sanitized: + sanitized[-1].setdefault("parts", []).extend(parts) + else: + sanitized.append({"role": role, "parts": parts}) + if role == "user" and text_key: + last_user_text = text_key + last_role = role + while sanitized and sanitized[0].get("role") != "user": + sanitized.pop(0) + while sanitized and sanitized[-1].get("role") != "user": + sanitized.pop() + contents = sanitized + + if instructions: + system_parts.append({"text": instructions}) + if OAUTH_PROVIDER == "google-antigravity": + system_parts.append({"text": ( + "You are connected through a Responses API translation proxy. " + "If tools are available and the user's request requires changing files, call the appropriate tool immediately. " + "Do not announce plans, do not say you will list files, browse, fetch, inspect, or start by exploring unless you are emitting the actual tool call in the same response. " + "For file creation requests, use tools to create or modify the file instead of only printing code in chat. " + "If no suitable tool is available, answer directly with the complete result. " + "Never answer only with a plan such as 'I will start by...' or 'I am going to...'." + )}) + + gen_config = {} + mot = body.get("max_output_tokens", 0) + if mot: + gen_config["maxOutputTokens"] = mot + if body.get("temperature") is not None: + gen_config["temperature"] = body["temperature"] + if body.get("top_p") is not None: + gen_config["topP"] = body["top_p"] + + if REASONING_ENABLED and REASONING_EFFORT != "none": + budget = {"low": 2048, "medium": 8192, "high": 24576}.get(REASONING_EFFORT, 8192) + gen_config["thinkingConfig"] = {"includeThoughts": True, "thinkingBudget": budget} + + oa_tools = body.get("tools", []) + gemini_tools = [] + if oa_tools: + func_decls = [] + for tool in oa_tools: + ttype = tool.get("type", "function") + fname = tool.get("name", "") + if ttype == "function": + fn = tool.get("function", tool) + name = fn.get("name", fname) + desc = fn.get("description", "") + params = fn.get("parameters", fn.get("input_schema", {})) + func_decls.append({"name": name, "description": desc, "parameters": params}) + elif fname: + func_decls.append({"name": fname, "description": tool.get("description", ""), "parameters": tool.get("parameters", {"type": "object", "properties": {}})}) + if func_decls: + gemini_tools = [{"functionDeclarations": func_decls}] + + request_body = {"contents": contents} + if system_parts: + request_body["systemInstruction"] = {"parts": system_parts} + if gen_config: + request_body["generationConfig"] = gen_config + if gemini_tools: + request_body["tools"] = gemini_tools + + wrapped = { + "project": project_id, + "model": model, + "request": request_body, + } + if OAUTH_PROVIDER == "google-antigravity": + wrapped["requestType"] = "agent" + wrapped["userAgent"] = "antigravity" + wrapped["requestId"] = f"agent-{uuid.uuid4().hex[:12]}" + + endpoints = ([ + "https://daily-cloudcode-pa.sandbox.googleapis.com", + "https://autopush-cloudcode-pa.sandbox.googleapis.com", + "https://cloudcode-pa.googleapis.com", + ] if OAUTH_PROVIDER == "google-antigravity" else [ + "https://cloudcode-pa.googleapis.com", + ]) + action = "streamGenerateContent" if stream else "generateContent" + url_suffix = f"v1internal:{action}?alt=sse" if stream else f"v1internal:{action}" + + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {access_token}", + } + if OAUTH_PROVIDER == "google-antigravity": + version = _ensure_antigravity_version() + headers["User-Agent"] = f"antigravity/{version} darwin/arm64" + else: + headers["User-Agent"] = "google-api-nodejs-client/9.15.1" + headers["X-Goog-Api-Client"] = "gl-node/22.17.0" + headers["Client-Metadata"] = "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI" + body_b = json.dumps(wrapped).encode() + print(f"[gemini-oauth] model={model} stream={stream} items={len(input_data) if isinstance(input_data, list) else 1} project={project_id}", file=sys.stderr) + + for ep in endpoints: + target = f"{ep}/{url_suffix}" + req = urllib.request.Request(target, data=body_b, headers=headers) + try: + upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream)) + break + except urllib.error.HTTPError as e: + err_body = e.read().decode() + if e.code == 400 and OAUTH_PROVIDER.startswith("google"): + try: + debug_path = os.path.join(_LOG_DIR, "gemini-last-400-request.json") + with open(debug_path, "w") as dbg: + json.dump({"endpoint": ep, "model": model, "wrapped": wrapped, "error": err_body}, dbg, indent=2) + print(f"[gemini-oauth] saved 400 debug request to {debug_path}", file=sys.stderr) + except Exception: + pass + if e.code == 429 and ep != endpoints[-1]: + print(f"[gemini-oauth] {ep} HTTP 429, trying next endpoint", file=sys.stderr) + continue + return self.send_json(e.code, {"error": {"type": "upstream_error", "message": err_body}}) + except Exception as e: + if ep == endpoints[-1]: + return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}}) + print(f"[gemini-oauth] {ep} failed: {e}, trying next", file=sys.stderr) + continue + + if stream: + self._forward_gemini_sse(upstream, model, body, input_data) + else: + self._forward_gemini_json(upstream, model, body, input_data) + + def _forward_gemini_sse(self, upstream, model, body, input_data): + resp_id = f"resp-{uuid.uuid4().hex[:24]}" + created = int(time.time()) + self.send_response(200) + self.send_header("Content-Type", "text/event-stream") + self.send_header("Cache-Control", "no-cache") + self.send_header("Connection", "keep-alive") + self.end_headers() + + full_text = "" + output_items = [] + current_tool_calls = {} + message_started = False + message_id = f"msg-{uuid.uuid4().hex[:24]}" + + def flush_event(event_type, data): + self.wfile.write(f"event: {event_type}\ndata: {json.dumps(data)}\n\n".encode()) + self.wfile.flush() + + flush_event("response.created", {"type": "response.created", "response": {"id": resp_id, "object": "response", "model": model, "status": "in_progress", "created": created, "output": []}}) + flush_event("response.in_progress", {"type": "response.in_progress", "response": {"id": resp_id}}) + + buf = "" + stream_finished = False + for raw_line in upstream: + if stream_finished: + break + line = raw_line.decode(errors="replace") + if line.startswith("data: "): + buf += line[6:] + continue + if not line.strip() and buf: + try: + chunk = json.loads(buf) + except Exception: + buf = "" + continue + buf = "" + + candidates = chunk.get("response", chunk).get("candidates", []) + if not candidates: + if chunk.get("error"): + print(f"[gemini-oauth] stream error chunk: {str(chunk.get('error'))[:300]}", file=sys.stderr) + continue + if candidates[0].get("finishReason") and not candidates[0].get("content", {}).get("parts"): + print(f"[gemini-oauth] finish without parts: {candidates[0].get('finishReason')}", file=sys.stderr) + parts = candidates[0].get("content", {}).get("parts", []) + for part in parts: + if part.get("thought"): + continue + if "text" in part and not part.get("functionCall"): + text_delta = part["text"] + if not text_delta: + continue + full_text += text_delta + if not message_started: + flush_event("response.output_item.added", {"type": "response.output_item.added", "output_index": 0, "item": {"type": "message", "id": message_id, "role": "assistant", "content": []}}) + flush_event("response.content_part.added", {"type": "response.content_part.added", "output_index": 0, "content_index": 0, "part": {"type": "output_text", "text": ""}}) + output_items.append({"text": True}) + message_started = True + flush_event("response.output_text.delta", {"type": "response.output_text.delta", "output_index": 0, "content_index": 0, "delta": text_delta}) + elif part.get("functionCall"): + fc = part["functionCall"] + call_id = f"call_{uuid.uuid4().hex[:24]}" + args_str = json.dumps(fc.get("args", fc.get("arguments", {}))) + output_index = len(output_items) + flush_event("response.output_item.added", {"type": "response.output_item.added", "output_index": output_index, "item": {"type": "function_call", "id": call_id, "call_id": call_id, "name": fc.get("name", ""), "arguments": ""}}) + flush_event("response.function_call_arguments.delta", {"type": "response.function_call_arguments.delta", "output_index": output_index, "item_id": call_id, "delta": args_str}) + flush_event("response.function_call_arguments.done", {"type": "response.function_call_arguments.done", "output_index": output_index, "item_id": call_id, "arguments": args_str}) + current_tool_calls[call_id] = fc + output_items.append({"tool": True}) + if OAUTH_PROVIDER == "google-antigravity" and full_text and candidates[0].get("finishReason"): + stream_finished = True + break + + out = [] + if not full_text and not current_tool_calls: + print("[gemini-oauth] WARNING: completed with empty output", file=sys.stderr) + if full_text: + out.append({"type": "message", "id": message_id, "role": "assistant", "content": [{"type": "output_text", "text": full_text}]}) + tool_outputs = [] + for cid, fc in current_tool_calls.items(): + tool_outputs.append({"type": "function_call", "id": cid, "call_id": cid, "name": fc.get("name", ""), "arguments": json.dumps(fc.get("args", fc.get("arguments", {})))}) + out.extend(tool_outputs) + + final_resp = {"id": resp_id, "object": "response", "model": model, "status": "completed", "created": created, "output": out} + if full_text: + flush_event("response.output_text.done", {"type": "response.output_text.done", "output_index": 0, "content_index": 0, "text": full_text}) + flush_event("response.content_part.done", {"type": "response.content_part.done", "output_index": 0, "content_index": 0, "part": {"type": "output_text", "text": full_text}}) + flush_event("response.output_item.done", {"type": "response.output_item.done", "output_index": 0, "item": out[0]}) + for idx, item in enumerate(tool_outputs, start=(1 if full_text else 0)): + flush_event("response.output_item.done", {"type": "response.output_item.done", "output_index": idx, "item": item}) + flush_event("response.completed", {"type": "response.completed", "response": final_resp}) + self.close_connection = True + + with _response_store_lock: + _response_store[resp_id] = final_resp + while len(_response_store) > _MAX_STORED: + _response_store.popitem(last=False) + + def _forward_gemini_json(self, upstream, model, body, input_data): + data = json.loads(upstream.read().decode()) + resp_id = f"resp-{uuid.uuid4().hex[:24]}" + created = int(time.time()) + out = [] + full_text = "" + candidates = data.get("response", data).get("candidates", []) + if candidates: + parts = candidates[0].get("content", {}).get("parts", []) + text_parts = [] + for part in parts: + if part.get("thought"): + continue + if "text" in part and not part.get("functionCall"): + text_parts.append(part["text"]) + elif part.get("functionCall"): + fc = part["functionCall"] + call_id = f"call_{uuid.uuid4().hex[:24]}" + out.append({"type": "function_call", "id": call_id, "call_id": call_id, "name": fc.get("name", ""), "arguments": json.dumps(fc.get("args", fc.get("arguments", {})))}) + if text_parts: + full_text = "".join(text_parts) + out.insert(0, {"type": "message", "id": f"msg-{uuid.uuid4().hex[:24]}", "role": "assistant", "content": [{"type": "output_text", "text": full_text}]}) + resp = {"id": resp_id, "object": "response", "model": model, "status": "completed", "created": created, "output": out} + with _response_store_lock: + _response_store[resp_id] = resp + while len(_response_store) > _MAX_STORED: + _response_store.popitem(last=False) + self.send_json(200, resp) + def _handle_bgp(self, body, model, stream, messages, input_data): - routes = sorted(BGP_ROUTES, key=lambda r: r.get("priority", 99)) + routes = _sorted_bgp_routes() + routes = [r for r in routes if _bucket_for_route(r).allow()] + if not routes: + return self.send_json(503, {"error": {"type": "bgp_rate_limited", "message": "All routes rate-limited"}}) errors = [] for route in routes: r_model = route.get("model", model) @@ -1266,11 +1976,13 @@ class Handler(http.server.BaseHTTPRequestHandler): }, browser_ua=True) print(f"[bgp] trying route '{route.get('name', r_url)}' model={r_model}", file=sys.stderr) req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=fwd) + t0_route = time.time() route_ok = False for attempt in range(3): try: upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream)) print(f"[bgp] route '{route.get('name', r_url)}' connected OK", file=sys.stderr) + _update_route_stats(route, True, time.time() - t0_route) self._forward_oa_compat(upstream, stream, r_model, chat_body, body, input_data, fwd, target) return except urllib.error.HTTPError as e: @@ -1282,6 +1994,7 @@ class Handler(http.server.BaseHTTPRequestHandler): req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=fwd) continue print(f"[bgp] route '{route.get('name', r_url)}' FAILED: HTTP {e.code}: {err[:200]}", file=sys.stderr) + _update_route_stats(route, False, time.time() - t0_route, http_code=e.code) errors.append(f"{route.get('name','?')}: HTTP {e.code}") break except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError) as e: @@ -1291,10 +2004,12 @@ class Handler(http.server.BaseHTTPRequestHandler): time.sleep(wait) req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=fwd) continue + _update_route_stats(route, False, time.time() - t0_route, error_type=str(e)) errors.append(f"{route.get('name','?')}: {e}") break except Exception as e: print(f"[bgp] route '{route.get('name', r_url)}' FAILED: {e}", file=sys.stderr) + _update_route_stats(route, False, time.time() - t0_route, error_type=str(e)) errors.append(f"{route.get('name','?')}: {e}") break