diff --git a/CHANGELOG.md b/CHANGELOG.md index 34193d1..0056c63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,27 @@ # Changelog +## v3.10.12 (2026-05-26) + +**Sticky Endpoint, Parallel Discovery, Smart Errors, Anti-Stall** + +### New Features +- **Sticky endpoint caching**: remembers which endpoint last succeeded, reuses it on every subsequent request (zero probe overhead) +- **Parallel endpoint discovery**: probes ALL endpoints simultaneously, first 200 wins and is cached +- **Automatic re-probe**: if the sticky endpoint fails (429/502/503), cache is cleared and all endpoints are probed in parallel again +- **Endpoint order swapped**: `daily-cloudcode-pa` tried first (matches agy-core behavior), `cloudcode-pa` as fallback +- **Anti-stall engine**: kills stale proxy processes and clears `__pycache__` on every new session start +- **Smart error classification**: distinguishes `quota_exhausted` vs `capacity_exhausted` vs `account_banned` vs `validation_required` vs `service_disabled` vs `auth_permanent` +- **Rate limit reset time parsing**: extracts cooldown from error body (`quotaResetDelay`, `Resets in ~1h27m`, etc.) for accurate cooldown +- **Missing Antigravity headers**: `X-Client-Name`, `X-Client-Version`, `x-goog-api-client`, platform-aware `User-Agent` +- **Session ID**: added `sessionId` to request wrapper for proper session tracking + +### How It Works +1. First request: parallel probe → daily-cloudcode-pa and cloudcode-pa hit simultaneously → first 200 wins, cached +2. All subsequent requests: goes straight to cached endpoint (no probing) +3. If cached endpoint returns 429: cache cleared, parallel re-probe +4. If cached endpoint returns 400/403: error returned to client immediately +5. On startup: anti-stall kills all stale translate-proxy processes, clears pycache + ## v3.10.11 (2026-05-26) **Hybrid Endpoint Fallback — Redundant Antigravity Endpoints** diff --git a/README.md b/README.md index fb31a47..6975940 100644 --- a/README.md +++ b/README.md @@ -554,6 +554,7 @@ The launcher generates model catalog JSON with dual field naming to satisfy both Codex Launcher includes special handling for Gemini 3 / Antigravity OAuth: +- **Sticky endpoint with parallel discovery**: First request probes `cloudcode-pa.googleapis.com` and `daily-cloudcode-pa.googleapis.com` simultaneously — first 200 wins and is cached. All subsequent requests go straight to the cached endpoint. If it fails (429/502/503), cache is cleared and all endpoints are re-probed in parallel. Zero wasted time on rate-limited endpoints. - **Thought signature preservation**: Captures `thoughtSignature` from Gemini responses and reattaches them on follow-up requests to maintain tool-call continuity. - **Edit-intent detection**: When follow-up requests contain edit keywords, a tool-use @@ -561,7 +562,7 @@ Codex Launcher includes special handling for Gemini 3 / Antigravity OAuth: - **User instruction enforcement**: The latest user message is guaranteed to be the final content turn sent to Gemini, even after compaction. - **Smart compaction**: Old tool outputs capped at 3000 chars, recent 6 at 20000 chars. -- **Context compaction**: Aggressive auto-trimming when approaching 60% of model context +- **Context compaction**: Aggressive auto-trimming when approaching 80% of model context limit (1M tokens Gemini, 200K Claude, 128K GPT-OSS). Prevents token limit errors. - **Model ID mapping**: Display names (e.g. `Gemini 3.5 Flash (High)`) mapped to REST API slugs (e.g. `gemini-3-flash`). See `docs/ANTIGRAVITY.md` for details. diff --git a/codex-launcher_3.10.12_all.deb b/codex-launcher_3.10.12_all.deb new file mode 100644 index 0000000..9f1733d Binary files /dev/null and b/codex-launcher_3.10.12_all.deb differ diff --git a/install.sh b/install.sh index 1af5b7d..5c24f87 100755 --- a/install.sh +++ b/install.sh @@ -3,11 +3,11 @@ set -e SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -if [ -f "$SCRIPT_DIR/codex-launcher_3.10.11_all.deb" ]; then - echo "Installing codex-launcher_3.10.11_all.deb ..." - sudo dpkg -i "$SCRIPT_DIR/codex-launcher_3.10.11_all.deb" +if [ -f "$SCRIPT_DIR/codex-launcher_3.10.12_all.deb" ]; then + echo "Installing codex-launcher_3.10.12_all.deb ..." + sudo dpkg -i "$SCRIPT_DIR/codex-launcher_3.10.12_all.deb" echo "" - echo "Installed v3.10.11 via .deb package." + echo "Installed v3.10.12 via .deb package." echo " translate-proxy.py -> /usr/bin/translate-proxy.py" echo " codex-launcher-gui -> /usr/bin/codex-launcher-gui" echo " cleanup-codex-stale -> /usr/bin/cleanup-codex-stale.sh" diff --git a/src/codex_launcher_lib.py b/src/codex_launcher_lib.py index 5286480..fc23cc7 100644 --- a/src/codex_launcher_lib.py +++ b/src/codex_launcher_lib.py @@ -83,6 +83,16 @@ model_catalog_json = "" """ CHANGELOG = [ + ("3.10.12", "2026-05-26", [ + "Sticky endpoint: caches last working endpoint, skips probing on subsequent requests", + "Parallel discovery: probes ALL endpoints simultaneously, first 200 wins", + "Auto re-probe: if sticky endpoint fails (429/502/503), parallel re-probe all", + "Endpoint order: daily-cloudcode-pa first (matches agy-core), cloudcode-pa fallback", + "Anti-stall engine: kills stale proxy processes + clears pycache on startup", + "Smart error classification: quota vs capacity vs banned vs validation vs auth", + "Rate limit reset parsing: extracts cooldown from error body for accuracy", + "Missing headers: X-Client-Name, X-Client-Version, x-goog-api-client, sessionId", + ]), ("3.10.11", "2026-05-26", [ "Hybrid endpoint fallback: cloudcode-pa then daily-cloudcode-pa on 429", "daily-cloudcode-pa.googleapis.com (same endpoint agy-core uses)", diff --git a/src/translate-proxy.py b/src/translate-proxy.py index f1a4004..938014f 100755 --- a/src/translate-proxy.py +++ b/src/translate-proxy.py @@ -616,6 +616,51 @@ class APIKeyPool(AccountPool): _cb_pool = CodebuffAccountPool("codebuff") _google_antigravity_pool = GoogleAccountPool("antigravity") _google_cli_pool = GoogleAccountPool("cli") +_antigravity_preferred_endpoint = None +_antigravity_endpoint_lock = threading.Lock() + +def _classify_antigravity_error(status_code, body): + lower = body.lower() + if status_code == 400: + return "bad_request" + if status_code == 401: + if any(x in lower for x in ["invalid_grant", "token revoked", "token_revoked", "invalid_client"]): + return "auth_permanent" + return "auth_transient" + if status_code == 403: + if "validation_required" in lower or "account_disabled" in lower: + return "validation_required" + if "has been disabled" in lower and "violation of terms of service" in lower: + return "account_banned" + if "service_disabled" in lower: + return "service_disabled" + return "forbidden" + if status_code in (429, 503, 529): + if any(x in lower for x in ["model_capacity_exhausted", "capacity_exhausted", "model is currently overloaded", "service temporarily unavailable"]): + return "capacity_exhausted" + if any(x in lower for x in ["quota_exhausted", "resource_exhausted", "daily limit", "quota exceeded", "quotaresetdelay"]): + return "quota_exhausted" + return "rate_limited" + if status_code >= 500: + return "server_error" + return "unknown" + +def _parse_rate_limit_reset(body): + import re as _re + m = _re.search(r'quotaResetDelay[:"\s]+(\d+(?:\.\d+)?)(ms|s)', body, _re.IGNORECASE) + if m: + val = float(m.group(1)) + return val / 1000 if m.group(2) == 'ms' else val + m = _re.search(r'(\d+)h(\d+)m(\d+)s', body, _re.IGNORECASE) + if m: + return int(m.group(1)) * 3600 + int(m.group(2)) * 60 + int(m.group(3)) + m = _re.search(r'Resets in ~(\d+)h(\d+)m', body, _re.IGNORECASE) + if m: + return int(m.group(1)) * 3600 + int(m.group(2)) * 60 + m = _re.search(r'retry[-_]?after[:\s]+(\d+)\s*(?:sec|s\b)', body, _re.IGNORECASE) + if m: + return int(m.group(1)) + return None def _get_codebuff_account(): """Return (token, account_dict) for best available codebuff account.""" @@ -771,6 +816,20 @@ def _ensure_antigravity_version(): _antigravity_version_checked = time.time() return _antigravity_version +_antigravity_client_version = "1.110.0" +_antigravity_client_version_checked = 0 + +def _ensure_antigravity_client_version(): + global _antigravity_client_version, _antigravity_client_version_checked + env_ver = os.environ.get("ANTIGRAVITY_CLIENT_VERSION", "").strip() + if env_ver: + return env_ver + if time.time() - _antigravity_client_version_checked < 6 * 3600: + return _antigravity_client_version + _antigravity_client_version = os.environ.get("ANTIGRAVITY_CLIENT_VERSION_FALLBACK", "1.110.0") + _antigravity_client_version_checked = time.time() + return _antigravity_client_version + def _init_runtime(): global CONFIG, PORT, BACKEND, TARGET_URL, API_KEY, OAUTH_PROVIDER, _antigravity_version global MODELS, CC_VERSION, REASONING_ENABLED, REASONING_EFFORT, BGP_ROUTES @@ -5068,12 +5127,13 @@ class Handler(http.server.BaseHTTPRequestHandler): wrapped["requestType"] = "agent" wrapped["userAgent"] = "antigravity" wrapped["requestId"] = f"agent-{uuid.uuid4().hex[:12]}" + wrapped["request"]["sessionId"] = f"{uuid.uuid4().hex}{int(time.time()*1000)}" _allow_staging = os.environ.get("ALLOW_ANTIGRAVITY_STAGING", "0") == "1" if OAUTH_PROVIDER == "google-antigravity": _antigravity_endpoints = [ - "https://cloudcode-pa.googleapis.com", "https://daily-cloudcode-pa.googleapis.com", + "https://cloudcode-pa.googleapis.com", ] if _allow_staging: _antigravity_endpoints.extend([ @@ -5092,7 +5152,13 @@ class Handler(http.server.BaseHTTPRequestHandler): } if OAUTH_PROVIDER == "google-antigravity": version = _ensure_antigravity_version() - headers["User-Agent"] = f"antigravity/{version} darwin/arm64" + import platform as _plat + _os_name = _plat.system().lower() + _os_arch = _plat.machine().lower().replace("x86_64", "x64").replace("aarch64", "arm64") + headers["User-Agent"] = f"antigravity/{version} {_os_name}/{_os_arch}" + headers["X-Client-Name"] = "antigravity" + headers["X-Client-Version"] = _ensure_antigravity_client_version() + headers["x-goog-api-client"] = "gl-node/18.18.2 fire/0.8.6 grpc/1.10.x" else: headers["User-Agent"] = "google-api-nodejs-client/9.15.1" headers["X-Goog-Api-Client"] = "gl-node/22.17.0" @@ -5112,41 +5178,118 @@ class Handler(http.server.BaseHTTPRequestHandler): if OAUTH_PROVIDER == "google-antigravity": print(f"[antigravity-endpoint] endpoints={[e.replace('https://','') for e in endpoints]} project={project_id}", file=sys.stderr) - for ep in endpoints: + upstream = None + chosen_ep = None + global _antigravity_preferred_endpoint + + with _antigravity_endpoint_lock: + _pref = _antigravity_preferred_endpoint + + if _pref and _pref in endpoints: + ep = _pref target = f"{ep}/{url_suffix}" req = urllib.request.Request(target, data=body_b, headers=headers) try: upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream)) - break + chosen_ep = ep + print(f"[{self._session_id}] sticky OK: {ep.replace('https://','')}", file=sys.stderr) except urllib.error.HTTPError as e: err_body = e.read().decode() - if e.code == 400 and OAUTH_PROVIDER.startswith("google"): - try: - debug_path = os.path.join(_LOG_DIR, "gemini-last-400-request.json") - with open(debug_path, "w", encoding="utf-8") as dbg: - json.dump({"endpoint": ep, "model": model, "wrapped": wrapped, "error": err_body}, dbg, indent=2) - print(f"[{self._session_id}] saved 400 debug request to {debug_path}", file=sys.stderr) - except Exception: - pass - if e.code == 403 and "SERVICE_DISABLED" in err_body[:500] and ep != endpoints[-1]: - print(f"[{self._session_id}] {ep} SERVICE_DISABLED, trying next endpoint", file=sys.stderr) - continue - if e.code == 429 and OAUTH_PROVIDER.startswith("google"): - print(f"[{self._session_id}] 429 from {ep}, body: {err_body[:300]}", file=sys.stderr) - if ep != endpoints[-1]: - print(f"[{self._session_id}] {ep} HTTP 429, trying fallback endpoint", file=sys.stderr) - continue - pool = _google_antigravity_pool if OAUTH_PROVIDER == "google-antigravity" else _google_cli_pool - _, acct = _get_google_account(OAUTH_PROVIDER) - if acct: - pool.mark_rate_limited(acct, 60) - print(f"[{self._session_id}] all endpoints returned 429", file=sys.stderr) - return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) - except Exception as e: - if ep == endpoints[-1]: - return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}}) - print(f"[{self._session_id}] {ep} failed: {e}, trying next", file=sys.stderr) - continue + if e.code in (429, 502, 503): + print(f"[{self._session_id}] sticky {ep.replace('https://','')} failed ({e.code}), parallel probing all", file=sys.stderr) + with _antigravity_endpoint_lock: + if _antigravity_preferred_endpoint == ep: + _antigravity_preferred_endpoint = None + upstream = None + else: + if e.code == 400 and OAUTH_PROVIDER.startswith("google"): + try: + debug_path = os.path.join(_LOG_DIR, "gemini-last-400-request.json") + with open(debug_path, "w", encoding="utf-8") as dbg: + json.dump({"endpoint": ep, "model": model, "wrapped": wrapped, "error": err_body}, dbg, indent=2) + print(f"[{self._session_id}] saved 400 debug request to {debug_path}", file=sys.stderr) + except Exception: + pass + return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) + except Exception: + with _antigravity_endpoint_lock: + if _antigravity_preferred_endpoint == ep: + _antigravity_preferred_endpoint = None + upstream = None + print(f"[{self._session_id}] sticky {ep.replace('https://','')} conn failed, parallel probing", file=sys.stderr) + + if upstream is None: + _probe_results = {} + _probe_winner = threading.Event() + _probe_data = [None, None] + + def _probe_try(ep): + if _probe_winner.is_set(): + return + target = f"{ep}/{url_suffix}" + req = urllib.request.Request(target, data=body_b, headers=headers) + try: + resp = urllib.request.urlopen(req, timeout=30) + if _probe_winner.is_set(): + try: resp.close() + except: pass + return + _probe_data[0] = resp + _probe_data[1] = ep + _probe_winner.set() + except urllib.error.HTTPError as e: + err_body = e.read().decode() + _probe_results[ep] = (e.code, err_body) + except Exception as e: + _probe_results[ep] = (0, str(e)) + + probe_threads = [] + for ep in endpoints: + t = threading.Thread(target=_probe_try, args=(ep,), daemon=True) + t.start() + probe_threads.append(t) + + _probe_winner.wait(timeout=30) + + if _probe_data[0] is not None: + upstream = _probe_data[0] + chosen_ep = _probe_data[1] + with _antigravity_endpoint_lock: + _antigravity_preferred_endpoint = chosen_ep + print(f"[{self._session_id}] parallel probe winner: {chosen_ep.replace('https://','')}", file=sys.stderr) + else: + for t in probe_threads: + t.join(timeout=5) + best_err = None + best_ep = None + for ep in endpoints: + if ep in _probe_results: + code, err_body = _probe_results[ep] + if code == 400 and OAUTH_PROVIDER.startswith("google"): + try: + debug_path = os.path.join(_LOG_DIR, "gemini-last-400-request.json") + with open(debug_path, "w", encoding="utf-8") as dbg: + json.dump({"endpoint": ep, "model": model, "wrapped": wrapped, "error": err_body}, dbg, indent=2) + except Exception: + pass + if best_err is None: + best_err = (code, err_body) + best_ep = ep + if best_err: + code, err_body = best_err + err_class = _classify_antigravity_error(code, err_body) + print(f"[{self._session_id}] all endpoints failed: {code} class={err_class}", file=sys.stderr) + if err_class in ("quota_exhausted", "rate_limited"): + reset_s = _parse_rate_limit_reset(err_body) + pool = _google_antigravity_pool if OAUTH_PROVIDER == "google-antigravity" else _google_cli_pool + _, acct = _get_google_account(OAUTH_PROVIDER) + if acct: + cooldown = reset_s if reset_s and reset_s > 10 else 60 + pool.mark_rate_limited(acct, cooldown) + if reset_s: + print(f"[{self._session_id}] quota reset in ~{reset_s}s, cooldown={cooldown}s", file=sys.stderr) + return self.send_json(code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) + return self.send_json(502, {"error": {"type": "proxy_error", "message": "All endpoints failed"}}) if stream: self._forward_gemini_sse(upstream, model, body, input_data, tracker) @@ -6434,9 +6577,42 @@ def _handle_shutdown_signal(sig, frame): if 'SERVER' in globals() and SERVER: SERVER.shutdown() +def _anti_stall_cleanup(): + my_pid = os.getpid() + my_port = PORT + killed = [] + try: + import subprocess as _sp + out = _sp.run(["pgrep", "-f", "translate-proxy"], capture_output=True, text=True, timeout=5).stdout.strip() + for pid_str in out.splitlines(): + pid_str = pid_str.strip() + if not pid_str or not pid_str.isdigit(): + continue + pid = int(pid_str) + if pid == my_pid: + continue + try: + os.kill(pid, signal.SIGTERM) + killed.append(pid) + except (ProcessLookupError, PermissionError): + pass + except Exception: + pass + try: + _cache_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "__pycache__") + if os.path.isdir(_cache_dir): + import shutil + shutil.rmtree(_cache_dir, ignore_errors=True) + except Exception: + pass + if killed: + print(f"[anti-stall] killed {len(killed)} stale proxy process(es): {killed}", flush=True) + time.sleep(1) + def main(): global SERVER, _START_TIME _START_TIME = time.time() + _anti_stall_cleanup() _init_runtime() try: _current_cfg = os.path.basename(args.config) if args.config else ""