diff --git a/CHANGELOG.md b/CHANGELOG.md index 06357ca..fe1eb0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ # Changelog +## v3.11.0 (2026-05-26) + +**Cobra PR Merge + Smart Continuation + API Key Hot-Reload** + +### New Features +- **Concurrency semaphore (max 3)**: limits parallel upstream requests to prevent rate-limiting +- **Auto-continue for truncated text**: detects text ending in `:`, `(`, `;`, `…` or `finish_reason=length`, continues seamlessly +- **SO_REUSEADDR on sticky port**: prevents `TIME_WAIT` from changing port on restart +- **proxy-stderr.log**: persistent log file for proxy errors +- **Stream diagnostics**: logs event count, finish reason, content flag, elapsed time after each stream +- **Timeout/OSError handler**: sends proper `response.failed` SSE event instead of silently dropping connection +- **Restart Proxy button**: now only restarts proxy without killing Codex Desktop +- **Tool call argument normalizer**: fixes capital-A `Arguments` key, strips markdown/JSON code block wrapping from tool call arguments +- **Smart-continue loop (2× retries)**: escalating nudge messages when model returns text-only stop mid-task +- **XML tool call extraction**: parses `name{args}` from model text output, injects as real `function_call` items +- **Auto-continue + smart-continue ordered with skip guard**: prevents both from double-firing on the same response +- **API key hot-reload**: mtime tracking detects config changes, `/admin/reload` endpoint triggers hot-reload, `/admin/verify-key` tests key against upstream +- **GUI hot-reload**: auto-refreshes proxy key on endpoint edit, verifies with upstream — no proxy restart needed +- **Synthetic tool-results disabled**: was causing deepseek-v4-pro truncation on opencode.ai + ## v3.10.12 (2026-05-26) **Sticky Endpoint, Claude Fixes, Guardrail Skip, Anti-Stall** diff --git a/codex-launcher_3.11.0_all.deb b/codex-launcher_3.11.0_all.deb new file mode 100644 index 0000000..44561f0 Binary files /dev/null and b/codex-launcher_3.11.0_all.deb differ diff --git a/install.sh b/install.sh index 5c24f87..88f869b 100755 --- a/install.sh +++ b/install.sh @@ -3,11 +3,13 @@ set -e SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -if [ -f "$SCRIPT_DIR/codex-launcher_3.10.12_all.deb" ]; then - echo "Installing codex-launcher_3.10.12_all.deb ..." - sudo dpkg -i "$SCRIPT_DIR/codex-launcher_3.10.12_all.deb" - echo "" - echo "Installed v3.10.12 via .deb package." +if [ -f "$SCRIPT_DIR/codex-launcher_3.11.0_all.deb" ]; then + echo "Installing codex-launcher_3.11.0_all.deb ..." + sudo dpkg -i "$SCRIPT_DIR/codex-launcher_3.11.0_all.deb" +else + echo "WARNING: codex-launcher_3.11.0_all.deb not found; copying files manually." +fi +echo "Installed v3.11.0 via .deb package." echo " translate-proxy.py -> /usr/bin/translate-proxy.py" echo " codex-launcher-gui -> /usr/bin/codex-launcher-gui" echo " cleanup-codex-stale -> /usr/bin/cleanup-codex-stale.sh" diff --git a/src/codex-launcher-gui b/src/codex-launcher-gui index 3dfd4ac..7fa065b 100755 --- a/src/codex-launcher-gui +++ b/src/codex-launcher-gui @@ -26,6 +26,19 @@ model_catalog_json = "" """ CHANGELOG = [ + ("3.11.0", "2026-05-26", [ + "Merge cobra PR: concurrency semaphore (max 3), auto-continue for truncated text", + "SO_REUSEADDR on sticky port, proxy-stderr.log, stream diagnostics logging", + "Timeout/OSError handler sends response.failed SSE instead of silent drop", + "Restart Proxy button: only restarts proxy without killing Codex Desktop", + "Tool call argument normalizer: fixes Arguments→arguments, strips markdown wrapping", + "Smart-continue loop (2× retries): escalating nudges when model stops text-only mid-task", + "XML tool call extraction: parses patterns from text, injects as real calls", + "Auto-continue + smart-continue ordered with skip guard to avoid double-firing", + "API key hot-reload with mtime tracking + /admin/reload + /admin/verify-key endpoints", + "GUI hot-reload: auto-refreshes proxy key on endpoint edit, verifies with upstream", + "Synthetic tool-results disabled: was causing deepseek-v4-pro truncation on opencode.ai", + ]), ("3.10.4", "2026-05-25", [ "OAuth Secrets editor in GUI — update client ID/secret without editing files", "Secrets stored in ~/.config/codex-launcher/oauth-secrets.json (not in repo)", @@ -361,7 +374,7 @@ PROVIDER_PRESETS = { }, "Google Antigravity (OAuth)": { "backend_type": "gemini-oauth-antigravity", - "base_url": "https://daily-cloudcode-pa.sandbox.googleapis.com", + "base_url": "https://cloudcode-pa.googleapis.com", "oauth_provider": "google-antigravity", "models": [ "Gemini 3.5 Flash (High)", "Gemini 3.5 Flash (Medium)", "Gemini 3.5 Flash (Low)", @@ -1782,6 +1795,64 @@ class AIMonitoringWindow(Gtk.Window): # Main window # ═══════════════════════════════════════════════════════════════════ +def _oauth_discover_project(access_token, token_path, tokens): + project_id = "" + try: + lr = urllib.request.Request( + "https://cloudcode-pa.googleapis.com/v1internal:loadCodeAssist", + data=json.dumps({}).encode(), + headers={"Content-Type": "application/json", + "Authorization": f"Bearer {access_token}", + "User-Agent": "google-api-nodejs-client/9.15.1"}) + lresp = urllib.request.urlopen(lr, timeout=15) + ldata = json.loads(lresp.read()) + p = ldata.get("cloudaicompanionProject", "") + if isinstance(p, dict): + project_id = p.get("id", "") + elif isinstance(p, str): + project_id = p + except Exception: + pass + if not project_id: + return "" + try: + test_url = f"https://cloudcode-pa.googleapis.com/v1internal:listModels?project={project_id}" + test_req = urllib.request.Request(test_url, + headers={"Authorization": f"Bearer {access_token}", + "User-Agent": "google-api-nodejs-client/9.15.1"}) + urllib.request.urlopen(test_req, timeout=10) + except urllib.error.HTTPError as e: + if e.code == 403 and "SERVICE_DISABLED" in (e.read().decode()[:500]): + print(f"[oauth] project {project_id} has API disabled, searching for valid project...", file=sys.stderr) + try: + list_req = urllib.request.Request( + "https://cloudresourcemanager.googleapis.com/v1/projects?filter=lifecycleState:ACTIVE", + headers={"Authorization": f"Bearer {access_token}"}) + list_resp = urllib.request.urlopen(list_req, timeout=15) + projects = json.loads(list_resp.read()).get("projects", []) + for proj in projects: + pid = proj.get("projectId", "") + if not pid or pid == project_id: + continue + try: + t2 = urllib.request.Request( + f"https://cloudcode-pa.googleapis.com/v1internal:listModels?project={pid}", + headers={"Authorization": f"Bearer {access_token}", + "User-Agent": "google-api-nodejs-client/9.15.1"}) + urllib.request.urlopen(t2, timeout=10) + project_id = pid + print(f"[oauth] found working project: {pid}", file=sys.stderr) + break + except Exception: + continue + except Exception: + pass + tokens["project_id"] = project_id + with open(token_path, "w") as f: + json.dump(tokens, f, indent=2) + os.chmod(token_path, 0o600) + return project_id + class LauncherWin(Gtk.Window): def __init__(self): super().__init__(title="Codex Launcher") @@ -1798,7 +1869,7 @@ class LauncherWin(Gtk.Window): # header row hdr = Gtk.Box(spacing=8) vbox.pack_start(hdr, False, False, 0) - lbl = Gtk.Label(label="Codex Launcher v3.10.7") + lbl = Gtk.Label(label="Codex Launcher v3.10.9") lbl.set_use_markup(True) hdr.pack_start(lbl, False, False, 0) changelog_btn = Gtk.Button(label="Changelog") @@ -2832,63 +2903,163 @@ class LauncherWin(Gtk.Window): _stop_proxy() Gtk.main_quit() - def _google_reoauth(self, provider): - secrets_path = os.path.expanduser("~/.config/codex-launcher/oauth-secrets.json") - try: - with open(secrets_path) as f: - secrets = json.load(f) - except Exception: - secrets = {} + def _google_reoauth(self, provider, parent_dlg=None): + import http.server is_antigravity = provider == "google-antigravity" sec_key = "antigravity" if is_antigravity else "gemini_cli" - sec = secrets.get(sec_key, {}) - client_id = sec.get("client_id", "") - client_secret = sec.get("client_secret", "") - if not client_id or not client_secret: + _sp = os.path.expanduser("~/.config/codex-launcher/oauth-secrets.json") + try: + with open(_sp) as _f: + _secrets_data = json.load(_f) + except Exception: + _secrets_data = {} + sec = _secrets_data.get(sec_key, {}) + CLIENT_ID = sec.get("client_id", "") + CLIENT_SECRET = sec.get("client_secret", "") + if not CLIENT_ID or not CLIENT_SECRET: self._show_error_dialog("Missing OAuth secrets", f"No client_id/client_secret for {sec_key}.\nSet them in OAuth Secrets first.") return token_file = "google-antigravity-oauth-token.json" if is_antigravity else "google-cli-oauth-token.json" token_path = os.path.expanduser(f"~/.cache/codex-proxy/{token_file}") - redirect = "urn:ietf:wg:oauth:2.0:oob" - auth_url = (f"https://accounts.google.com/o/oauth2/v2/auth?client_id={client_id}" - f"&redirect_uri={urllib.parse.quote(redirect)}" - f"&response_type=code&scope={urllib.parse.quote('https://www.googleapis.com/auth/cloud-platform')}" - f"&access_type=offline&prompt=consent") - webbrowser.open(auth_url) - code_dlg = Gtk.Dialog(title=f"Re-OAuth: {'Antigravity' if is_antigravity else 'Gemini CLI'}", parent=self, modal=True) - code_dlg.add_button("Cancel", Gtk.ResponseType.CANCEL) - code_dlg.add_button("Exchange", Gtk.ResponseType.OK) - code_dlg.set_default_size(500, 180) - ca = code_dlg.get_content_area() + provider_kind = "antigravity" if is_antigravity else "cli" + + if is_antigravity: + SCOPES = [ + "https://www.googleapis.com/auth/cloud-platform", + "https://www.googleapis.com/auth/userinfo.email", + "https://www.googleapis.com/auth/userinfo.profile", + "https://www.googleapis.com/auth/cclog", + "https://www.googleapis.com/auth/experimentsandconfigs", + ] + port = 51121 + redirect_uri = f"http://localhost:{port}/oauth-callback" + callback_path = "/oauth-callback" + else: + SCOPES = [ + "https://www.googleapis.com/auth/cloud-platform", + "https://www.googleapis.com/auth/userinfo.email", + "https://www.googleapis.com/auth/userinfo.profile", + ] + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("127.0.0.1", 0)) + port = s.getsockname()[1] + redirect_uri = f"http://127.0.0.1:{port}/oauth2callback" + callback_path = "/oauth2callback" + + state = secrets.token_hex(32) + verifier = secrets.token_urlsafe(64) + challenge = base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest()).rstrip(b"=").decode() + + scope_str = " ".join(SCOPES) + auth_url = ( + f"https://accounts.google.com/o/oauth2/v2/auth?" + f"client_id={CLIENT_ID}" + f"&redirect_uri={urllib.parse.quote(redirect_uri)}" + f"&response_type=code" + f"&scope={urllib.parse.quote(scope_str)}" + f"&access_type=offline" + f"&prompt=select_account%20consent" + f"&state={state}" + f"&code_challenge={challenge}" + f"&code_challenge_method=S256" + ) + + oauth_dlg = Gtk.Dialog(title=f"Re-OAuth: {'Antigravity' if is_antigravity else 'Gemini CLI'}", parent=parent_dlg or self, modal=True) + oauth_dlg.add_button("Cancel", Gtk.ResponseType.CANCEL) + oauth_dlg.set_default_size(520, 200) + ca = oauth_dlg.get_content_area() ca.set_margin_start(12) ca.set_margin_end(12) ca.set_spacing(6) - ca.pack_start(Gtk.Label(label="Browser opened for Google OAuth.\nPaste the authorization code below:", xalign=0), False, False, 0) - code_entry = Gtk.Entry() - code_entry.set_placeholder_text("4/0AX...") - ca.pack_start(code_entry, False, False, 4) + ca.pack_start(Gtk.Label(label=f"Re-authenticating {'Antigravity' if is_antigravity else 'Gemini CLI'}", use_markup=True, xalign=0), False, False, 0) + link_lbl = Gtk.Label(label="Click here to open Google authorization", use_markup=True, xalign=0) + link_lbl.set_markup(f'Click here to open Google authorization') + ca.pack_start(link_lbl, False, False, 4) + status_lbl = Gtk.Label(label="Waiting for browser callback...", xalign=0) + ca.pack_start(status_lbl, False, False, 4) ca.show_all() - if code_dlg.run() == Gtk.ResponseType.OK: - code = code_entry.get_text().strip() - if code: + + code_holder = [None] + error_holder = [None] + + class OAuthHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self2): + qs = urllib.parse.urlparse(self2.path).query + params = urllib.parse.parse_qs(qs) + if "code" in params: + if params.get("state", [None])[0] != state: + self2.send_response(400) + self2.end_headers() + self2.wfile.write(b"CSRF state mismatch") + error_holder[0] = "CSRF state mismatch" + return + code_holder[0] = params["code"][0] + self2.send_response(302) + self2.send_header("Location", "https://developers.google.com/gemini-code-assist/auth_success_gemini") + self2.end_headers() + else: + error_holder[0] = params.get("error", ["unknown"])[0] + self2.send_response(302) + self2.send_header("Location", "https://developers.google.com/gemini-code-assist/auth_failure_gemini") + self2.end_headers() + def log_message(self2, fmt, *args): + pass + + try: + bind_host = "localhost" if is_antigravity else "127.0.0.1" + server = http.server.HTTPServer((bind_host, port), OAuthHandler) + except OSError: + status_lbl.set_text(f"Port {port} in use — close other apps and retry.") + oauth_dlg.run() + oauth_dlg.destroy() + return + + def _wait(): + deadline = time.time() + 120 + while code_holder[0] is None and error_holder[0] is None and time.time() < deadline: + server.handle_request() + server.server_close() + if code_holder[0]: try: - tok_req = urllib.request.Request("https://oauth2.googleapis.com/token", - data=urllib.parse.urlencode({ - "code": code, "client_id": client_id, "client_secret": client_secret, - "redirect_uri": redirect, "grant_type": "authorization_code" - }).encode(), + tok_data = urllib.parse.urlencode({ + "code": code_holder[0], "client_id": CLIENT_ID, "client_secret": CLIENT_SECRET, + "redirect_uri": redirect_uri, "grant_type": "authorization_code", + "code_verifier": verifier, + }).encode() + req = urllib.request.Request("https://oauth2.googleapis.com/token", data=tok_data, headers={"Content-Type": "application/x-www-form-urlencoded"}) - tok_resp = urllib.request.urlopen(tok_req, timeout=30) - tok_data = json.loads(tok_resp.read()) - tok_data["_updated"] = time.time() + resp = urllib.request.urlopen(req, timeout=30) + tokens = json.loads(resp.read()) + tokens["client_id"] = CLIENT_ID + tokens["client_secret"] = CLIENT_SECRET + tokens["provider_kind"] = provider_kind + tokens["expires_at"] = time.time() + tokens.get("expires_in", 3600) os.makedirs(os.path.dirname(token_path), exist_ok=True) with open(token_path, "w") as f: - json.dump(tok_data, f, indent=2) - self._log(f"[oauth] Refreshed {provider} token → {token_path}") + json.dump(tokens, f, indent=2) + os.chmod(token_path, 0o600) + project_id = _oauth_discover_project(tokens["access_token"], token_path, tokens) + def _on_success(): + status_lbl.set_text(f"Authorization successful! Project: {project_id or 'none'}") + GLib.timeout_add_seconds(2, lambda: oauth_dlg.destroy()) + return False + GLib.idle_add(_on_success) except Exception as e: - self._show_error_dialog("Token exchange failed", str(e)[:300]) - code_dlg.destroy() + def _on_err(exc=str(e)): + status_lbl.set_text(f"Token exchange failed: {exc[:200]}") + return False + GLib.idle_add(_on_err) + else: + def _on_fail(err=error_holder[0]): + status_lbl.set_text(f"Failed: {err or 'No code received'}") + return False + GLib.idle_add(_on_fail) + + webbrowser.open(auth_url) + threading.Thread(target=_wait, daemon=True).start() + oauth_dlg.run() + oauth_dlg.destroy() def _codebuff_reoauth(self): self._codebuff_oauth_standalone() @@ -3019,7 +3190,7 @@ class LauncherWin(Gtk.Window): hdr_row.pack_start(Gtk.Label(label=f"\n{section_label}", use_markup=True, xalign=0), True, True, 0) reauth_btn = Gtk.Button(label="Re-OAuth") reauth_btn.set_size_request(80, -1) - reauth_btn.connect("clicked", lambda b, p=oauth_prov: self._google_reoauth(p)) + reauth_btn.connect("clicked", lambda b, p=oauth_prov: self._google_reoauth(p, dlg)) hdr_row.pack_end(reauth_btn, False, False, 0) import_btn = Gtk.Button(label="Import JSON") import_btn.set_size_request(100, -1) @@ -3868,32 +4039,8 @@ class EditEndpointDialog(Gtk.Dialog): json.dump(tokens, f, indent=2) os.chmod(token_path, 0o600) _oauth_log(f"Token saved to {token_path}") - project_id = "" - try: - _oauth_log("Discovering project ID via loadCodeAssist...") - lr = urllib.request.Request( - "https://cloudcode-pa.googleapis.com/v1internal:loadCodeAssist", - data=json.dumps({}).encode(), - headers={ - "Content-Type": "application/json", - "Authorization": f"Bearer {tokens['access_token']}", - "User-Agent": "google-api-nodejs-client/9.15.1", - }) - lresp = urllib.request.urlopen(lr, timeout=15) - ldata = json.loads(lresp.read()) - p = ldata.get("cloudaicompanionProject", "") - if isinstance(p, dict): - project_id = p.get("id", "") - elif isinstance(p, str): - project_id = p - _oauth_log(f"Project ID: {project_id or '(none)'}") - if project_id: - tokens["project_id"] = project_id - with open(token_path, "w") as f2: - json.dump(tokens, f2, indent=2) - os.chmod(token_path, 0o600) - except Exception as pe: - _oauth_log(f"loadCodeAssist failed (non-fatal): {pe}") + project_id = _oauth_discover_project(tokens["access_token"], token_path, tokens) + _oauth_log(f"Project ID: {project_id or '(none)'}") if is_antigravity: found_models = [ "gemini-2.5-flash", "gemini-2.5-pro", @@ -3915,7 +4062,7 @@ class EditEndpointDialog(Gtk.Dialog): for mc in probe_candidates: try: pr = urllib.request.Request( - "https://daily-cloudcode-pa.sandbox.googleapis.com/v1internal:generateContent", + "https://cloudcode-pa.googleapis.com/v1internal:generateContent", data=json.dumps({ "project": project_id, "model": mc, @@ -4264,10 +4411,54 @@ class EditEndpointDialog(Gtk.Dialog): data["default"] = name save_endpoints(data) + self._hot_reload_proxy_key(new_ep) self._parent_mgr._rebuild() self._parent_mgr._parent._on_endpoints_updated() self.destroy() + def _hot_reload_proxy_key(self, ep): + try: + ep_name = ep.get("name", "") + proxy_port = None + import glob as _glob + for cfg_file in _glob.glob(str(PROXY_CONFIG_DIR / "proxy-*.json")): + try: + with open(cfg_file) as f: + pcfg = json.load(f) + if ep_name.lower().replace(" ", "-") in cfg_file.lower(): + proxy_port = pcfg.get("port") + pcfg["api_key"] = ep.get("api_key", "") + with open(cfg_file, "w") as f: + json.dump(pcfg, f, indent=2) + break + except Exception: + continue + if proxy_port: + import urllib.request as _ur + try: + url = f"http://127.0.0.1:{proxy_port}/admin/reload" + resp = _ur.urlopen(url, timeout=3) + result = json.loads(resp.read()) + reloaded = result.get("reloaded", False) + preview = result.get("api_key_preview", "?") + self._parent_mgr._parent.log( + f"[hot-reload] key {'updated' if reloaded else 'unchanged'}: {preview}") + if reloaded: + verify_url = f"http://127.0.0.1:{proxy_port}/admin/verify-key" + vresp = _ur.urlopen(verify_url, timeout=10) + vresult = json.loads(vresp.read()) + valid = vresult.get("valid", False) + if valid: + self._parent_mgr._parent.log( + f"[hot-reload] key verified OK ({vresult.get('models', '?')} models)") + else: + self._parent_mgr._parent.log( + f"[hot-reload] WARNING: key verification failed: {vresult.get('error', 'unknown')}") + except Exception: + pass + except Exception: + pass + def _show_error(self, msg): d = Gtk.MessageDialog(self, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.OK, msg) d.run(); d.destroy() diff --git a/src/codex_launcher_lib.py b/src/codex_launcher_lib.py index 2622e69..28e6e92 100644 --- a/src/codex_launcher_lib.py +++ b/src/codex_launcher_lib.py @@ -83,6 +83,19 @@ model_catalog_json = "" """ CHANGELOG = [ + ("3.11.0", "2026-05-26", [ + "Merge cobra PR: concurrency semaphore (max 3), auto-continue for truncated text", + "SO_REUSEADDR on sticky port, proxy-stderr.log, stream diagnostics logging", + "Timeout/OSError handler sends response.failed SSE instead of silent drop", + "Restart Proxy button: only restarts proxy without killing Codex Desktop", + "Tool call argument normalizer: fixes Arguments→arguments, strips markdown wrapping", + "Smart-continue loop (2× retries): escalating nudges when model stops text-only mid-task", + "XML tool call extraction: parses patterns from text, injects as real calls", + "Auto-continue + smart-continue ordered with skip guard to avoid double-firing", + "API key hot-reload with mtime tracking + /admin/reload + /admin/verify-key endpoints", + "GUI hot-reload: auto-refreshes proxy key on endpoint edit, verifies with upstream", + "Synthetic tool-results disabled: was causing deepseek-v4-pro truncation on opencode.ai", + ]), ("3.10.12", "2026-05-26", [ "Sticky endpoint: caches last working endpoint, sequential fallback on failure", "Endpoint order: cloudcode-pa first (matches agy CLI), daily-cloudcode-pa fallback", @@ -1468,6 +1481,7 @@ def _pick_free_port(): try: saved = int(_PROXY_PORT_FILE.read_text().strip()) with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind(("127.0.0.1", saved)) return saved except (ValueError, OSError, FileNotFoundError): @@ -1559,11 +1573,19 @@ def _start_proxy_with_config(pcfg_path, port, logfn): ) _register_pgid_entry("proxy", _proxy_proc.pid) + _proxy_log_path = PROXY_CONFIG_DIR / "proxy-stderr.log" + _proxy_log_file = open(_proxy_log_path, "a", encoding="utf-8") + def _pipe_stderr(): if not _proxy_proc.stderr: return for line in _proxy_proc.stderr: logfn(f"[proxy] {line.rstrip()}") + try: + _proxy_log_file.write(line) + _proxy_log_file.flush() + except Exception: + pass threading.Thread(target=_pipe_stderr, daemon=True).start() diff --git a/src/translate-proxy.py b/src/translate-proxy.py index 4cf20a9..6d7b966 100755 --- a/src/translate-proxy.py +++ b/src/translate-proxy.py @@ -323,6 +323,8 @@ _conn_pool_lock = threading.Lock() _conn_pool = {} _STREAM_IDLE_TIMEOUT = 300 +_MAX_CONCURRENT_REQUESTS = 3 +_request_semaphore = threading.Semaphore(_MAX_CONCURRENT_REQUESTS) _CODEBUFF_AUTH_URL = "https://www.codebuff.com" _CODEBUFF_API_URL = "https://www.codebuff.com" @@ -4829,6 +4831,11 @@ class Handler(http.server.BaseHTTPRequestHandler): _last_user_urls.append(url_m.group(0)) save_request_snapshot(request_id, body) _req_t0 = time.time() + wait_start = time.monotonic() + _request_semaphore.acquire() + wait_ms = (time.monotonic() - wait_start) * 1000 + if wait_ms > 100: + print(f"[{_sid}] waited {wait_ms:.0f}ms for upstream slot (concurrency gate)", file=sys.stderr) try: with RequestTracker(request_id) as tracker: if BACKEND == "auto": @@ -4847,6 +4854,8 @@ class Handler(http.server.BaseHTTPRequestHandler): except Exception as _snap_err: update_snapshot_response(request_id, "error", time.time() - _req_t0, _snap_err) raise + finally: + _request_semaphore.release() def _handle_openai_compat(self, body, model, stream, tracker=None): input_data = body.get("input", "") @@ -4859,7 +4868,8 @@ class Handler(http.server.BaseHTTPRequestHandler): body = dict(body) body["input"] = input_data - if (policy.get("synthetic_tool_results") or _provider_cap(model, "synthetic_tool_results", False)) and isinstance(input_data, list): + # synthetic tool-results disabled: causes deepseek-v4-pro truncation on opencode.ai + if False and (policy.get("synthetic_tool_results") or _provider_cap(model, "synthetic_tool_results", False)) and isinstance(input_data, list): input_data, synthesized = synthesize_tool_results_for_chat(input_data) if synthesized: print("[provider-adapter] using synthetic tool-result continuation", file=sys.stderr) @@ -5739,11 +5749,25 @@ class Handler(http.server.BaseHTTPRequestHandler): break collected_events.append(event) _observe_event(event) + print(f"[{self._session_id}] stream ended: events={len(collected_events)} finish={finish_reason} has_content={has_content} elapsed={time.time()-t0:.1f}s", file=sys.stderr) except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError): print("[translate-proxy] client disconnected during stream", file=sys.stderr) _crof_record(model, n_items, False) _log_resp(last_resp_id, "client_disconnect", last_output) return + except (TimeoutError, OSError, urllib.error.URLError) as e: + print(f"[translate-proxy] upstream error during stream: {type(e).__name__}: {e}", file=sys.stderr) + err_resp_id = body.get("request_id") or body.get("id") or uid("resp") + try: + self.wfile.write(emit("response.failed", {"type": "response.failed", + "response": {"id": err_resp_id, "error": {"type": "upstream_error", + "code": "stream_interrupted", "message": str(e)[:200]}}}).encode()) + self.wfile.flush() + except Exception: + pass + _crof_record(model, n_items, False) + _log_resp(last_resp_id, "upstream_error", last_output) + return # Record outcome success = (finish_reason != "length") @@ -5819,43 +5843,160 @@ class Handler(http.server.BaseHTTPRequestHandler): except Exception as e: print(f"[crof-adaptive] retry failed: {e}", file=sys.stderr) + # ── Auto-continue for truncated responses ── (cobra PR) + _ac_did_run = False + if stream and collected_events: + _ac_text = "" + _ac_msg_id = _ac_resp_id = None + for _ev in collected_events: + for _ln in _ev.strip().split("\n"): + if not _ln.startswith("data: "): + continue + try: + _d = json.loads(_ln[6:]) + _t = _d.get("type") + if _t == "response.output_text.done": + _ac_text = _d.get("text", "") + elif _t == "response.output_item.added" and _d.get("item",{}).get("type") == "message": + _ac_msg_id = _d.get("item",{}).get("id") + elif _t == "response.completed": + _ac_resp_id = _d.get("response",{}).get("id") + except Exception: + pass + + _ac_tc = reasoning_out.get("tool_calls", []) + _ac_truncated = False + if not _ac_tc and _ac_text: + _ac_stripped = _ac_text.rstrip() + if finish_reason == "length": + _ac_truncated = True + elif len(_ac_stripped) > 10 and _ac_stripped[-1] in "(:,;…": + _ac_truncated = True + + if _ac_truncated and _ac_text: + print(f"[{self._session_id}] auto-continue: truncated (finish={finish_reason}, ends '{_ac_text.rstrip()[-10:]}')", file=sys.stderr) + _ac_did_run = True + _ac_cut = len(collected_events) + for _i, _ev2 in enumerate(collected_events): + if "response.output_text.done" in _ev2: + _ac_cut = _i + break + collected_events = collected_events[:_ac_cut] + + _ac_accumulated = _ac_text + _ac_max = 3 + for _ac_attempt in range(_ac_max): + try: + _ac_cont_msgs = list(chat_body.get("messages", [])) + _ac_cont_msgs.append({"role": "assistant", "content": _ac_accumulated}) + _ac_cont_msgs.append({"role": "user", "content": "Continue exactly where you left off. Do not repeat anything already written."}) + _ac_cont_body = dict(chat_body) + _ac_cont_body["messages"] = _ac_cont_msgs + _ac_cont_body["stream"] = False + _ac_cont_req = urllib.request.Request(target, data=json.dumps(_ac_cont_body).encode(), headers=fwd) + _ac_cont_resp = json.loads(urllib.request.urlopen(_ac_cont_req, timeout=120).read()) + _ac_choices = _ac_cont_resp.get("choices", []) + if _ac_choices: + _ac_chunk = _ac_choices[0].get("message",{}).get("content","") + if not _ac_chunk: + _ac_chunk = _ac_choices[0].get("delta",{}).get("content","") + _ac_finish = _ac_choices[0].get("finish_reason") + if _ac_chunk: + _ac_accumulated += _ac_chunk + collected_events.append(emit("response.output_text.delta", { + "type": "response.output_text.delta", + "delta": _ac_chunk, "item_id": _ac_msg_id, "content_index": 0})) + if _ac_finish != "length": + break + _ac_text = _ac_accumulated + except Exception as _ac_e: + print(f"[{self._session_id}] auto-continue attempt {_ac_attempt+1} failed: {_ac_e}", file=sys.stderr) + break + + if _ac_msg_id: + collected_events.append(emit("response.output_text.done", { + "type": "response.output_text.done", + "text": _ac_accumulated, "item_id": _ac_msg_id, "content_index": 0})) + collected_events.append(emit("response.content_part.done", { + "type": "response.content_part.done", + "part": {"type": "output_text", "text": _ac_accumulated, "annotations": []}, "item_id": _ac_msg_id})) + collected_events.append(emit("response.output_item.done", { + "type": "response.output_item.done", + "item": {"type": "message", "id": _ac_msg_id, "role": "assistant", "status": "completed", + "content": [{"type": "output_text", "text": _ac_accumulated, "annotations": []}]}})) + if _ac_resp_id: + collected_events.append(emit("response.completed", { + "type": "response.completed", + "response": {"id": _ac_resp_id, "object": "response", "model": model, + "status": "completed", "created": int(time.time()), + "output": [{"type": "message", "id": _ac_msg_id, "role": "assistant", + "status": "completed", + "content": [{"type": "output_text", "text": _ac_accumulated, "annotations": []}]}]}})) + has_content = True + finish_reason = "stop" + print(f"[{self._session_id}] auto-continue done: {len(_ac_text)} -> {len(_ac_accumulated)} chars", file=sys.stderr) + # Smart continuation: loop with escalating nudges when model stops text-only mid-task. - _smart_max = 2 - _smart_attempt = 0 - while _smart_attempt < _smart_max: - _has_tool_calls_in_output = any(o.get("type") == "function_call" for o in (last_output or [])) - if not (finish_reason == "stop" and has_content and not _has_tool_calls_in_output - and isinstance(input_data, list) and len(input_data) >= 3 - and has_function_call_output(input_data)): - break - _smart_attempt += 1 - _nudges = [ - "Continue with the task using tool calls. Do NOT describe what to do — call the appropriate functions.", - "You MUST use tool calls to complete the task. Read files, run commands, and make changes using tools. Do NOT output XML tool calls as text.", - ] - nudge_text = _nudges[min(_smart_attempt - 1, len(_nudges) - 1)] - # Try extracting XML tool calls from text as fallback before nudging - last_text = "" - for o in (last_output or []): - if o.get("type") == "message": - for c in (o.get("content") or []): - if isinstance(c, dict) and c.get("type") == "output_text": - last_text += c.get("text", "") - xml_fc = _extract_xml_tool_calls(last_text) - if xml_fc: - print(f"[{self._session_id}] [smart-continue] extracted {len(xml_fc)} XML tool calls from text, injecting and retrying", file=sys.stderr) - fake_input = list(input_data) - for xfc in xml_fc: - fake_input.append({"type": "function_call", "id": uid("fcx"), "call_id": uid("fcx"), - "name": xfc["name"], "arguments": xfc["args"], "status": "completed"}) - fake_messages = oa_input_to_messages(fake_input) + # Skip if auto-continue already handled the response. + if not _ac_did_run: + _smart_max = 2 + _smart_attempt = 0 + while _smart_attempt < _smart_max: + _has_tool_calls_in_output = any(o.get("type") == "function_call" for o in (last_output or [])) + if not (finish_reason == "stop" and has_content and not _has_tool_calls_in_output + and isinstance(input_data, list) and len(input_data) >= 3 + and has_function_call_output(input_data)): + break + _smart_attempt += 1 + _nudges = [ + "Continue with the task using tool calls. Do NOT describe what to do — call the appropriate functions.", + "You MUST use tool calls to complete the task. Read files, run commands, and make changes using tools. Do NOT output XML tool calls as text.", + ] + nudge_text = _nudges[min(_smart_attempt - 1, len(_nudges) - 1)] + # Try extracting XML tool calls from text as fallback before nudging + last_text = "" + for o in (last_output or []): + if o.get("type") == "message": + for c in (o.get("content") or []): + if isinstance(c, dict) and c.get("type") == "output_text": + last_text += c.get("text", "") + xml_fc = _extract_xml_tool_calls(last_text) + if xml_fc: + print(f"[{self._session_id}] [smart-continue] extracted {len(xml_fc)} XML tool calls from text, injecting and retrying", file=sys.stderr) + fake_input = list(input_data) + for xfc in xml_fc: + fake_input.append({"type": "function_call", "id": uid("fcx"), "call_id": uid("fcx"), + "name": xfc["name"], "arguments": xfc["args"], "status": "completed"}) + fake_messages = oa_input_to_messages(fake_input) + instructions = body.get("instructions", "").strip() + if instructions: + fake_messages.insert(0, {"role": "system", "content": instructions}) + fake_chat_body = self._build_chat_body(model, fake_messages, body, stream) + fake_req = urllib.request.Request(target, data=json.dumps(fake_chat_body).encode(), headers=fwd) + try: + retry_upstream = urllib.request.urlopen(fake_req, timeout=_upstream_timeout(body, True)) + collected_events = [] + last_resp_id = last_output = last_status = None + finish_reason = None + has_content = False + for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")): + collected_events.append(event) + _observe_event(event) + input_data = fake_input + continue + except Exception as e: + print(f"[{self._session_id}] [smart-continue] XML injection retry failed: {e}", file=sys.stderr) + break + _nudge_msg = {"role": "user", "content": nudge_text} + nudge_messages = oa_input_to_messages(input_data) + [_nudge_msg] instructions = body.get("instructions", "").strip() if instructions: - fake_messages.insert(0, {"role": "system", "content": instructions}) - fake_chat_body = self._build_chat_body(model, fake_messages, body, stream) - fake_req = urllib.request.Request(target, data=json.dumps(fake_chat_body).encode(), headers=fwd) + nudge_messages.insert(0, {"role": "system", "content": instructions}) + nudge_chat_body = self._build_chat_body(model, nudge_messages, body, stream) + nudge_req = urllib.request.Request(target, data=json.dumps(nudge_chat_body).encode(), headers=fwd) + print(f"[{self._session_id}] [smart-continue] attempt {_smart_attempt}/{_smart_max}: model stopped mid-task, nudging", file=sys.stderr) try: - retry_upstream = urllib.request.urlopen(fake_req, timeout=_upstream_timeout(body, True)) + retry_upstream = urllib.request.urlopen(nudge_req, timeout=_upstream_timeout(body, True)) collected_events = [] last_resp_id = last_output = last_status = None finish_reason = None @@ -5863,31 +6004,9 @@ class Handler(http.server.BaseHTTPRequestHandler): for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")): collected_events.append(event) _observe_event(event) - input_data = fake_input - continue except Exception as e: - print(f"[{self._session_id}] [smart-continue] XML injection retry failed: {e}", file=sys.stderr) + print(f"[{self._session_id}] [smart-continue] nudge attempt {_smart_attempt} failed: {e}", file=sys.stderr) break - _nudge_msg = {"role": "user", "content": nudge_text} - nudge_messages = oa_input_to_messages(input_data) + [_nudge_msg] - instructions = body.get("instructions", "").strip() - if instructions: - nudge_messages.insert(0, {"role": "system", "content": instructions}) - nudge_chat_body = self._build_chat_body(model, nudge_messages, body, stream) - nudge_req = urllib.request.Request(target, data=json.dumps(nudge_chat_body).encode(), headers=fwd) - print(f"[{self._session_id}] [smart-continue] attempt {_smart_attempt}/{_smart_max}: model stopped mid-task, nudging", file=sys.stderr) - try: - retry_upstream = urllib.request.urlopen(nudge_req, timeout=_upstream_timeout(body, True)) - collected_events = [] - last_resp_id = last_output = last_status = None - finish_reason = None - has_content = False - for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")): - collected_events.append(event) - _observe_event(event) - except Exception as e: - print(f"[{self._session_id}] [smart-continue] nudge attempt {_smart_attempt} failed: {e}", file=sys.stderr) - break self.stream_buffered_events(collected_events) else: diff --git a/translate-proxy.py b/translate-proxy.py index 19d6c44..6d7b966 100755 --- a/translate-proxy.py +++ b/translate-proxy.py @@ -188,6 +188,7 @@ DEFAULT_MODELS = { } def load_config(): + global _CONFIG_PATH, _CONFIG_MTIME p = argparse.ArgumentParser(description="Responses API translation proxy") p.add_argument("--config", help="JSON config file path") p.add_argument("--port", type=int, default=None) @@ -195,16 +196,21 @@ def load_config(): p.add_argument("--target-url", default=None) p.add_argument("--api-key", default=None) p.add_argument("--models-file", default=None, help="JSON file with model list array") - args = p.parse_args() + _args = p.parse_args() cfg = {} - if args.config: - with open(args.config) as f: + if _args.config: + _CONFIG_PATH = os.path.abspath(_args.config) + with open(_args.config) as f: cfg = json.load(f) + try: + _CONFIG_MTIME = os.path.getmtime(_CONFIG_PATH) + except OSError: + pass for ck, ak in [("port", "port"), ("backend_type", "backend"), ("target_url", "target_url"), ("api_key", "api_key")]: - v = getattr(args, ak, None) + v = getattr(_args, ak, None) if v is not None: cfg[ck] = v @@ -226,8 +232,8 @@ def load_config(): cfg.setdefault("api_key", "") models = cfg.get("models", []) - if not models and args.models_file: - with open(args.models_file) as f: + if not models and _args.models_file: + with open(_args.models_file) as f: models = json.load(f) if not models: models = DEFAULT_MODELS.get(cfg["backend_type"], []) @@ -236,6 +242,8 @@ def load_config(): return cfg CONFIG = None +_CONFIG_PATH = None +_CONFIG_MTIME = 0 PORT = 8080 BACKEND = "openai-compat" TARGET_URL = "" @@ -315,6 +323,8 @@ _conn_pool_lock = threading.Lock() _conn_pool = {} _STREAM_IDLE_TIMEOUT = 300 +_MAX_CONCURRENT_REQUESTS = 3 +_request_semaphore = threading.Semaphore(_MAX_CONCURRENT_REQUESTS) _CODEBUFF_AUTH_URL = "https://www.codebuff.com" _CODEBUFF_API_URL = "https://www.codebuff.com" @@ -616,6 +626,51 @@ class APIKeyPool(AccountPool): _cb_pool = CodebuffAccountPool("codebuff") _google_antigravity_pool = GoogleAccountPool("antigravity") _google_cli_pool = GoogleAccountPool("cli") +_antigravity_preferred_endpoint = None +_antigravity_endpoint_lock = threading.Lock() + +def _classify_antigravity_error(status_code, body): + lower = body.lower() + if status_code == 400: + return "bad_request" + if status_code == 401: + if any(x in lower for x in ["invalid_grant", "token revoked", "token_revoked", "invalid_client"]): + return "auth_permanent" + return "auth_transient" + if status_code == 403: + if "validation_required" in lower or "account_disabled" in lower: + return "validation_required" + if "has been disabled" in lower and "violation of terms of service" in lower: + return "account_banned" + if "service_disabled" in lower: + return "service_disabled" + return "forbidden" + if status_code in (429, 503, 529): + if any(x in lower for x in ["model_capacity_exhausted", "capacity_exhausted", "model is currently overloaded", "service temporarily unavailable"]): + return "capacity_exhausted" + if any(x in lower for x in ["quota_exhausted", "resource_exhausted", "daily limit", "quota exceeded", "quotaresetdelay"]): + return "quota_exhausted" + return "rate_limited" + if status_code >= 500: + return "server_error" + return "unknown" + +def _parse_rate_limit_reset(body): + import re as _re + m = _re.search(r'quotaResetDelay[:"\s]+(\d+(?:\.\d+)?)(ms|s)', body, _re.IGNORECASE) + if m: + val = float(m.group(1)) + return val / 1000 if m.group(2) == 'ms' else val + m = _re.search(r'(\d+)h(\d+)m(\d+)s', body, _re.IGNORECASE) + if m: + return int(m.group(1)) * 3600 + int(m.group(2)) * 60 + int(m.group(3)) + m = _re.search(r'Resets in ~(\d+)h(\d+)m', body, _re.IGNORECASE) + if m: + return int(m.group(1)) * 3600 + int(m.group(2)) * 60 + m = _re.search(r'retry[-_]?after[:\s]+(\d+)\s*(?:sec|s\b)', body, _re.IGNORECASE) + if m: + return int(m.group(1)) + return None def _get_codebuff_account(): """Return (token, account_dict) for best available codebuff account.""" @@ -714,14 +769,21 @@ def _gemini_reattach_sigs(contents): # Gemini follow-through guardrail _GEMINI_AGENT_GUARDRAIL = ( - "You are running inside Codex as an autonomous coding agent. " - "When the user asks for a change to existing files, do not merely describe the previous work or summarize. " - "You must inspect the existing files, apply edits with tools, and verify the result. " - "If a file path is known from prior context, reuse it. " - "If unsure, list files first. " - "After tool results, continue until the requested change is actually implemented. " - "Never answer only with a plan such as 'I will start by...' or 'I am going to...'. " - "Always emit the actual tool call in the same response." + "!!! ABSOLUTELY CRITICAL - DO NOT IGNORE THIS UNDER ANY CIRCUMSTANCES !!! " + "YOU ARE RUNNING INSIDE CODEX AS AN AUTONOMOUS CODING AGENT. " + "!!!!!! NEVER EVER CONTINUE, PARAPHRASE, COMPLETE, OR ADD ANYTHING TO THE USER'S INSTRUCTIONS !!!!!! " + "!!!!!! NEVER SAY 'LET\\'S FIRST VIEW' OR 'LET\\'S FIRST FIND' OR SIMILAR PHRASES - EMIT THE ACTUAL TOOL CALL NOW !!!!!! " + "WHEN THE USER ASKS FOR A CHANGE TO EXISTING FILES, YOU MUST " + "1. IMMEDIATELY INSPECT EXISTING FILES USING exec_command OR read_files TOOLS RIGHT NOW, " + "2. THEN APPLY EDITS USING write OR exec_command TOOLS, " + "3. THEN VERIFY THE RESULT. " + "IF A FILE PATH IS KNOWN, REUSE IT IMMEDIATELY. " + "IF UNSURE, LIST FILES FIRST USING exec_command (ls -la). " + "AFTER TOOL RESULTS, CONTINUE UNTIL THE REQUESTED CHANGE IS FULLY IMPLEMENTED AND FILES ARE MODIFIED. " + "NEVER ANSWER ONLY WITH A PLAN LIKE 'I WILL START BY...' OR 'I AM GOING TO...'. " + "NEVER SUMMARIZE THE USER'S REQUEST. NEVER CONTINUE THEIR SENTENCE. " + "ALWAYS, ALWAYS, ALWAYS EMIT THE ACTUAL TOOL CALL IN THE SAME RESPONSE. " + "!!! FAILURE TO FOLLOW THESE INSTRUCTIONS WILL RESULT IN A BROKEN USER EXPERIENCE !!!" ) _LOG_FILE_LOCK = threading.Lock() @@ -771,6 +833,20 @@ def _ensure_antigravity_version(): _antigravity_version_checked = time.time() return _antigravity_version +_antigravity_client_version = "1.110.0" +_antigravity_client_version_checked = 0 + +def _ensure_antigravity_client_version(): + global _antigravity_client_version, _antigravity_client_version_checked + env_ver = os.environ.get("ANTIGRAVITY_CLIENT_VERSION", "").strip() + if env_ver: + return env_ver + if time.time() - _antigravity_client_version_checked < 6 * 3600: + return _antigravity_client_version + _antigravity_client_version = os.environ.get("ANTIGRAVITY_CLIENT_VERSION_FALLBACK", "1.110.0") + _antigravity_client_version_checked = time.time() + return _antigravity_client_version + def _init_runtime(): global CONFIG, PORT, BACKEND, TARGET_URL, API_KEY, OAUTH_PROVIDER, _antigravity_version global MODELS, CC_VERSION, REASONING_ENABLED, REASONING_EFFORT, BGP_ROUTES @@ -801,6 +877,68 @@ def _init_runtime(): _antigravity_version = _ensure_antigravity_version() print(f"[antigravity] version={_antigravity_version}", file=sys.stderr) +def _verify_api_key(key, target_url): + if not key or not target_url: + return {"valid": False, "error": "missing key or url"} + test_url = upstream_target(target_url, "/models") + if not test_url: + return {"valid": False, "error": "invalid target url"} + try: + req = urllib.request.Request(test_url, headers={ + "Authorization": f"Bearer {key}", + "Content-Type": "application/json", + }) + resp = urllib.request.urlopen(req, timeout=10) + body = resp.read().decode() + model_count = 0 + try: + data = json.loads(body) + model_count = len(data.get("data", [])) + except Exception: + pass + return {"valid": True, "status": resp.status, "models": model_count} + except urllib.error.HTTPError as e: + err = e.read().decode()[:200] + return {"valid": False, "status": e.code, "error": err} + except Exception as e: + return {"valid": False, "error": str(e)[:200]} + +_HOT_RELOAD_LOCK = threading.Lock() + +def _hot_reload_api_key(): + global API_KEY, _api_key_pool, _CONFIG_MTIME + if not _CONFIG_PATH: + return False + try: + cur_mtime = os.path.getmtime(_CONFIG_PATH) + except OSError: + return False + if cur_mtime <= _CONFIG_MTIME: + return False + with _HOT_RELOAD_LOCK: + try: + cur_mtime2 = os.path.getmtime(_CONFIG_PATH) + if cur_mtime2 <= _CONFIG_MTIME: + return False + with open(_CONFIG_PATH) as f: + new_cfg = json.load(f) + new_key = (new_cfg.get("api_key") or "").strip() + if not new_key or new_key == API_KEY: + _CONFIG_MTIME = cur_mtime2 + return False + old_preview = API_KEY[:8] + "..." if len(API_KEY) > 8 else "(empty)" + new_preview = new_key[:8] + "..." if len(new_key) > 8 else "(empty)" + API_KEY = new_key + _CONFIG_MTIME = cur_mtime2 + if API_KEY and "," in API_KEY and not OAUTH_PROVIDER.startswith("google") and BACKEND not in ("codebuff", "freebuff"): + _api_key_pool = APIKeyPool(BACKEND, API_KEY) + print(f"[hot-reload] API key pool refreshed: {len(_api_key_pool._accounts)} keys", file=sys.stderr) + print(f"[hot-reload] API key updated: {old_preview} -> {new_preview}", file=sys.stderr) + return True + except Exception as e: + print(f"[hot-reload] error: {e}", file=sys.stderr) + return False + bgp_models = [] for _r in BGP_ROUTES: for _m in _r.get("models", [{"id": _r.get("model", "unknown")}]): @@ -1623,7 +1761,8 @@ _PROVIDER_POLICIES = { "openrouter": {"reasoning_mode": "provider_default", "max_tokens": 32768, "strip_reasoning": True, "tool_output_limit": 6000, "max_input_items": 35, "compaction": "balanced"}, "openadapter": {"reasoning_mode": "off", "max_tokens": 32768, "strip_reasoning": True, - "tool_output_limit": 6000, "max_input_items": 30, "compaction": "balanced"}, + "tool_output_limit": 1000, "max_input_items": 10, "compaction": "aggressive", + "synthetic_tool_results": True}, "cloudcode-pa": {"compaction": "aggressive", "context_size": 1000000, "tool_output_limit": 6000, "max_input_items": 60}, "googleapis": {"compaction": "balanced", "context_size": 1000000, @@ -2054,6 +2193,75 @@ def _inject_stored_reasoning(messages): msg["reasoning_content"] = reasoning return messages +def _normalize_tool_args(raw_args): + if not raw_args or raw_args == "{}": + return raw_args + try: + parsed = json.loads(raw_args) + if isinstance(parsed, dict): + if "Arguments" in parsed and "arguments" not in parsed: + inner = parsed["Arguments"] + if isinstance(inner, str): + inner = inner.strip() + for pfx in ("```json", "```"): + if inner.startswith(pfx): + inner = inner[len(pfx):].strip() + if inner.endswith("```"): + inner = inner[:-3].strip() + try: + inner_parsed = json.loads(inner) + if isinstance(inner_parsed, dict): + return json.dumps(inner_parsed) + except json.JSONDecodeError: + pass + if "cmd" not in parsed and "Arguments" in parsed: + inner = parsed["Arguments"] + if isinstance(inner, str): + inner = inner.strip() + for pfx in ("```json", "```"): + if inner.startswith(pfx): + inner = inner[len(pfx):].strip() + if inner.endswith("```"): + inner = inner[:-3].strip() + try: + inner_parsed = json.loads(inner) + if isinstance(inner_parsed, dict): + return json.dumps(inner_parsed) + except json.JSONDecodeError: + pass + return raw_args + except json.JSONDecodeError: + return raw_args + +_XML_TC_RE = re.compile(r'(\w+)(.*?)', re.DOTALL) +_XML_ARG_VALUE_RE = re.compile(r'\s*') + +def _extract_xml_tool_calls(text): + if not text: + return [] + results = [] + for m in _XML_TC_RE.finditer(text): + name = m.group(1) + rest = _XML_ARG_VALUE_RE.sub("", m.group(2)).strip() + args_str = "{}" + try: + for pfx in ("```json", "```"): + if rest.startswith(pfx): + rest = rest[len(pfx):].strip() + if rest.endswith("```"): + rest = rest[:-3].strip() + if rest.startswith("{"): + json.loads(rest) + args_str = rest + else: + json.loads(rest) + args_str = rest + except Exception: + if rest.startswith("{"): + args_str = rest + results.append({"name": name, "args": args_str, "call_id": f"xml_{len(results)}"}) + return results + def oa_input_to_messages(input_data): msgs = [] tool_name_by_id = {} @@ -2066,11 +2274,13 @@ def oa_input_to_messages(input_data): t = item.get("type") if t == "function_call": tcid = item.get("call_id") or item.get("id") or uid("tc") + raw_args = item.get("arguments", "{}") + normalized_args = _normalize_tool_args(raw_args) pending_tool_calls.append( {"id": tcid, "type": "function", "function": {"name": item.get("name", ""), - "arguments": item.get("arguments", "{}")}}) + "arguments": normalized_args}}) tool_name_by_id[tcid] = item.get("name", "") continue if pending_tool_calls: @@ -4280,9 +4490,10 @@ def _antigravity_is_simple_user(text): return True return False -def _antigravity_normalize_context(input_data): +def _antigravity_normalize_context(input_data, model=""): if not isinstance(input_data, list) or len(input_data) < 2: return input_data + is_claude_model = "claude" in model.lower() latest_user = "" latest_user_idx = -1 @@ -4310,13 +4521,19 @@ def _antigravity_normalize_context(input_data): if os.environ.get("ANTIGRAVITY_AUTO_RESET_POLLUTED_CONTEXT", "1") != "1": auto_reset = False - if is_simple and (auto_reset or n_tool_outputs == 0): + has_compaction_summary = any( + isinstance(it, dict) and it.get("type") == "message" and it.get("role") == "user" + and ("Auto-compacted" in str(it.get("content", "")) or "auto-compacted" in str(it.get("content", "")).lower()) + for it in input_data + ) + + if is_simple and auto_reset and not has_compaction_summary: system_items = [it for it in input_data if isinstance(it, dict) and it.get("type") == "message" and it.get("role") in ("developer", "system")] user_item = input_data[latest_user_idx] result = system_items + [user_item] if system_items else [user_item] print(f"[antigravity-context] raw_items={n_raw} compacted_items={n_raw} final_items={len(result)}", file=sys.stderr) print(f"[antigravity-context] raw_tool_outputs={n_tool_outputs} kept_tool_outputs=0", file=sys.stderr) - print(f"[antigravity-context] simple_latest_user=true auto_reset={auto_reset}", file=sys.stderr) + print(f"[antigravity-context] simple_latest_user=true auto_reset={auto_reset} has_compaction={has_compaction_summary}", file=sys.stderr) return result dev_messages = [] @@ -4340,9 +4557,15 @@ def _antigravity_normalize_context(input_data): latest_words = set(latest_user.strip().lower().split()) has_edit_intent = bool(latest_words.intersection(_ANTIGRAVITY_EDIT_WORDS)) has_ref_intent = bool(latest_words.intersection(_ANTIGRAVITY_REFERENCE_WORDS)) - keep_tools = 2 if (has_edit_intent or has_ref_intent) else 1 + if is_claude_model: + keep_tools = len(tool_outputs) + else: + keep_tools = 2 if (has_edit_intent or has_ref_intent) else 1 - kept_tools = tool_outputs[-keep_tools:] if tool_outputs and (has_edit_intent or has_ref_intent) else [] + if is_claude_model: + kept_tools = tool_outputs + else: + kept_tools = tool_outputs[-keep_tools:] if tool_outputs and (has_edit_intent or has_ref_intent) else [] for idx_t, t_item in enumerate(kept_tools): orig = t_item[1] @@ -4357,6 +4580,22 @@ def _antigravity_normalize_context(input_data): tail_start = max(0, len(recent_items) - 6) recent_tail = recent_items[tail_start:] + deduped_tail = [] + seen_goal_context = False + for idx, msg_item in recent_tail: + content_str = "" + c = msg_item.get("content", "") + if isinstance(c, str): + content_str = c + elif isinstance(c, list): + content_str = " ".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict)) + if "" in content_str: + if seen_goal_context: + continue + seen_goal_context = True + deduped_tail.append((idx, msg_item)) + recent_tail = deduped_tail if deduped_tail else recent_tail + tool_call_ids = set() for _, t_item in kept_tools: cid = t_item.get("call_id", t_item.get("id", "")) @@ -4371,6 +4610,15 @@ def _antigravity_normalize_context(input_data): result = list(dev_messages) + compaction_summaries = [] + for idx, msg_item in recent_items: + if msg_item is input_data[latest_user_idx]: + continue + c = msg_item.get("content", "") + content_str = c if isinstance(c, str) else " ".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict)) if isinstance(c, list) else "" + if "Auto-compacted" in content_str or "auto-compacted" in content_str.lower(): + compaction_summaries.append(msg_item) + if n_summarized > 0: summary_text = f"[Tool history summary: {n_summarized} older tool outputs omitted. {n_tool_calls} prior function calls were made for file inspection/editing.]" result.append({"type": "message", "role": "user", "content": [{"type": "input_text", "text": summary_text}]}) @@ -4381,6 +4629,9 @@ def _antigravity_normalize_context(input_data): for _, tool_item in kept_tools: result.append(tool_item) + for cs_item in compaction_summaries: + result.append(cs_item) + for _, msg_item in recent_tail: if msg_item is not input_data[latest_user_idx]: result.append(msg_item) @@ -4408,7 +4659,10 @@ def _antigravity_normalize_context(input_data): if total_chars > _ANTIGRAVITY_EMERGENCY_CHARS: print(f"[antigravity-context] EMERGENCY: {total_chars} chars exceeds limit, resetting to minimal", file=sys.stderr) - result = list(dev_messages) + [input_data[latest_user_idx]] + result = list(dev_messages) + if compaction_summaries: + result.extend(compaction_summaries) + result.append(input_data[latest_user_idx]) total_chars = sum(len(json.dumps(it, ensure_ascii=False)) for it in result) while len(result) > _ANTIGRAVITY_MAX_CONTENTS and total_chars > _ANTIGRAVITY_SOFT_CHARS: @@ -4475,12 +4729,23 @@ class Handler(http.server.BaseHTTPRequestHandler): pass _uptime = time.time() - _START_TIME if '_START_TIME' in dir() else 0 self.send_json(200, {"ok": True, "backend": BACKEND, - "target_url": TARGET_URL, - "models": [m.get("id") for m in MODELS], - "bgp_routes": len(BGP_ROUTES), - "uptime_s": round(_uptime, 1), - "memory_mb": round(_mem_mb, 1), - "requests_total": _STATS.get("requests", 0)}) + "target_url": TARGET_URL, + "models": [m.get("id") for m in MODELS], + "bgp_routes": len(BGP_ROUTES), + "uptime_s": round(_uptime, 1), + "memory_mb": round(_mem_mb, 1), + "requests_total": _STATS.get("requests", 0)}) + elif self.path == "/admin/reload": + reloaded = _hot_reload_api_key() + key_preview = API_KEY[:8] + "..." if len(API_KEY) > 8 else "(empty)" + self.send_json(200, {"ok": True, "reloaded": reloaded, + "api_key_preview": key_preview, + "config_path": _CONFIG_PATH or "none"}) + elif self.path == "/admin/verify-key": + result = _verify_api_key(API_KEY, TARGET_URL) + key_preview = API_KEY[:8] + "..." if len(API_KEY) > 8 else "(empty)" + result["api_key_preview"] = key_preview + self.send_json(200, result) else: self.send_error(404) @@ -4502,6 +4767,7 @@ class Handler(http.server.BaseHTTPRequestHandler): _logf = None def _handle(self): + _hot_reload_api_key() try: clen = int(self.headers.get("Content-Length", 0)) body = json.loads(self.rfile.read(clen)) @@ -4565,6 +4831,11 @@ class Handler(http.server.BaseHTTPRequestHandler): _last_user_urls.append(url_m.group(0)) save_request_snapshot(request_id, body) _req_t0 = time.time() + wait_start = time.monotonic() + _request_semaphore.acquire() + wait_ms = (time.monotonic() - wait_start) * 1000 + if wait_ms > 100: + print(f"[{_sid}] waited {wait_ms:.0f}ms for upstream slot (concurrency gate)", file=sys.stderr) try: with RequestTracker(request_id) as tracker: if BACKEND == "auto": @@ -4583,6 +4854,8 @@ class Handler(http.server.BaseHTTPRequestHandler): except Exception as _snap_err: update_snapshot_response(request_id, "error", time.time() - _req_t0, _snap_err) raise + finally: + _request_semaphore.release() def _handle_openai_compat(self, body, model, stream, tracker=None): input_data = body.get("input", "") @@ -4595,7 +4868,8 @@ class Handler(http.server.BaseHTTPRequestHandler): body = dict(body) body["input"] = input_data - if (policy.get("synthetic_tool_results") or _provider_cap(model, "synthetic_tool_results", False)) and isinstance(input_data, list): + # synthetic tool-results disabled: causes deepseek-v4-pro truncation on opencode.ai + if False and (policy.get("synthetic_tool_results") or _provider_cap(model, "synthetic_tool_results", False)) and isinstance(input_data, list): input_data, synthesized = synthesize_tool_results_for_chat(input_data) if synthesized: print("[provider-adapter] using synthetic tool-result continuation", file=sys.stderr) @@ -4603,7 +4877,7 @@ class Handler(http.server.BaseHTTPRequestHandler): body["input"] = input_data compacted = False - if policy.get("compaction") and isinstance(input_data, list): + if policy.get("compaction") and isinstance(input_data, list) and "claude" not in model.lower(): input_data, compacted = _adaptive_compact(input_data, model, policy) if compacted: body = dict(body) @@ -4652,6 +4926,22 @@ class Handler(http.server.BaseHTTPRequestHandler): upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream)) except urllib.error.HTTPError as e: err_body = e.read().decode() + if "context_length_exceeded" in err_body and attempt < max_retries: + print(f"[{self._session_id}] context_length_exceeded (attempt {attempt+1}/{max_retries}), retrying with extreme compaction!", file=sys.stderr) + policy = provider_policy() + if isinstance(input_data, list): + print(f"[{self._session_id}] applying extreme compaction to {len(input_data)} items", file=sys.stderr) + input_data = _crof_compact_for_retry(input_data, model) + body = dict(body) + body["input"] = input_data + messages = oa_input_to_messages(input_data) + messages = _inject_stored_reasoning(messages) + instructions = body.get("instructions", "").strip() + if instructions: + messages.insert(0, {"role": "system", "content": instructions}) + chat_body = self._build_chat_body(model, messages, body, stream) + chat_body_b = json.dumps(chat_body).encode() + continue if e.code in (429, 502, 503) and attempt < max_retries: if e.code == 429 and _api_key_pool: pool_acct = _api_key_pool.get() @@ -4782,7 +5072,7 @@ class Handler(http.server.BaseHTTPRequestHandler): body["input"] = input_data compacted = False - if policy.get("compaction") and isinstance(input_data, list): + if policy.get("compaction") and isinstance(input_data, list) and "claude" not in model.lower(): input_data, compacted = _adaptive_compact(input_data, model, policy) if compacted: body = dict(body) @@ -4793,8 +5083,8 @@ class Handler(http.server.BaseHTTPRequestHandler): body = dict(body) body["input"] = input_data - if OAUTH_PROVIDER == "google-antigravity" and isinstance(input_data, list): - input_data = _antigravity_normalize_context(input_data) + if OAUTH_PROVIDER == "google-antigravity" and isinstance(input_data, list) and "claude" not in model.lower(): + input_data = _antigravity_normalize_context(input_data, model) body = dict(body) body["input"] = input_data @@ -4872,7 +5162,7 @@ class Handler(http.server.BaseHTTPRequestHandler): resp_part["functionResponse"]["id"] = call_id contents.append({"role": "user", "parts": [resp_part]}) - if OAUTH_PROVIDER.startswith("google"): + if OAUTH_PROVIDER.startswith("google") and "claude" not in model.lower(): sanitized = [] last_user_text = None last_role = None @@ -4963,8 +5253,18 @@ class Handler(http.server.BaseHTTPRequestHandler): contents = _gemini_reattach_sigs(contents) if OAUTH_PROVIDER == "google-antigravity": - guardrail_found = any("autonomous coding agent" in json.dumps(c.get("parts", []), ensure_ascii=False) for c in contents[:2]) - if not guardrail_found: + latest_user = "" + if isinstance(input_data, list): + for item in reversed(input_data): + if item.get("type") == "message" and item.get("role") == "user": + c = item.get("content", "") + if isinstance(c, str): + latest_user = c + elif isinstance(c, list): + latest_user = "\n".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict)) + break + is_latest_simple = _antigravity_is_simple_user(latest_user) + if not is_latest_simple: contents.insert(0, {"role": "user", "parts": [{"text": _GEMINI_AGENT_GUARDRAIL}]}) if OAUTH_PROVIDER == "google-antigravity" and isinstance(input_data, list): @@ -4976,11 +5276,10 @@ class Handler(http.server.BaseHTTPRequestHandler): if isinstance(c, str): latest_lower = c.lower() elif isinstance(c, list): latest_lower = " ".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict)).lower() break - if latest_lower and any(w in latest_lower for w in _EDIT_WORDS) and len(input_data) > 6: + if latest_lower and any(w in latest_lower for w in _EDIT_WORDS): n_tool_calls = sum(1 for it in input_data if isinstance(it, dict) and it.get("type") == "function_call") - if n_tool_calls > 0: - contents.append({"role": "user", "parts": [{"text": "IMPORTANT: The user is requesting a modification to existing files. You MUST use tools (exec_command, write, etc.) to make the changes. Do NOT just describe what to do — actually call the tools to modify the files now."}]}) - print(f"[antigravity] edit-intent detected with {n_tool_calls} prior tool calls; injected tool-use nudge", file=sys.stderr) + contents.append({"role": "user", "parts": [{"text": "!!! ABSOLUTELY NO PLANNING - EMIT THE TOOL CALL NOW !!! IMPORTANT: The user is requesting a modification to existing files. You MUST use tools (exec_command, read_files, write, etc.) to make the changes RIGHT NOW. Do NOT just describe what to do — actually CALL THE TOOLS IN THIS RESPONSE. IMMEDIATELY INSPECT THE FILE OR LIST FILES USING exec_command TOOL CALL."}]}) + print(f"[antigravity] edit-intent detected; injected tool-use nudge", file=sys.stderr) if OAUTH_PROVIDER == "google-antigravity" and isinstance(input_data, list): latest_user = "" @@ -5031,10 +5330,14 @@ class Handler(http.server.BaseHTTPRequestHandler): wrapped["requestType"] = "agent" wrapped["userAgent"] = "antigravity" wrapped["requestId"] = f"agent-{uuid.uuid4().hex[:12]}" + wrapped["request"]["sessionId"] = f"{uuid.uuid4().hex}{int(time.time()*1000)}" _allow_staging = os.environ.get("ALLOW_ANTIGRAVITY_STAGING", "0") == "1" if OAUTH_PROVIDER == "google-antigravity": - _antigravity_endpoints = ["https://cloudcode-pa.googleapis.com"] + _antigravity_endpoints = [ + "https://cloudcode-pa.googleapis.com", + "https://daily-cloudcode-pa.googleapis.com", + ] if _allow_staging: _antigravity_endpoints.extend([ "https://daily-cloudcode-pa.sandbox.googleapis.com", @@ -5052,7 +5355,13 @@ class Handler(http.server.BaseHTTPRequestHandler): } if OAUTH_PROVIDER == "google-antigravity": version = _ensure_antigravity_version() - headers["User-Agent"] = f"antigravity/{version} darwin/arm64" + import platform as _plat + _os_name = _plat.system().lower() + _os_arch = _plat.machine().lower().replace("x86_64", "x64").replace("aarch64", "arm64") + headers["User-Agent"] = f"antigravity/{version} {_os_name}/{_os_arch}" + headers["X-Client-Name"] = "antigravity" + headers["X-Client-Version"] = _ensure_antigravity_client_version() + headers["x-goog-api-client"] = "gl-node/18.18.2 fire/0.8.6 grpc/1.10.x" else: headers["User-Agent"] = "google-api-nodejs-client/9.15.1" headers["X-Goog-Api-Client"] = "gl-node/22.17.0" @@ -5072,14 +5381,33 @@ class Handler(http.server.BaseHTTPRequestHandler): if OAUTH_PROVIDER == "google-antigravity": print(f"[antigravity-endpoint] endpoints={[e.replace('https://','') for e in endpoints]} project={project_id}", file=sys.stderr) - for ep in endpoints: + upstream = None + chosen_ep = None + global _antigravity_preferred_endpoint + + with _antigravity_endpoint_lock: + _pref = _antigravity_preferred_endpoint + + if _pref and _pref in endpoints: + ordered = [_pref] + [e for e in endpoints if e != _pref] + else: + ordered = list(endpoints) + + for ep in ordered: target = f"{ep}/{url_suffix}" req = urllib.request.Request(target, data=body_b, headers=headers) try: upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream)) + chosen_ep = ep + with _antigravity_endpoint_lock: + _antigravity_preferred_endpoint = ep + if ep != _pref: + print(f"[{self._session_id}] fallback OK: {ep.replace('https://','')}", file=sys.stderr) break except urllib.error.HTTPError as e: err_body = e.read().decode() + err_class = _classify_antigravity_error(e.code, err_body) + print(f"[{self._session_id}] {ep.replace('https://','')} {e.code} class={err_class}", file=sys.stderr) if e.code == 400 and OAUTH_PROVIDER.startswith("google"): try: debug_path = os.path.join(_LOG_DIR, "gemini-last-400-request.json") @@ -5088,23 +5416,38 @@ class Handler(http.server.BaseHTTPRequestHandler): print(f"[{self._session_id}] saved 400 debug request to {debug_path}", file=sys.stderr) except Exception: pass - if e.code == 403 and "SERVICE_DISABLED" in err_body[:500] and ep != endpoints[-1]: - print(f"[{self._session_id}] {ep} SERVICE_DISABLED, trying next endpoint", file=sys.stderr) + return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) + if err_class == "auth_permanent": + return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) + if err_class in ("quota_exhausted", "rate_limited"): + reset_s = _parse_rate_limit_reset(err_body) + if ep == ordered[-1]: + pool = _google_antigravity_pool if OAUTH_PROVIDER == "google-antigravity" else _google_cli_pool + _, acct = _get_google_account(OAUTH_PROVIDER) + if acct: + cooldown = reset_s if reset_s and reset_s > 10 else 60 + pool.mark_rate_limited(acct, cooldown) + print(f"[{self._session_id}] quota reset in ~{reset_s}s, cooldown={cooldown}s", file=sys.stderr) + return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) + print(f"[{self._session_id}] {ep.replace('https://','')} 429, trying next", file=sys.stderr) + with _antigravity_endpoint_lock: + _antigravity_preferred_endpoint = None continue - if e.code == 429 and ep != endpoints[-1] and _allow_staging: - print(f"[{self._session_id}] {ep} HTTP 429, trying next endpoint", file=sys.stderr) + if err_class in ("service_disabled", "forbidden", "account_banned", "validation_required"): + if ep == ordered[-1]: + return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) continue - if e.code == 429: - pool = _google_antigravity_pool if OAUTH_PROVIDER == "google-antigravity" else _google_cli_pool - _, acct = _get_google_account(OAUTH_PROVIDER) - if acct: - pool.mark_rate_limited(acct, 60) - return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) - except Exception as e: - if ep == endpoints[-1]: - return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}}) - print(f"[{self._session_id}] {ep} failed: {e}, trying next", file=sys.stderr) + if ep == ordered[-1]: + return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) continue + except Exception as e: + print(f"[{self._session_id}] {ep.replace('https://','')} conn failed: {e}", file=sys.stderr) + if ep == ordered[-1]: + return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}}) + continue + + if upstream is None: + return self.send_json(502, {"error": {"type": "proxy_error", "message": "All endpoints failed"}}) if stream: self._forward_gemini_sse(upstream, model, body, input_data, tracker) @@ -5406,11 +5749,25 @@ class Handler(http.server.BaseHTTPRequestHandler): break collected_events.append(event) _observe_event(event) + print(f"[{self._session_id}] stream ended: events={len(collected_events)} finish={finish_reason} has_content={has_content} elapsed={time.time()-t0:.1f}s", file=sys.stderr) except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError): print("[translate-proxy] client disconnected during stream", file=sys.stderr) _crof_record(model, n_items, False) _log_resp(last_resp_id, "client_disconnect", last_output) return + except (TimeoutError, OSError, urllib.error.URLError) as e: + print(f"[translate-proxy] upstream error during stream: {type(e).__name__}: {e}", file=sys.stderr) + err_resp_id = body.get("request_id") or body.get("id") or uid("resp") + try: + self.wfile.write(emit("response.failed", {"type": "response.failed", + "response": {"id": err_resp_id, "error": {"type": "upstream_error", + "code": "stream_interrupted", "message": str(e)[:200]}}}).encode()) + self.wfile.flush() + except Exception: + pass + _crof_record(model, n_items, False) + _log_resp(last_resp_id, "upstream_error", last_output) + return # Record outcome success = (finish_reason != "length") @@ -5486,6 +5843,171 @@ class Handler(http.server.BaseHTTPRequestHandler): except Exception as e: print(f"[crof-adaptive] retry failed: {e}", file=sys.stderr) + # ── Auto-continue for truncated responses ── (cobra PR) + _ac_did_run = False + if stream and collected_events: + _ac_text = "" + _ac_msg_id = _ac_resp_id = None + for _ev in collected_events: + for _ln in _ev.strip().split("\n"): + if not _ln.startswith("data: "): + continue + try: + _d = json.loads(_ln[6:]) + _t = _d.get("type") + if _t == "response.output_text.done": + _ac_text = _d.get("text", "") + elif _t == "response.output_item.added" and _d.get("item",{}).get("type") == "message": + _ac_msg_id = _d.get("item",{}).get("id") + elif _t == "response.completed": + _ac_resp_id = _d.get("response",{}).get("id") + except Exception: + pass + + _ac_tc = reasoning_out.get("tool_calls", []) + _ac_truncated = False + if not _ac_tc and _ac_text: + _ac_stripped = _ac_text.rstrip() + if finish_reason == "length": + _ac_truncated = True + elif len(_ac_stripped) > 10 and _ac_stripped[-1] in "(:,;…": + _ac_truncated = True + + if _ac_truncated and _ac_text: + print(f"[{self._session_id}] auto-continue: truncated (finish={finish_reason}, ends '{_ac_text.rstrip()[-10:]}')", file=sys.stderr) + _ac_did_run = True + _ac_cut = len(collected_events) + for _i, _ev2 in enumerate(collected_events): + if "response.output_text.done" in _ev2: + _ac_cut = _i + break + collected_events = collected_events[:_ac_cut] + + _ac_accumulated = _ac_text + _ac_max = 3 + for _ac_attempt in range(_ac_max): + try: + _ac_cont_msgs = list(chat_body.get("messages", [])) + _ac_cont_msgs.append({"role": "assistant", "content": _ac_accumulated}) + _ac_cont_msgs.append({"role": "user", "content": "Continue exactly where you left off. Do not repeat anything already written."}) + _ac_cont_body = dict(chat_body) + _ac_cont_body["messages"] = _ac_cont_msgs + _ac_cont_body["stream"] = False + _ac_cont_req = urllib.request.Request(target, data=json.dumps(_ac_cont_body).encode(), headers=fwd) + _ac_cont_resp = json.loads(urllib.request.urlopen(_ac_cont_req, timeout=120).read()) + _ac_choices = _ac_cont_resp.get("choices", []) + if _ac_choices: + _ac_chunk = _ac_choices[0].get("message",{}).get("content","") + if not _ac_chunk: + _ac_chunk = _ac_choices[0].get("delta",{}).get("content","") + _ac_finish = _ac_choices[0].get("finish_reason") + if _ac_chunk: + _ac_accumulated += _ac_chunk + collected_events.append(emit("response.output_text.delta", { + "type": "response.output_text.delta", + "delta": _ac_chunk, "item_id": _ac_msg_id, "content_index": 0})) + if _ac_finish != "length": + break + _ac_text = _ac_accumulated + except Exception as _ac_e: + print(f"[{self._session_id}] auto-continue attempt {_ac_attempt+1} failed: {_ac_e}", file=sys.stderr) + break + + if _ac_msg_id: + collected_events.append(emit("response.output_text.done", { + "type": "response.output_text.done", + "text": _ac_accumulated, "item_id": _ac_msg_id, "content_index": 0})) + collected_events.append(emit("response.content_part.done", { + "type": "response.content_part.done", + "part": {"type": "output_text", "text": _ac_accumulated, "annotations": []}, "item_id": _ac_msg_id})) + collected_events.append(emit("response.output_item.done", { + "type": "response.output_item.done", + "item": {"type": "message", "id": _ac_msg_id, "role": "assistant", "status": "completed", + "content": [{"type": "output_text", "text": _ac_accumulated, "annotations": []}]}})) + if _ac_resp_id: + collected_events.append(emit("response.completed", { + "type": "response.completed", + "response": {"id": _ac_resp_id, "object": "response", "model": model, + "status": "completed", "created": int(time.time()), + "output": [{"type": "message", "id": _ac_msg_id, "role": "assistant", + "status": "completed", + "content": [{"type": "output_text", "text": _ac_accumulated, "annotations": []}]}]}})) + has_content = True + finish_reason = "stop" + print(f"[{self._session_id}] auto-continue done: {len(_ac_text)} -> {len(_ac_accumulated)} chars", file=sys.stderr) + + # Smart continuation: loop with escalating nudges when model stops text-only mid-task. + # Skip if auto-continue already handled the response. + if not _ac_did_run: + _smart_max = 2 + _smart_attempt = 0 + while _smart_attempt < _smart_max: + _has_tool_calls_in_output = any(o.get("type") == "function_call" for o in (last_output or [])) + if not (finish_reason == "stop" and has_content and not _has_tool_calls_in_output + and isinstance(input_data, list) and len(input_data) >= 3 + and has_function_call_output(input_data)): + break + _smart_attempt += 1 + _nudges = [ + "Continue with the task using tool calls. Do NOT describe what to do — call the appropriate functions.", + "You MUST use tool calls to complete the task. Read files, run commands, and make changes using tools. Do NOT output XML tool calls as text.", + ] + nudge_text = _nudges[min(_smart_attempt - 1, len(_nudges) - 1)] + # Try extracting XML tool calls from text as fallback before nudging + last_text = "" + for o in (last_output or []): + if o.get("type") == "message": + for c in (o.get("content") or []): + if isinstance(c, dict) and c.get("type") == "output_text": + last_text += c.get("text", "") + xml_fc = _extract_xml_tool_calls(last_text) + if xml_fc: + print(f"[{self._session_id}] [smart-continue] extracted {len(xml_fc)} XML tool calls from text, injecting and retrying", file=sys.stderr) + fake_input = list(input_data) + for xfc in xml_fc: + fake_input.append({"type": "function_call", "id": uid("fcx"), "call_id": uid("fcx"), + "name": xfc["name"], "arguments": xfc["args"], "status": "completed"}) + fake_messages = oa_input_to_messages(fake_input) + instructions = body.get("instructions", "").strip() + if instructions: + fake_messages.insert(0, {"role": "system", "content": instructions}) + fake_chat_body = self._build_chat_body(model, fake_messages, body, stream) + fake_req = urllib.request.Request(target, data=json.dumps(fake_chat_body).encode(), headers=fwd) + try: + retry_upstream = urllib.request.urlopen(fake_req, timeout=_upstream_timeout(body, True)) + collected_events = [] + last_resp_id = last_output = last_status = None + finish_reason = None + has_content = False + for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")): + collected_events.append(event) + _observe_event(event) + input_data = fake_input + continue + except Exception as e: + print(f"[{self._session_id}] [smart-continue] XML injection retry failed: {e}", file=sys.stderr) + break + _nudge_msg = {"role": "user", "content": nudge_text} + nudge_messages = oa_input_to_messages(input_data) + [_nudge_msg] + instructions = body.get("instructions", "").strip() + if instructions: + nudge_messages.insert(0, {"role": "system", "content": instructions}) + nudge_chat_body = self._build_chat_body(model, nudge_messages, body, stream) + nudge_req = urllib.request.Request(target, data=json.dumps(nudge_chat_body).encode(), headers=fwd) + print(f"[{self._session_id}] [smart-continue] attempt {_smart_attempt}/{_smart_max}: model stopped mid-task, nudging", file=sys.stderr) + try: + retry_upstream = urllib.request.urlopen(nudge_req, timeout=_upstream_timeout(body, True)) + collected_events = [] + last_resp_id = last_output = last_status = None + finish_reason = None + has_content = False + for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")): + collected_events.append(event) + _observe_event(event) + except Exception as e: + print(f"[{self._session_id}] [smart-continue] nudge attempt {_smart_attempt} failed: {e}", file=sys.stderr) + break + self.stream_buffered_events(collected_events) else: result = oa_resp_to_responses(json.loads(upstream.read()), model) @@ -6392,12 +6914,45 @@ def _handle_shutdown_signal(sig, frame): if 'SERVER' in globals() and SERVER: SERVER.shutdown() +def _anti_stall_cleanup(): + my_pid = os.getpid() + my_port = PORT + killed = [] + try: + import subprocess as _sp + out = _sp.run(["pgrep", "-f", "translate-proxy"], capture_output=True, text=True, timeout=5).stdout.strip() + for pid_str in out.splitlines(): + pid_str = pid_str.strip() + if not pid_str or not pid_str.isdigit(): + continue + pid = int(pid_str) + if pid == my_pid: + continue + try: + os.kill(pid, signal.SIGTERM) + killed.append(pid) + except (ProcessLookupError, PermissionError): + pass + except Exception: + pass + try: + _cache_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "__pycache__") + if os.path.isdir(_cache_dir): + import shutil + shutil.rmtree(_cache_dir, ignore_errors=True) + except Exception: + pass + if killed: + print(f"[anti-stall] killed {len(killed)} stale proxy process(es): {killed}", flush=True) + time.sleep(1) + def main(): global SERVER, _START_TIME _START_TIME = time.time() + _anti_stall_cleanup() _init_runtime() try: - _current_cfg = os.path.basename(args.config) if args.config else "" + _current_cfg = os.path.basename(_CONFIG_PATH) if _CONFIG_PATH else "" for _f in os.listdir(_LOG_DIR): if _f.startswith("proxy-") and _f.endswith(".json") and _f != _current_cfg: os.remove(os.path.join(_LOG_DIR, _f))