From 7f04a2590c16587a257d206fc76c2bd70d183afa Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 24 May 2026 19:29:44 +0000 Subject: [PATCH] v3.9.0: sync src/translate-proxy.py --- src/translate-proxy.py | 563 +++++++++++++++++++++++++++++++---------- 1 file changed, 430 insertions(+), 133 deletions(-) diff --git a/src/translate-proxy.py b/src/translate-proxy.py index 63b604d..e061c3e 100755 --- a/src/translate-proxy.py +++ b/src/translate-proxy.py @@ -335,7 +335,7 @@ def _codebuff_get_session(token, model): req = urllib.request.Request(url, data=body, headers={ "Content-Type": "application/json", "Authorization": f"Bearer {token}", - "User-Agent": "codex-launcher/3.8.4", + "User-Agent": "codex-launcher/3.9.0", "x-codebuff-model": model, }) resp = urllib.request.urlopen(req, timeout=15) @@ -360,7 +360,7 @@ def _codebuff_start_run(token, agent_id): req = urllib.request.Request(url, data=body, headers={ "Content-Type": "application/json", "Authorization": f"Bearer {token}", - "User-Agent": "codex-launcher/3.8.4", + "User-Agent": "codex-launcher/3.9.0", }) try: resp = urllib.request.urlopen(req, timeout=15) @@ -383,13 +383,226 @@ def _codebuff_finish_run(token, run_id, status="completed"): req = urllib.request.Request(url, data=body, headers={ "Content-Type": "application/json", "Authorization": f"Bearer {token}", - "User-Agent": "codex-launcher/3.8.4", + "User-Agent": "codex-launcher/3.9.0", }) try: urllib.request.urlopen(req, timeout=10) except Exception as e: print(f"[codebuff] finish run {run_id} error: {e}", file=sys.stderr) +# ═══════════════════════════════════════════════════════════════════ +# Multi-account rotation system +# ═══════════════════════════════════════════════════════════════════ + +class AccountPool: + """Manages multiple accounts for a provider. Rotates on rate-limit (429/426).""" + + def __init__(self, provider_name): + self.provider_name = provider_name + self._lock = threading.Lock() + self._accounts = [] + self._rate_limited = {} + self._current_idx = 0 + self._loaded_at = 0 + + def load_accounts(self, force=False): + with self._lock: + if not force and self._accounts and time.time() - self._loaded_at < 60: + return len(self._accounts) + accounts = self._do_load() + with self._lock: + if accounts: + self._accounts = accounts + self._loaded_at = time.time() + for a in accounts: + key = a.get("id", a.get("email", "")) + if key not in self._rate_limited: + self._rate_limited[key] = 0 + return len(self._accounts) if accounts else 0 + + def _do_load(self): + return [] + + def get(self): + """Return the best available account dict, or None.""" + self.load_accounts() + with self._lock: + if not self._accounts: + return None + now = time.time() + n = len(self._accounts) + for attempt in range(n): + idx = (self._current_idx + attempt) % n + acct = self._accounts[idx] + key = acct.get("id", acct.get("email", "")) + if self._rate_limited.get(key, 0) < now: + self._current_idx = idx + return acct + best_key = min(self._rate_limited, key=self._rate_limited.get) + wait = self._rate_limited[best_key] - now + print(f"[{self.provider_name}] all accounts rate-limited, earliest free in {wait:.0f}s", file=sys.stderr) + return self._accounts[self._current_idx] + + def mark_rate_limited(self, account, duration=120): + key = account.get("id", account.get("email", "")) + with self._lock: + self._rate_limited[key] = time.time() + duration + idx = None + for i, a in enumerate(self._accounts): + if a.get("id", a.get("email", "")) == key: + idx = i + break + if idx is not None: + self._current_idx = (idx + 1) % len(self._accounts) + print(f"[{self.provider_name}] account {key} rate-limited for {duration}s, rotating to next", file=sys.stderr) + + def advance(self): + with self._lock: + if self._accounts: + self._current_idx = (self._current_idx + 1) % len(self._accounts) + + def status(self): + with self._lock: + now = time.time() + result = [] + for a in self._accounts: + key = a.get("id", a.get("email", "")) + rl_until = self._rate_limited.get(key, 0) + info = {"id": key, "email": a.get("email", ""), "rate_limited": rl_until > now} + if rl_until > now: + info["rate_limited_until"] = rl_until + info["resets_in"] = int(rl_until - now) + result.append(info) + return result + +class CodebuffAccountPool(AccountPool): + def _do_load(self): + if not os.path.exists(_FREEBUFF_CREDS_PATH): + return None + try: + with open(_FREEBUFF_CREDS_PATH) as f: + creds = json.load(f) + except Exception: + return None + accounts = [] + if "accounts" in creds and isinstance(creds["accounts"], list): + for i, ac in enumerate(creds["accounts"]): + token = ac.get("authToken") or ac.get("apiKey") or "" + if token: + acct = {"id": ac.get("email") or ac.get("id") or f"account-{i}", "token": token, "email": ac.get("email", "")} + accounts.append(acct) + default = creds.get("default", {}) + default_token = default.get("authToken") or creds.get("apiKey") or "" + if default_token: + default_id = default.get("email") or default.get("id") or "default" + if not any(a["id"] == default_id for a in accounts): + accounts.insert(0, {"id": default_id, "token": default_token, "email": default.get("email", "")}) + return accounts if accounts else None + +class GoogleAccountPool(AccountPool): + def __init__(self, variant): + super().__init__(f"google-{variant}") + self.variant = variant + + def _do_load(self): + cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy") + accounts = [] + primary = f"google-{self.variant}-oauth-token.json" + primary_path = os.path.join(cache_dir, primary) + if os.path.exists(primary_path): + try: + with open(primary_path) as f: + tok = json.load(f) + token = tok.get("access_token", "") + if token: + accounts.append({"id": f"google-{self.variant}-primary", "token": token, "email": tok.get("email", ""), "_token_data": tok, "_path": primary_path}) + except Exception: + pass + idx = 1 + while True: + extra = f"google-{self.variant}-oauth-token-{idx}.json" + extra_path = os.path.join(cache_dir, extra) + if not os.path.exists(extra_path): + break + try: + with open(extra_path) as f: + tok = json.load(f) + token = tok.get("access_token", "") + if token: + accounts.append({"id": f"google-{self.variant}-{idx}", "token": token, "email": tok.get("email", ""), "_token_data": tok, "_path": extra_path}) + except Exception: + pass + idx += 1 + return accounts if accounts else None + +class APIKeyPool(AccountPool): + """Rotates through comma-separated API keys.""" + + def __init__(self, provider_name, keys_str): + super().__init__(provider_name) + self._raw_keys = [k.strip() for k in keys_str.split(",") if k.strip()] + self._accounts = [{"id": f"key-{i}", "token": k, "email": f"key-{i}"} for i, k in enumerate(self._raw_keys)] + for a in self._accounts: + self._rate_limited[a["id"]] = 0 + self._loaded_at = time.time() + + def load_accounts(self, force=False): + return len(self._accounts) + +_fb_pool = CodebuffAccountPool("codebuff") +_google_antigravity_pool = GoogleAccountPool("antigravity") +_google_cli_pool = GoogleAccountPool("cli") + +def _get_codebuff_account(): + """Return (token, account_dict) for best available codebuff account.""" + _fb_pool.load_accounts() + acct = _fb_pool.get() + if not acct: + return "", None + return acct["token"], acct + +def _get_google_account(oauth_provider): + """Return (access_token, account_dict) for best available Google account.""" + pool = _google_antigravity_pool if oauth_provider == "google-antigravity" else _google_cli_pool + pool.load_accounts() + acct = pool.get() + if not acct: + return None, None + token_data = acct.get("_token_data", {}) + token_path = acct.get("_path", "") + if token_data and token_path: + refreshed = _refresh_google_token(token_data, token_path) + return refreshed, acct + return acct.get("token", ""), acct + +def _refresh_google_token(token_data, token_path): + if token_data.get("expires_at", 0) > time.time() + 60: + return token_data.get("access_token", "") + client_id = token_data.get("client_id", "") + client_secret = token_data.get("client_secret", "") + refresh_token = token_data.get("refresh_token", "") + if not all([client_id, client_secret, refresh_token]): + return token_data.get("access_token", "") + print("[oauth] refreshing Google access token...", file=sys.stderr) + try: + data = urllib.parse.urlencode({ + "client_id": client_id, "client_secret": client_secret, + "refresh_token": refresh_token, "grant_type": "refresh_token", + }).encode() + req = urllib.request.Request("https://oauth2.googleapis.com/token", data=data, + headers={"Content-Type": "application/x-www-form-urlencoded"}) + resp = urllib.request.urlopen(req, timeout=30) + new_tokens = json.loads(resp.read()) + token_data["access_token"] = new_tokens.get("access_token", token_data.get("access_token")) + token_data["expires_at"] = time.time() + new_tokens.get("expires_in", 3600) + with open(token_path, "w") as f: + json.dump(token_data, f, indent=2) + print("[oauth] token refreshed OK", file=sys.stderr) + return token_data["access_token"] + except Exception as e: + print(f"[oauth] refresh failed: {e}", file=sys.stderr) + return token_data.get("access_token", "") + _LOG_FILE = None _LOG_FILE_LOCK = threading.Lock() @@ -441,6 +654,7 @@ def _ensure_antigravity_version(): def _init_runtime(): global CONFIG, PORT, BACKEND, TARGET_URL, API_KEY, OAUTH_PROVIDER, _antigravity_version global MODELS, CC_VERSION, REASONING_ENABLED, REASONING_EFFORT, BGP_ROUTES + global _api_key_pool CONFIG = load_config() PORT = CONFIG["port"] @@ -453,6 +667,10 @@ def _init_runtime(): REASONING_ENABLED = CONFIG.get("reasoning_enabled", True) REASONING_EFFORT = CONFIG.get("reasoning_effort", "medium") BGP_ROUTES = CONFIG.get("bgp_routes", []) + _api_key_pool = None + if API_KEY and "," in API_KEY and not OAUTH_PROVIDER.startswith("google") and BACKEND not in ("codebuff",): + _api_key_pool = APIKeyPool(BACKEND, API_KEY) + print(f"[multi-account] API key pool: {len(_api_key_pool._accounts)} keys for {BACKEND}", file=sys.stderr) if OAUTH_PROVIDER == "google-antigravity": _antigravity_version = _ensure_antigravity_version() print(f"[antigravity] version={_antigravity_version}", file=sys.stderr) @@ -614,6 +832,10 @@ def _refresh_oauth_token(): def _refresh_oauth_token_for(api_key, oauth_provider): oauth_provider = oauth_provider or "" + if oauth_provider.startswith("google"): + token, acct = _get_google_account(oauth_provider) + if token and acct: + return token if not oauth_provider.startswith("google"): return api_key token_name = "google-antigravity-oauth-token.json" if oauth_provider == "google-antigravity" else "google-cli-oauth-token.json" @@ -3715,9 +3937,25 @@ class Handler(http.server.BaseHTTPRequestHandler): protocol_version = "HTTP/1.1" def do_GET(self): - if self.path in ("/v1/models", "/models"): - self.send_json(200, {"object": "list", "data": MODELS}) - elif self.path in ("/health", "/v1/health"): + if self.path in ("/v1/models", "/models"): + self.send_json(200, {"object": "list", "data": MODELS}) + elif self.path in ("/v1/accounts", "/accounts"): + info = {"provider": BACKEND, "oauth_provider": OAUTH_PROVIDER} + if BACKEND == "codebuff": + info["accounts"] = _fb_pool.status() + info["total"] = len(_fb_pool._accounts) + elif OAUTH_PROVIDER and OAUTH_PROVIDER.startswith("google"): + pool = _google_antigravity_pool if OAUTH_PROVIDER == "google-antigravity" else _google_cli_pool + info["accounts"] = pool.status() + info["total"] = len(pool._accounts) + elif _api_key_pool: + info["accounts"] = _api_key_pool.status() + info["total"] = len(_api_key_pool._accounts) + else: + info["accounts"] = [] + info["total"] = 0 + self.send_json(200, info) + elif self.path in ("/health", "/v1/health"): import resource as _res _mem_mb = 0 try: @@ -3732,8 +3970,8 @@ class Handler(http.server.BaseHTTPRequestHandler): "uptime_s": round(_uptime, 1), "memory_mb": round(_mem_mb, 1), "requests_total": _STATS.get("requests", 0)}) - else: - self.send_error(404) + else: + self.send_error(404) def do_POST(self): if _shutdown_requested: @@ -3868,7 +4106,11 @@ class Handler(http.server.BaseHTTPRequestHandler): else: chat_body = self._build_chat_body(model, messages, body, stream) target = upstream_target(TARGET_URL, "/chat/completions") - effective_key = _refresh_oauth_token() + if _api_key_pool: + pool_acct = _api_key_pool.get() + effective_key = pool_acct["token"] if pool_acct else API_KEY + else: + effective_key = _refresh_oauth_token() fwd = forwarded_headers(self.headers, { "Content-Type": "application/json", "Authorization": f"Bearer {effective_key}", @@ -3883,6 +4125,15 @@ class Handler(http.server.BaseHTTPRequestHandler): except urllib.error.HTTPError as e: err_body = e.read().decode() if e.code in (429, 502, 503) and attempt < max_retries: + if e.code == 429 and _api_key_pool: + pool_acct = _api_key_pool.get() + if pool_acct: + _api_key_pool.mark_rate_limited(pool_acct, 60) + next_acct = _api_key_pool.get() + if next_acct: + effective_key = next_acct["token"] + fwd["Authorization"] = f"Bearer {effective_key}" + print(f"[multi-account] rotating to key {next_acct['id']}", file=sys.stderr) retry_after = e.headers.get("Retry-After") if retry_after: try: @@ -4153,6 +4404,11 @@ class Handler(http.server.BaseHTTPRequestHandler): if e.code == 429 and ep != endpoints[-1]: print(f"[{self._session_id}] {ep} HTTP 429, trying next endpoint", file=sys.stderr) continue + if e.code == 429: + pool = _google_antigravity_pool if OAUTH_PROVIDER == "google-antigravity" else _google_cli_pool + _, acct = _get_google_account(OAUTH_PROVIDER) + if acct: + pool.mark_rate_limited(acct, 60) return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) except Exception as e: if ep == endpoints[-1]: @@ -4750,11 +5006,6 @@ class Handler(http.server.BaseHTTPRequestHandler): store_response(rid, body.get("input", ""), result.get("output", [])) def _handle_codebuff(self, body, model, stream, tracker=None): - token = _get_codebuff_token() - if not token: - return self.send_json(401, {"error": {"type": "auth_error", - "message": "No codebuff credentials found. Install codebuff (npm i -g codebuff) and login first."}}) - agent_id = _FREEBUFF_AGENT_MAP.get(model) if not agent_id: matched = None @@ -4771,140 +5022,175 @@ class Handler(http.server.BaseHTTPRequestHandler): print(f"[codebuff] unknown model '{model}', falling back to {fallback_model}", file=sys.stderr) model = fallback_model - run_id = _codebuff_start_run(token, agent_id) - if not run_id: - return self.send_json(502, {"error": {"type": "upstream_error", - "message": "Failed to start codebuff agent run. Check credentials and network."}}) + _fb_pool.load_accounts() + pool_status = _fb_pool.status() + n_accounts = len(pool_status) + if n_accounts == 0: + return self.send_json(401, {"error": {"type": "auth_error", + "message": "No codebuff credentials found. Add accounts to ~/.config/manicode/credentials.json"}}) - instance_id = _codebuff_get_session(token, model) + last_err = None + for attempt in range(n_accounts): + token, acct = _get_codebuff_account() + if not token: + return self.send_json(401, {"error": {"type": "auth_error", + "message": "No codebuff credentials found. All accounts exhausted."}}) - input_data = body.get("input", "") - instructions = body.get("instructions", "").strip() - messages = _fb_input_to_messages(input_data, instructions) - messages = _ds_rebuild_tool_history(messages) + acct_id = acct.get("id", "?") if acct else "?" + if attempt > 0: + print(f"[codebuff] rotation attempt {attempt+1}/{n_accounts}, trying account {acct_id}", file=sys.stderr) - metadata = { - "run_id": run_id, - "cost_mode": "free", - } - if instance_id: - metadata["codebuff_instance_id"] = instance_id + run_id = _codebuff_start_run(token, agent_id) + if not run_id: + _fb_pool.mark_rate_limited(acct, 60) + last_err = ("upstream_error", 502, "Failed to start codebuff agent run. Check credentials and network.") + continue - chat_body = { - "model": model, - "messages": messages, - "stream": stream, - "max_tokens": max(body.get("max_output_tokens", 0), 64000), - "codebuff_metadata": metadata, - } - for k in ("temperature", "top_p"): - if k in body: - chat_body[k] = body[k] - tools = oa_convert_tools(body.get("tools")) - if tools: - chat_body["tools"] = tools - if body.get("tool_choice"): - chat_body["tool_choice"] = body["tool_choice"] + instance_id = _codebuff_get_session(token, model) - target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions" - headers = { - "Content-Type": "application/json", - "Authorization": f"Bearer {token}", - "User-Agent": "codex-launcher/3.8.4", - "x-codebuff-model": model, - } - if instance_id: - headers["x-codebuff-instance-id"] = instance_id + input_data = body.get("input", "") + instructions = body.get("instructions", "").strip() + messages = _fb_input_to_messages(input_data, instructions) + messages = _ds_rebuild_tool_history(messages) - print(f"[{self._session_id}] [codebuff] POST {target} model={model} stream={stream} run={run_id}", file=sys.stderr) - chat_body_b = json.dumps(chat_body).encode() + metadata = { + "run_id": run_id, + "cost_mode": "free", + } + if instance_id: + metadata["codebuff_instance_id"] = instance_id - try: - req = urllib.request.Request(target, data=chat_body_b, headers=headers) - upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream)) - except urllib.error.HTTPError as e: - err_body = e.read().decode()[:1000] - _codebuff_finish_run(token, run_id, "failed") - if _is_reasoning_content_error(err_body): - print(f"[codebuff] reasoning_content error, retrying with thinking disabled (DeepSeek native format): {err_body[:200]}", file=sys.stderr) - result = self._fb_retry_thinking_disabled(body, model, token, agent_id, stream, tracker, input_data, instructions, err_body) - return result - print(f"[codebuff] HTTP {e.code}: {err_body[:300]}", file=sys.stderr) - return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) - except Exception as e: - _codebuff_finish_run(token, run_id, "failed") - return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}}) + chat_body = { + "model": model, + "messages": messages, + "stream": stream, + "max_tokens": max(body.get("max_output_tokens", 0), 64000), + "codebuff_metadata": metadata, + } + for k in ("temperature", "top_p"): + if k in body: + chat_body[k] = body[k] + tools = oa_convert_tools(body.get("tools")) + if tools: + chat_body["tools"] = tools + if body.get("tool_choice"): + chat_body["tool_choice"] = body["tool_choice"] - t0 = time.time() - try: - if stream: - self.send_response(200) - self.send_header("Content-Type", "text/event-stream") - self.send_header("Cache-Control", "no-cache") - self.send_header("Connection", "keep-alive") - self.end_headers() - if hasattr(self, 'connection') and self.connection: + target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions" + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {token}", + "User-Agent": "codex-launcher/3.9.0", + "x-codebuff-model": model, + } + if instance_id: + headers["x-codebuff-instance-id"] = instance_id + + print(f"[{self._session_id}] [codebuff] POST {target} model={model} stream={stream} run={run_id} acct={acct_id}", file=sys.stderr) + chat_body_b = json.dumps(chat_body).encode() + + try: + req = urllib.request.Request(target, data=chat_body_b, headers=headers) + upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream)) + except urllib.error.HTTPError as e: + err_body = e.read().decode()[:1000] + _codebuff_finish_run(token, run_id, "failed") + if e.code in (429, 426): + reset_ms = 0 try: - self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + err_json = json.loads(err_body) + reset_ms = err_json.get("retryAfterMs", 0) except Exception: pass + duration = max(reset_ms / 1000, 120) if reset_ms else 120 + _fb_pool.mark_rate_limited(acct, duration) + last_err = ("upstream_error", e.code, _sanitize_err_body(err_body)) + print(f"[codebuff] account {acct_id} got HTTP {e.code}, rotating", file=sys.stderr) + continue + if _is_reasoning_content_error(err_body): + print(f"[codebuff] reasoning_content error, retrying with thinking disabled", file=sys.stderr) + result = self._fb_retry_thinking_disabled(body, model, token, agent_id, stream, tracker, input_data, instructions, err_body, acct) + return result + print(f"[codebuff] HTTP {e.code}: {err_body[:300]}", file=sys.stderr) + return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) + except Exception as e: + _codebuff_finish_run(token, run_id, "failed") + return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}}) - last_resp_id = [None] - last_output = [None] - last_status = [None] - finish_reason = [None] - reasoning_out = {} + t0 = time.time() + try: + if stream: + self.send_response(200) + self.send_header("Content-Type", "text/event-stream") + self.send_header("Cache-Control", "no-cache") + self.send_header("Connection", "keep-alive") + self.end_headers() + if hasattr(self, 'connection') and self.connection: + try: + self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + except Exception: + pass - def _on_fb_event(event): - if tracker and tracker.cancelled.is_set(): - return False - for line in event.strip().split("\n"): - if line.startswith("data: "): - try: - d = json.loads(line[6:]) - if d.get("type") == "response.completed": - last_resp_id[0] = d.get("response", {}).get("id") - last_output[0] = d.get("response", {}).get("output", []) - last_status[0] = d.get("response", {}).get("status") - finish_reason[0] = "length" if last_status[0] == "incomplete" else "stop" - except Exception: - pass - return None + last_resp_id = [None] + last_output = [None] + last_status = [None] + finish_reason = [None] + reasoning_out = {} - try: - self.stream_buffered_events( - oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id"), - _reasoning_out=reasoning_out), - on_event=_on_fb_event) - except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError): - print(f"[{self._session_id}] [codebuff] client disconnected", file=sys.stderr) - return + def _on_fb_event(event): + if tracker and tracker.cancelled.is_set(): + return False + for line in event.strip().split("\n"): + if line.startswith("data: "): + try: + d = json.loads(line[6:]) + if d.get("type") == "response.completed": + last_resp_id[0] = d.get("response", {}).get("id") + last_output[0] = d.get("response", {}).get("output", []) + last_status[0] = d.get("response", {}).get("status") + finish_reason[0] = "length" if last_status[0] == "incomplete" else "stop" + except Exception: + pass + return None - success = finish_reason[0] != "length" - _record_usage("codebuff", model, success, time.time() - t0) - if last_resp_id[0] and input_data is not None: - store_response(last_resp_id[0], input_data, last_output[0]) - if last_resp_id[0] and reasoning_out.get("text") or reasoning_out.get("tool_calls"): - asm = {"role": "assistant", "content": reasoning_out.get("text", "") or ""} - if reasoning_out.get("tool_calls"): - asm["tool_calls"] = reasoning_out["tool_calls"] - if reasoning_out.get("text"): - asm["reasoning_content"] = reasoning_out["text"] - _ds_store_assistant(last_resp_id[0], asm) - print(f"[{self._session_id}] [codebuff] stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr) - else: - raw = upstream.read().decode() - chat_resp = json.loads(raw) - result = oa_resp_to_responses(chat_resp, model) - self.send_json(200, result) - rid = result.get("id") - if rid: - store_response(rid, input_data, result.get("output", [])) - print(f"[{self._session_id}] [codebuff] non-stream done in {time.time()-t0:.1f}s", file=sys.stderr) - finally: - _codebuff_finish_run(token, run_id, "completed") + try: + self.stream_buffered_events( + oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id"), + _reasoning_out=reasoning_out), + on_event=_on_fb_event) + except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError): + print(f"[{self._session_id}] [codebuff] client disconnected", file=sys.stderr) + return - def _fb_retry_thinking_disabled(self, body, model, token, agent_id, stream, tracker, input_data, instructions, original_error): + success = finish_reason[0] != "length" + _record_usage("codebuff", model, success, time.time() - t0) + if last_resp_id[0] and input_data is not None: + store_response(last_resp_id[0], input_data, last_output[0]) + if last_resp_id[0] and reasoning_out.get("text") or reasoning_out.get("tool_calls"): + asm = {"role": "assistant", "content": reasoning_out.get("text", "") or ""} + if reasoning_out.get("tool_calls"): + asm["tool_calls"] = reasoning_out["tool_calls"] + if reasoning_out.get("text"): + asm["reasoning_content"] = reasoning_out["text"] + _ds_store_assistant(last_resp_id[0], asm) + print(f"[{self._session_id}] [codebuff] stream done status={last_status[0]} in {time.time()-t0:.1f}s acct={acct_id}", file=sys.stderr) + else: + raw = upstream.read().decode() + chat_resp = json.loads(raw) + result = oa_resp_to_responses(chat_resp, model) + self.send_json(200, result) + rid = result.get("id") + if rid: + store_response(rid, input_data, result.get("output", [])) + print(f"[{self._session_id}] [codebuff] non-stream done in {time.time()-t0:.1f}s acct={acct_id}", file=sys.stderr) + finally: + _codebuff_finish_run(token, run_id, "completed") + return + + if last_err: + return self.send_json(last_err[1], {"error": {"type": last_err[0], "message": f"All {n_accounts} accounts exhausted. {last_err[2]}"}}) + + def _fb_retry_thinking_disabled(self, body, model, token, agent_id, stream, tracker, input_data, instructions, original_error, acct=None): run_id = _codebuff_start_run(token, agent_id) if not run_id: return self.send_json(502, {"error": {"type": "upstream_error", @@ -4930,7 +5216,7 @@ class Handler(http.server.BaseHTTPRequestHandler): if body.get("tool_choice"): chat_body["tool_choice"] = body["tool_choice"] target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions" - headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}", "User-Agent": "codex-launcher/3.8.4", "x-codebuff-model": model} + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}", "User-Agent": "codex-launcher/3.9.0", "x-codebuff-model": model} if instance_id: headers["x-codebuff-instance-id"] = instance_id print(f"[codebuff] retry POST {target} model={model} stream={stream} run={run_id} (thinking disabled via DeepSeek native)", file=sys.stderr) @@ -5352,6 +5638,17 @@ def main(): print(f"translate-proxy ({BACKEND}) listening on http://127.0.0.1:{PORT}", flush=True) print(f"Target: {TARGET_URL}", flush=True) print(f"Models: {[m['id'] for m in MODELS]}", flush=True) + if BACKEND == "codebuff": + _fb_pool.load_accounts(force=True) + fb_status = _fb_pool.status() + print(f"[multi-account] codebuff: {len(fb_status)} accounts loaded {[a['id'] for a in fb_status]}", flush=True) + if OAUTH_PROVIDER and OAUTH_PROVIDER.startswith("google"): + pool = _google_antigravity_pool if OAUTH_PROVIDER == "google-antigravity" else _google_cli_pool + pool.load_accounts(force=True) + g_status = pool.status() + print(f"[multi-account] {OAUTH_PROVIDER}: {len(g_status)} accounts loaded {[a['id'] for a in g_status]}", flush=True) + if _api_key_pool: + print(f"[multi-account] API keys: {len(_api_key_pool._accounts)} keys loaded", flush=True) if BGP_ROUTES: print(f"BGP routes: {len(BGP_ROUTES)} ({[r.get('name','?') for r in BGP_ROUTES]})", flush=True) try: