v3.9.0: sync src/translate-proxy.py
This commit is contained in:
@@ -335,7 +335,7 @@ def _codebuff_get_session(token, model):
|
||||
req = urllib.request.Request(url, data=body, headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {token}",
|
||||
"User-Agent": "codex-launcher/3.8.4",
|
||||
"User-Agent": "codex-launcher/3.9.0",
|
||||
"x-codebuff-model": model,
|
||||
})
|
||||
resp = urllib.request.urlopen(req, timeout=15)
|
||||
@@ -360,7 +360,7 @@ def _codebuff_start_run(token, agent_id):
|
||||
req = urllib.request.Request(url, data=body, headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {token}",
|
||||
"User-Agent": "codex-launcher/3.8.4",
|
||||
"User-Agent": "codex-launcher/3.9.0",
|
||||
})
|
||||
try:
|
||||
resp = urllib.request.urlopen(req, timeout=15)
|
||||
@@ -383,13 +383,226 @@ def _codebuff_finish_run(token, run_id, status="completed"):
|
||||
req = urllib.request.Request(url, data=body, headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {token}",
|
||||
"User-Agent": "codex-launcher/3.8.4",
|
||||
"User-Agent": "codex-launcher/3.9.0",
|
||||
})
|
||||
try:
|
||||
urllib.request.urlopen(req, timeout=10)
|
||||
except Exception as e:
|
||||
print(f"[codebuff] finish run {run_id} error: {e}", file=sys.stderr)
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
# Multi-account rotation system
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
class AccountPool:
|
||||
"""Manages multiple accounts for a provider. Rotates on rate-limit (429/426)."""
|
||||
|
||||
def __init__(self, provider_name):
|
||||
self.provider_name = provider_name
|
||||
self._lock = threading.Lock()
|
||||
self._accounts = []
|
||||
self._rate_limited = {}
|
||||
self._current_idx = 0
|
||||
self._loaded_at = 0
|
||||
|
||||
def load_accounts(self, force=False):
|
||||
with self._lock:
|
||||
if not force and self._accounts and time.time() - self._loaded_at < 60:
|
||||
return len(self._accounts)
|
||||
accounts = self._do_load()
|
||||
with self._lock:
|
||||
if accounts:
|
||||
self._accounts = accounts
|
||||
self._loaded_at = time.time()
|
||||
for a in accounts:
|
||||
key = a.get("id", a.get("email", ""))
|
||||
if key not in self._rate_limited:
|
||||
self._rate_limited[key] = 0
|
||||
return len(self._accounts) if accounts else 0
|
||||
|
||||
def _do_load(self):
|
||||
return []
|
||||
|
||||
def get(self):
|
||||
"""Return the best available account dict, or None."""
|
||||
self.load_accounts()
|
||||
with self._lock:
|
||||
if not self._accounts:
|
||||
return None
|
||||
now = time.time()
|
||||
n = len(self._accounts)
|
||||
for attempt in range(n):
|
||||
idx = (self._current_idx + attempt) % n
|
||||
acct = self._accounts[idx]
|
||||
key = acct.get("id", acct.get("email", ""))
|
||||
if self._rate_limited.get(key, 0) < now:
|
||||
self._current_idx = idx
|
||||
return acct
|
||||
best_key = min(self._rate_limited, key=self._rate_limited.get)
|
||||
wait = self._rate_limited[best_key] - now
|
||||
print(f"[{self.provider_name}] all accounts rate-limited, earliest free in {wait:.0f}s", file=sys.stderr)
|
||||
return self._accounts[self._current_idx]
|
||||
|
||||
def mark_rate_limited(self, account, duration=120):
|
||||
key = account.get("id", account.get("email", ""))
|
||||
with self._lock:
|
||||
self._rate_limited[key] = time.time() + duration
|
||||
idx = None
|
||||
for i, a in enumerate(self._accounts):
|
||||
if a.get("id", a.get("email", "")) == key:
|
||||
idx = i
|
||||
break
|
||||
if idx is not None:
|
||||
self._current_idx = (idx + 1) % len(self._accounts)
|
||||
print(f"[{self.provider_name}] account {key} rate-limited for {duration}s, rotating to next", file=sys.stderr)
|
||||
|
||||
def advance(self):
|
||||
with self._lock:
|
||||
if self._accounts:
|
||||
self._current_idx = (self._current_idx + 1) % len(self._accounts)
|
||||
|
||||
def status(self):
|
||||
with self._lock:
|
||||
now = time.time()
|
||||
result = []
|
||||
for a in self._accounts:
|
||||
key = a.get("id", a.get("email", ""))
|
||||
rl_until = self._rate_limited.get(key, 0)
|
||||
info = {"id": key, "email": a.get("email", ""), "rate_limited": rl_until > now}
|
||||
if rl_until > now:
|
||||
info["rate_limited_until"] = rl_until
|
||||
info["resets_in"] = int(rl_until - now)
|
||||
result.append(info)
|
||||
return result
|
||||
|
||||
class CodebuffAccountPool(AccountPool):
|
||||
def _do_load(self):
|
||||
if not os.path.exists(_FREEBUFF_CREDS_PATH):
|
||||
return None
|
||||
try:
|
||||
with open(_FREEBUFF_CREDS_PATH) as f:
|
||||
creds = json.load(f)
|
||||
except Exception:
|
||||
return None
|
||||
accounts = []
|
||||
if "accounts" in creds and isinstance(creds["accounts"], list):
|
||||
for i, ac in enumerate(creds["accounts"]):
|
||||
token = ac.get("authToken") or ac.get("apiKey") or ""
|
||||
if token:
|
||||
acct = {"id": ac.get("email") or ac.get("id") or f"account-{i}", "token": token, "email": ac.get("email", "")}
|
||||
accounts.append(acct)
|
||||
default = creds.get("default", {})
|
||||
default_token = default.get("authToken") or creds.get("apiKey") or ""
|
||||
if default_token:
|
||||
default_id = default.get("email") or default.get("id") or "default"
|
||||
if not any(a["id"] == default_id for a in accounts):
|
||||
accounts.insert(0, {"id": default_id, "token": default_token, "email": default.get("email", "")})
|
||||
return accounts if accounts else None
|
||||
|
||||
class GoogleAccountPool(AccountPool):
|
||||
def __init__(self, variant):
|
||||
super().__init__(f"google-{variant}")
|
||||
self.variant = variant
|
||||
|
||||
def _do_load(self):
|
||||
cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy")
|
||||
accounts = []
|
||||
primary = f"google-{self.variant}-oauth-token.json"
|
||||
primary_path = os.path.join(cache_dir, primary)
|
||||
if os.path.exists(primary_path):
|
||||
try:
|
||||
with open(primary_path) as f:
|
||||
tok = json.load(f)
|
||||
token = tok.get("access_token", "")
|
||||
if token:
|
||||
accounts.append({"id": f"google-{self.variant}-primary", "token": token, "email": tok.get("email", ""), "_token_data": tok, "_path": primary_path})
|
||||
except Exception:
|
||||
pass
|
||||
idx = 1
|
||||
while True:
|
||||
extra = f"google-{self.variant}-oauth-token-{idx}.json"
|
||||
extra_path = os.path.join(cache_dir, extra)
|
||||
if not os.path.exists(extra_path):
|
||||
break
|
||||
try:
|
||||
with open(extra_path) as f:
|
||||
tok = json.load(f)
|
||||
token = tok.get("access_token", "")
|
||||
if token:
|
||||
accounts.append({"id": f"google-{self.variant}-{idx}", "token": token, "email": tok.get("email", ""), "_token_data": tok, "_path": extra_path})
|
||||
except Exception:
|
||||
pass
|
||||
idx += 1
|
||||
return accounts if accounts else None
|
||||
|
||||
class APIKeyPool(AccountPool):
|
||||
"""Rotates through comma-separated API keys."""
|
||||
|
||||
def __init__(self, provider_name, keys_str):
|
||||
super().__init__(provider_name)
|
||||
self._raw_keys = [k.strip() for k in keys_str.split(",") if k.strip()]
|
||||
self._accounts = [{"id": f"key-{i}", "token": k, "email": f"key-{i}"} for i, k in enumerate(self._raw_keys)]
|
||||
for a in self._accounts:
|
||||
self._rate_limited[a["id"]] = 0
|
||||
self._loaded_at = time.time()
|
||||
|
||||
def load_accounts(self, force=False):
|
||||
return len(self._accounts)
|
||||
|
||||
_fb_pool = CodebuffAccountPool("codebuff")
|
||||
_google_antigravity_pool = GoogleAccountPool("antigravity")
|
||||
_google_cli_pool = GoogleAccountPool("cli")
|
||||
|
||||
def _get_codebuff_account():
|
||||
"""Return (token, account_dict) for best available codebuff account."""
|
||||
_fb_pool.load_accounts()
|
||||
acct = _fb_pool.get()
|
||||
if not acct:
|
||||
return "", None
|
||||
return acct["token"], acct
|
||||
|
||||
def _get_google_account(oauth_provider):
|
||||
"""Return (access_token, account_dict) for best available Google account."""
|
||||
pool = _google_antigravity_pool if oauth_provider == "google-antigravity" else _google_cli_pool
|
||||
pool.load_accounts()
|
||||
acct = pool.get()
|
||||
if not acct:
|
||||
return None, None
|
||||
token_data = acct.get("_token_data", {})
|
||||
token_path = acct.get("_path", "")
|
||||
if token_data and token_path:
|
||||
refreshed = _refresh_google_token(token_data, token_path)
|
||||
return refreshed, acct
|
||||
return acct.get("token", ""), acct
|
||||
|
||||
def _refresh_google_token(token_data, token_path):
|
||||
if token_data.get("expires_at", 0) > time.time() + 60:
|
||||
return token_data.get("access_token", "")
|
||||
client_id = token_data.get("client_id", "")
|
||||
client_secret = token_data.get("client_secret", "")
|
||||
refresh_token = token_data.get("refresh_token", "")
|
||||
if not all([client_id, client_secret, refresh_token]):
|
||||
return token_data.get("access_token", "")
|
||||
print("[oauth] refreshing Google access token...", file=sys.stderr)
|
||||
try:
|
||||
data = urllib.parse.urlencode({
|
||||
"client_id": client_id, "client_secret": client_secret,
|
||||
"refresh_token": refresh_token, "grant_type": "refresh_token",
|
||||
}).encode()
|
||||
req = urllib.request.Request("https://oauth2.googleapis.com/token", data=data,
|
||||
headers={"Content-Type": "application/x-www-form-urlencoded"})
|
||||
resp = urllib.request.urlopen(req, timeout=30)
|
||||
new_tokens = json.loads(resp.read())
|
||||
token_data["access_token"] = new_tokens.get("access_token", token_data.get("access_token"))
|
||||
token_data["expires_at"] = time.time() + new_tokens.get("expires_in", 3600)
|
||||
with open(token_path, "w") as f:
|
||||
json.dump(token_data, f, indent=2)
|
||||
print("[oauth] token refreshed OK", file=sys.stderr)
|
||||
return token_data["access_token"]
|
||||
except Exception as e:
|
||||
print(f"[oauth] refresh failed: {e}", file=sys.stderr)
|
||||
return token_data.get("access_token", "")
|
||||
|
||||
_LOG_FILE = None
|
||||
_LOG_FILE_LOCK = threading.Lock()
|
||||
|
||||
@@ -441,6 +654,7 @@ def _ensure_antigravity_version():
|
||||
def _init_runtime():
|
||||
global CONFIG, PORT, BACKEND, TARGET_URL, API_KEY, OAUTH_PROVIDER, _antigravity_version
|
||||
global MODELS, CC_VERSION, REASONING_ENABLED, REASONING_EFFORT, BGP_ROUTES
|
||||
global _api_key_pool
|
||||
|
||||
CONFIG = load_config()
|
||||
PORT = CONFIG["port"]
|
||||
@@ -453,6 +667,10 @@ def _init_runtime():
|
||||
REASONING_ENABLED = CONFIG.get("reasoning_enabled", True)
|
||||
REASONING_EFFORT = CONFIG.get("reasoning_effort", "medium")
|
||||
BGP_ROUTES = CONFIG.get("bgp_routes", [])
|
||||
_api_key_pool = None
|
||||
if API_KEY and "," in API_KEY and not OAUTH_PROVIDER.startswith("google") and BACKEND not in ("codebuff",):
|
||||
_api_key_pool = APIKeyPool(BACKEND, API_KEY)
|
||||
print(f"[multi-account] API key pool: {len(_api_key_pool._accounts)} keys for {BACKEND}", file=sys.stderr)
|
||||
if OAUTH_PROVIDER == "google-antigravity":
|
||||
_antigravity_version = _ensure_antigravity_version()
|
||||
print(f"[antigravity] version={_antigravity_version}", file=sys.stderr)
|
||||
@@ -614,6 +832,10 @@ def _refresh_oauth_token():
|
||||
|
||||
def _refresh_oauth_token_for(api_key, oauth_provider):
|
||||
oauth_provider = oauth_provider or ""
|
||||
if oauth_provider.startswith("google"):
|
||||
token, acct = _get_google_account(oauth_provider)
|
||||
if token and acct:
|
||||
return token
|
||||
if not oauth_provider.startswith("google"):
|
||||
return api_key
|
||||
token_name = "google-antigravity-oauth-token.json" if oauth_provider == "google-antigravity" else "google-cli-oauth-token.json"
|
||||
@@ -3715,9 +3937,25 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
protocol_version = "HTTP/1.1"
|
||||
|
||||
def do_GET(self):
|
||||
if self.path in ("/v1/models", "/models"):
|
||||
self.send_json(200, {"object": "list", "data": MODELS})
|
||||
elif self.path in ("/health", "/v1/health"):
|
||||
if self.path in ("/v1/models", "/models"):
|
||||
self.send_json(200, {"object": "list", "data": MODELS})
|
||||
elif self.path in ("/v1/accounts", "/accounts"):
|
||||
info = {"provider": BACKEND, "oauth_provider": OAUTH_PROVIDER}
|
||||
if BACKEND == "codebuff":
|
||||
info["accounts"] = _fb_pool.status()
|
||||
info["total"] = len(_fb_pool._accounts)
|
||||
elif OAUTH_PROVIDER and OAUTH_PROVIDER.startswith("google"):
|
||||
pool = _google_antigravity_pool if OAUTH_PROVIDER == "google-antigravity" else _google_cli_pool
|
||||
info["accounts"] = pool.status()
|
||||
info["total"] = len(pool._accounts)
|
||||
elif _api_key_pool:
|
||||
info["accounts"] = _api_key_pool.status()
|
||||
info["total"] = len(_api_key_pool._accounts)
|
||||
else:
|
||||
info["accounts"] = []
|
||||
info["total"] = 0
|
||||
self.send_json(200, info)
|
||||
elif self.path in ("/health", "/v1/health"):
|
||||
import resource as _res
|
||||
_mem_mb = 0
|
||||
try:
|
||||
@@ -3732,8 +3970,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
"uptime_s": round(_uptime, 1),
|
||||
"memory_mb": round(_mem_mb, 1),
|
||||
"requests_total": _STATS.get("requests", 0)})
|
||||
else:
|
||||
self.send_error(404)
|
||||
else:
|
||||
self.send_error(404)
|
||||
|
||||
def do_POST(self):
|
||||
if _shutdown_requested:
|
||||
@@ -3868,7 +4106,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
else:
|
||||
chat_body = self._build_chat_body(model, messages, body, stream)
|
||||
target = upstream_target(TARGET_URL, "/chat/completions")
|
||||
effective_key = _refresh_oauth_token()
|
||||
if _api_key_pool:
|
||||
pool_acct = _api_key_pool.get()
|
||||
effective_key = pool_acct["token"] if pool_acct else API_KEY
|
||||
else:
|
||||
effective_key = _refresh_oauth_token()
|
||||
fwd = forwarded_headers(self.headers, {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {effective_key}",
|
||||
@@ -3883,6 +4125,15 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
except urllib.error.HTTPError as e:
|
||||
err_body = e.read().decode()
|
||||
if e.code in (429, 502, 503) and attempt < max_retries:
|
||||
if e.code == 429 and _api_key_pool:
|
||||
pool_acct = _api_key_pool.get()
|
||||
if pool_acct:
|
||||
_api_key_pool.mark_rate_limited(pool_acct, 60)
|
||||
next_acct = _api_key_pool.get()
|
||||
if next_acct:
|
||||
effective_key = next_acct["token"]
|
||||
fwd["Authorization"] = f"Bearer {effective_key}"
|
||||
print(f"[multi-account] rotating to key {next_acct['id']}", file=sys.stderr)
|
||||
retry_after = e.headers.get("Retry-After")
|
||||
if retry_after:
|
||||
try:
|
||||
@@ -4153,6 +4404,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
if e.code == 429 and ep != endpoints[-1]:
|
||||
print(f"[{self._session_id}] {ep} HTTP 429, trying next endpoint", file=sys.stderr)
|
||||
continue
|
||||
if e.code == 429:
|
||||
pool = _google_antigravity_pool if OAUTH_PROVIDER == "google-antigravity" else _google_cli_pool
|
||||
_, acct = _get_google_account(OAUTH_PROVIDER)
|
||||
if acct:
|
||||
pool.mark_rate_limited(acct, 60)
|
||||
return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}})
|
||||
except Exception as e:
|
||||
if ep == endpoints[-1]:
|
||||
@@ -4750,11 +5006,6 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
store_response(rid, body.get("input", ""), result.get("output", []))
|
||||
|
||||
def _handle_codebuff(self, body, model, stream, tracker=None):
|
||||
token = _get_codebuff_token()
|
||||
if not token:
|
||||
return self.send_json(401, {"error": {"type": "auth_error",
|
||||
"message": "No codebuff credentials found. Install codebuff (npm i -g codebuff) and login first."}})
|
||||
|
||||
agent_id = _FREEBUFF_AGENT_MAP.get(model)
|
||||
if not agent_id:
|
||||
matched = None
|
||||
@@ -4771,140 +5022,175 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
print(f"[codebuff] unknown model '{model}', falling back to {fallback_model}", file=sys.stderr)
|
||||
model = fallback_model
|
||||
|
||||
run_id = _codebuff_start_run(token, agent_id)
|
||||
if not run_id:
|
||||
return self.send_json(502, {"error": {"type": "upstream_error",
|
||||
"message": "Failed to start codebuff agent run. Check credentials and network."}})
|
||||
_fb_pool.load_accounts()
|
||||
pool_status = _fb_pool.status()
|
||||
n_accounts = len(pool_status)
|
||||
if n_accounts == 0:
|
||||
return self.send_json(401, {"error": {"type": "auth_error",
|
||||
"message": "No codebuff credentials found. Add accounts to ~/.config/manicode/credentials.json"}})
|
||||
|
||||
instance_id = _codebuff_get_session(token, model)
|
||||
last_err = None
|
||||
for attempt in range(n_accounts):
|
||||
token, acct = _get_codebuff_account()
|
||||
if not token:
|
||||
return self.send_json(401, {"error": {"type": "auth_error",
|
||||
"message": "No codebuff credentials found. All accounts exhausted."}})
|
||||
|
||||
input_data = body.get("input", "")
|
||||
instructions = body.get("instructions", "").strip()
|
||||
messages = _fb_input_to_messages(input_data, instructions)
|
||||
messages = _ds_rebuild_tool_history(messages)
|
||||
acct_id = acct.get("id", "?") if acct else "?"
|
||||
if attempt > 0:
|
||||
print(f"[codebuff] rotation attempt {attempt+1}/{n_accounts}, trying account {acct_id}", file=sys.stderr)
|
||||
|
||||
metadata = {
|
||||
"run_id": run_id,
|
||||
"cost_mode": "free",
|
||||
}
|
||||
if instance_id:
|
||||
metadata["codebuff_instance_id"] = instance_id
|
||||
run_id = _codebuff_start_run(token, agent_id)
|
||||
if not run_id:
|
||||
_fb_pool.mark_rate_limited(acct, 60)
|
||||
last_err = ("upstream_error", 502, "Failed to start codebuff agent run. Check credentials and network.")
|
||||
continue
|
||||
|
||||
chat_body = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"stream": stream,
|
||||
"max_tokens": max(body.get("max_output_tokens", 0), 64000),
|
||||
"codebuff_metadata": metadata,
|
||||
}
|
||||
for k in ("temperature", "top_p"):
|
||||
if k in body:
|
||||
chat_body[k] = body[k]
|
||||
tools = oa_convert_tools(body.get("tools"))
|
||||
if tools:
|
||||
chat_body["tools"] = tools
|
||||
if body.get("tool_choice"):
|
||||
chat_body["tool_choice"] = body["tool_choice"]
|
||||
instance_id = _codebuff_get_session(token, model)
|
||||
|
||||
target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {token}",
|
||||
"User-Agent": "codex-launcher/3.8.4",
|
||||
"x-codebuff-model": model,
|
||||
}
|
||||
if instance_id:
|
||||
headers["x-codebuff-instance-id"] = instance_id
|
||||
input_data = body.get("input", "")
|
||||
instructions = body.get("instructions", "").strip()
|
||||
messages = _fb_input_to_messages(input_data, instructions)
|
||||
messages = _ds_rebuild_tool_history(messages)
|
||||
|
||||
print(f"[{self._session_id}] [codebuff] POST {target} model={model} stream={stream} run={run_id}", file=sys.stderr)
|
||||
chat_body_b = json.dumps(chat_body).encode()
|
||||
metadata = {
|
||||
"run_id": run_id,
|
||||
"cost_mode": "free",
|
||||
}
|
||||
if instance_id:
|
||||
metadata["codebuff_instance_id"] = instance_id
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(target, data=chat_body_b, headers=headers)
|
||||
upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
|
||||
except urllib.error.HTTPError as e:
|
||||
err_body = e.read().decode()[:1000]
|
||||
_codebuff_finish_run(token, run_id, "failed")
|
||||
if _is_reasoning_content_error(err_body):
|
||||
print(f"[codebuff] reasoning_content error, retrying with thinking disabled (DeepSeek native format): {err_body[:200]}", file=sys.stderr)
|
||||
result = self._fb_retry_thinking_disabled(body, model, token, agent_id, stream, tracker, input_data, instructions, err_body)
|
||||
return result
|
||||
print(f"[codebuff] HTTP {e.code}: {err_body[:300]}", file=sys.stderr)
|
||||
return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}})
|
||||
except Exception as e:
|
||||
_codebuff_finish_run(token, run_id, "failed")
|
||||
return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}})
|
||||
chat_body = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"stream": stream,
|
||||
"max_tokens": max(body.get("max_output_tokens", 0), 64000),
|
||||
"codebuff_metadata": metadata,
|
||||
}
|
||||
for k in ("temperature", "top_p"):
|
||||
if k in body:
|
||||
chat_body[k] = body[k]
|
||||
tools = oa_convert_tools(body.get("tools"))
|
||||
if tools:
|
||||
chat_body["tools"] = tools
|
||||
if body.get("tool_choice"):
|
||||
chat_body["tool_choice"] = body["tool_choice"]
|
||||
|
||||
t0 = time.time()
|
||||
try:
|
||||
if stream:
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/event-stream")
|
||||
self.send_header("Cache-Control", "no-cache")
|
||||
self.send_header("Connection", "keep-alive")
|
||||
self.end_headers()
|
||||
if hasattr(self, 'connection') and self.connection:
|
||||
target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {token}",
|
||||
"User-Agent": "codex-launcher/3.9.0",
|
||||
"x-codebuff-model": model,
|
||||
}
|
||||
if instance_id:
|
||||
headers["x-codebuff-instance-id"] = instance_id
|
||||
|
||||
print(f"[{self._session_id}] [codebuff] POST {target} model={model} stream={stream} run={run_id} acct={acct_id}", file=sys.stderr)
|
||||
chat_body_b = json.dumps(chat_body).encode()
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(target, data=chat_body_b, headers=headers)
|
||||
upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
|
||||
except urllib.error.HTTPError as e:
|
||||
err_body = e.read().decode()[:1000]
|
||||
_codebuff_finish_run(token, run_id, "failed")
|
||||
if e.code in (429, 426):
|
||||
reset_ms = 0
|
||||
try:
|
||||
self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
|
||||
err_json = json.loads(err_body)
|
||||
reset_ms = err_json.get("retryAfterMs", 0)
|
||||
except Exception:
|
||||
pass
|
||||
duration = max(reset_ms / 1000, 120) if reset_ms else 120
|
||||
_fb_pool.mark_rate_limited(acct, duration)
|
||||
last_err = ("upstream_error", e.code, _sanitize_err_body(err_body))
|
||||
print(f"[codebuff] account {acct_id} got HTTP {e.code}, rotating", file=sys.stderr)
|
||||
continue
|
||||
if _is_reasoning_content_error(err_body):
|
||||
print(f"[codebuff] reasoning_content error, retrying with thinking disabled", file=sys.stderr)
|
||||
result = self._fb_retry_thinking_disabled(body, model, token, agent_id, stream, tracker, input_data, instructions, err_body, acct)
|
||||
return result
|
||||
print(f"[codebuff] HTTP {e.code}: {err_body[:300]}", file=sys.stderr)
|
||||
return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}})
|
||||
except Exception as e:
|
||||
_codebuff_finish_run(token, run_id, "failed")
|
||||
return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}})
|
||||
|
||||
last_resp_id = [None]
|
||||
last_output = [None]
|
||||
last_status = [None]
|
||||
finish_reason = [None]
|
||||
reasoning_out = {}
|
||||
t0 = time.time()
|
||||
try:
|
||||
if stream:
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/event-stream")
|
||||
self.send_header("Cache-Control", "no-cache")
|
||||
self.send_header("Connection", "keep-alive")
|
||||
self.end_headers()
|
||||
if hasattr(self, 'connection') and self.connection:
|
||||
try:
|
||||
self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _on_fb_event(event):
|
||||
if tracker and tracker.cancelled.is_set():
|
||||
return False
|
||||
for line in event.strip().split("\n"):
|
||||
if line.startswith("data: "):
|
||||
try:
|
||||
d = json.loads(line[6:])
|
||||
if d.get("type") == "response.completed":
|
||||
last_resp_id[0] = d.get("response", {}).get("id")
|
||||
last_output[0] = d.get("response", {}).get("output", [])
|
||||
last_status[0] = d.get("response", {}).get("status")
|
||||
finish_reason[0] = "length" if last_status[0] == "incomplete" else "stop"
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
last_resp_id = [None]
|
||||
last_output = [None]
|
||||
last_status = [None]
|
||||
finish_reason = [None]
|
||||
reasoning_out = {}
|
||||
|
||||
try:
|
||||
self.stream_buffered_events(
|
||||
oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id"),
|
||||
_reasoning_out=reasoning_out),
|
||||
on_event=_on_fb_event)
|
||||
except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
|
||||
print(f"[{self._session_id}] [codebuff] client disconnected", file=sys.stderr)
|
||||
return
|
||||
def _on_fb_event(event):
|
||||
if tracker and tracker.cancelled.is_set():
|
||||
return False
|
||||
for line in event.strip().split("\n"):
|
||||
if line.startswith("data: "):
|
||||
try:
|
||||
d = json.loads(line[6:])
|
||||
if d.get("type") == "response.completed":
|
||||
last_resp_id[0] = d.get("response", {}).get("id")
|
||||
last_output[0] = d.get("response", {}).get("output", [])
|
||||
last_status[0] = d.get("response", {}).get("status")
|
||||
finish_reason[0] = "length" if last_status[0] == "incomplete" else "stop"
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
success = finish_reason[0] != "length"
|
||||
_record_usage("codebuff", model, success, time.time() - t0)
|
||||
if last_resp_id[0] and input_data is not None:
|
||||
store_response(last_resp_id[0], input_data, last_output[0])
|
||||
if last_resp_id[0] and reasoning_out.get("text") or reasoning_out.get("tool_calls"):
|
||||
asm = {"role": "assistant", "content": reasoning_out.get("text", "") or ""}
|
||||
if reasoning_out.get("tool_calls"):
|
||||
asm["tool_calls"] = reasoning_out["tool_calls"]
|
||||
if reasoning_out.get("text"):
|
||||
asm["reasoning_content"] = reasoning_out["text"]
|
||||
_ds_store_assistant(last_resp_id[0], asm)
|
||||
print(f"[{self._session_id}] [codebuff] stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr)
|
||||
else:
|
||||
raw = upstream.read().decode()
|
||||
chat_resp = json.loads(raw)
|
||||
result = oa_resp_to_responses(chat_resp, model)
|
||||
self.send_json(200, result)
|
||||
rid = result.get("id")
|
||||
if rid:
|
||||
store_response(rid, input_data, result.get("output", []))
|
||||
print(f"[{self._session_id}] [codebuff] non-stream done in {time.time()-t0:.1f}s", file=sys.stderr)
|
||||
finally:
|
||||
_codebuff_finish_run(token, run_id, "completed")
|
||||
try:
|
||||
self.stream_buffered_events(
|
||||
oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id"),
|
||||
_reasoning_out=reasoning_out),
|
||||
on_event=_on_fb_event)
|
||||
except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
|
||||
print(f"[{self._session_id}] [codebuff] client disconnected", file=sys.stderr)
|
||||
return
|
||||
|
||||
def _fb_retry_thinking_disabled(self, body, model, token, agent_id, stream, tracker, input_data, instructions, original_error):
|
||||
success = finish_reason[0] != "length"
|
||||
_record_usage("codebuff", model, success, time.time() - t0)
|
||||
if last_resp_id[0] and input_data is not None:
|
||||
store_response(last_resp_id[0], input_data, last_output[0])
|
||||
if last_resp_id[0] and reasoning_out.get("text") or reasoning_out.get("tool_calls"):
|
||||
asm = {"role": "assistant", "content": reasoning_out.get("text", "") or ""}
|
||||
if reasoning_out.get("tool_calls"):
|
||||
asm["tool_calls"] = reasoning_out["tool_calls"]
|
||||
if reasoning_out.get("text"):
|
||||
asm["reasoning_content"] = reasoning_out["text"]
|
||||
_ds_store_assistant(last_resp_id[0], asm)
|
||||
print(f"[{self._session_id}] [codebuff] stream done status={last_status[0]} in {time.time()-t0:.1f}s acct={acct_id}", file=sys.stderr)
|
||||
else:
|
||||
raw = upstream.read().decode()
|
||||
chat_resp = json.loads(raw)
|
||||
result = oa_resp_to_responses(chat_resp, model)
|
||||
self.send_json(200, result)
|
||||
rid = result.get("id")
|
||||
if rid:
|
||||
store_response(rid, input_data, result.get("output", []))
|
||||
print(f"[{self._session_id}] [codebuff] non-stream done in {time.time()-t0:.1f}s acct={acct_id}", file=sys.stderr)
|
||||
finally:
|
||||
_codebuff_finish_run(token, run_id, "completed")
|
||||
return
|
||||
|
||||
if last_err:
|
||||
return self.send_json(last_err[1], {"error": {"type": last_err[0], "message": f"All {n_accounts} accounts exhausted. {last_err[2]}"}})
|
||||
|
||||
def _fb_retry_thinking_disabled(self, body, model, token, agent_id, stream, tracker, input_data, instructions, original_error, acct=None):
|
||||
run_id = _codebuff_start_run(token, agent_id)
|
||||
if not run_id:
|
||||
return self.send_json(502, {"error": {"type": "upstream_error",
|
||||
@@ -4930,7 +5216,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
if body.get("tool_choice"):
|
||||
chat_body["tool_choice"] = body["tool_choice"]
|
||||
target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions"
|
||||
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}", "User-Agent": "codex-launcher/3.8.4", "x-codebuff-model": model}
|
||||
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}", "User-Agent": "codex-launcher/3.9.0", "x-codebuff-model": model}
|
||||
if instance_id:
|
||||
headers["x-codebuff-instance-id"] = instance_id
|
||||
print(f"[codebuff] retry POST {target} model={model} stream={stream} run={run_id} (thinking disabled via DeepSeek native)", file=sys.stderr)
|
||||
@@ -5352,6 +5638,17 @@ def main():
|
||||
print(f"translate-proxy ({BACKEND}) listening on http://127.0.0.1:{PORT}", flush=True)
|
||||
print(f"Target: {TARGET_URL}", flush=True)
|
||||
print(f"Models: {[m['id'] for m in MODELS]}", flush=True)
|
||||
if BACKEND == "codebuff":
|
||||
_fb_pool.load_accounts(force=True)
|
||||
fb_status = _fb_pool.status()
|
||||
print(f"[multi-account] codebuff: {len(fb_status)} accounts loaded {[a['id'] for a in fb_status]}", flush=True)
|
||||
if OAUTH_PROVIDER and OAUTH_PROVIDER.startswith("google"):
|
||||
pool = _google_antigravity_pool if OAUTH_PROVIDER == "google-antigravity" else _google_cli_pool
|
||||
pool.load_accounts(force=True)
|
||||
g_status = pool.status()
|
||||
print(f"[multi-account] {OAUTH_PROVIDER}: {len(g_status)} accounts loaded {[a['id'] for a in g_status]}", flush=True)
|
||||
if _api_key_pool:
|
||||
print(f"[multi-account] API keys: {len(_api_key_pool._accounts)} keys loaded", flush=True)
|
||||
if BGP_ROUTES:
|
||||
print(f"BGP routes: {len(BGP_ROUTES)} ({[r.get('name','?') for r in BGP_ROUTES]})", flush=True)
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user