v2.5.0: AI BGP multi-provider routing with automatic failover
- New AI BGP pool manager (create/edit/delete pools) - Each pool has ordered routes from any configured endpoint - Failover: tries primary, falls back to next route on error - Pools appear in endpoint dropdown with shuffle icon - Pool editor with route add/remove/reorder - Fixed TOML breakage from multi-line paste - Added OpenAdapter preset with 0G models
This commit is contained in:
@@ -84,23 +84,35 @@ MODELS = CONFIG["models"]
|
||||
CC_VERSION = CONFIG.get("cc_version", "")
|
||||
REASONING_ENABLED = CONFIG.get("reasoning_enabled", True)
|
||||
REASONING_EFFORT = CONFIG.get("reasoning_effort", "medium")
|
||||
BGP_ROUTES = CONFIG.get("bgp_routes", [])
|
||||
BGP_MODELS = []
|
||||
for _r in BGP_ROUTES:
|
||||
for _m in _r.get("models", [{"id": _r.get("model", "unknown")}]):
|
||||
if _m.get("id", _m) not in BGP_MODELS:
|
||||
BGP_MODELS.append(_m.get("id", _m) if isinstance(_m, dict) else _m)
|
||||
if BGP_ROUTES and not MODELS:
|
||||
MODELS = [{"id": m, "object": "model", "created": 1700000000, "owned_by": "bgp"} for m in BGP_MODELS]
|
||||
CONFIG["models"] = MODELS
|
||||
|
||||
def _refresh_oauth_token():
|
||||
if OAUTH_PROVIDER != "google":
|
||||
return API_KEY
|
||||
return _refresh_oauth_token_for(API_KEY, OAUTH_PROVIDER)
|
||||
|
||||
def _refresh_oauth_token_for(api_key, oauth_provider):
|
||||
if oauth_provider != "google":
|
||||
return api_key
|
||||
token_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", "google-oauth-token.json")
|
||||
if not os.path.exists(token_path):
|
||||
return API_KEY
|
||||
return api_key
|
||||
try:
|
||||
with open(token_path) as f:
|
||||
tokens = json.load(f)
|
||||
if tokens.get("expires_at", 0) > time.time() + 60:
|
||||
return tokens.get("access_token", API_KEY)
|
||||
return tokens.get("access_token", api_key)
|
||||
client_id = tokens.get("client_id", "")
|
||||
client_secret = tokens.get("client_secret", "")
|
||||
refresh_token = tokens.get("refresh_token", "")
|
||||
if not all([client_id, client_secret, refresh_token]):
|
||||
return tokens.get("access_token", API_KEY)
|
||||
return tokens.get("access_token", api_key)
|
||||
print("[oauth] refreshing Google access token...", file=sys.stderr)
|
||||
data = urllib.parse.urlencode({
|
||||
"client_id": client_id, "client_secret": client_secret,
|
||||
@@ -1006,7 +1018,6 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
def _handle_openai_compat(self, body, model, stream):
|
||||
input_data = body.get("input", "")
|
||||
|
||||
# Adaptive: proactively compact if above learned Crof limit
|
||||
crof_limit = _crof_item_limit(model)
|
||||
if isinstance(input_data, list) and len(input_data) > crof_limit:
|
||||
print(f"[crof-adaptive] proactive compact: {len(input_data)} items > limit {crof_limit}", file=sys.stderr)
|
||||
@@ -1018,6 +1029,29 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
instructions = body.get("instructions", "").strip()
|
||||
if instructions:
|
||||
messages.insert(0, {"role": "system", "content": instructions})
|
||||
|
||||
if BGP_ROUTES:
|
||||
self._handle_bgp(body, model, stream, messages, input_data)
|
||||
else:
|
||||
chat_body = self._build_chat_body(model, messages, body, stream)
|
||||
target = upstream_target(TARGET_URL, "/chat/completions")
|
||||
effective_key = _refresh_oauth_token()
|
||||
fwd = forwarded_headers(self.headers, {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {effective_key}",
|
||||
}, browser_ua=True)
|
||||
print(f"[translate-proxy] POST {target} model={model} stream={stream} items={len(input_data) if isinstance(input_data,list) else 1}", file=sys.stderr)
|
||||
req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=fwd)
|
||||
try:
|
||||
upstream = urllib.request.urlopen(req, timeout=180)
|
||||
except urllib.error.HTTPError as e:
|
||||
err = e.read().decode()
|
||||
return self.send_json(e.code, {"error": {"type": "upstream_error", "message": err}})
|
||||
except Exception as e:
|
||||
return self.send_json(500, {"error": {"type": "proxy_error", "message": str(e)}})
|
||||
self._forward_oa_compat(upstream, stream, model, chat_body, body, input_data, fwd, target)
|
||||
|
||||
def _build_chat_body(self, model, messages, body, stream):
|
||||
chat_body = {"model": model, "messages": messages}
|
||||
for k in ("temperature", "top_p"):
|
||||
if k in body:
|
||||
@@ -1034,31 +1068,63 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
chat_body["reasoning_effort"] = "none"
|
||||
else:
|
||||
chat_body["reasoning_effort"] = REASONING_EFFORT
|
||||
return chat_body
|
||||
|
||||
target = upstream_target(TARGET_URL, "/chat/completions")
|
||||
effective_key = _refresh_oauth_token()
|
||||
fwd = forwarded_headers(self.headers, {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {effective_key}",
|
||||
}, browser_ua=True)
|
||||
print(f"[translate-proxy] POST {target} model={model} stream={stream} items={len(input_data) if isinstance(input_data,list) else 1} ua={fwd.get('User-Agent','')[:50]}", file=sys.stderr)
|
||||
def _handle_bgp(self, body, model, stream, messages, input_data):
|
||||
routes = sorted(BGP_ROUTES, key=lambda r: r.get("priority", 99))
|
||||
errors = []
|
||||
for route in routes:
|
||||
r_model = route.get("model", model)
|
||||
r_url = route["target_url"].rstrip("/")
|
||||
r_key = route.get("api_key", "")
|
||||
r_reasoning = route.get("reasoning_enabled", True)
|
||||
r_effort = route.get("reasoning_effort", "medium")
|
||||
r_oauth = route.get("oauth_provider", "")
|
||||
|
||||
req = urllib.request.Request(
|
||||
target,
|
||||
data=json.dumps(chat_body).encode(),
|
||||
headers=fwd,
|
||||
)
|
||||
self._forward_oa_compat(req, stream, model, chat_body, body, input_data, fwd, target, tools)
|
||||
chat_body = dict(messages=list(messages))
|
||||
chat_body["model"] = r_model
|
||||
for k in ("temperature", "top_p"):
|
||||
if k in body:
|
||||
chat_body[k] = body[k]
|
||||
chat_body["max_tokens"] = max(body.get("max_output_tokens", 0), 64000)
|
||||
tools = oa_convert_tools(body.get("tools"))
|
||||
if tools:
|
||||
chat_body["tools"] = tools
|
||||
if body.get("tool_choice"):
|
||||
chat_body["tool_choice"] = body["tool_choice"]
|
||||
chat_body["stream"] = stream
|
||||
if not r_reasoning or r_effort == "none":
|
||||
chat_body["enable_thinking"] = False
|
||||
chat_body["reasoning_effort"] = "none"
|
||||
else:
|
||||
chat_body["reasoning_effort"] = r_effort
|
||||
|
||||
def _forward_oa_compat(self, req, stream, model, chat_body, body, input_data, fwd, target, tools):
|
||||
try:
|
||||
upstream = urllib.request.urlopen(req, timeout=180)
|
||||
except urllib.error.HTTPError as e:
|
||||
err = e.read().decode()
|
||||
return self.send_json(e.code, {"error": {"type": "upstream_error", "message": err}})
|
||||
except Exception as e:
|
||||
return self.send_json(500, {"error": {"type": "proxy_error", "message": str(e)}})
|
||||
target = upstream_target(r_url, "/chat/completions")
|
||||
if r_oauth == "google":
|
||||
r_key = _refresh_oauth_token_for(r_key, r_oauth)
|
||||
fwd = forwarded_headers(self.headers, {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {r_key}",
|
||||
}, browser_ua=True)
|
||||
print(f"[bgp] trying route '{route.get('name', r_url)}' model={r_model}", file=sys.stderr)
|
||||
req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=fwd)
|
||||
try:
|
||||
upstream = urllib.request.urlopen(req, timeout=180)
|
||||
print(f"[bgp] route '{route.get('name', r_url)}' connected OK", file=sys.stderr)
|
||||
self._forward_oa_compat(upstream, stream, r_model, chat_body, body, input_data, fwd, target)
|
||||
return
|
||||
except urllib.error.HTTPError as e:
|
||||
err = e.read().decode()
|
||||
print(f"[bgp] route '{route.get('name', r_url)}' FAILED: HTTP {e.code}: {err[:200]}", file=sys.stderr)
|
||||
errors.append(f"{route.get('name','?')}: HTTP {e.code}")
|
||||
except Exception as e:
|
||||
print(f"[bgp] route '{route.get('name', r_url)}' FAILED: {e}", file=sys.stderr)
|
||||
errors.append(f"{route.get('name','?')}: {e}")
|
||||
|
||||
print(f"[bgp] ALL ROUTES FAILED: {errors}", file=sys.stderr)
|
||||
self.send_json(502, {"error": {"type": "bgp_all_routes_failed", "message": f"All BGP routes failed: {'; '.join(errors)}"}})
|
||||
|
||||
def _forward_oa_compat(self, upstream, stream, model, chat_body, body, input_data, fwd, target):
|
||||
n_items = len(input_data) if isinstance(input_data, list) else 1
|
||||
|
||||
if stream:
|
||||
|
||||
Reference in New Issue
Block a user