v2.5.0: AI BGP multi-provider routing with automatic failover

- New AI BGP pool manager (create/edit/delete pools)
- Each pool has ordered routes from any configured endpoint
- Failover: tries primary, falls back to next route on error
- Pools appear in endpoint dropdown with shuffle icon
- Pool editor with route add/remove/reorder
- Fixed TOML breakage from multi-line paste
- Added OpenAdapter preset with 0G models
This commit is contained in:
Roman
2026-05-20 16:40:57 +04:00
Unverified
parent 0f333aab6e
commit 12ca136fba
5 changed files with 606 additions and 34 deletions

View File

@@ -84,23 +84,35 @@ MODELS = CONFIG["models"]
CC_VERSION = CONFIG.get("cc_version", "")
REASONING_ENABLED = CONFIG.get("reasoning_enabled", True)
REASONING_EFFORT = CONFIG.get("reasoning_effort", "medium")
BGP_ROUTES = CONFIG.get("bgp_routes", [])
BGP_MODELS = []
for _r in BGP_ROUTES:
for _m in _r.get("models", [{"id": _r.get("model", "unknown")}]):
if _m.get("id", _m) not in BGP_MODELS:
BGP_MODELS.append(_m.get("id", _m) if isinstance(_m, dict) else _m)
if BGP_ROUTES and not MODELS:
MODELS = [{"id": m, "object": "model", "created": 1700000000, "owned_by": "bgp"} for m in BGP_MODELS]
CONFIG["models"] = MODELS
def _refresh_oauth_token():
if OAUTH_PROVIDER != "google":
return API_KEY
return _refresh_oauth_token_for(API_KEY, OAUTH_PROVIDER)
def _refresh_oauth_token_for(api_key, oauth_provider):
if oauth_provider != "google":
return api_key
token_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", "google-oauth-token.json")
if not os.path.exists(token_path):
return API_KEY
return api_key
try:
with open(token_path) as f:
tokens = json.load(f)
if tokens.get("expires_at", 0) > time.time() + 60:
return tokens.get("access_token", API_KEY)
return tokens.get("access_token", api_key)
client_id = tokens.get("client_id", "")
client_secret = tokens.get("client_secret", "")
refresh_token = tokens.get("refresh_token", "")
if not all([client_id, client_secret, refresh_token]):
return tokens.get("access_token", API_KEY)
return tokens.get("access_token", api_key)
print("[oauth] refreshing Google access token...", file=sys.stderr)
data = urllib.parse.urlencode({
"client_id": client_id, "client_secret": client_secret,
@@ -1006,7 +1018,6 @@ class Handler(http.server.BaseHTTPRequestHandler):
def _handle_openai_compat(self, body, model, stream):
input_data = body.get("input", "")
# Adaptive: proactively compact if above learned Crof limit
crof_limit = _crof_item_limit(model)
if isinstance(input_data, list) and len(input_data) > crof_limit:
print(f"[crof-adaptive] proactive compact: {len(input_data)} items > limit {crof_limit}", file=sys.stderr)
@@ -1018,6 +1029,29 @@ class Handler(http.server.BaseHTTPRequestHandler):
instructions = body.get("instructions", "").strip()
if instructions:
messages.insert(0, {"role": "system", "content": instructions})
if BGP_ROUTES:
self._handle_bgp(body, model, stream, messages, input_data)
else:
chat_body = self._build_chat_body(model, messages, body, stream)
target = upstream_target(TARGET_URL, "/chat/completions")
effective_key = _refresh_oauth_token()
fwd = forwarded_headers(self.headers, {
"Content-Type": "application/json",
"Authorization": f"Bearer {effective_key}",
}, browser_ua=True)
print(f"[translate-proxy] POST {target} model={model} stream={stream} items={len(input_data) if isinstance(input_data,list) else 1}", file=sys.stderr)
req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=fwd)
try:
upstream = urllib.request.urlopen(req, timeout=180)
except urllib.error.HTTPError as e:
err = e.read().decode()
return self.send_json(e.code, {"error": {"type": "upstream_error", "message": err}})
except Exception as e:
return self.send_json(500, {"error": {"type": "proxy_error", "message": str(e)}})
self._forward_oa_compat(upstream, stream, model, chat_body, body, input_data, fwd, target)
def _build_chat_body(self, model, messages, body, stream):
chat_body = {"model": model, "messages": messages}
for k in ("temperature", "top_p"):
if k in body:
@@ -1034,31 +1068,63 @@ class Handler(http.server.BaseHTTPRequestHandler):
chat_body["reasoning_effort"] = "none"
else:
chat_body["reasoning_effort"] = REASONING_EFFORT
return chat_body
target = upstream_target(TARGET_URL, "/chat/completions")
effective_key = _refresh_oauth_token()
fwd = forwarded_headers(self.headers, {
"Content-Type": "application/json",
"Authorization": f"Bearer {effective_key}",
}, browser_ua=True)
print(f"[translate-proxy] POST {target} model={model} stream={stream} items={len(input_data) if isinstance(input_data,list) else 1} ua={fwd.get('User-Agent','')[:50]}", file=sys.stderr)
def _handle_bgp(self, body, model, stream, messages, input_data):
routes = sorted(BGP_ROUTES, key=lambda r: r.get("priority", 99))
errors = []
for route in routes:
r_model = route.get("model", model)
r_url = route["target_url"].rstrip("/")
r_key = route.get("api_key", "")
r_reasoning = route.get("reasoning_enabled", True)
r_effort = route.get("reasoning_effort", "medium")
r_oauth = route.get("oauth_provider", "")
req = urllib.request.Request(
target,
data=json.dumps(chat_body).encode(),
headers=fwd,
)
self._forward_oa_compat(req, stream, model, chat_body, body, input_data, fwd, target, tools)
chat_body = dict(messages=list(messages))
chat_body["model"] = r_model
for k in ("temperature", "top_p"):
if k in body:
chat_body[k] = body[k]
chat_body["max_tokens"] = max(body.get("max_output_tokens", 0), 64000)
tools = oa_convert_tools(body.get("tools"))
if tools:
chat_body["tools"] = tools
if body.get("tool_choice"):
chat_body["tool_choice"] = body["tool_choice"]
chat_body["stream"] = stream
if not r_reasoning or r_effort == "none":
chat_body["enable_thinking"] = False
chat_body["reasoning_effort"] = "none"
else:
chat_body["reasoning_effort"] = r_effort
def _forward_oa_compat(self, req, stream, model, chat_body, body, input_data, fwd, target, tools):
try:
upstream = urllib.request.urlopen(req, timeout=180)
except urllib.error.HTTPError as e:
err = e.read().decode()
return self.send_json(e.code, {"error": {"type": "upstream_error", "message": err}})
except Exception as e:
return self.send_json(500, {"error": {"type": "proxy_error", "message": str(e)}})
target = upstream_target(r_url, "/chat/completions")
if r_oauth == "google":
r_key = _refresh_oauth_token_for(r_key, r_oauth)
fwd = forwarded_headers(self.headers, {
"Content-Type": "application/json",
"Authorization": f"Bearer {r_key}",
}, browser_ua=True)
print(f"[bgp] trying route '{route.get('name', r_url)}' model={r_model}", file=sys.stderr)
req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=fwd)
try:
upstream = urllib.request.urlopen(req, timeout=180)
print(f"[bgp] route '{route.get('name', r_url)}' connected OK", file=sys.stderr)
self._forward_oa_compat(upstream, stream, r_model, chat_body, body, input_data, fwd, target)
return
except urllib.error.HTTPError as e:
err = e.read().decode()
print(f"[bgp] route '{route.get('name', r_url)}' FAILED: HTTP {e.code}: {err[:200]}", file=sys.stderr)
errors.append(f"{route.get('name','?')}: HTTP {e.code}")
except Exception as e:
print(f"[bgp] route '{route.get('name', r_url)}' FAILED: {e}", file=sys.stderr)
errors.append(f"{route.get('name','?')}: {e}")
print(f"[bgp] ALL ROUTES FAILED: {errors}", file=sys.stderr)
self.send_json(502, {"error": {"type": "bgp_all_routes_failed", "message": f"All BGP routes failed: {'; '.join(errors)}"}})
def _forward_oa_compat(self, upstream, stream, model, chat_body, body, input_data, fwd, target):
n_items = len(input_data) if isinstance(input_data, list) else 1
if stream: