v2.5.0: AI BGP multi-provider routing with automatic failover

- New AI BGP pool manager (create/edit/delete pools) - Each pool has ordered routes from any configured endpoint - Failover: tries primary, falls back to next route on error - Pools appear in endpoint dropdown with shuffle icon - Pool editor with route add/remove/reorder - Fixed TOML breakage from multi-line paste - Added OpenAdapter preset with 0G models
2026-05-20 16:40:57 +04:00
parent 0f333aab6e
commit 12ca136fba
5 changed files with 606 additions and 34 deletions
--- a/src/translate-proxy.py
+++ b/src/translate-proxy.py
@@ -84,23 +84,35 @@ MODELS = CONFIG["models"]
 CC_VERSION = CONFIG.get("cc_version", "")
 REASONING_ENABLED = CONFIG.get("reasoning_enabled", True)
 REASONING_EFFORT = CONFIG.get("reasoning_effort", "medium")
+BGP_ROUTES = CONFIG.get("bgp_routes", [])
+BGP_MODELS = []
+for _r in BGP_ROUTES:
+    for _m in _r.get("models", [{"id": _r.get("model", "unknown")}]):
+        if _m.get("id", _m) not in BGP_MODELS:
+            BGP_MODELS.append(_m.get("id", _m) if isinstance(_m, dict) else _m)
+if BGP_ROUTES and not MODELS:
+    MODELS = [{"id": m, "object": "model", "created": 1700000000, "owned_by": "bgp"} for m in BGP_MODELS]
+    CONFIG["models"] = MODELS

 def _refresh_oauth_token():
-    if OAUTH_PROVIDER != "google":
-        return API_KEY
+    return _refresh_oauth_token_for(API_KEY, OAUTH_PROVIDER)
+
+def _refresh_oauth_token_for(api_key, oauth_provider):
+    if oauth_provider != "google":
+        return api_key
    token_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", "google-oauth-token.json")
    if not os.path.exists(token_path):
-        return API_KEY
+        return api_key
    try:
        with open(token_path) as f:
            tokens = json.load(f)
        if tokens.get("expires_at", 0) > time.time() + 60:
-            return tokens.get("access_token", API_KEY)
+            return tokens.get("access_token", api_key)
        client_id = tokens.get("client_id", "")
        client_secret = tokens.get("client_secret", "")
        refresh_token = tokens.get("refresh_token", "")
        if not all([client_id, client_secret, refresh_token]):
-            return tokens.get("access_token", API_KEY)
+            return tokens.get("access_token", api_key)
        print("[oauth] refreshing Google access token...", file=sys.stderr)
        data = urllib.parse.urlencode({
            "client_id": client_id, "client_secret": client_secret,
@@ -1006,7 +1018,6 @@ class Handler(http.server.BaseHTTPRequestHandler):
    def _handle_openai_compat(self, body, model, stream):
        input_data = body.get("input", "")

-        # Adaptive: proactively compact if above learned Crof limit
        crof_limit = _crof_item_limit(model)
        if isinstance(input_data, list) and len(input_data) > crof_limit:
            print(f"[crof-adaptive] proactive compact: {len(input_data)} items > limit {crof_limit}", file=sys.stderr)
@@ -1018,6 +1029,29 @@ class Handler(http.server.BaseHTTPRequestHandler):
        instructions = body.get("instructions", "").strip()
        if instructions:
            messages.insert(0, {"role": "system", "content": instructions})
+
+        if BGP_ROUTES:
+            self._handle_bgp(body, model, stream, messages, input_data)
+        else:
+            chat_body = self._build_chat_body(model, messages, body, stream)
+            target = upstream_target(TARGET_URL, "/chat/completions")
+            effective_key = _refresh_oauth_token()
+            fwd = forwarded_headers(self.headers, {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {effective_key}",
+            }, browser_ua=True)
+            print(f"[translate-proxy] POST {target} model={model} stream={stream} items={len(input_data) if isinstance(input_data,list) else 1}", file=sys.stderr)
+            req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=fwd)
+            try:
+                upstream = urllib.request.urlopen(req, timeout=180)
+            except urllib.error.HTTPError as e:
+                err = e.read().decode()
+                return self.send_json(e.code, {"error": {"type": "upstream_error", "message": err}})
+            except Exception as e:
+                return self.send_json(500, {"error": {"type": "proxy_error", "message": str(e)}})
+            self._forward_oa_compat(upstream, stream, model, chat_body, body, input_data, fwd, target)
+
+    def _build_chat_body(self, model, messages, body, stream):
        chat_body = {"model": model, "messages": messages}
        for k in ("temperature", "top_p"):
            if k in body:
@@ -1034,31 +1068,63 @@ class Handler(http.server.BaseHTTPRequestHandler):
            chat_body["reasoning_effort"] = "none"
        else:
            chat_body["reasoning_effort"] = REASONING_EFFORT
+        return chat_body

-        target = upstream_target(TARGET_URL, "/chat/completions")
-        effective_key = _refresh_oauth_token()
-        fwd = forwarded_headers(self.headers, {
-            "Content-Type": "application/json",
-            "Authorization": f"Bearer {effective_key}",
-        }, browser_ua=True)
-        print(f"[translate-proxy] POST {target} model={model} stream={stream} items={len(input_data) if isinstance(input_data,list) else 1} ua={fwd.get('User-Agent','')[:50]}", file=sys.stderr)
+    def _handle_bgp(self, body, model, stream, messages, input_data):
+        routes = sorted(BGP_ROUTES, key=lambda r: r.get("priority", 99))
+        errors = []
+        for route in routes:
+            r_model = route.get("model", model)
+            r_url = route["target_url"].rstrip("/")
+            r_key = route.get("api_key", "")
+            r_reasoning = route.get("reasoning_enabled", True)
+            r_effort = route.get("reasoning_effort", "medium")
+            r_oauth = route.get("oauth_provider", "")

-        req = urllib.request.Request(
-            target,
-            data=json.dumps(chat_body).encode(),
-            headers=fwd,
-        )
-        self._forward_oa_compat(req, stream, model, chat_body, body, input_data, fwd, target, tools)
+            chat_body = dict(messages=list(messages))
+            chat_body["model"] = r_model
+            for k in ("temperature", "top_p"):
+                if k in body:
+                    chat_body[k] = body[k]
+            chat_body["max_tokens"] = max(body.get("max_output_tokens", 0), 64000)
+            tools = oa_convert_tools(body.get("tools"))
+            if tools:
+                chat_body["tools"] = tools
+            if body.get("tool_choice"):
+                chat_body["tool_choice"] = body["tool_choice"]
+            chat_body["stream"] = stream
+            if not r_reasoning or r_effort == "none":
+                chat_body["enable_thinking"] = False
+                chat_body["reasoning_effort"] = "none"
+            else:
+                chat_body["reasoning_effort"] = r_effort

-    def _forward_oa_compat(self, req, stream, model, chat_body, body, input_data, fwd, target, tools):
-        try:
-            upstream = urllib.request.urlopen(req, timeout=180)
-        except urllib.error.HTTPError as e:
-            err = e.read().decode()
-            return self.send_json(e.code, {"error": {"type": "upstream_error", "message": err}})
-        except Exception as e:
-            return self.send_json(500, {"error": {"type": "proxy_error", "message": str(e)}})
+            target = upstream_target(r_url, "/chat/completions")
+            if r_oauth == "google":
+                r_key = _refresh_oauth_token_for(r_key, r_oauth)
+            fwd = forwarded_headers(self.headers, {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {r_key}",
+            }, browser_ua=True)
+            print(f"[bgp] trying route '{route.get('name', r_url)}' model={r_model}", file=sys.stderr)
+            req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=fwd)
+            try:
+                upstream = urllib.request.urlopen(req, timeout=180)
+                print(f"[bgp] route '{route.get('name', r_url)}' connected OK", file=sys.stderr)
+                self._forward_oa_compat(upstream, stream, r_model, chat_body, body, input_data, fwd, target)
+                return
+            except urllib.error.HTTPError as e:
+                err = e.read().decode()
+                print(f"[bgp] route '{route.get('name', r_url)}' FAILED: HTTP {e.code}: {err[:200]}", file=sys.stderr)
+                errors.append(f"{route.get('name','?')}: HTTP {e.code}")
+            except Exception as e:
+                print(f"[bgp] route '{route.get('name', r_url)}' FAILED: {e}", file=sys.stderr)
+                errors.append(f"{route.get('name','?')}: {e}")

+        print(f"[bgp] ALL ROUTES FAILED: {errors}", file=sys.stderr)
+        self.send_json(502, {"error": {"type": "bgp_all_routes_failed", "message": f"All BGP routes failed: {'; '.join(errors)}"}})
+
+    def _forward_oa_compat(self, upstream, stream, model, chat_body, body, input_data, fwd, target):
        n_items = len(input_data) if isinstance(input_data, list) else 1

        if stream: