diff --git a/.gitignore b/.gitignore
index 0745a38..7086f64 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,5 @@ config.toml
 *.swp
 *~
 .DS_Store
+DEBIAN/
+usr/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 20111dd..39fcac9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,36 @@
 # Changelog
 
+## v3.8.4 (2026-05-24)
+
+**Critical Fix — Codebuff DeepSeek V4 Tool-Call Sessions Now Work**
+
+### Root Cause
+Codebuff/Codebuff proxies requests to DeepSeek V4, which defaults to **thinking mode enabled**. When DeepSeek returns `reasoning_content` in a streaming response that includes tool calls, subsequent requests must include that same `reasoning_content` in the assistant message history — otherwise DeepSeek's API rejects it with HTTP 400: `"The reasoning_content in the thinking mode must be passed back to the API."`
+
+The previous approach tried to **disable thinking** (`enable_thinking: false`, `reasoning_effort: "none"`) which Codebuff doesn't reliably forward to DeepSeek. The retry system then tried stripping assistant messages from history — which guarantees failure because DeepSeek needs the full context.
+
+### Fix — Full Reasoning Round-Trip System
+1. **Capture**: After each codebuff streaming response completes, extract `reasoning_content` + `tool_calls` from the stream deltas
+2. **Store**: Index by `tool_call_id` in `_deepseek_reasoning_store` (thread-safe dict with TTL)
+3. **Rebuild**: Before every codebuff POST, `_ds_rebuild_tool_history()` re-inserts stored assistant messages (with `reasoning_content`) before their matching `tool` messages
+4. **Fallback retry**: If reasoning error still occurs, retries with DeepSeek's native `{"thinking": {"type": "disabled"}}` format
+5. **Primary path no longer disables thinking** — lets Codebuff/DeepSeek use default thinking mode with proper round-trip
+
+### Changes
+- **translate-proxy.py**: New `_ds_store_assistant()`, `_ds_rebuild_tool_history()` functions; `_deepseek_reasoning_store` / `_deepseek_reasoning_lock` globals
+- **translate-proxy.py**: `oa_stream_to_sse()` now captures tool_calls in `_reasoning_out` dict alongside reasoning text
+- **translate-proxy.py**: `_handle_codebuff()` stores assistant messages after stream completes; calls `_ds_rebuild_tool_history()` before POST
+- **translate-proxy.py**: Replaced broken `_fb_retry_no_reasoning()` + `_fb_retry_stripped()` with single `_fb_retry_thinking_disabled()` using native DeepSeek format
+- **translate-proxy.py**: Removed `enable_thinking`/`reasoning_effort` from primary codebuff chat_body
+- **codex-launcher-gui**: Version bumped to 3.8.4
+
+### Confirmed Working
+- Codebuff first request: 200 OK (always worked)
+- Codebuff second request after tool call: **now 200 OK** (was 400 reasoning_content error)
+- Multi-turn Codex CLI sessions with function calls complete successfully
+
+---
+
 ## v3.8.3 (2026-05-24)
 
 **Critical Fix — Codebuff Streaming Now Works End-to-End**
diff --git a/codex-launcher-gui b/codex-launcher-gui
new file mode 100755
index 0000000..d73189f
--- /dev/null
+++ b/codex-launcher-gui
@@ -0,0 +1,5051 @@
+#!/usr/bin/env python3
+"""Codex Launcher GUI — manage endpoints, launch Desktop or CLI with any provider."""
+
+import gi
+gi.require_version("Gtk", "3.0")
+from gi.repository import Gtk, GLib
+import subprocess, os, signal, sys, threading, time, json, urllib.request, urllib.parse, urllib.error, tempfile, shutil
+import hashlib, socket, ssl, contextlib, re, collections
+import base64, secrets, uuid, webbrowser
+from pathlib import Path
+
+HOME = Path.home()
+START_SH = Path("/opt/codex-desktop/start.sh")
+CONFIG = HOME / ".codex/config.toml"
+CONFIG_BAK = HOME / ".codex/config.toml.launcher-bak"
+CLEANUP = HOME / ".local/bin/cleanup-codex-stale.sh"
+PROXY = HOME / ".local/bin/translate-proxy.py"
+ENDPOINTS_FILE = HOME / ".codex/endpoints.json"
+BGP_POOLS_FILE = HOME / ".codex/bgp-pools.json"
+LOG_DIR = HOME / ".cache/codex-desktop"
+LAUNCH_LOG = LOG_DIR / "launcher.log"
+PROXY_CONFIG_DIR = HOME / ".cache/codex-proxy"
+DEFAULT_CONFIG = """model = ""
+model_provider = ""
+model_catalog_json = ""
+"""
+
+CHANGELOG = [
+    ("3.8.4", "2026-05-24", [
+        "FIXED: Freebuff streaming — SSE events now reach Codex client",
+        "Root cause: stream_buffered_events was never called for freebuff",
+        "Freebuff stream uses buffered flushing (30ms / 4KB / urgent)",
+        "Freebuff OAuth — built-in login flow (no external CLI needed)",
+        "Freebuff API: reverse-engineered www.codebuff.com endpoints",
+        "Freebuff session management with instance ID (waiting room)",
+        "Freebuff agent run lifecycle (start/finish) with model routing",
+        "Free DeepSeek V4 Pro, V4 Flash, Kimi K2.6, MiniMax M2.7",
+        "Reasoning mode works with freebuff (thinking tokens supported)",
+        "GUI: Sandbox mode selector (Read-only / Workspace / Full Access)",
+        "GUI: Approval mode selector (Untrusted / On Request / Full Auto)",
+        "GUI: Freebuff Login button in endpoint editor",
+        "Fixed _STATS undefined error in /health endpoint",
+        "Fixed freebuff credential path (reads default account)",
+    ]),
+    ("3.8.1", "2026-05-24", [
+        "Freebuff integration — free DeepSeek V4 Pro, V4 Flash, Kimi K2.6, MiniMax M2.7",
+        "Freebuff backend: auto agent-run lifecycle, credential detection, model routing",
+        "Restored all provider presets (Command Code, Crof, OpenAdapter, OpenRouter, etc.)",
+        "AI Monitoring — self-healing watchdog with 3-tier response system",
+        "HealthWatcher: monitors proxy health every 5s, auto-restarts on crash",
+        "LogAnalyzer: tails debug logs for 18 failure signal patterns",
+        "Tier 1: 14 rule-based auto-recovery rules (< 1 s response)",
+        "Tier 2: Incident pattern store with success rate tracking",
+        "Tier 3: AI diagnostic agent — configurable provider/model for novel failures",
+        "30 fault types catalogued across 5 categories (A-E)",
+        "GUI: AI Monitor panel with ON/OFF, provider selector, incident log",
+        "Enhanced /health endpoint with memory and uptime metrics",
+    ]),
+    ("3.7.0", "2026-05-22", [
+        "Intelligence Routing — self-healing parser system for Command Code",
+        "Layer 1: Deep URL extraction from nested JSON in explore_agent blocks",
+        "Layer 2: Auto-proceed on require_escalation / request_escalation_permission blocks",
+        "Layer 3: Intent-based command synthesis when all parsers fail (5 heuristics)",
+        "Module-level _build_explore_cmd() — reuses URL extraction across parser + stream",
+        "54 self-test patterns covering all three Intelligence Routing layers",
+    ]),
+    ("3.6.0", "2026-05-22", [
+        "Connection pooling — persistent HTTPS connections per host",
+        "Stream idle timeout (300s) — kills silent streams instead of hanging",
+        "Retry-After header support on all retry paths",
+        "Bounded stream buffers (8MB) — prevents OOM",
+        "Dual logging to proxy.log + stderr",
+    ]),
+    ("3.5.0", "2026-05-22", [
+        "Command Code adapter overhaul — 17 patches for multi-format tool-call parsing",
+        "DSML, XML, explore_agent, bash blocks, raw JSON parser chain",
+        "Self-revive watchdog — auto-restarts proxy on crash",
+        "Debug-to-file logging in cc-debug.log",
+        "Inline self-test (19 patterns)",
+    ]),
+    ("3.3.0", "2026-05-20", [
+        "Antigravity + Gemini CLI OAuth — full Codex agent loop working",
+        "Auto-continue on MAX_TOKENS for Gemini/Antigravity",
+        "BGP++ route scoring and provider policy layer",
+    ]),
+    ("3.0.0", "2026-05-20", [
+        "Major overhaul — ThreadingHTTPServer, thread-safe state, graceful shutdown",
+        "Dynamic port allocation, proxy health gating, atomic config",
+        "Usage Dashboard v2 with dark theme",
+    ]),
+    ("2.7.0", "2026-05-20", [
+        "Usage Dashboard redesigned (OpenUsage-inspired dark theme)",
+        "TCP_NODELAY streaming, Anthropic prompt caching",
+    ]),
+    ("2.6.1", "2026-05-20", [
+        "Google OAuth rebuilt to emulate Gemini CLI — no client_secret.json needed",
+        "Uses Google's public OAuth client_id (same as gemini-cli)",
+        "PKCE + CSRF state protection for secure auth",
+        "Just click OAuth Login → browser opens → authorize → done",
+        "Includes cloud-platform scope for Gemini Code Assist compatibility",
+    ]),
+    ("2.6.0", "2026-05-20", [
+        "Usage Dashboard — per-provider request/token/latency tracking",
+        "Visual cards with success rate bars, model breakdown, error tracking",
+        "Google OAuth: browse for client_secret.json instead of fixed path",
+    ]),
+    ("2.5.1", "2026-05-20", [
+        "Adaptive retry for 429/502/503 errors with exponential backoff",
+        "BGP routes also retry transient errors before failing over",
+        "Proxy socket reuse — no more 'Address already in use' crashes",
+        "BGP route count shown at proxy startup",
+    ]),
+    ("2.5.0", "2026-05-20", [
+        "AI BGP — multi-provider routing with automatic failover",
+        "Create BGP pools with ordered routes from any configured endpoint",
+        "Each route uses its own endpoint URL, API key, and model",
+        "Failover strategy: tries primary, falls back on error/timeout",
+        "BGP pools appear in endpoint dropdown with shuffle icon",
+        "Up/down reordering for route priority in pool editor",
+        "Fixed TOML config breakage from multi-line paste in fields",
+    ]),
+    ("2.4.0", "2026-05-20", [
+        "Added OpenAdapter provider preset (api.openadapter.in)",
+        "One API key access to 40+ models — GLM, DeepSeek, Kimi, Qwen, Claude, GPT, Gemini",
+        "Fixed Add/Edit dialog crash (missing _on_reasoning_toggled method)",
+        "Redesigned Google OAuth flow with live status dialog",
+    ]),
+    ("2.3.2", "2026-05-20", [
+        "Added Google Gemini provider with OAuth support",
+        "Two presets: 'Google Gemini (API Key)' and 'Google Gemini (OAuth)'",
+        "OAuth Login button in endpoint editor — full Google OAuth2 flow with auto-refresh",
+        "Auto-refreshes OAuth access tokens when expired (no manual re-login needed)",
+        "Supports gemini-2.5-flash, gemini-2.5-pro, gemini-2.0-flash, and more",
+        "Uses Gemini's OpenAI-compatible endpoint — works with existing proxy",
+    ]),
+    ("2.3.0", "2026-05-20", [
+        "Adaptive Crof self-healing system — auto-adjusts to Crof model limits",
+        "Tracks per-model success/failure history, learns item count limits dynamically",
+        "Proactively compacts input when above learned limit before sending to Crof",
+        "Auto-retries on finish_reason=length — aggressively compacts and resends",
+        "Prevents 'stream disconnected' and 'incomplete' errors on long conversations",
+    ]),
+    ("2.2.1", "2026-05-20", [
+        "Fixed compaction orphaning function_call_output items — root cause of Crof incomplete responses",
+        "Compaction now respects function_call/function_call_output pairs — no more dangling tool results",
+        "Fixed reasoning control: reasoning_effort=none now always sends enable_thinking=false too",
+    ]),
+    ("2.2.0", "2026-05-20", [
+        "Added per-provider Reasoning On/Off toggle in endpoint editor",
+        "Added Reasoning Effort level per provider: None, Minimal, Low, Medium, High, Max",
+        "When reasoning is OFF: sends enable_thinking=false + reasoning_effort=none to upstream API",
+        "When reasoning is ON: sends user-selected effort level (default: Medium)",
+        "Fixes Crof mimo-v2.5-pro and similar reasoning models exhausting output tokens",
+        "Strip reasoning_content from proxy output — Codex doesn't use it",
+        "Force max_tokens=64000 minimum for openai-compat providers",
+    ]),
+    ("2.1.3", "2026-05-19", [
+        "Fixed Crof mimo-v2.5-pro stopping: reasoning_content exhausted all output tokens",
+        "Strip reasoning_content from proxy output — Codex doesn't use it, avoids token waste",
+        "Force max_tokens=64000 minimum for openai-compat providers — gives models room for both reasoning and content",
+    ]),
+    ("2.1.2", "2026-05-19", [
+        "Fixed Crof.ai and providers stopping after first tool call (root cause: None tool IDs)",
+        "Codex sends function_call items with id=None — proxy now matches tool results to calls by position",
+        "Fixed orphan message output item when response has only tool calls (no text)",
+        "Auto-trims long conversations (>30 items) to prevent context overflow on providers like Crof",
+        "Added request/response logging to ~/.cache/codex-proxy/requests.log",
+    ]),
+    ("2.1.1", "2026-05-19", [
+        "Fixed proxy: map 'developer' role to 'system' for Chat Completions providers",
+        "Fixed proxy: map 'developer' role to 'user' for Anthropic providers",
+        "Forward 'instructions' field from Responses API as system message/param",
+        "Fixes DeepSeek and other providers rejecting unknown 'developer' role",
+    ]),
+    ("2.1.0", "2026-05-19", [
+        "Added Codex auth status detection (codex login status)",
+        "Added Re-login button to re-authenticate via codex login",
+        "Auto-checks auth before launching Codex Default mode",
+        "Warns if OAuth token expired or missing before launch",
+    ]),
+    ("2.0.1", "2026-05-19", [
+        "Added Codex CLI/Desktop installation verifier to main page",
+        "Disables Desktop/CLI launch buttons when corresponding tool is missing",
+        "Shows install instructions in status area on startup",
+    ]),
+    ("2.0.0", "2026-05-19", [
+        "Initial release: multi-provider Codex Launcher",
+        "Translation proxy: Responses API to Chat Completions + Anthropic Messages",
+        "GTK endpoint manager with 10+ provider presets",
+        "Codex Default mode (built-in OAuth, zero config)",
+        "Browser UA injection for Cloudflare-protected providers (OpenCode)",
+        "Streaming SSE, tool calls, reasoning content support",
+        "Profile backup/import, model auto-fetch, bulk import",
+        "Refresh Models in background thread",
+        "URL normalization to prevent double-path bugs",
+        "Config backup/restore around sessions",
+        ".deb installer package",
+    ]),
+]
+
+PROVIDER_PRESETS = {
+    "Custom": {
+        "backend_type": "openai-compat",
+        "base_url": "",
+        "models": [],
+    },
+    "OpenAI": {
+        "backend_type": "native",
+        "base_url": "https://api.openai.com/v1",
+        "models": ["gpt-4o", "gpt-4o-mini"],
+    },
+    "Anthropic": {
+        "backend_type": "anthropic",
+        "base_url": "https://api.anthropic.com/v1",
+        "models": ["claude-sonnet-4-5", "claude-3-5-haiku-latest"],
+    },
+    "OpenCode Zen (OpenAI-compatible)": {
+        "backend_type": "openai-compat",
+        "base_url": "https://opencode.ai/zen/v1",
+        "models": [
+            "glm-5.1", "glm-5", "kimi-k2.5", "kimi-k2.6",
+            "minimax-m2.7", "minimax-m2.5", "minimax-m2.5-free",
+            "deepseek-v4-flash-free", "nemotron-3-super-free",
+            "qwen3.6-plus", "qwen3.5-plus", "big-pickle",
+        ],
+    },
+    "OpenCode Zen (Anthropic)": {
+        "backend_type": "anthropic",
+        "base_url": "https://opencode.ai/zen/v1",
+        "models": [
+            "claude-opus-4-7", "claude-opus-4-6", "claude-opus-4-5",
+            "claude-opus-4-1", "claude-sonnet-4-6", "claude-sonnet-4-5",
+            "claude-sonnet-4", "claude-haiku-4-5", "claude-3-5-haiku",
+        ],
+    },
+    "OpenCode Go (OpenAI-compatible)": {
+        "backend_type": "openai-compat",
+        "base_url": "https://opencode.ai/zen/go/v1",
+        "models": [
+            "glm-5.1", "glm-5", "kimi-k2.5", "kimi-k2.6",
+            "mimo-v2.5", "mimo-v2.5-pro", "minimax-m2.7", "minimax-m2.5",
+            "qwen3.6-plus", "qwen3.5-plus", "deepseek-v4-pro", "deepseek-v4-flash",
+        ],
+    },
+    "OpenCode Go (Anthropic)": {
+        "backend_type": "anthropic",
+        "base_url": "https://opencode.ai/zen/go/v1",
+        "models": ["minimax-m2.7", "minimax-m2.5"],
+    },
+    "Crof.ai": {
+        "backend_type": "openai-compat",
+        "base_url": "https://crof.ai/v1",
+        "models": [],
+    },
+    "NVIDIA NIM": {
+        "backend_type": "openai-compat",
+        "base_url": "https://integrate.api.nvidia.com/v1",
+        "models": [],
+    },
+    "Kilo.ai Gateway": {
+        "backend_type": "openai-compat",
+        "base_url": "https://api.kilo.ai/api/gateway",
+        "models": [],
+    },
+    "Command Code": {
+        "backend_type": "command-code",
+        "base_url": "https://api.commandcode.ai",
+        "cc_version": "0.26.8",
+        "models": [
+            "deepseek/deepseek-v4-flash", "deepseek/deepseek-v4-pro",
+            "anthropic:claude-sonnet-4-6", "anthropic:claude-haiku-4-5-20251001",
+            "anthropic:claude-opus-4-7", "anthropic:claude-opus-4-6",
+            "openai:gpt-5.5", "openai:gpt-5.4", "openai:gpt-5.4-mini", "openai:gpt-5.3-codex",
+            "moonshotai/Kimi-K2.6", "moonshotai/Kimi-K2.5",
+            "zai-org/GLM-5.1", "zai-org/GLM-5",
+            "MiniMaxAI/MiniMax-M2.7", "MiniMaxAI/MiniMax-M2.5",
+            "Qwen/Qwen3.6-Max-Preview", "Qwen/Qwen3.6-Plus",
+            "stepfun/Step-3.5-Flash", "google/gemini-3.1-flash-lite",
+        ],
+    },
+    "OpenRouter": {
+        "backend_type": "openai-compat",
+        "base_url": "https://openrouter.ai/api/v1",
+        "models": [],
+    },
+    "Google Gemini (API Key)": {
+        "backend_type": "openai-compat",
+        "base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
+        "models": [
+            "gemini-2.5-flash", "gemini-2.5-pro",
+            "gemini-2.0-flash", "gemini-2.0-flash-lite",
+            "gemini-2.5-flash-preview-native-audio-dialog",
+        ],
+    },
+    "Google Gemini (OAuth)": {
+        "backend_type": "gemini-oauth-cli",
+        "base_url": "https://cloudcode-pa.googleapis.com",
+        "oauth_provider": "google-cli",
+        "models": [
+            "gemini-2.5-flash", "gemini-2.5-pro",
+        ],
+    },
+    "Google Antigravity (OAuth)": {
+        "backend_type": "gemini-oauth-antigravity",
+        "base_url": "https://daily-cloudcode-pa.sandbox.googleapis.com",
+        "oauth_provider": "google-antigravity",
+        "models": [
+            "antigravity-gemini-3-flash",
+            "antigravity-gemini-3-pro",
+            "antigravity-gemini-3.1-pro",
+            "antigravity-claude-sonnet-4-6",
+            "antigravity-claude-opus-4-6-thinking",
+            "gemini-2.5-flash", "gemini-2.5-pro",
+            "gemini-3-flash-preview", "gemini-3-pro-preview", "gemini-3.1-pro-preview",
+        ],
+    },
+    "OpenAdapter": {
+        "backend_type": "openai-compat",
+        "base_url": "https://api.openadapter.in/v1",
+        "models": [
+            "0G-DeepSeek-V3",
+            "0G-DeepSeek-v4-Pro",
+            "0G-GLM-5",
+            "0G-GLM-5.1",
+            "0G-Qwen3.6",
+            "0G-Qwen-VL",
+        ],
+    },
+    "Z.ai Coding": {
+        "backend_type": "openai-compat",
+        "base_url": "https://api.z.ai/api/coding/paas/v4",
+        "models": [
+            "glm-5.1", "glm-4.7", "GLM-4-Plus", "GLM-4-Long",
+            "GLM-4-Flash", "GLM-4-FlashX", "GLM-Z1-Flash",
+        ],
+    },
+    "Freebuff (Free DeepSeek/Kimi)": {
+        "backend_type": "freebuff",
+        "base_url": "https://freebuff.com",
+        "oauth_provider": "freebuff",
+        "models": [
+            "deepseek/deepseek-v4-pro", "deepseek/deepseek-v4-flash",
+            "moonshotai/kimi-k2.6", "minimax/minimax-m2.7",
+        ],
+    },
+}
+
+def safe_name(name):
+    base = "".join(ch if ch.isalnum() or ch in "._-" else "_" for ch in name).strip("._-") or "endpoint"
+    digest = hashlib.sha1(name.encode("utf-8")).hexdigest()[:8]
+    return f"{base}-{digest}"
+
+def label_for_backend(backend_type):
+    return {
+        "openai-compat": "OpenAI-compatible",
+        "anthropic": "Anthropic",
+        "command-code": "Command Code",
+        "freebuff": "Freebuff (Free AI)",
+        "native": "Native",
+    }.get(backend_type, backend_type)
+
+def normalize_model_id(text):
+    value = text.strip().lower()
+    if not value:
+        return ""
+    value = value.replace("/", "-")
+    value = value.replace("+", "plus")
+    value = "".join(ch if ch.isalnum() or ch in ".-" else "-" for ch in value)
+    while "--" in value:
+        value = value.replace("--", "-")
+    return value.strip("-.")
+
+def normalize_base_url(url):
+    base = (url or "").strip().rstrip("/")
+    for suffix in ("/chat/completions", "/responses", "/messages"):
+        if base.endswith(suffix):
+            base = base[: -len(suffix)]
+            break
+    return base.rstrip("/")
+
+def parse_model_list(text):
+    out = []
+    seen = set()
+    for raw in text.replace(",", "\n").splitlines():
+        mid = normalize_model_id(raw)
+        if mid and mid not in seen:
+            seen.add(mid)
+            out.append(mid)
+    return out
+
+def apply_provider_preset(endpoint, preset_name):
+    preset = PROVIDER_PRESETS.get(preset_name)
+    if not preset:
+        return endpoint
+    updated = dict(endpoint)
+    updated["provider_preset"] = preset_name
+    updated["backend_type"] = preset["backend_type"]
+    updated["base_url"] = normalize_base_url(preset["base_url"])
+    if preset.get("cc_version") and not updated.get("cc_version"):
+        updated["cc_version"] = preset["cc_version"]
+    if not updated.get("models") or (preset.get("backend_type") or "").startswith("gemini-oauth"):
+        updated["models"] = list(preset.get("models", []))
+    if preset.get("oauth_provider"):
+        updated["oauth_provider"] = preset["oauth_provider"]
+    if not updated.get("default_model") and updated.get("models"):
+        updated["default_model"] = updated["models"][0]
+    return updated
+
+def _doctor_check_streaming(base_url, key, bt, model, add):
+    if bt == "anthropic":
+        test_url = f"{base_url}/v1/messages"
+        headers = {"x-api-key": key, "anthropic-version": "2023-06-01", "content-type": "application/json"}
+        body = json.dumps({"model": model or "claude-3-5-haiku-20241022", "max_tokens": 1, "stream": True,
+                           "messages": [{"role": "user", "content": "hi"}]}).encode()
+    else:
+        test_url = f"{base_url}/chat/completions"
+        headers = {"Authorization": f"Bearer {key}", "content-type": "application/json"}
+        body = json.dumps({"model": model, "max_tokens": 1, "stream": True,
+                           "messages": [{"role": "user", "content": "hi"}]}).encode()
+    try:
+        req = urllib.request.Request(test_url, data=body, headers=headers, method="POST")
+        t0 = time.time()
+        resp = urllib.request.urlopen(req, timeout=20)
+        content_type = resp.headers.get("content-type", "")
+        first_chunk = resp.read(512)
+        lat = (time.time() - t0) * 1000
+        is_sse = "text/event-stream" in content_type or first_chunk.startswith(b"data:")
+        if is_sse:
+            add("Streaming support", True, f"SSE OK in {lat:.0f}ms")
+        else:
+            add("Streaming support", False, f"Expected SSE, got {content_type[:60]}")
+    except urllib.error.HTTPError as e:
+        body_text = ""
+        try:
+            body_text = e.read(200).decode(errors="replace")
+        except Exception:
+            pass
+        if e.code == 429:
+            add("Streaming support", None, "Rate limited (skipped)")
+        elif e.code in (400, 404, 422):
+            add("Streaming support", False, f"HTTP {e.code}: {body_text[:80]}")
+        else:
+            add("Streaming support", False, f"HTTP {e.code}")
+    except Exception as e:
+        add("Streaming support", False, str(e)[:100])
+
+def _doctor_check_toolcall(base_url, key, bt, model, add):
+    tool = {"type": "function", "function": {"name": "test_tool", "parameters": {"type": "object", "properties": {"x": {"type": "string"}}}}}
+    if bt == "anthropic":
+        test_url = f"{base_url}/v1/messages"
+        headers = {"x-api-key": key, "anthropic-version": "2023-06-01", "content-type": "application/json"}
+        body = json.dumps({"model": model or "claude-3-5-haiku-20241022", "max_tokens": 50, "stream": False,
+                           "tools": [tool], "messages": [{"role": "user", "content": "Use the test_tool with x=hello"}]}).encode()
+    else:
+        test_url = f"{base_url}/chat/completions"
+        headers = {"Authorization": f"Bearer {key}", "content-type": "application/json"}
+        body = json.dumps({"model": model, "max_tokens": 50, "stream": False, "tools": [tool],
+                           "messages": [{"role": "user", "content": "Use the test_tool with x=hello"}]}).encode()
+    try:
+        req = urllib.request.Request(test_url, data=body, headers=headers, method="POST")
+        t0 = time.time()
+        resp = urllib.request.urlopen(req, timeout=30)
+        raw = resp.read()
+        lat = (time.time() - t0) * 1000
+        payload = json.loads(raw)
+        has_tools = False
+        if bt == "anthropic":
+            for block in (payload.get("content") or []):
+                if block.get("type") == "tool_use":
+                    has_tools = True
+                    break
+        else:
+            choices = payload.get("choices") or []
+            for ch in choices:
+                if (ch.get("message", {}).get("tool_calls")):
+                    has_tools = True
+                    break
+        if has_tools:
+            add("Tool-call support", True, f"Tool call received in {lat:.0f}ms")
+        else:
+            add("Tool-call support", None, f"Responded but no tool_call ({lat:.0f}ms)")
+    except urllib.error.HTTPError as e:
+        if e.code == 429:
+            add("Tool-call support", None, "Rate limited (skipped)")
+        elif e.code in (400, 404, 422):
+            err_body = ""
+            try:
+                err_body = e.read(200).decode(errors="replace")
+            except Exception:
+                pass
+            add("Tool-call support", False, f"HTTP {e.code}: {err_body[:80]}")
+        else:
+            add("Tool-call support", False, f"HTTP {e.code}")
+    except Exception as e:
+        add("Tool-call support", False, str(e)[:100])
+
+def run_endpoint_doctor(endpoint):
+    """Comprehensive health checks for an endpoint. Returns [(name, ok, detail), ...].
+    ok: True=pass, False=fail, None=warn/skip."""
+    checks = []
+    def add(name, ok, detail=""):
+        checks.append((name, ok, detail))
+
+    url = normalize_base_url(endpoint.get("base_url") or "")
+    key = (endpoint.get("api_key") or "").strip()
+    bt = endpoint.get("backend_type", "openai-compat")
+    model = endpoint.get("default_model") or endpoint.get("models", [""])[0] if endpoint.get("models") else ""
+
+    # 1. URL format
+    parsed = urllib.parse.urlparse(url)
+    has_url = bool(parsed.scheme and parsed.netloc)
+    add("URL format", has_url, url if has_url else "Missing scheme or host")
+    if not has_url:
+        return checks
+
+    host = parsed.hostname
+    port = parsed.port or (443 if parsed.scheme == "https" else 80)
+
+    # 2. DNS resolution
+    try:
+        t0 = time.time()
+        addrs = socket.getaddrinfo(host, port, socket.AF_UNSPEC, socket.SOCK_STREAM)
+        dns_ms = (time.time() - t0) * 1000
+        add("DNS resolution", True, f"{addrs[0][4][0]} ({dns_ms:.0f}ms)")
+    except socket.gaierror as e:
+        add("DNS resolution", False, str(e))
+        return checks
+
+    # 3. TCP/TLS connection
+    try:
+        t0 = time.time()
+        sock = socket.create_connection((host, port), timeout=10)
+        tcp_ms = (time.time() - t0) * 1000
+        if parsed.scheme == "https":
+            ctx = ssl.create_default_context()
+            try:
+                ssock = ctx.wrap_socket(sock, server_hostname=host)
+                tls_ms = (time.time() - t0) * 1000
+                add("TLS connection", True, f"TCP {tcp_ms:.0f}ms + handshake {tls_ms:.0f}ms")
+                ssock.close()
+            except ssl.SSLError as e:
+                add("TLS certificate", False, str(e)[:120])
+                sock.close()
+                return checks
+        else:
+            add("TCP connection", True, f"{tcp_ms:.0f}ms")
+            sock.close()
+    except (socket.timeout, ConnectionRefusedError, OSError) as e:
+        add("TCP connection", False, str(e)[:100])
+        return checks
+
+    # 4. Auth + /models (backend-aware)
+    if bt == "anthropic":
+        add("/models endpoint", None, "Anthropic has no /models endpoint — testing via /messages")
+        try:
+            t0 = time.time()
+            msg_url = f"{url}/v1/messages"
+            body = json.dumps({"model": model or "claude-3-5-haiku-20241022", "max_tokens": 1,
+                               "messages": [{"role": "user", "content": "hi"}]}).encode()
+            req = urllib.request.Request(msg_url, data=body, headers={
+                "x-api-key": key, "anthropic-version": "2023-06-01", "content-type": "application/json",
+            }, method="POST")
+            urllib.request.urlopen(req, timeout=15)
+            lat = (time.time() - t0) * 1000
+            add("Auth valid", True, f"Responded in {lat:.0f}ms")
+        except urllib.error.HTTPError as e:
+            if e.code in (401, 403):
+                add("Auth valid", False, f"HTTP {e.code} — check API key")
+            elif e.code == 400:
+                add("Auth valid", True, "Authenticated (model or param error)")
+            else:
+                add("Auth valid", False, f"HTTP {e.code}")
+        except Exception as e:
+            add("Auth valid", False, str(e)[:100])
+    elif bt.startswith("gemini-oauth"):
+        token_name = "google-antigravity-oauth-token.json" if "antigravity" in bt else "google-cli-oauth-token.json"
+        token_path = Path.home() / f".cache/codex-proxy/{token_name}"
+        if token_path.exists():
+            try:
+                td = json.loads(token_path.read_text())
+                exp = td.get("expires_at", 0)
+                if exp > time.time():
+                    remaining = exp - time.time()
+                    add("OAuth token", True, f"Valid ({remaining / 60:.0f} min remaining)")
+                else:
+                    add("OAuth token", False, "Token expired — re-login required")
+            except Exception as e:
+                add("OAuth token", False, str(e)[:80])
+        else:
+            add("OAuth token", False, f"No token file ({token_name})")
+        try:
+            t0 = time.time()
+            ids, err = fetch_models_for_endpoint(endpoint)
+            lat = (time.time() - t0) * 1000
+            if ids:
+                add("Network reachable", True, f"{lat:.0f}ms")
+                add("/models endpoint", True, f"{len(ids)} models ({lat:.0f}ms)")
+                if model:
+                    add("Selected model exists", model in ids,
+                        model if model in ids else f"'{model}' not in {ids[:5]}...")
+            elif err and ("401" in str(err) or "403" in str(err)):
+                add("Network reachable", True, f"{lat:.0f}ms")
+                add("Auth valid", False, str(err)[:100])
+            else:
+                add("Network reachable", False, str(err or "no response")[:100])
+        except Exception as e:
+            add("Network", False, str(e)[:100])
+    else:
+        try:
+            t0 = time.time()
+            ids, err = fetch_models_for_endpoint(endpoint)
+            lat = (time.time() - t0) * 1000
+            if ids:
+                add("Network reachable", True, f"{lat:.0f}ms")
+                add("Auth valid", True)
+                add("/models endpoint", True, f"{len(ids)} models ({lat:.0f}ms)")
+                if model:
+                    add("Selected model exists", model in ids,
+                        model if model in ids else f"'{model}' not found in {len(ids)} models")
+                else:
+                    add("Selected model", False, "No model selected")
+            elif err and ("401" in str(err) or "403" in str(err)):
+                add("Network reachable", True, f"{lat:.0f}ms")
+                add("Auth valid", False, f"HTTP 401/403 — check API key")
+            elif err and "429" in str(err):
+                add("Network reachable", True, f"{lat:.0f}ms")
+                add("Auth valid", True, "Authenticated but rate-limited")
+                add("/models endpoint", None, "Rate limited — skipped")
+            else:
+                add("Network reachable", False, str(err or "no response")[:100])
+        except Exception as e:
+            add("Network", False, str(e)[:100])
+
+    # 5. Streaming smoke test
+    if bt not in ("native", "command-code"):
+        _doctor_check_streaming(url, key, bt, model, add)
+
+    # 6. Tool-call support test
+    if bt not in ("native", "command-code"):
+        _doctor_check_toolcall(url, key, bt, model, add)
+
+    return checks
+
+def _show_doctor_results(parent, endpoint_name, checks):
+    dlg = Gtk.Dialog(title=f"Doctor: {endpoint_name}", parent=parent, modal=True)
+    dlg.add_button("Close", Gtk.ResponseType.CLOSE)
+    dlg.set_default_size(480, 400)
+    area = dlg.get_content_area()
+    area.set_margin_start(12)
+    area.set_margin_end(12)
+    area.set_margin_top(12)
+    area.set_margin_bottom(12)
+    area.set_spacing(4)
+    passed = sum(1 for _, ok, _ in checks if ok is True)
+    failed = sum(1 for _, ok, _ in checks if ok is False)
+    warned = sum(1 for _, ok, _ in checks if ok is None)
+    hdr = Gtk.Label()
+    hdr.set_markup(f'<b>{endpoint_name}</b>  '
+                   f'<span foreground="#27ae60">{passed} passed</span>  '
+                   f'<span foreground="#e74c3c">{failed} failed</span>  '
+                   f'<span foreground="#f39c12">{warned} warnings</span>')
+    area.pack_start(hdr, False, False, 6)
+    sep = Gtk.Separator()
+    area.pack_start(sep, False, False, 4)
+    for name, ok, detail in checks:
+        row = Gtk.Box(spacing=6)
+        if ok is True:
+            color, sym = "#27ae60", "\u2713"
+        elif ok is False:
+            color, sym = "#e74c3c", "\u2717"
+        else:
+            color, sym = "#f39c12", "\u25CB"
+        icon = Gtk.Label()
+        icon.set_markup(f'<span foreground="{color}" weight="bold">{sym}</span>')
+        row.pack_start(icon, False, False, 0)
+        lbl = Gtk.Label()
+        lbl.set_markup(f'<b>{name}</b>')
+        row.pack_start(lbl, False, False, 0)
+        if detail:
+            det = Gtk.Label()
+            det.set_markup(f'<span foreground="#7f8c8d" size="small">{detail}</span>')
+            det.set_line_wrap(True)
+            row.pack_end(det, False, False, 0)
+        area.pack_start(row, False, False, 2)
+    dlg.show_all()
+    dlg.run()
+    dlg.destroy()
+
+def endpoint_models_url(endpoint):
+    base = normalize_base_url(endpoint.get("base_url") or "")
+    if not base:
+        return ""
+    return f"{base}/models"
+
+def endpoint_model_headers(endpoint):
+    key = (endpoint.get("api_key") or "").strip()
+    backend = endpoint.get("backend_type", "openai-compat")
+    headers = {}
+    if backend == "anthropic":
+        if key:
+            headers["x-api-key"] = key
+        headers["anthropic-version"] = "2023-06-01"
+    elif key:
+        headers["Authorization"] = f"Bearer {key}"
+    return headers
+
+def fetch_models_for_endpoint(endpoint, timeout=10):
+    url = endpoint_models_url(endpoint)
+    if not url:
+        return None, "Base URL is empty"
+    try:
+        req = urllib.request.Request(url, headers=endpoint_model_headers(endpoint))
+        raw = urllib.request.urlopen(req, timeout=timeout).read()
+        payload = json.loads(raw)
+        items = payload.get("data") or payload.get("models") or []
+        ids = []
+        seen = set()
+        for item in items:
+            mid = item.get("id") if isinstance(item, dict) else None
+            if mid and mid not in seen:
+                seen.add(mid)
+                ids.append(mid)
+        if not ids:
+            return None, "No models returned"
+        return ids, None
+    except Exception as e:
+        return None, str(e)
+
+def refresh_endpoint_models(endpoint):
+    ids, err = fetch_models_for_endpoint(endpoint)
+    if not ids:
+        return None, err
+    updated = dict(endpoint)
+    updated["models"] = ids
+    if updated.get("default_model") not in ids:
+        updated["default_model"] = ids[0]
+    return updated, None
+
+# ═══════════════════════════════════════════════════════════════════
+# Endpoint storage
+# ═══════════════════════════════════════════════════════════════════
+
+def load_endpoints():
+    if ENDPOINTS_FILE.exists():
+        try:
+            return json.loads(ENDPOINTS_FILE.read_text())
+        except Exception:
+            pass
+    return {"default": None, "endpoints": []}
+
+def save_endpoints(data):
+    ENDPOINTS_FILE.parent.mkdir(parents=True, exist_ok=True)
+    ENDPOINTS_FILE.write_text(json.dumps(data, indent=2))
+
+def load_bgp_pools():
+    if BGP_POOLS_FILE.exists():
+        try:
+            return json.loads(BGP_POOLS_FILE.read_text())
+        except Exception:
+            pass
+    return {"pools": []}
+
+def save_bgp_pools(data):
+    BGP_POOLS_FILE.parent.mkdir(parents=True, exist_ok=True)
+    BGP_POOLS_FILE.write_text(json.dumps(data, indent=2))
+
+def get_endpoint(name):
+    for e in load_endpoints()["endpoints"]:
+        if e["name"] == name:
+            return e
+    return None
+
+def now_utc_iso():
+    return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+
+def build_profile_bundle():
+    return {
+        "version": 1,
+        "exported_at": now_utc_iso(),
+        "endpoints": load_endpoints(),
+        "codex_config_toml": CONFIG.read_text(encoding="utf-8") if CONFIG.exists() else "",
+    }
+
+def save_profile_bundle(path):
+    bundle = build_profile_bundle()
+    Path(path).write_text(json.dumps(bundle, indent=2), encoding="utf-8")
+
+def import_profile_bundle(path):
+    data = json.loads(Path(path).read_text(encoding="utf-8"))
+    if not isinstance(data, dict):
+        raise ValueError("Invalid profile bundle")
+
+    endpoints = data.get("endpoints")
+    if not isinstance(endpoints, dict) or "endpoints" not in endpoints:
+        raise ValueError("Profile bundle missing endpoints")
+
+    # Keep a local rollback point before overwriting the current profile.
+    if CONFIG.exists():
+        shutil.copy2(str(CONFIG), str(CONFIG_BAK))
+    if ENDPOINTS_FILE.exists():
+        shutil.copy2(str(ENDPOINTS_FILE), str(ENDPOINTS_FILE.with_suffix(".json.import-bak")))
+
+    save_endpoints(endpoints)
+
+    cfg = data.get("codex_config_toml", "")
+    if isinstance(cfg, str) and cfg.strip():
+        CONFIG.parent.mkdir(parents=True, exist_ok=True)
+        CONFIG.write_text(cfg, encoding="utf-8")
+    return endpoints
+
+# ═══════════════════════════════════════════════════════════════════
+# Config management
+# ═══════════════════════════════════════════════════════════════════
+
+def backup_config():
+    if CONFIG.exists():
+        tmp = CONFIG_BAK.with_suffix(".tmp")
+        shutil.copy2(str(CONFIG), str(tmp))
+        os.replace(str(tmp), str(CONFIG_BAK))
+
+def restore_config():
+    if CONFIG_BAK.exists():
+        tmp = CONFIG.with_suffix(".tmp")
+        shutil.copy2(str(CONFIG_BAK), str(tmp))
+        os.replace(str(tmp), str(CONFIG))
+
+def write_secure_text(path, text):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    tmp = path.with_suffix(path.suffix + ".tmp")
+    tmp.write_text(text, encoding="utf-8")
+    os.chmod(str(tmp), 0o600)
+    os.replace(str(tmp), str(path))
+
+CONFIG_TXN = HOME / ".codex/config.toml.launcher-txn.json"
+
+def begin_config_transaction(reason):
+    txn = {"started_at": time.time(), "reason": reason,
+           "config_existed": CONFIG.exists(), "backup_path": str(CONFIG_BAK)}
+    if CONFIG.exists():
+        backup_config()
+    CONFIG_TXN.parent.mkdir(parents=True, exist_ok=True)
+    CONFIG_TXN.write_text(json.dumps(txn, indent=2))
+
+def end_config_transaction():
+    CONFIG_TXN.unlink(missing_ok=True)
+
+def recover_config_if_needed(logfn=None):
+    if not CONFIG_TXN.exists():
+        return
+    try:
+        txn = json.loads(CONFIG_TXN.read_text())
+        if txn.get("config_existed") and CONFIG_BAK.exists():
+            restore_config()
+            if logfn:
+                logfn("Recovered Codex config from interrupted session.")
+        elif CONFIG.exists():
+            CONFIG.unlink()
+            if logfn:
+                logfn("Removed generated config from interrupted session.")
+    finally:
+        CONFIG_TXN.unlink(missing_ok=True)
+
+def write_config_for_native(endpoint, selected_model):
+    """Write config for native OpenAI (no proxy needed)."""
+    backup_config()
+    model_catalog = _gen_model_catalog(endpoint, selected_model)
+    mc_path = PROXY_CONFIG_DIR / f"models-{safe_name(endpoint['name'])}.json"
+    mc_path.parent.mkdir(parents=True, exist_ok=True)
+    mc_path.write_text(json.dumps(model_catalog, indent=2))
+
+    lines = [
+        f'model = "{_toml_safe(selected_model)}"\n',
+        f'model_provider = "{_toml_safe(endpoint["name"])}"\n',
+        f'model_catalog_json = "{mc_path}"\n',
+        f'\n[model_providers."{endpoint["name"]}"]\n',
+        f'name = "{_toml_safe(endpoint["name"])}"\n',
+        f'base_url = "{_toml_safe(endpoint["base_url"])}"\n',
+        f'experimental_bearer_token = "{_toml_safe(_resolve_secret(endpoint["api_key"]))}"\n',
+        f'\n[profiles."{endpoint["name"]}"]\n',
+        f'model_provider = "{_toml_safe(endpoint["name"])}"\n',
+        f'model = "{_toml_safe(selected_model)}"\n',
+        f'model_catalog_json = "{mc_path}"\n',
+        f'service_tier = "default"\n',
+        f'approvals_reviewer = "user"\n',
+    ]
+    write_secure_text(CONFIG, "".join(lines))
+
+def _toml_safe(val):
+    val = str(val).replace('"', '\\"')
+    return val.split('\n', 1)[0].strip()
+
+def _resolve_secret(value):
+    value = (value or "").strip()
+    m = re.fullmatch(r"\$\{ENV:([A-Z0-9_]+)\}", value)
+    if m:
+        return os.environ.get(m.group(1), "")
+    return value
+
+def write_config_for_translated(endpoint, selected_model, proxy_port=8080):
+    backup_config()
+    model_catalog = _gen_model_catalog(endpoint, selected_model)
+    mc_path = PROXY_CONFIG_DIR / f"models-{safe_name(endpoint['name'])}.json"
+    mc_path.parent.mkdir(parents=True, exist_ok=True)
+    mc_path.write_text(json.dumps(model_catalog, indent=2))
+
+    lines = [
+        f'model = "{_toml_safe(selected_model)}"\n',
+        f'model_provider = "{_toml_safe(endpoint["name"])}"\n',
+        f'model_catalog_json = "{mc_path}"\n',
+        f'\n[model_providers."{endpoint["name"]}"]\n',
+        f'name = "{_toml_safe(endpoint["name"])}"\n',
+        f'base_url = "http://127.0.0.1:{proxy_port}"\n',
+        f'experimental_bearer_token = "codex-launcher-local"\n',
+        f'\n[profiles."{endpoint["name"]}"]\n',
+        f'model_provider = "{_toml_safe(endpoint["name"])}"\n',
+        f'model = "{_toml_safe(selected_model)}"\n',
+        f'model_catalog_json = "{mc_path}"\n',
+        f'service_tier = "fast"\n',
+        f'approvals_reviewer = "user"\n',
+    ]
+    write_secure_text(CONFIG, "".join(lines))
+
+def _gen_model_catalog(endpoint, selected_model=None):
+    default_model = selected_model or endpoint.get("default_model")
+    models = []
+    for mid in endpoint.get("models", []):
+        models.append({
+            "slug": mid, "model": mid, "display_name": mid,
+            "description": f"{endpoint['name']} {mid}",
+            "hidden": False, "isDefault": mid == default_model,
+            "shell_type": "shell_command", "visibility": "list",
+            "default_reasoning_level": "medium",
+            "supported_reasoning_levels": [
+                {"effort": "low", "description": "Fast"},
+                {"effort": "medium", "description": "Balanced"},
+                {"effort": "high", "description": "Deep"},
+                {"effort": "xhigh", "description": "Extra deep"},
+            ],
+            "supportedReasoningEfforts": [
+                {"reasoningEffort": "low", "description": "Fast"},
+                {"reasoningEffort": "medium", "description": "Balanced"},
+                {"reasoningEffort": "high", "description": "Deep"},
+                {"reasoningEffort": "xhigh", "description": "Extra deep"},
+            ],
+            "priority": 30, "context_size": 128000,
+            "additional_speed_tiers": [], "service_tiers": [],
+            "supports_reasoning_summaries": True, "support_verbosity": True,
+            "reasoning": True, "tool_call": True,
+            "supports_parallel_tool_calls": True,
+            "experimental_supported_tools": [], "supported_in_api": True,
+            "truncation_policy": {"mode": "tokens", "limit": 128000},
+            "base_instructions": "You are Codex, a coding agent.",
+        })
+    return {"models": models}
+
+# ═══════════════════════════════════════════════════════════════════
+# Proxy management
+# ═══════════════════════════════════════════════════════════════════
+
+_proxy_proc = None
+_proxy_port = None
+
+PID_REGISTRY = HOME / ".cache" / "codex-launcher" / "pids.json"
+
+def _pick_free_port():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("127.0.0.1", 0))
+        return s.getsockname()[1]
+
+def _load_pid_registry():
+    if PID_REGISTRY.exists():
+        try:
+            return json.loads(PID_REGISTRY.read_text())
+        except Exception:
+            pass
+    return {}
+
+def _save_pid_registry(data):
+    PID_REGISTRY.parent.mkdir(parents=True, exist_ok=True)
+    tmp = PID_REGISTRY.with_suffix(".tmp")
+    tmp.write_text(json.dumps(data, indent=2))
+    os.replace(str(tmp), str(PID_REGISTRY))
+
+def _register_pgid(kind, pid):
+    data = _load_pid_registry()
+    try:
+        pgid = os.getpgid(pid)
+    except ProcessLookupError:
+        return
+    data[kind] = {"pid": pid, "pgid": pgid, "ts": time.time()}
+    _save_pid_registry(data)
+
+def safe_cleanup_owned(logfn=None):
+    data = _load_pid_registry()
+    changed = False
+    for kind, meta in list(data.items()):
+        pgid = meta.get("pgid")
+        if not pgid:
+            continue
+        try:
+            os.killpg(pgid, signal.SIGTERM)
+            if logfn:
+                logfn(f"Stopped {kind} (pgid {pgid})")
+            changed = True
+        except ProcessLookupError:
+            changed = True
+        except Exception as e:
+            if logfn:
+                logfn(f"Could not stop {kind}: {e}")
+    if changed:
+        _save_pid_registry({})
+
+def _start_proxy_for(endpoint, logfn):
+    global _proxy_proc, _proxy_port
+    # Clear stale Python bytecode cache so proxy picks up latest source changes
+    import shutil
+    pycache = os.path.join(os.path.dirname(os.path.abspath(__file__)), '__pycache__')
+    if os.path.isdir(pycache):
+        shutil.rmtree(pycache, ignore_errors=True)
+    _stop_proxy()
+    port = _pick_free_port()
+    _proxy_port = port
+
+    model_list = endpoint.get("models", [])
+    if (endpoint.get("backend_type") or "").startswith("gemini-oauth") and (endpoint.get("oauth_provider") or "").startswith("google"):
+        token_name = "google-antigravity-oauth-token.json" if endpoint.get("oauth_provider") == "google-antigravity" else "google-cli-oauth-token.json"
+        token_path = os.path.expanduser(f"~/.cache/codex-proxy/{token_name}")
+        try:
+            with open(token_path) as tf:
+                td = json.load(tf)
+            discovered = [] if endpoint.get("oauth_provider") == "google-antigravity" else td.get("available_models", [])
+            if discovered:
+                model_list = discovered
+        except Exception:
+            pass
+    pcfg = {
+        "port": port,
+        "backend_type": endpoint["backend_type"],
+        "target_url": normalize_base_url(endpoint["base_url"]),
+        "api_key": endpoint["api_key"],
+        "cc_version": endpoint.get("cc_version", ""),
+        "oauth_provider": endpoint.get("oauth_provider", ""),
+        "reasoning_enabled": endpoint.get("reasoning_enabled", True),
+        "reasoning_effort": endpoint.get("reasoning_effort", "medium"),
+        "models": [{"id": m, "object": "model", "created": 1700000000, "owned_by": endpoint["name"]}
+                   for m in model_list],
+    }
+    pcfg_path = PROXY_CONFIG_DIR / f"proxy-{safe_name(endpoint['name'])}-{port}.json"
+    pcfg_path.parent.mkdir(parents=True, exist_ok=True)
+    pcfg_path.write_text(json.dumps(pcfg, indent=2))
+    _start_proxy_with_config(pcfg_path, port, logfn)
+    return port
+
+def _start_proxy_with_config(pcfg_path, port, logfn):
+    global _proxy_proc
+    _proxy_proc = subprocess.Popen(
+        ["python3", str(PROXY), "--config", str(pcfg_path)],
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.PIPE,
+        preexec_fn=os.setsid,
+        text=True,
+    )
+    _register_pgid("proxy", _proxy_proc.pid)
+
+    def _pipe_stderr():
+        if not _proxy_proc.stderr:
+            return
+        for line in _proxy_proc.stderr:
+            GLib.idle_add(logfn, f"[proxy] {line.rstrip()}")
+    threading.Thread(target=_pipe_stderr, daemon=True).start()
+
+    deadline = time.time() + 15
+    last_err = None
+    while time.time() < deadline:
+        if _proxy_proc.poll() is not None:
+            raise RuntimeError(f"Proxy exited early with code {_proxy_proc.returncode}")
+        try:
+            urllib.request.urlopen(f"http://127.0.0.1:{port}/v1/models", timeout=2)
+            logfn(f"Proxy ready on port {port}")
+            return
+        except Exception as e:
+            last_err = e
+            time.sleep(0.3)
+    try:
+        os.killpg(os.getpgid(_proxy_proc.pid), signal.SIGTERM)
+        _proxy_proc.wait(timeout=3)
+    except Exception:
+        with contextlib.suppress(Exception):
+            os.killpg(os.getpgid(_proxy_proc.pid), signal.SIGKILL)
+    raise RuntimeError(f"Proxy failed health check on port {port}: {last_err}")
+
+def _stop_proxy():
+    global _proxy_proc
+    if _proxy_proc and _proxy_proc.poll() is None:
+        try:
+            os.killpg(os.getpgid(_proxy_proc.pid), signal.SIGTERM)
+            time.sleep(0.5)
+            if _proxy_proc.poll() is None:
+                os.killpg(os.getpgid(_proxy_proc.pid), signal.SIGKILL)
+        except (ProcessLookupError, PermissionError):
+            pass
+        _proxy_proc = None
+
+def _kill_existing_desktop(logfn=None):
+    import subprocess as _sp
+    try:
+        out = _sp.run(["pgrep", "-f", "/opt/codex-desktop/electron"], capture_output=True, text=True, timeout=5)
+        pids = [p for p in out.stdout.strip().splitlines() if p.strip().isdigit()]
+        if not pids:
+            return
+        main_pid = int(pids[0])
+        pgid = os.getpgid(main_pid)
+        if pgid > 0:
+            os.killpg(pgid, signal.SIGTERM)
+            if logfn:
+                logfn(f"Killed existing Codex Desktop (pid {main_pid}, pgid {pgid})")
+            time.sleep(2)
+            try:
+                os.killpg(pgid, signal.SIGKILL)
+            except (ProcessLookupError, PermissionError):
+                pass
+    except Exception as e:
+        if logfn:
+            logfn(f"Note: could not kill existing Desktop: {e}")
+
+def _run_cleanup(logfn=None):
+    safe_cleanup_owned(logfn)
+
+def _last_log_lines(n=15):
+    try:
+        t = LAUNCH_LOG.read_text()
+        return "\n".join(t.splitlines()[-n:])
+    except Exception:
+        return "(no log file)"
+
+def _detect_codex_cli():
+    try:
+        path = shutil.which("codex")
+        if not path:
+            return None
+        out = subprocess.run(["codex", "--version"], capture_output=True, text=True, timeout=5)
+        ver = (out.stdout or "").strip() or (out.stderr or "").strip() or "unknown"
+        return (path, ver)
+    except Exception:
+        return None
+
+def _detect_codex_desktop():
+    if START_SH.exists():
+        return str(START_SH)
+    return None
+
+def _check_codex_auth():
+    try:
+        out = subprocess.run(
+            ["codex", "login", "status"],
+            capture_output=True, text=True, timeout=10,
+        )
+        text = (out.stdout or "").strip()
+        if not text:
+            text = (out.stderr or "").strip()
+        if out.returncode == 0 and text:
+            return ("logged_in", text)
+        if text:
+            return ("error", text)
+        return ("unknown", "No output from codex login status")
+    except FileNotFoundError:
+        return ("not_installed", "codex not found")
+    except Exception as e:
+        return ("error", str(e))
+
+# ═══════════════════════════════════════════════════════════════════
+# AI Monitoring — Self-Healing Watchdog
+# ═══════════════════════════════════════════════════════════════════
+
+MONITORING_FILE = Path.home() / ".cache/codex-proxy/monitoring-config.json"
+INCIDENT_STORE_FILE = Path.home() / ".cache/codex-proxy/incident-store.json"
+MONITORING_LOG = Path.home() / ".cache/codex-proxy/monitoring.log"
+
+_TIER1_RULES = [
+    ("proxy_health_fail",      "restart_proxy",         30),
+    ("proxy_port_conflict",    "kill_stale_restart",    60),
+    ("upstream_429",           "wait_retry",             0),
+    ("upstream_502_503",       "retry_backoff",         30),
+    ("upstream_500_repeat",    "switch_provider",       60),
+    ("upstream_timeout",       "retry_increase_timeout",30),
+    ("upstream_401_403",       "alert_bad_key",          0),
+    ("stream_broken_pipe",     "restart_proxy",         30),
+    ("stream_reset",           "restart_proxy",         30),
+    ("parsed_tool_calls_0_x3", "clear_schema_cache",   300),
+    ("sanitizer_suspicious_5x","alert_model_issue",      0),
+    ("stuck_recovery_x5",      "suggest_switch_model",   0),
+    ("codex_process_dead",     "alert_restart",           0),
+    ("schema_corrupt",         "delete_provider_caps",    0),
+]
+
+_FAILURE_SIGNALS = {
+    "parsed_tool_calls=0":      ("C1", "parser_empty"),
+    "[STUCK-RECOVERY]":         ("C3", "stuck_recovery"),
+    "suspicious cmd":           ("C4", "sanitizer_flag"),
+    "empty cmd recovered":      ("C6", "empty_cmd"),
+    "HTTP 429":                 ("B1", "rate_limited"),
+    "HTTP 500":                 ("B2", "server_error"),
+    "HTTP 502":                 ("B2", "server_error"),
+    "HTTP 503":                 ("B2", "server_error"),
+    "HTTP 401":                 ("B3", "auth_failure"),
+    "HTTP 403":                 ("B4", "forbidden"),
+    "Connection refused":       ("A1", "proxy_dead"),
+    "Address already in use":   ("A2", "port_conflict"),
+    "Broken pipe":              ("B7", "broken_pipe"),
+    "Connection reset":         ("B6", "connection_reset"),
+    "timed out":                ("B5", "timeout"),
+    "SELF-REVIVE CRASH":        ("A5", "proxy_crash"),
+    "stream error":             ("B6", "stream_error"),
+    "content_type.*array":      ("E1", "schema_corrupt"),
+}
+
+_DIAGNOSTIC_SYSTEM_PROMPT = (
+    'You are a diagnostic agent for "Codex Launcher" — a desktop app that runs a local '
+    'translation proxy between OpenAI Codex CLI/Desktop and AI providers.\n\n'
+    'Analyze the incident and respond with ONLY a JSON object:\n'
+    '{"action": "...", "reason": "...", "confidence": 0.0-1.0}\n\n'
+    'Available actions: restart_proxy, kill_stale_processes, clear_schema_cache, '
+    'switch_provider, increase_timeout, regenerate_config, cleanup_stale, '
+    'alert_user, ignore, retry_now\n\n'
+    'Rules:\n'
+    '- upstream 401/403 with auth error -> alert_user\n'
+    '- proxy dead -> restart_proxy\n'
+    '- same error 5+ times -> switch_provider or alert_user\n'
+    '- schema/content_type error -> clear_schema_cache\n'
+    '- "Address already in use" -> kill_stale_processes then restart_proxy\n'
+    '- timeout on slow upstream -> increase_timeout\n'
+    '- single transient 429/502/503 -> ignore\n'
+    '- "stream disconnected" + proxy healthy -> ignore\n'
+    '- no extra text, no markdown, just the JSON object'
+)
+
+def _load_monitoring_config():
+    if MONITORING_FILE.exists():
+        try:
+            return json.loads(MONITORING_FILE.read_text())
+        except Exception:
+            pass
+    return {
+        "enabled": False,
+        "provider_url": "",
+        "model": "",
+        "api_key": "",
+        "health_check_interval_s": 5,
+        "auto_restart_proxy": True,
+        "auto_switch_provider": False,
+    }
+
+def _save_monitoring_config(cfg):
+    MONITORING_FILE.parent.mkdir(parents=True, exist_ok=True)
+    MONITORING_FILE.write_text(json.dumps(cfg, indent=2))
+
+def _load_incident_store():
+    if INCIDENT_STORE_FILE.exists():
+        try:
+            return json.loads(INCIDENT_STORE_FILE.read_text())
+        except Exception:
+            pass
+    return {"version": 1, "incidents": {}, "stats": {"ai_calls": 0, "tokens_used": 0}}
+
+def _save_incident_store(store):
+    INCIDENT_STORE_FILE.parent.mkdir(parents=True, exist_ok=True)
+    INCIDENT_STORE_FILE.write_text(json.dumps(store, indent=2))
+
+def _monitoring_log(msg):
+    try:
+        with open(str(MONITORING_LOG), "a") as f:
+            f.write(f"[{time.strftime('%H:%M:%S')}] {msg}\n")
+    except Exception:
+        pass
+
+
+class IncidentStore:
+    def __init__(self):
+        self._store = _load_incident_store()
+        self._dirty = False
+
+    def lookup(self, pattern):
+        inc = self._store.get("incidents", {}).get(pattern)
+        if inc and inc.get("success_count", 0) > 0:
+            rate = inc["success_count"] / max(inc["success_count"] + inc.get("fail_count", 0), 1)
+            if rate > 0.5:
+                return inc
+        return None
+
+    def record(self, pattern, fix, success=True):
+        incs = self._store.setdefault("incidents", {})
+        inc = incs.setdefault(pattern, {
+            "fix": fix, "success_count": 0, "fail_count": 0,
+            "last_seen": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+            "occurrences": 0,
+        })
+        inc["last_seen"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+        inc["occurrences"] = inc.get("occurrences", 0) + 1
+        if success:
+            inc["success_count"] = inc.get("success_count", 0) + 1
+        else:
+            inc["fail_count"] = inc.get("fail_count", 0) + 1
+        self._dirty = True
+
+    def record_ai_call(self, tokens=0):
+        stats = self._store.setdefault("stats", {"ai_calls": 0, "tokens_used": 0})
+        stats["ai_calls"] = stats.get("ai_calls", 0) + 1
+        stats["tokens_used"] = stats.get("tokens_used", 0) + tokens
+        self._dirty = True
+
+    def flush(self):
+        if self._dirty:
+            _save_incident_store(self._store)
+            self._dirty = False
+
+    @property
+    def stats(self):
+        return self._store.get("stats", {"ai_calls": 0, "tokens_used": 0})
+
+
+class AIDiagnosticAgent:
+    def __init__(self, provider_url, model, api_key):
+        self.provider_url = provider_url
+        self.model = model
+        self.api_key = api_key
+        self.incident_store = IncidentStore()
+
+    def diagnose(self, context):
+        pattern = self._extract_pattern(context)
+        known = self.incident_store.lookup(pattern)
+        if known:
+            _monitoring_log(f"Tier 2 HIT: pattern={pattern} fix={known['fix']}")
+            return {"action": known["fix"], "reason": "known_pattern", "confidence": 0.9, "tier": 2}
+        action = self._call_model(context)
+        if action:
+            self.incident_store.record(pattern, action.get("action", "unknown"))
+            self.incident_store.flush()
+        return action
+
+    def _extract_pattern(self, context):
+        parts = []
+        for k in sorted(context.get("signals", [])):
+            parts.append(k)
+        if context.get("http_code"):
+            parts.append(f"http_{context['http_code']}")
+        return "+".join(parts[:3]) or "unknown"
+
+    def _call_model(self, context):
+        prompt = (
+            f"INCIDENT REPORT:\n"
+            f"Time: {time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}\n"
+            f"Proxy health: {context.get('proxy_alive', 'unknown')}\n"
+            f"Upstream: {context.get('upstream_url', 'unknown')}\n"
+            f"Model: {context.get('model', 'unknown')}\n"
+            f"Last HTTP code: {context.get('http_code', 'n/a')}\n"
+            f"Recent signals: {context.get('signals', [])}\n"
+            f"Recent log tail:\n{context.get('log_tail', '')[:1500]}\n"
+        )
+        body = {
+            "model": self.model,
+            "messages": [
+                {"role": "system", "content": _DIAGNOSTIC_SYSTEM_PROMPT},
+                {"role": "user", "content": prompt},
+            ],
+            "max_tokens": 200,
+            "temperature": 0.1,
+        }
+        try:
+            req = urllib.request.Request(
+                self.provider_url,
+                data=json.dumps(body).encode(),
+                headers={
+                    "Content-Type": "application/json",
+                    "Authorization": f"Bearer {self.api_key}",
+                },
+            )
+            resp = urllib.request.urlopen(req, timeout=15)
+            result = json.loads(resp.read())
+            text = result["choices"][0]["message"]["content"].strip()
+            self.incident_store.record_ai_call(tokens=800)
+            action = json.loads(text)
+            action["tier"] = 3
+            _monitoring_log(f"Tier 3 AI: action={action.get('action')} reason={action.get('reason')}")
+            return action
+        except Exception as e:
+            _monitoring_log(f"Tier 3 AI FAILED: {e}")
+            return {"action": "alert_user", "reason": f"ai_diag_failed: {e}", "confidence": 0.0, "tier": 3}
+
+
+class HealthWatcher(threading.Thread):
+    def __init__(self, on_failure, on_recovery, on_signal, on_action):
+        super().__init__(daemon=True)
+        self.cfg = _load_monitoring_config()
+        self.on_failure = on_failure
+        self.on_recovery = on_recovery
+        self.on_signal = on_signal
+        self.on_action = on_action
+        self.failures = 0
+        self.running = False
+        self._signal_counts = collections.defaultdict(int)
+        self._last_actions = {}
+        self._restart_count = 0
+        self._last_restart_time = 0
+
+    def run(self):
+        self.running = True
+        self.incident_store = IncidentStore()
+        self._log_analyzer = _LogAnalyzerThread(self._on_log_signal)
+        self._log_analyzer.start()
+        while self.running:
+            self.cfg = _load_monitoring_config()
+            if not self.cfg.get("enabled"):
+                time.sleep(5)
+                continue
+            port = self._get_proxy_port()
+            if port:
+                healthy = self._check_health(port)
+                if healthy:
+                    if self.failures > 0:
+                        self.failures = 0
+                        self.on_recovery()
+                else:
+                    self.failures += 1
+                    if self.failures >= 3:
+                        self._handle_failure("proxy_health_fail")
+            self.incident_store.flush()
+            interval = self.cfg.get("health_check_interval_s", 5)
+            time.sleep(interval)
+
+    def stop(self):
+        self.running = False
+        if hasattr(self, '_log_analyzer'):
+            self._log_analyzer.running = False
+
+    def _get_proxy_port(self):
+        try:
+            cfg_path = Path.home() / ".cache/codex-proxy/proxy-config.json"
+            if cfg_path.exists():
+                d = json.loads(cfg_path.read_text())
+                return d.get("port")
+        except Exception:
+            pass
+        return None
+
+    def _check_health(self, port):
+        try:
+            req = urllib.request.Request(f"http://localhost:{port}/health")
+            resp = urllib.request.urlopen(req, timeout=5)
+            return resp.status == 200
+        except Exception:
+            return False
+
+    def _on_log_signal(self, fault_id, category, line):
+        self._signal_counts[category] += 1
+        self.on_signal(fault_id, category, line[:200])
+        count = self._signal_counts[category]
+        if category in ("proxy_dead", "port_conflict") and count >= 2:
+            self._handle_failure(category)
+        elif category in ("server_error", "timeout") and count >= 3:
+            self._handle_failure(category + "_repeat")
+        elif category in ("sanitizer_flag",) and count >= 5:
+            self._handle_failure("sanitizer_suspicious_5x")
+        elif category in ("stuck_recovery",) and count >= 5:
+            self._handle_failure("stuck_recovery_x5")
+        elif category in ("parser_empty",) and count >= 3:
+            self._handle_failure("parsed_tool_calls_0_x3")
+        elif category in ("schema_corrupt",):
+            self._handle_failure("schema_corrupt")
+
+    def _handle_failure(self, trigger):
+        now = time.time()
+        for rule_trigger, action, cooldown in _TIER1_RULES:
+            if rule_trigger == trigger:
+                last_t = self._last_actions.get(action, 0)
+                if now - last_t < cooldown:
+                    return
+                self._last_actions[action] = now
+                _monitoring_log(f"Tier 1: trigger={trigger} action={action}")
+                self.on_action(action, trigger)
+                self.incident_store.record(trigger, action, success=True)
+                return
+        self._try_tier2_3(trigger)
+
+    def _try_tier2_3(self, trigger):
+        cfg = self.cfg
+        if not cfg.get("provider_url") or not cfg.get("model") or not cfg.get("api_key"):
+            _monitoring_log(f"No AI configured for Tier 2/3 — alerting user for trigger={trigger}")
+            self.on_action("alert_user", trigger)
+            return
+        agent = AIDiagnosticAgent(cfg["provider_url"], cfg["model"], cfg["api_key"])
+        context = {
+            "signals": [trigger],
+            "proxy_alive": self.failures == 0,
+            "log_tail": self._get_recent_log(),
+        }
+        result = agent.diagnose(context)
+        if result:
+            action = result.get("action", "alert_user")
+            _monitoring_log(f"Tier {result.get('tier', '?')}: action={action}")
+            self.on_action(action, trigger)
+
+
+class _LogAnalyzerThread(threading.Thread):
+    def __init__(self, on_signal):
+        super().__init__(daemon=True)
+        self.on_signal = on_signal
+        self.running = False
+
+    def run(self):
+        self.running = True
+        log_paths = [
+            str(Path.home() / ".cache/codex-proxy/cc-debug.log"),
+            str(Path.home() / ".cache/codex-proxy/proxy.log"),
+        ]
+        fhs = {}
+        for p in log_paths:
+            try:
+                f = open(p, "r")
+                f.seek(0, 2)
+                fhs[p] = f
+            except Exception:
+                pass
+        while self.running:
+            activity = False
+            for p, fh in list(fhs.items()):
+                try:
+                    line = fh.readline()
+                    if line:
+                        activity = True
+                        for pattern, (fault_id, category) in _FAILURE_SIGNALS.items():
+                            if re.search(pattern, line):
+                                self.on_signal(fault_id, category, line.strip())
+                                break
+                except Exception:
+                    pass
+            if not activity:
+                time.sleep(0.5)
+
+
+class AIMonitoringWindow(Gtk.Window):
+    def __init__(self, parent=None):
+        super().__init__(title="AI Monitoring")
+        self.set_transient_for(parent)
+        self.set_default_size(580, 520)
+        self.set_border_width(12)
+        self._cfg = _load_monitoring_config()
+        self._store = _load_incident_store()
+
+        vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+        self.add(vbox)
+
+        hdr = Gtk.Box(spacing=8)
+        vbox.pack_start(hdr, False, False, 0)
+        lbl = Gtk.Label()
+        lbl.set_markup("<b>AI Monitoring</b>")
+        lbl.set_use_markup(True)
+        hdr.pack_start(lbl, False, False, 0)
+        self._toggle = Gtk.Switch()
+        self._toggle.set_active(self._cfg.get("enabled", False))
+        self._toggle.connect("state-set", self._on_toggle)
+        hdr.pack_end(self._toggle, False, False, 0)
+        lbl2 = Gtk.Label(label="Enabled")
+        hdr.pack_end(lbl2, False, False, 0)
+
+        frame = Gtk.Frame(label="Diagnostic Agent")
+        vbox.pack_start(frame, False, False, 0)
+        grid = Gtk.Grid(column_spacing=8, row_spacing=6, margin=8)
+        frame.add(grid)
+
+        grid.attach(Gtk.Label(label="Provider URL:", halign=Gtk.Align.END), 0, 0, 1, 1)
+        self._url_entry = Gtk.Entry(hexpand=True)
+        self._url_entry.set_text(self._cfg.get("provider_url", ""))
+        self._url_entry.set_placeholder_text("https://api.openai.com/v1/chat/completions")
+        grid.attach(self._url_entry, 1, 0, 2, 1)
+
+        grid.attach(Gtk.Label(label="Model:", halign=Gtk.Align.END), 0, 1, 1, 1)
+        self._model_entry = Gtk.Entry(hexpand=True)
+        self._model_entry.set_text(self._cfg.get("model", ""))
+        self._model_entry.set_placeholder_text("gpt-4o-mini or Qwen/Qwen3-32B")
+        grid.attach(self._model_entry, 1, 1, 2, 1)
+
+        grid.attach(Gtk.Label(label="API Key:", halign=Gtk.Align.END), 0, 2, 1, 1)
+        self._key_entry = Gtk.Entry(hexpand=True, visibility=False)
+        self._key_entry.set_text(self._cfg.get("api_key", ""))
+        self._key_entry.set_placeholder_text("sk-...")
+        grid.attach(self._key_entry, 1, 2, 1, 1)
+        self._reveal_btn = Gtk.ToggleButton(label="Show")
+        self._reveal_btn.connect("toggled", lambda b: self._key_entry.set_visibility(b.get_active()))
+        grid.attach(self._reveal_btn, 2, 2, 1, 1)
+
+        grid.attach(Gtk.Label(label="Health Check:", halign=Gtk.Align.END), 0, 3, 1, 1)
+        adj = Gtk.Adjustment(value=self._cfg.get("health_check_interval_s", 5), lower=2, upper=30, step_increment=1)
+        self._interval_spin = Gtk.SpinButton(adjustment=adj)
+        self._interval_spin.set_numeric(True)
+        grid.attach(self._interval_spin, 1, 3, 1, 1)
+        grid.attach(Gtk.Label(label="seconds"), 2, 3, 1, 1)
+
+        opts_box = Gtk.Box(spacing=12, margin_top=4)
+        grid.attach(opts_box, 0, 4, 3, 1)
+        self._auto_restart_cb = Gtk.CheckButton(label="Auto-restart proxy on crash")
+        self._auto_restart_cb.set_active(self._cfg.get("auto_restart_proxy", True))
+        opts_box.pack_start(self._auto_restart_cb, False, False, 0)
+        self._auto_switch_cb = Gtk.CheckButton(label="Auto-switch provider on repeated failure")
+        self._auto_switch_cb.set_active(self._cfg.get("auto_switch_provider", False))
+        opts_box.pack_start(self._auto_switch_cb, False, False, 0)
+
+        save_btn = Gtk.Button(label="Save Configuration")
+        save_btn.get_style_context().add_class("suggested-action")
+        save_btn.connect("clicked", self._on_save)
+        grid.attach(save_btn, 0, 5, 3, 1)
+
+        stats_box = Gtk.Box(spacing=16)
+        vbox.pack_start(stats_box, False, False, 0)
+        stats = self._store.get("stats", {"ai_calls": 0, "tokens_used": 0})
+        self._stats_lbl = Gtk.Label()
+        self._stats_lbl.set_markup(
+            f"<small>AI diagnostic calls: <b>{stats.get('ai_calls', 0)}</b>  |  "
+            f"Tokens used: <b>{stats.get('tokens_used', 0):,}</b>  |  "
+            f"Known patterns: <b>{len(self._store.get('incidents', {}))}</b></small>"
+        )
+        self._stats_lbl.set_use_markup(True)
+        stats_box.pack_start(self._stats_lbl, False, False, 0)
+
+        frame2 = Gtk.Frame(label="Recent Incidents")
+        vbox.pack_start(frame2, True, True, 0)
+        sw = Gtk.ScrolledWindow()
+        sw.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC)
+        frame2.add(sw)
+        self._inc_buf = Gtk.TextBuffer()
+        tv = Gtk.TextView(buffer=self._inc_buf)
+        tv.set_editable(False)
+        tv.set_cursor_visible(False)
+        tv.set_wrap_mode(Gtk.WrapMode.WORD_CHAR)
+        sw.add(tv)
+        self._refresh_incidents()
+
+        bb = Gtk.Box(spacing=8)
+        vbox.pack_start(bb, False, False, 0)
+        view_btn = Gtk.Button(label="View Monitoring Log")
+        view_btn.connect("clicked", lambda b: subprocess.Popen(["xdg-open", str(MONITORING_LOG)]))
+        bb.pack_start(view_btn, False, False, 0)
+        clear_btn = Gtk.Button(label="Clear Incident Store")
+        clear_btn.connect("clicked", self._on_clear_store)
+        bb.pack_start(clear_btn, False, False, 0)
+        close_btn = Gtk.Button(label="Close")
+        close_btn.connect("clicked", lambda b: self.destroy())
+        bb.pack_end(close_btn, False, False, 0)
+
+        self.show_all()
+
+    def _on_toggle(self, switch, state):
+        self._cfg["enabled"] = state
+        _save_monitoring_config(self._cfg)
+
+    def _on_save(self, btn):
+        self._cfg["provider_url"] = self._url_entry.get_text().strip()
+        self._cfg["model"] = self._model_entry.get_text().strip()
+        self._cfg["api_key"] = self._key_entry.get_text().strip()
+        self._cfg["health_check_interval_s"] = int(self._interval_spin.get_value())
+        self._cfg["auto_restart_proxy"] = self._auto_restart_cb.get_active()
+        self._cfg["auto_switch_provider"] = self._auto_switch_cb.get_active()
+        _save_monitoring_config(self._cfg)
+        self._inc_buf.set_text("Configuration saved.\n")
+
+    def _on_clear_store(self, btn):
+        _save_incident_store({"version": 1, "incidents": {}, "stats": {"ai_calls": 0, "tokens_used": 0}})
+        self._store = {"version": 1, "incidents": {}, "stats": {"ai_calls": 0, "tokens_used": 0}}
+        self._refresh_incidents()
+
+    def _refresh_incidents(self):
+        lines = []
+        for pattern, inc in sorted(self._store.get("incidents", {}).items(),
+                                    key=lambda x: x[1].get("last_seen", ""), reverse=True):
+            sc = inc.get("success_count", 0)
+            fc = inc.get("fail_count", 0)
+            rate = sc / max(sc + fc, 1)
+            bar = "+" * min(int(rate * 10), 10) + "-" * (10 - min(int(rate * 10), 10))
+            lines.append(
+                f"[{inc.get('last_seen', '?')[:16]}] {pattern}\n"
+                f"  fix={inc.get('fix', '?')}  success_rate={rate:.0%} [{bar}]  "
+                f"seen={inc.get('occurrences', 0)}x\n"
+            )
+        if not lines:
+            lines.append("No incidents recorded yet.\n")
+            lines.append("\nEnable AI Monitoring and use Codex to populate the store.\n")
+        self._inc_buf.set_text("\n".join(lines))
+
+
+# ═══════════════════════════════════════════════════════════════════
+# Main window
+# ═══════════════════════════════════════════════════════════════════
+
+class LauncherWin(Gtk.Window):
+    def __init__(self):
+        super().__init__(title="Codex Launcher")
+        self.set_default_size(560, 460)
+        self.set_border_width(12)
+        self.set_position(Gtk.WindowPosition.CENTER)
+        self._proc = None
+        self._endpoints_data = load_endpoints()
+        recover_config_if_needed()
+
+        vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+        self.add(vbox)
+
+        # header row
+        hdr = Gtk.Box(spacing=8)
+        vbox.pack_start(hdr, False, False, 0)
+        lbl = Gtk.Label(label="<b>Codex Launcher v3.8.4</b>")
+        lbl.set_use_markup(True)
+        hdr.pack_start(lbl, False, False, 0)
+        changelog_btn = Gtk.Button(label="Changelog")
+        changelog_btn.connect("clicked", lambda b: self._show_changelog())
+        hdr.pack_end(changelog_btn, False, False, 0)
+        history_btn = Gtk.Button(label="History")
+        history_btn.connect("clicked", lambda b: self._open_history())
+        hdr.pack_end(history_btn, False, False, 0)
+        bench_btn = Gtk.Button(label="Benchmark")
+        bench_btn.connect("clicked", lambda b: self._open_benchmark())
+        hdr.pack_end(bench_btn, False, False, 0)
+        usage_btn = Gtk.Button(label="Usage")
+        usage_btn.connect("clicked", lambda b: self._open_usage())
+        hdr.pack_end(usage_btn, False, False, 0)
+        bgp_btn = Gtk.Button(label="AI BGP")
+        bgp_btn.connect("clicked", lambda b: self._open_bgp())
+        hdr.pack_end(bgp_btn, False, False, 0)
+        mon_btn = Gtk.Button(label="AI Monitor")
+        mon_btn.connect("clicked", lambda b: self._open_monitoring())
+        hdr.pack_end(mon_btn, False, False, 0)
+        mgr_btn = Gtk.Button(label="Manage Endpoints")
+        mgr_btn.connect("clicked", lambda b: self._open_mgr())
+        hdr.pack_end(mgr_btn, False, False, 0)
+
+        # verification status bar
+        self._cli_info = _detect_codex_cli()
+        self._desktop_info = _detect_codex_desktop()
+        ver_box = Gtk.Box(spacing=12)
+        vbox.pack_start(ver_box, False, False, 0)
+
+        if self._cli_info:
+            cli_path, cli_ver = self._cli_info
+            cli_lbl = Gtk.Label()
+            cli_lbl.set_markup(f"<span foreground='#2ea043'>✔ Codex CLI</span>  <small>{cli_ver} ({cli_path})</small>")
+            cli_lbl.set_use_markup(True)
+            ver_box.pack_start(cli_lbl, False, False, 0)
+        else:
+            cli_lbl = Gtk.Label()
+            cli_lbl.set_markup("<span foreground='#d29922'>✘ Codex CLI — not found</span>")
+            cli_lbl.set_use_markup(True)
+            ver_box.pack_start(cli_lbl, False, False, 0)
+            cli_install_btn = Gtk.Button(label="Install")
+            cli_install_btn.connect("clicked", lambda b: self._show_install_guide("cli"))
+            ver_box.pack_start(cli_install_btn, False, False, 0)
+
+        ver_box.pack_start(Gtk.Label(label="  "), False, False, 0)
+
+        if self._desktop_info:
+            desk_lbl = Gtk.Label()
+            desk_lbl.set_markup(f"<span foreground='#2ea043'>✔ Codex Desktop</span>  <small>({self._desktop_info})</small>")
+            desk_lbl.set_use_markup(True)
+            ver_box.pack_start(desk_lbl, False, False, 0)
+        else:
+            desk_lbl = Gtk.Label()
+            desk_lbl.set_markup("<span foreground='#d29922'>✘ Codex Desktop — not found</span>")
+            desk_lbl.set_use_markup(True)
+            ver_box.pack_start(desk_lbl, False, False, 0)
+            desk_install_btn = Gtk.Button(label="Install")
+            desk_install_btn.connect("clicked", lambda b: self._show_install_guide("desktop"))
+            ver_box.pack_start(desk_install_btn, False, False, 0)
+
+        self._missing = []
+        if not self._cli_info:
+            self._missing.append("cli")
+        if not self._desktop_info:
+            self._missing.append("desktop")
+
+        auth_box = Gtk.Box(spacing=12)
+        vbox.pack_start(auth_box, False, False, 0)
+        self._auth_label = Gtk.Label()
+        self._auth_label.set_markup("<span foreground='#888'>Checking auth…</span>")
+        self._auth_label.set_use_markup(True)
+        self._auth_label.set_ellipsize(3)
+        auth_box.pack_start(self._auth_label, False, False, 0)
+        self._relogin_btn = Gtk.Button(label="Re-login")
+        self._relogin_btn.set_sensitive(False)
+        self._relogin_btn.connect("clicked", lambda b: self._codex_relogin())
+        auth_box.pack_end(self._relogin_btn, False, False, 0)
+        threading.Thread(target=self._check_auth_async, daemon=True).start()
+
+        ops_box = Gtk.Box(spacing=8)
+        vbox.pack_start(ops_box, False, False, 0)
+        self._refresh_all_btn = Gtk.Button(label="Refresh Models")
+        self._refresh_all_btn.connect("clicked", lambda b: self._refresh_all_models())
+        ops_box.pack_start(self._refresh_all_btn, False, False, 0)
+        self._backup_btn = Gtk.Button(label="Backup Profile")
+        self._backup_btn.connect("clicked", lambda b: self._backup_profile())
+        ops_box.pack_start(self._backup_btn, False, False, 0)
+        self._import_btn = Gtk.Button(label="Import Profile")
+        self._import_btn.connect("clicked", lambda b: self._import_profile())
+        ops_box.pack_start(self._import_btn, False, False, 0)
+
+        # endpoint selector
+        sel_box = Gtk.Box(spacing=6)
+        vbox.pack_start(sel_box, False, False, 4)
+        sel_box.pack_start(Gtk.Label(label="Endpoint:"), False, False, 0)
+        self._combo = Gtk.ComboBoxText()
+        self._combo.connect("changed", lambda c: self._on_endpoint_changed())
+        sel_box.pack_start(self._combo, True, True, 0)
+
+        # model selector
+        sel_box.pack_start(Gtk.Label(label="Model:"), False, False, 0)
+        self._model_combo = Gtk.ComboBoxText()
+        sel_box.pack_start(self._model_combo, True, True, 0)
+
+        # sandbox mode selector
+        sel_box.pack_start(Gtk.Label(label="Sandbox:"), False, False, 0)
+        self._sandbox_combo = Gtk.ComboBoxText()
+        for v, l in [("read-only", "Read-only"),
+                      ("workspace-write", "Workspace"),
+                      ("danger-full-access", "Full Access")]:
+            self._sandbox_combo.append(v, l)
+        self._sandbox_combo.set_active_id("workspace-write")
+        sel_box.pack_start(self._sandbox_combo, True, True, 0)
+
+        # approval mode selector
+        sel_box.pack_start(Gtk.Label(label="Approval:"), False, False, 0)
+        self._approval_combo = Gtk.ComboBoxText()
+        for v, l in [("untrusted", "Untrusted"),
+                      ("on-request", "On Request"),
+                      ("never", "Never (Full Auto)")]:
+            self._approval_combo.append(v, l)
+        self._approval_combo.set_active_id("on-request")
+        sel_box.pack_start(self._approval_combo, True, True, 0)
+
+        # launch buttons
+        btn_box = Gtk.Box(spacing=8, homogeneous=True)
+        vbox.pack_start(btn_box, False, False, 8)
+        self._btn_desktop = Gtk.Button(label="Launch Desktop")
+        self._btn_desktop.connect("clicked", lambda b: self._launch("desktop"))
+        if "desktop" in self._missing:
+            self._btn_desktop.set_tooltip_text("Codex Desktop is not installed")
+            self._btn_desktop.set_sensitive(False)
+        btn_box.pack_start(self._btn_desktop, True, True, 0)
+        self._btn_cli = Gtk.Button(label="Launch CLI")
+        self._btn_cli.connect("clicked", lambda b: self._launch("cli"))
+        if "cli" in self._missing:
+            self._btn_cli.set_tooltip_text("Codex CLI is not installed")
+            self._btn_cli.set_sensitive(False)
+        btn_box.pack_start(self._btn_cli, True, True, 0)
+
+        btn_box2 = Gtk.Box(spacing=8, homogeneous=True)
+        vbox.pack_start(btn_box2, False, False, 0)
+        self._btn_codex_desktop = Gtk.Button(label="Codex Default (Desktop)")
+        self._btn_codex_desktop.connect("clicked", lambda b: self._launch_codex_default("desktop"))
+        if "desktop" in self._missing:
+            self._btn_codex_desktop.set_tooltip_text("Codex Desktop is not installed")
+            self._btn_codex_desktop.set_sensitive(False)
+        btn_box2.pack_start(self._btn_codex_desktop, True, True, 0)
+        self._btn_codex_cli = Gtk.Button(label="Codex Default (CLI)")
+        self._btn_codex_cli.connect("clicked", lambda b: self._launch_codex_default("cli"))
+        if "cli" in self._missing:
+            self._btn_codex_cli.set_tooltip_text("Codex CLI is not installed")
+            self._btn_codex_cli.set_sensitive(False)
+        btn_box2.pack_start(self._btn_codex_cli, True, True, 0)
+
+        # status
+        sw = Gtk.ScrolledWindow()
+        sw.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC)
+        vbox.pack_start(sw, True, True, 0)
+        self._buf = Gtk.TextBuffer()
+        self._tv = Gtk.TextView(buffer=self._buf)
+        self._tv.set_editable(False)
+        self._tv.set_cursor_visible(False)
+        self._tv.set_wrap_mode(Gtk.WrapMode.WORD_CHAR)
+        sw.add(self._tv)
+
+        # bottom bar
+        bb = Gtk.Box(spacing=8)
+        vbox.pack_start(bb, False, False, 0)
+        assist_btn = Gtk.Button(label="AI Assistant")
+        assist_btn.get_style_context().add_class("suggested-action")
+        assist_btn.connect("clicked", lambda b: self._open_assistant())
+        assist_btn.set_tooltip_text("Open AI coding assistant with streaming, tools, and session management")
+        bb.pack_start(assist_btn, False, False, 0)
+        self._kill_btn = Gtk.Button(label="Kill && Cleanup")
+        self._kill_btn.connect("clicked", lambda b: self._kill())
+        self._kill_btn.set_sensitive(False)
+        bb.pack_start(self._kill_btn, True, True, 0)
+        self._view_log_btn = Gtk.Button(label="View Log")
+        self._view_log_btn.connect("clicked", lambda b: subprocess.Popen(["xdg-open", str(LAUNCH_LOG)]))
+        bb.pack_start(self._view_log_btn, False, False, 0)
+        self._close_btn = Gtk.Button(label="Close")
+        self._close_btn.connect("clicked", lambda b: self._do_close())
+        bb.pack_start(self._close_btn, False, False, 0)
+
+        self.show_all()
+        self._rebuild_combo()
+        self._log_dependency_status()
+        self._start_watcher()
+
+    # ── helpers ──────────────────────────────────────────────────
+
+    def log(self, msg):
+        GLib.idle_add(self._append_log, msg)
+
+    def _append_log(self, msg):
+        e = self._buf.get_end_iter()
+        self._buf.insert(e, msg + "\n")
+        m = self._buf.create_mark(None, e, False)
+        self._tv.scroll_to_mark(m, 0.0, True, 0.0, 0.5)
+        self._buf.delete_mark(m)
+
+    def _log_dependency_status(self):
+        if self._cli_info:
+            _, ver = self._cli_info
+            self.log(f"✔ Codex CLI detected ({ver})")
+        else:
+            self.log("✘ Codex CLI NOT found — CLI launch disabled. Click 'Install' above.")
+        if self._desktop_info:
+            self.log(f"✔ Codex Desktop detected ({self._desktop_info})")
+        else:
+            self.log("✘ Codex Desktop NOT found — Desktop launch disabled. Click 'Install' above.")
+        if self._missing:
+            self.log("⚠  Install missing tools before using the launcher.")
+        else:
+            self.log("All dependencies OK.")
+
+    def _check_auth_async(self):
+        status, msg = _check_codex_auth()
+        GLib.idle_add(self._update_auth_status, status, msg)
+
+    def _update_auth_status(self, status, msg):
+        if status == "logged_in":
+            self._auth_label.set_markup(f"<span foreground='#2ea043'>✔ Auth: {msg}</span>")
+            self._relogin_btn.set_sensitive("cli" not in self._missing)
+        elif status == "not_installed":
+            self._auth_label.set_markup("<span foreground='#888'>Auth: N/A (CLI not installed)</span>")
+        else:
+            self._auth_label.set_markup(f"<span foreground='#d29922'>⚠ Auth: {msg}</span>")
+            self._relogin_btn.set_sensitive("cli" not in self._missing)
+        return False
+
+    def _codex_relogin(self):
+        self.log("Opening codex login in terminal…")
+        terms = [
+            ("x-terminal-emulator", ["-e"]),
+            ("kgx", ["--"]),
+            ("gnome-terminal", ["--"]),
+            ("konsole", ["-e"]),
+            ("xterm", ["-e"]),
+        ]
+        term = None
+        term_args = None
+        for t in terms:
+            if shutil.which(t[0]):
+                term = t[0]
+                term_args = t[1]
+                break
+        if not term:
+            self.log("ERROR: no terminal emulator found for re-login")
+            return
+        cmd_parts = [term] + term_args + ["codex", "login"]
+        subprocess.Popen(cmd_parts, preexec_fn=os.setsid)
+        self.log("Login flow started in terminal. Re-checking auth in 30s…")
+        self._auth_label.set_markup("<span foreground='#888'>Auth: waiting for login…</span>")
+        threading.Thread(target=self._delayed_auth_check, daemon=True).start()
+
+    def _delayed_auth_check(self):
+        time.sleep(30)
+        self._check_auth_async()
+
+    def _set_busy(self, busy):
+        def _update():
+            has_cli = "cli" not in self._missing
+            has_desk = "desktop" not in self._missing
+            self._btn_desktop.set_sensitive(not busy and has_desk)
+            self._btn_cli.set_sensitive(not busy and has_cli)
+            self._btn_codex_desktop.set_sensitive(not busy and has_desk)
+            self._btn_codex_cli.set_sensitive(not busy and has_cli)
+            self._kill_btn.set_sensitive(busy)
+        GLib.idle_add(_update)
+
+    def _rebuild_combo(self):
+        self._endpoints_data = load_endpoints()
+        self._combo.remove_all()
+        names = [e["name"] for e in self._endpoints_data["endpoints"]]
+        for n in names:
+            self._combo.append_text(n)
+        bgp_names = [p["name"] for p in load_bgp_pools().get("pools", [])]
+        for n in bgp_names:
+            self._combo.append_text(f"🔀 {n}")
+        if names or bgp_names:
+            default = self._endpoints_data.get("default")
+            if default and default in names:
+                self._combo.set_active(names.index(default))
+            else:
+                self._combo.set_active(0)
+        self._on_endpoint_changed()
+
+    def _on_endpoint_changed(self):
+        name = self._combo.get_active_text()
+        is_bgp = name and name.startswith("🔀 ")
+        bgp_name = name[2:] if is_bgp else None
+        ep = get_endpoint(name) if name and not is_bgp else None
+        self._model_combo.remove_all()
+        if is_bgp:
+            pool = None
+            for p in load_bgp_pools().get("pools", []):
+                if p["name"] == bgp_name:
+                    pool = p
+                    break
+            if pool:
+                seen = set()
+                for r in pool.get("routes", []):
+                    m = r.get("model", "")
+                    if m and m not in seen:
+                        self._model_combo.append_text(m)
+                        seen.add(m)
+                if seen:
+                    self._model_combo.set_active(0)
+        elif ep:
+            for m in ep.get("models", []):
+                self._model_combo.append_text(m)
+            GLib.idle_add(self._select_default_model, ep)
+
+    def _select_default_model(self, ep):
+        dm = ep.get("default_model", "")
+        models = ep.get("models", [])
+        if dm in models:
+            self._model_combo.set_active(models.index(dm))
+        elif models:
+            self._model_combo.set_active(0)
+
+    # ── endpoint mgr ─────────────────────────────────────────────
+
+    def _open_mgr(self):
+        try:
+            self._mgr_window = EndpointMgr(self)
+            self._mgr_window.connect("destroy", lambda *_: setattr(self, "_mgr_window", None))
+        except Exception as e:
+            import traceback; traceback.print_exc()
+            d = Gtk.MessageDialog(self, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.OK, f"Error: {e}")
+            d.run(); d.destroy()
+
+    def _open_bgp(self):
+         try:
+             self._bgp_window = BGPPoolMgr(self)
+             self._bgp_window.connect("destroy", lambda *_: setattr(self, "_bgp_window", None))
+         except Exception as e:
+             import traceback; traceback.print_exc()
+             d = Gtk.MessageDialog(self, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.OK, f"Error: {e}")
+             d.run(); d.destroy()
+
+    def _open_monitoring(self):
+         try:
+             self._monitoring_window = AIMonitoringWindow(self)
+             self._monitoring_window.connect("destroy", lambda *_: setattr(self, "_monitoring_window", None))
+         except Exception as e:
+             import traceback; traceback.print_exc()
+             d = Gtk.MessageDialog(self, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.OK, f"Error: {e}")
+             d.run(); d.destroy()
+
+    def _start_watcher(self):
+         cfg = _load_monitoring_config()
+         if not cfg.get("enabled"):
+             return
+         self._watcher = HealthWatcher(
+             on_failure=self._on_watcher_failure,
+             on_recovery=self._on_watcher_recovery,
+             on_signal=self._on_watcher_signal,
+             on_action=self._on_watcher_action,
+         )
+         self._watcher.start()
+         self.log("AI Monitoring: watchdog started")
+
+    def _on_watcher_failure(self, count):
+         GLib.idle_add(self.log, f"[AI Monitor] Proxy unresponsive (failures={count})")
+
+    def _on_watcher_recovery(self):
+         GLib.idle_add(self.log, "[AI Monitor] Proxy recovered")
+
+    def _on_watcher_signal(self, fault_id, category, line):
+         pass
+
+    def _on_watcher_action(self, action, trigger):
+         cfg = _load_monitoring_config()
+         if action == "restart_proxy" and cfg.get("auto_restart_proxy"):
+             GLib.idle_add(self.log, f"[AI Monitor] Auto-restarting proxy (trigger: {trigger})")
+             GLib.idle_add(self._restart_proxy_from_watcher)
+         elif action == "clear_schema_cache":
+             try:
+                 cap_file = Path.home() / ".cache/codex-proxy/provider-caps.json"
+                 if cap_file.exists():
+                     cap_file.unlink()
+                     GLib.idle_add(self.log, "[AI Monitor] Cleared corrupt schema cache")
+             except Exception as e:
+                 GLib.idle_add(self.log, f"[AI Monitor] Failed to clear cache: {e}")
+         elif action == "delete_provider_caps":
+             try:
+                 cap_file = Path.home() / ".cache/codex-proxy/provider-caps.json"
+                 if cap_file.exists():
+                     cap_file.unlink()
+                     GLib.idle_add(self.log, "[AI Monitor] Deleted corrupted provider-caps.json")
+             except Exception as e:
+                 GLib.idle_add(self.log, f"[AI Monitor] Failed: {e}")
+         elif action == "kill_stale_restart":
+             GLib.idle_add(self.log, f"[AI Monitor] Killing stale processes + restarting (trigger: {trigger})")
+             self._kill()
+             GLib.idle_add(self._restart_proxy_from_watcher)
+         else:
+             GLib.idle_add(self.log, f"[AI Monitor] Alert: {action} (trigger: {trigger})")
+
+    def _restart_proxy_from_watcher(self):
+         try:
+             ep_name = load_endpoints().get("default")
+             if not ep_name:
+                 return
+             for ep in load_endpoints().get("endpoints", []):
+                 if ep.get("name") == ep_name:
+                     self._start_proxy(ep)
+                     break
+         except Exception as e:
+             self.log(f"[AI Monitor] Proxy restart failed: {e}")
+
+    def _open_usage(self):
+        try:
+            self._usage_window = UsageWindow(self)
+            self._usage_window.connect("destroy", lambda *_: setattr(self, "_usage_window", None))
+        except Exception as e:
+            import traceback; traceback.print_exc()
+            d = Gtk.MessageDialog(self, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.OK, f"Error: {e}")
+            d.run(); d.destroy()
+
+    def _open_history(self):
+        try:
+            self._history_window = RequestHistoryWindow(self)
+            self._history_window.connect("destroy", lambda *_: setattr(self, "_history_window", None))
+        except Exception as e:
+            import traceback; traceback.print_exc()
+            d = Gtk.MessageDialog(self, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.OK, f"Error: {e}")
+            d.run(); d.destroy()
+
+    def _open_benchmark(self):
+        try:
+            self._benchmark_window = BenchmarkWindow(self)
+            self._benchmark_window.connect("destroy", lambda *_: setattr(self, "_benchmark_window", None))
+        except Exception as e:
+            import traceback; traceback.print_exc()
+            d = Gtk.MessageDialog(self, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.OK, f"Error: {e}")
+            d.run(); d.destroy()
+
+    def _open_assistant(self):
+        import subprocess, sys
+        _py = str(Path(__file__).resolve().parent / "flet-codex-assist.py")
+        subprocess.Popen([sys.executable, _py], start_new_session=True)
+
+    def _backup_profile(self):
+        chooser = Gtk.FileChooserDialog(
+            title="Backup Codex Profile",
+            parent=self,
+            action=Gtk.FileChooserAction.SAVE,
+        )
+        chooser.add_buttons(Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL,
+                            Gtk.STOCK_SAVE, Gtk.ResponseType.OK)
+        chooser.set_do_overwrite_confirmation(True)
+        chooser.set_current_name(f"codex-profile-{time.strftime('%Y%m%d-%H%M%S')}.json")
+        resp = chooser.run()
+        filename = chooser.get_filename() if resp == Gtk.ResponseType.OK else None
+        chooser.destroy()
+        if not filename:
+            return
+        try:
+            save_profile_bundle(filename)
+            self.log(f"Profile backed up to {filename}")
+        except Exception as e:
+            self._show_message(Gtk.MessageType.ERROR, f"Backup failed:\n{e}")
+
+    def _refresh_all_models(self):
+        if getattr(self, "_refresh_running", False):
+            return
+        self._refresh_running = True
+        self._refresh_all_btn.set_sensitive(False)
+        self.log("Refreshing models for all providers...")
+        threading.Thread(target=self._refresh_all_models_worker, daemon=True).start()
+
+    def _refresh_all_models_worker(self):
+        try:
+            data = load_endpoints()
+            updated = 0
+            failed = []
+
+            for idx, ep in enumerate(list(data["endpoints"])):
+                refreshed, err = refresh_endpoint_models(ep)
+                if refreshed:
+                    data["endpoints"][idx] = refreshed
+                    updated += 1
+                else:
+                    failed.append(f"{ep['name']}: {err}")
+
+            if updated:
+                save_endpoints(data)
+
+            GLib.idle_add(self._finish_refresh_all_models, updated, failed)
+        except Exception as e:
+            GLib.idle_add(self._finish_refresh_all_models_error, str(e))
+
+    def _finish_refresh_all_models(self, updated, failed):
+        try:
+            if updated:
+                self._rebuild_combo()
+                if getattr(self, "_mgr_window", None):
+                    try:
+                        self._mgr_window._rebuild()
+                    except Exception:
+                        pass
+                self.log(f"Refreshed models for {updated} provider(s)")
+
+            if failed:
+                self._show_message(
+                    Gtk.MessageType.WARNING,
+                    "Some providers could not auto-fetch models.\n\n"
+                    + "\n".join(failed)
+                    + "\n\nThose providers were left unchanged so you can manage them manually."
+                )
+            elif updated:
+                self._show_message(Gtk.MessageType.INFO, f"Refreshed models for {updated} provider(s).")
+            else:
+                self._show_message(Gtk.MessageType.INFO, "No providers were refreshed.")
+        finally:
+            self._refresh_running = False
+            self._refresh_all_btn.set_sensitive(True)
+        return False
+
+    def _finish_refresh_all_models_error(self, err):
+        try:
+            self._show_message(Gtk.MessageType.ERROR, f"Refresh failed:\n{err}")
+        finally:
+            self._refresh_running = False
+            self._refresh_all_btn.set_sensitive(True)
+        return False
+
+    def _import_profile(self):
+        if self._proc and self._proc.poll() is None:
+            self._show_message(Gtk.MessageType.WARNING, "Stop Codex before importing a profile.")
+            return
+
+        chooser = Gtk.FileChooserDialog(
+            title="Import Codex Profile",
+            parent=self,
+            action=Gtk.FileChooserAction.OPEN,
+        )
+        chooser.add_buttons(Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL,
+                            Gtk.STOCK_OPEN, Gtk.ResponseType.OK)
+        resp = chooser.run()
+        filename = chooser.get_filename() if resp == Gtk.ResponseType.OK else None
+        chooser.destroy()
+        if not filename:
+            return
+
+        confirm = Gtk.MessageDialog(
+            self, 0, Gtk.MessageType.QUESTION, Gtk.ButtonsType.YES_NO,
+            "Importing will replace the current endpoints and Codex config. Continue?"
+        )
+        ok = confirm.run() == Gtk.ResponseType.YES
+        confirm.destroy()
+        if not ok:
+            return
+
+        try:
+            import_profile_bundle(filename)
+            self._rebuild_combo()
+            self.log(f"Profile imported from {filename}")
+            self._show_message(Gtk.MessageType.INFO, "Profile imported successfully.")
+        except Exception as e:
+            self._show_message(Gtk.MessageType.ERROR, f"Import failed:\n{e}")
+
+    def _on_endpoints_updated(self):
+        self._rebuild_combo()
+
+    def _show_message(self, msg_type, text):
+        d = Gtk.MessageDialog(self, 0, msg_type, Gtk.ButtonsType.OK, text)
+        d.run()
+        d.destroy()
+
+    def _show_changelog(self):
+        d = Gtk.Dialog(title="Changelog", transient_for=self, modal=True)
+        d.set_default_size(520, 480)
+        d.add_button("Close", Gtk.ResponseType.CLOSE)
+        area = d.get_content_area()
+        area.set_margin_start(12)
+        area.set_margin_end(12)
+        area.set_margin_top(12)
+        area.set_margin_bottom(12)
+        sw = Gtk.ScrolledWindow()
+        sw.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC)
+        area.pack_start(sw, True, True, 0)
+        buf = Gtk.TextBuffer()
+        tv = Gtk.TextView(buffer=buf)
+        tv.set_editable(False)
+        tv.set_cursor_visible(False)
+        tv.set_wrap_mode(Gtk.WrapMode.WORD_CHAR)
+        sw.add(tv)
+        lines = []
+        for ver, date, items in CHANGELOG:
+            lines.append(f"<b>v{ver}</b>  ({date})")
+            for item in items:
+                lines.append(f"  \u2022 {item}")
+            lines.append("")
+        txt = "\n".join(lines).strip()
+        buf.insert(buf.get_end_iter(), txt)
+        d.show_all()
+        d.run()
+        d.destroy()
+
+    def _show_install_guide(self, which):
+        if which == "cli":
+            title = "Install Codex CLI"
+            guide = (
+                "Codex CLI is required to use CLI launch features.\n\n"
+                "Install with npm:\n"
+                "  npm install -g @openai/codex\n\n"
+                "Or download from:\n"
+                "  https://github.com/openai/codex\n\n"
+                "After installing, restart the launcher."
+            )
+        else:
+            title = "Install Codex Desktop"
+            guide = (
+                "Codex Desktop is required to use Desktop launch features.\n\n"
+                "Expected location: /opt/codex-desktop/start.sh\n\n"
+                "Download from:\n"
+                "  https://codex.desktop.openai.com\n\n"
+                "After installing, restart the launcher."
+            )
+        d = Gtk.MessageDialog(self, 0, Gtk.MessageType.INFO, Gtk.ButtonsType.OK, guide)
+        d.set_title(title)
+        d.run()
+        d.destroy()
+
+    # ── launch ───────────────────────────────────────────────────
+
+    def _launch(self, target):
+        name = self._combo.get_active_text()
+        if not name:
+            self.log("ERROR: no endpoint selected")
+            return
+        model = self._model_combo.get_active_text()
+        if not model:
+            self.log("ERROR: no model selected")
+            return
+
+        is_bgp = bool(name and name.startswith("🔀 "))
+        if is_bgp:
+            pool_name = name[2:]
+            pool = None
+            for p in load_bgp_pools().get("pools", []):
+                if p["name"] == pool_name:
+                    pool = p
+                    break
+            if not pool:
+                self.log(f"ERROR: BGP pool '{pool_name}' not found")
+                return
+            self._set_busy(True)
+            self.log(f"=== 🔀 BGP: {pool_name} / {model} → {'Desktop' if target == 'desktop' else 'CLI'} ===")
+            threading.Thread(target=self._run_bgp, args=(pool, model, target), daemon=True).start()
+            return
+
+        ep = get_endpoint(name)
+        if not ep:
+            self.log("ERROR: endpoint not found")
+            return
+        self._set_busy(True)
+        self.log(f"=== {ep['name']} / {model} → {'Desktop' if target == 'desktop' else 'CLI'} ===")
+        threading.Thread(target=self._run, args=(ep, model, target), daemon=True).start()
+
+    def _launch_codex_default(self, target):
+        if "cli" not in self._missing:
+            status, msg = _check_codex_auth()
+            if status != "logged_in":
+                d = Gtk.MessageDialog(
+                    self, 0, Gtk.MessageType.WARNING, Gtk.ButtonsType.YES_NO,
+                    f"Codex auth check: {msg}\n\n"
+                    "Launch may fail without valid authentication.\n"
+                    "Continue anyway?"
+                )
+                r = d.run()
+                d.destroy()
+                if r != Gtk.ResponseType.YES:
+                    self._set_busy(False)
+                    return
+        self._set_busy(True)
+        self.log(f"=== Codex Default (OAuth) → {'Desktop' if target == 'desktop' else 'CLI'} ===")
+        threading.Thread(target=self._run_codex_default, args=(target,), daemon=True).start()
+
+    def _run(self, ep, model, target):
+        keep_session_alive = False
+        try:
+            self.log("Cleaning up stale processes…")
+            _run_cleanup(self.log)
+            recover_config_if_needed(self.log)
+
+            needs_proxy = ep["backend_type"] != "native"
+
+            if needs_proxy:
+                self.log("Starting translation proxy…")
+                try:
+                    proxy_port = _start_proxy_for(ep, self.log)
+                except RuntimeError as e:
+                    GLib.idle_add(self._show_error_dialog, "Proxy startup failed", str(e))
+                    return
+                self.log(f"Configuring Codex for {ep['name']} (proxied on :{proxy_port})…")
+                begin_config_transaction(f"launch:{ep['name']}")
+                write_config_for_translated(ep, model, proxy_port)
+            else:
+                self.log(f"Configuring Codex for {ep['name']} (native)…")
+                begin_config_transaction(f"launch:{ep['name']}")
+                write_config_for_native(ep, model)
+
+            if target == "desktop":
+                if needs_proxy:
+                    _kill_existing_desktop(self.log)
+                keep_session_alive = self._launch_desktop(ep, model)
+            else:
+                self._launch_cli(ep, model)
+
+        except Exception as e:
+            self.log(f"ERROR: {e}")
+        finally:
+            if keep_session_alive:
+                self.log("Warm-start handoff detected; keeping proxy/config active for running Desktop.")
+                self._set_busy(False)
+                self.log("Ready. Use Kill && Cleanup when finished.")
+            else:
+                _stop_proxy()
+                restore_config()
+                end_config_transaction()
+                self._set_busy(False)
+                self.log("Ready.")
+
+    def _run_bgp(self, pool, model, target):
+        keep_session_alive = False
+        try:
+            self.log("Cleaning up stale processes…")
+            _run_cleanup(self.log)
+            recover_config_if_needed(self.log)
+
+            port = _pick_free_port()
+            self.log(f"Starting BGP proxy with {len(pool.get('routes', []))} routes on :{port}…")
+            bgp_ep = {
+                "name": pool["name"],
+                "backend_type": "openai-compat",
+                "base_url": "http://bgp.placeholder",
+                "api_key": "",
+                "default_model": model,
+                "models": list(dict.fromkeys(r.get("model", model) for r in pool.get("routes", []))),
+            }
+            pcfg = {
+                "port": port,
+                "backend_type": "openai-compat",
+                "target_url": "http://bgp.placeholder",
+                "api_key": "",
+                "bgp_routes": pool.get("routes", []),
+                "models": [{"id": m, "object": "model", "created": 1700000000, "owned_by": "bgp"} for m in bgp_ep["models"]],
+            }
+            pcfg_path = PROXY_CONFIG_DIR / f"proxy-{safe_name(pool['name'])}-{port}.json"
+            pcfg_path.parent.mkdir(parents=True, exist_ok=True)
+            pcfg_path.write_text(json.dumps(pcfg, indent=2))
+            try:
+                _start_proxy_with_config(pcfg_path, port, self.log)
+            except RuntimeError as e:
+                GLib.idle_add(self._show_error_dialog, "BGP proxy startup failed", str(e))
+                return
+
+            begin_config_transaction(f"launch:bgp:{pool['name']}")
+            write_config_for_translated(bgp_ep, model, port)
+
+            if target == "desktop":
+                _kill_existing_desktop(self.log)
+                keep_session_alive = self._launch_desktop(bgp_ep, model)
+            else:
+                self._launch_cli(bgp_ep, model)
+
+        except Exception as e:
+            self.log(f"ERROR: {e}")
+        finally:
+            if keep_session_alive:
+                self.log("Warm-start handoff detected; keeping proxy/config active for running Desktop.")
+                self._set_busy(False)
+                self.log("Ready. Use Kill && Cleanup when finished.")
+            else:
+                _stop_proxy()
+                restore_config()
+                end_config_transaction()
+                self._set_busy(False)
+                self.log("Ready.")
+
+    def _run_codex_default(self, target):
+        try:
+            self.log("Cleaning up stale processes…")
+            _run_cleanup(self.log)
+            _stop_proxy()
+            recover_config_if_needed(self.log)
+
+            self.log("Resetting config to Codex defaults (OAuth)…")
+            begin_config_transaction("launch:default")
+            if CONFIG.exists():
+                CONFIG.unlink()
+
+            if target == "desktop":
+                self._launch_desktop_direct()
+            else:
+                self._launch_cli_default()
+        except Exception as e:
+            self.log(f"ERROR: {e}")
+        finally:
+            restore_config()
+            end_config_transaction()
+            self._set_busy(False)
+            self.log("Ready.")
+
+    def _show_error_dialog(self, title, message):
+        dialog = Gtk.MessageDialog(
+            transient_for=self, flags=0,
+            message_type=Gtk.MessageType.ERROR,
+            buttons=Gtk.ButtonsType.CLOSE, text=str(title))
+        dialog.format_secondary_text(str(message))
+        dialog.run()
+        dialog.destroy()
+
+    def _launch_desktop(self, ep, model):
+        args = [str(START_SH)]
+        if ep["backend_type"] != "native":
+            args += ["--", "--ozone-platform=wayland"]
+
+        self._proc = subprocess.Popen(args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, preexec_fn=os.setsid)
+        pid = self._proc.pid
+        self.log(f"Desktop started (PID {pid})")
+        self.log(f"Log: {LAUNCH_LOG}")
+
+        t0 = time.time()
+        stall_warned = False
+        while self._proc and self._proc.poll() is None:
+            time.sleep(1.5)
+            el = time.time() - t0
+            if el > 20 and not stall_warned:
+                self.log("⚠  Still starting after 20 s — possible stall. Click Kill if window doesn't appear.")
+                self.log(f"--- last log lines ---\n{_last_log_lines()}")
+                stall_warned = True
+
+        if self._proc:
+            rc = self._proc.poll()
+            el = time.time() - t0
+            self.log(f"Desktop exited (code {rc}) after {el:.0f}s")
+            if el < 12:
+                self.log("TIP: Quick exit — may be warm-start handoff (normal) or crash. Kill && retry if needed.")
+                last_lines = _last_log_lines()
+                self.log(f"--- last log lines ---\n{last_lines}")
+                if rc == 0 and "warm-start" in last_lines.lower():
+                    self._proc = None
+                    return True
+            self._proc = None
+        return False
+
+    def _launch_cli(self, ep, model):
+        """Launch codex CLI in a terminal with the selected endpoint."""
+        self.log(f"Launching Codex CLI with {ep['name']}…")
+
+        terms = [
+            ("x-terminal-emulator", ["-e"]),
+            ("kgx", ["--"]),
+            ("gnome-terminal", ["--"]),
+            ("konsole", ["-e"]),
+            ("xterm", ["-e"]),
+        ]
+        term = None
+        term_args = None
+        for t in terms:
+            if shutil.which(t[0]):
+                term = t[0]
+                term_args = t[1]
+                break
+
+        if not term:
+            self.log("ERROR: no terminal emulator found (tried x-terminal-emulator, kgx, gnome-terminal, konsole, xterm)")
+            return
+
+        sandbox = self._sandbox_combo.get_active_id() or "workspace-write"
+        approval = self._approval_combo.get_active_id() or "on-request"
+
+        cmd_parts = [term] + term_args
+
+        if ep["backend_type"] == "native":
+            cmd_parts.extend(["codex", "-c", f"model={model}",
+                              "-s", sandbox, "-a", approval])
+        else:
+            cmd_parts.extend(["codex", "--profile", ep["name"], "-c", f"model={model}",
+                              "-s", sandbox, "-a", approval])
+
+        self.log(f"Running: {' '.join(cmd_parts)}")
+        self._proc = subprocess.Popen(cmd_parts, preexec_fn=os.setsid)
+        pid = self._proc.pid
+        self.log(f"CLI started in terminal (PID {pid})")
+
+        # Wait for terminal process
+        while self._proc and self._proc.poll() is None:
+            time.sleep(1.5)
+
+        if self._proc:
+            rc = self._proc.poll()
+            self.log(f"CLI exited (code {rc})")
+            self._proc = None
+
+    def _launch_desktop_direct(self):
+        self.log("Launching Codex Desktop (default OAuth)…")
+        self._proc = subprocess.Popen(
+            [str(START_SH), "--", "--ozone-platform=wayland"],
+            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, preexec_fn=os.setsid,
+        )
+        pid = self._proc.pid
+        self.log(f"Desktop started (PID {pid})")
+        self.log(f"Log: {LAUNCH_LOG}")
+
+        t0 = time.time()
+        stall_warned = False
+        while self._proc and self._proc.poll() is None:
+            time.sleep(1.5)
+            el = time.time() - t0
+            if el > 20 and not stall_warned:
+                self.log("Still starting after 20s — possible stall. Click Kill if window doesn't appear.")
+                self.log(f"--- last log lines ---\n{_last_log_lines()}")
+                stall_warned = True
+
+        if self._proc:
+            rc = self._proc.poll()
+            el = time.time() - t0
+            self.log(f"Desktop exited (code {rc}) after {el:.0f}s")
+            if el < 12:
+                self.log("TIP: Quick exit — may be warm-start handoff (normal) or crash.")
+                self.log(f"--- last log lines ---\n{_last_log_lines()}")
+            self._proc = None
+
+    def _launch_cli_default(self):
+        self.log("Launching Codex CLI (default OAuth)…")
+        terms = [
+            ("x-terminal-emulator", ["-e"]),
+            ("kgx", ["--"]),
+            ("gnome-terminal", ["--"]),
+            ("konsole", ["-e"]),
+            ("xterm", ["-e"]),
+        ]
+        term = None
+        term_args = None
+        for t in terms:
+            if shutil.which(t[0]):
+                term = t[0]
+                term_args = t[1]
+                break
+
+        if not term:
+            self.log("ERROR: no terminal emulator found")
+            return
+
+        sandbox = self._sandbox_combo.get_active_id() or "workspace-write"
+        approval = self._approval_combo.get_active_id() or "on-request"
+        cmd_parts = [term] + term_args + ["codex", "-s", sandbox, "-a", approval]
+        self.log(f"Running: {' '.join(cmd_parts)}")
+        self._proc = subprocess.Popen(cmd_parts, preexec_fn=os.setsid)
+        pid = self._proc.pid
+        self.log(f"CLI started in terminal (PID {pid})")
+
+        while self._proc and self._proc.poll() is None:
+            time.sleep(1.5)
+
+        if self._proc:
+            rc = self._proc.poll()
+            self.log(f"CLI exited (code {rc})")
+            self._proc = None
+
+    # ── kill ─────────────────────────────────────────────────────
+
+    def _kill(self):
+        self.log("=== Killing ===")
+        if self._proc and self._proc.poll() is None:
+            try:
+                pgid = os.getpgid(self._proc.pid)
+                os.killpg(pgid, signal.SIGTERM)
+                time.sleep(1)
+                if self._proc.poll() is None:
+                    os.killpg(pgid, signal.SIGKILL)
+            except (ProcessLookupError, PermissionError):
+                pass
+            self._proc = None
+        _stop_proxy()
+        _run_cleanup(self.log)
+        restore_config()
+        end_config_transaction()
+        LOG_DIR.mkdir(parents=True, exist_ok=True)
+        LAUNCH_LOG.unlink(missing_ok=True)
+        self.log("Cleanup complete")
+        self._set_busy(False)
+        self.log("Ready.")
+
+    def _do_close(self):
+        if self._proc and self._proc.poll() is None:
+            d = Gtk.MessageDialog(self, 0, Gtk.MessageType.QUESTION, Gtk.ButtonsType.YES_NO,
+                                  "Codex is still running. Kill it?")
+            r = d.run()
+            d.destroy()
+            if r != Gtk.ResponseType.YES:
+                return
+            self._kill()
+        _stop_proxy()
+        Gtk.main_quit()
+
+# ═══════════════════════════════════════════════════════════════════
+# Endpoint manager dialog
+# ═══════════════════════════════════════════════════════════════════
+
+class EndpointMgr(Gtk.Window):
+    def __init__(self, parent):
+        super().__init__(title="Manage Endpoints")
+        self.set_transient_for(parent)
+        self.set_modal(True)
+        self._parent = parent
+        self.set_default_size(500, 350)
+        self.set_border_width(12)
+        self.set_position(Gtk.WindowPosition.CENTER_ON_PARENT)
+
+        vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+        self.add(vbox)
+
+        title_lbl = Gtk.Label(label="<b>Endpoints</b>")
+        title_lbl.set_use_markup(True)
+        vbox.pack_start(title_lbl, False, False, 0)
+
+        sw = Gtk.ScrolledWindow()
+        vbox.pack_start(sw, True, True, 0)
+        self._store = Gtk.ListStore(str, str, str, str)  # name, provider, backend, default_model
+        self._tree = Gtk.TreeView(model=self._store)
+        for i, title in enumerate(["Name", "Provider", "Type", "Default Model"]):
+            col = Gtk.TreeViewColumn(title, Gtk.CellRendererText(), text=i)
+            col.set_resizable(True)
+            self._tree.append_column(col)
+        sw.add(self._tree)
+
+        btn_bar = Gtk.Box(spacing=8)
+        vbox.pack_start(btn_bar, False, False, 0)
+        self._add_btn = Gtk.Button(label="Add")
+        self._add_btn.connect("clicked", lambda b: self._add())
+        btn_bar.pack_start(self._add_btn, False, False, 0)
+        self._edit_btn = Gtk.Button(label="Edit")
+        self._edit_btn.connect("clicked", lambda b: self._edit())
+        btn_bar.pack_start(self._edit_btn, False, False, 0)
+        self._delete_btn = Gtk.Button(label="Delete")
+        self._delete_btn.connect("clicked", lambda b: self._delete())
+        btn_bar.pack_start(self._delete_btn, False, False, 0)
+        self._default_btn = Gtk.Button(label="Set Default")
+        self._default_btn.connect("clicked", lambda b: self._set_default())
+        btn_bar.pack_start(self._default_btn, False, False, 0)
+        self._doctor_btn = Gtk.Button(label="Doctor")
+        self._doctor_btn.connect("clicked", lambda b: self._doctor_selected())
+        btn_bar.pack_start(self._doctor_btn, False, False, 0)
+        self._doctor_all_btn = Gtk.Button(label="Doctor All")
+        self._doctor_all_btn.connect("clicked", lambda b: self._doctor_all())
+        btn_bar.pack_start(self._doctor_all_btn, False, False, 0)
+        self._mgr_close_btn = Gtk.Button(label="Close")
+        self._mgr_close_btn.connect("clicked", lambda b: self.destroy())
+        btn_bar.pack_end(self._mgr_close_btn, False, False, 0)
+
+        self._rebuild()
+        self.show_all()
+
+    def _rebuild(self):
+        data = load_endpoints()
+        self._store.clear()
+        for ep in data["endpoints"]:
+            provider = ep.get("provider_preset", "Custom")
+            bt = label_for_backend(ep["backend_type"])
+            self._store.append([ep["name"], provider, bt, ep.get("default_model", "")])
+
+    def _selected(self):
+        sel = self._tree.get_selection()
+        m, i = sel.get_selected()
+        if i is None:
+            return None
+        return self._store[i][0]
+
+    def _add(self):
+        try:
+            self._dialog = EditEndpointDialog(self, None)
+            self._dialog.connect("destroy", lambda *_: setattr(self, "_dialog", None))
+        except Exception as e:
+            import traceback; traceback.print_exc()
+            d = Gtk.MessageDialog(self, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.OK, f"Error: {e}")
+            d.run(); d.destroy()
+
+    def _edit(self):
+        name = self._selected()
+        if name:
+            try:
+                self._dialog = EditEndpointDialog(self, name)
+                self._dialog.connect("destroy", lambda *_: setattr(self, "_dialog", None))
+            except Exception as e:
+                import traceback; traceback.print_exc()
+                d = Gtk.MessageDialog(self, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.OK, f"Error: {e}")
+                d.run(); d.destroy()
+
+    def _delete(self):
+        name = self._selected()
+        if not name:
+            return
+        d = Gtk.MessageDialog(self, 0, Gtk.MessageType.QUESTION, Gtk.ButtonsType.YES_NO,
+                              f'Delete endpoint "{name}"?')
+        r = d.run()
+        d.destroy()
+        if r != Gtk.ResponseType.YES:
+            return
+        data = load_endpoints()
+        data["endpoints"] = [e for e in data["endpoints"] if e["name"] != name]
+        if data.get("default") == name:
+            data["default"] = data["endpoints"][0]["name"] if data["endpoints"] else None
+        save_endpoints(data)
+        self._rebuild()
+        self._parent._on_endpoints_updated()
+
+    def _set_default(self):
+        name = self._selected()
+        if not name:
+            return
+        data = load_endpoints()
+        data["default"] = name
+        save_endpoints(data)
+        self._rebuild()
+        self._parent._on_endpoints_updated()
+
+    def _doctor_selected(self):
+        name = self._selected()
+        if not name:
+            return
+        ep = get_endpoint(name)
+        if not ep:
+            return
+        wait_dlg = Gtk.Dialog(title=f"Doctor: {name}…", parent=self, modal=True)
+        wait_dlg.set_default_size(280, 80)
+        lbl = Gtk.Label(label=f"Running diagnostics for {name}…")
+        lbl.set_margin_top(16)
+        lbl.set_margin_bottom(16)
+        wait_dlg.get_content_area().pack_start(lbl, True, True, 0)
+        wait_dlg.show_all()
+
+        def _run():
+            checks = run_endpoint_doctor(ep)
+            GLib.idle_add(wait_dlg.destroy)
+            GLib.idle_add(_show_doctor_results, self, name, checks)
+
+        threading.Thread(target=_run, daemon=True).start()
+        wait_dlg.run()
+
+    def _doctor_all(self):
+        data = load_endpoints()
+        endpoints = data.get("endpoints", [])
+        if not endpoints:
+            d = Gtk.MessageDialog(self, 0, Gtk.MessageType.INFO, Gtk.ButtonsType.OK, "No endpoints configured.")
+            d.run()
+            d.destroy()
+            return
+        wait_dlg = Gtk.Dialog(title="Doctor All…", parent=self, modal=True)
+        wait_dlg.set_default_size(320, 80)
+        lbl = Gtk.Label(label=f"Testing {len(endpoints)} endpoints…")
+        lbl.set_margin_top(16)
+        lbl.set_margin_bottom(16)
+        wait_dlg.get_content_area().pack_start(lbl, True, True, 0)
+        wait_dlg.show_all()
+
+        all_results = {}
+
+        def _run():
+            for ep in endpoints:
+                try:
+                    all_results[ep["name"]] = run_endpoint_doctor(ep)
+                except Exception as e:
+                    all_results[ep["name"]] = [("Doctor run", False, str(e)[:100])]
+            GLib.idle_add(wait_dlg.destroy)
+            GLib.idle_add(self._show_doctor_all_results, all_results)
+
+        threading.Thread(target=_run, daemon=True).start()
+        wait_dlg.run()
+
+    def _show_doctor_all_results(self, all_results):
+        dlg = Gtk.Dialog(title="Doctor All Results", parent=self, modal=True)
+        dlg.add_button("Close", Gtk.ResponseType.CLOSE)
+        dlg.set_default_size(560, 450)
+        sw = Gtk.ScrolledWindow()
+        sw.set_policy(Gtk.PolicyType.NEVER, Gtk.PolicyType.AUTOMATIC)
+        area = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+        area.set_margin_start(12)
+        area.set_margin_end(12)
+        area.set_margin_top(12)
+        area.set_margin_bottom(12)
+        sw.add(area)
+        for ep_name, checks in all_results.items():
+            passed = sum(1 for _, ok, _ in checks if ok is True)
+            failed = sum(1 for _, ok, _ in checks if ok is False)
+            if failed:
+                color, status = "#e74c3c", f"{failed} failed"
+            else:
+                color, status = "#27ae60", f"{passed} passed"
+            hdr = Gtk.Label()
+            hdr.set_markup(f'<b>{ep_name}</b>  <span foreground="{color}">{status}</span>')
+            hdr.set_xalign(0)
+            area.pack_start(hdr, False, False, 4)
+            for name, ok, detail in checks:
+                if ok is True:
+                    sym, sc = "\u2713", "#27ae60"
+                elif ok is False:
+                    sym, sc = "\u2717", "#e74c3c"
+                else:
+                    sym, sc = "\u25CB", "#f39c12"
+                row = Gtk.Box(spacing=4)
+                row.set_margin_start(12)
+                icon = Gtk.Label()
+                icon.set_markup(f'<span foreground="{sc}" weight="bold">{sym}</span>')
+                lbl = Gtk.Label()
+                lbl.set_markup(f'<span size="small"><b>{name}</b>'
+                               + (f'  <span foreground="#7f8c8d">{detail}</span>' if detail else '')
+                               + '</span>')
+                lbl.set_xalign(0)
+                row.pack_start(icon, False, False, 0)
+                row.pack_start(lbl, False, False, 0)
+                area.pack_start(row, False, False, 1)
+            sep = Gtk.Separator()
+            area.pack_start(sep, False, False, 4)
+        dlg.get_content_area().pack_start(sw, True, True, 0)
+        dlg.show_all()
+        dlg.run()
+        dlg.destroy()
+
+class EditEndpointDialog(Gtk.Dialog):
+    def __init__(self, parent, existing_name):
+        title = "Edit Endpoint" if existing_name else "Add Endpoint"
+        Gtk.Dialog.__init__(self, title=title)
+        self.set_transient_for(parent)
+        self.set_modal(True)
+        self._parent_mgr = parent
+        self._existing_name = existing_name
+        self._data = get_endpoint(existing_name) if existing_name else {
+            "name": "", "backend_type": "openai-compat",
+            "base_url": "", "api_key": "", "default_model": "", "models": [],
+            "provider_preset": "Custom",
+        }
+        self.set_default_size(480, 520)
+
+        area = self.get_content_area()
+        area.set_spacing(6)
+        area.set_margin_start(12)
+        area.set_margin_end(12)
+        area.set_margin_top(12)
+        area.set_margin_bottom(12)
+
+        grid = Gtk.Grid(column_spacing=8, row_spacing=6)
+        area.pack_start(grid, False, False, 0)
+
+        def add_row(row, label, widget):
+            grid.attach(Gtk.Label(label=label, xalign=1), 0, row, 1, 1)
+            grid.attach(widget, 1, row, 1, 1)
+
+        self._entry_name = Gtk.Entry(text=self._data.get("name", ""))
+        add_row(0, "Name:", self._entry_name)
+
+        self._combo_preset = Gtk.ComboBoxText()
+        self._preset_names = list(PROVIDER_PRESETS.keys())
+        for preset_name in self._preset_names:
+            self._combo_preset.append_text(preset_name)
+        self._combo_preset.set_active(self._preset_names.index(self._data.get("provider_preset", "Custom")) if self._data.get("provider_preset", "Custom") in self._preset_names else 0)
+        self._combo_preset.connect("changed", lambda c: self._apply_selected_preset())
+        add_row(1, "Preset:", self._combo_preset)
+
+        self._combo_type = Gtk.ComboBoxText()
+        for val, lab in [("openai-compat", "OpenAI-compatible (needs proxy)"),
+                          ("anthropic", "Anthropic (needs proxy)"),
+                          ("command-code", "Command Code (needs proxy)"),
+                          ("freebuff", "Freebuff - Free DeepSeek/Kimi (needs proxy)"),
+                          ("gemini-oauth-cli", "Gemini CLI OAuth (needs proxy)"),
+                          ("gemini-oauth-antigravity", "Antigravity OAuth (needs proxy)"),
+                          ("native", "Native OpenAI (no proxy)")]:
+            self._combo_type.append(val, lab)
+        bt = self._data.get("backend_type", "openai-compat")
+        self._combo_type.set_active_id(bt)
+        add_row(2, "Type:", self._combo_type)
+
+        self._entry_url = Gtk.Entry(text=self._data.get("base_url", ""))
+        add_row(3, "Base URL:", self._entry_url)
+
+        self._entry_key = Gtk.Entry(text=self._data.get("api_key", ""))
+        self._entry_key.set_visibility(False)
+        key_box = Gtk.Box(spacing=6)
+        key_box.pack_start(self._entry_key, True, True, 0)
+        self._oauth_btn = Gtk.Button(label="OAuth Login")
+        self._oauth_btn.connect("clicked", lambda b: self._do_oauth_login())
+        key_box.pack_start(self._oauth_btn, False, False, 0)
+        add_row(4, "API Key:", key_box)
+        self._oauth_btn.set_visible(False)
+
+        self._entry_cc_ver = Gtk.Entry(text=self._data.get("cc_version", ""))
+        self._entry_cc_ver.set_placeholder_text("e.g. 0.26.8 (Command Code only)")
+        add_row(5, "CC Version:", self._entry_cc_ver)
+
+        reasoning_css = b"""
+        switch.reasoning-toggle {
+            min-width: 56px; min-height: 28px;
+            border-radius: 14px;
+            background: #e67e22;
+            border: 2px solid #cf6d17;
+        }
+        switch.reasoning-toggle:checked {
+            background: #2ecc71;
+            border: 2px solid #27ae60;
+        }
+        switch.reasoning-toggle slider {
+            min-width: 24px; min-height: 24px;
+            border-radius: 12px;
+            background: white;
+            border: 1px solid #bbb;
+        }
+        """
+        reasoning_box = Gtk.Box(spacing=10)
+        self._switch_reasoning = Gtk.Switch()
+        self._switch_reasoning.set_name("reasoning-toggle")
+        ctx = self._switch_reasoning.get_style_context()
+        ctx.add_class("reasoning-toggle")
+        try:
+            css_prov = Gtk.CssProvider()
+            css_prov.load_from_data(reasoning_css)
+            ctx.add_provider(css_prov, Gtk.STYLE_PROVIDER_PRIORITY_USER)
+        except Exception:
+            pass
+        self._switch_reasoning.set_active(self._data.get("reasoning_enabled", True))
+        self._switch_reasoning.connect("notify::active", lambda *a: self._on_reasoning_toggled())
+        reasoning_box.pack_start(self._switch_reasoning, False, False, 0)
+        self._lbl_reasoning = Gtk.Label()
+        reasoning_box.pack_start(self._lbl_reasoning, False, False, 0)
+        add_row(6, "Reasoning:", reasoning_box)
+
+        self._combo_effort = Gtk.ComboBoxText()
+        for ev, el in [("none", "None"), ("minimal", "Minimal"), ("low", "Low"),
+                       ("medium", "Medium"), ("high", "High"), ("max", "Max")]:
+            self._combo_effort.append(ev, el)
+        saved_effort = self._data.get("reasoning_effort", "medium")
+        self._combo_effort.set_active_id(saved_effort if saved_effort in ("none","minimal","low","medium","high","max") else "medium")
+        add_row(7, "Effort:", self._combo_effort)
+        self._on_reasoning_toggled()
+
+        # Models
+        mlbl = Gtk.Label(label="Models:", xalign=0)
+        area.pack_start(mlbl, False, False, 4)
+
+        mbox = Gtk.Box(spacing=6)
+        area.pack_start(mbox, False, False, 0)
+        self._entry_model = Gtk.Entry()
+        mbox.pack_start(self._entry_model, True, True, 0)
+        self._add_model_btn = Gtk.Button(label="Add")
+        self._add_model_btn.connect("clicked", lambda b: self._add_model())
+        mbox.pack_start(self._add_model_btn, False, False, 0)
+        self._add_list_btn = Gtk.Button(label="Add List")
+        self._add_list_btn.connect("clicked", lambda b: self._add_models_from_text())
+        mbox.pack_start(self._add_list_btn, False, False, 0)
+        self._fetch_models_btn = Gtk.Button(label="Fetch from API")
+        self._fetch_models_btn.connect("clicked", lambda b: self._fetch_models())
+        mbox.pack_start(self._fetch_models_btn, False, False, 0)
+        self._test_btn = Gtk.Button(label="Test Endpoint")
+        self._test_btn.connect("clicked", lambda b: self._diagnose_endpoint())
+        mbox.pack_start(self._test_btn, False, False, 0)
+
+        bulk_lbl = Gtk.Label(label="Bulk add models (one per line or comma-separated):", xalign=0)
+        area.pack_start(bulk_lbl, False, False, 2)
+        bulk_sw = Gtk.ScrolledWindow()
+        bulk_sw.set_min_content_height(72)
+        area.pack_start(bulk_sw, False, False, 0)
+        self._bulk_buf = Gtk.TextBuffer()
+        self._bulk_text = Gtk.TextView(buffer=self._bulk_buf)
+        self._bulk_text.set_wrap_mode(Gtk.WrapMode.WORD_CHAR)
+        bulk_sw.add(self._bulk_text)
+
+        sw = Gtk.ScrolledWindow()
+        sw.set_min_content_height(120)
+        area.pack_start(sw, True, True, 0)
+        self._model_store = Gtk.ListStore(str)
+        self._model_tree = Gtk.TreeView(model=self._model_store)
+        self._model_tree.append_column(Gtk.TreeViewColumn("Model ID", Gtk.CellRendererText(), text=0))
+        self._model_tree.set_rules_hint(True)
+        sw.add(self._model_tree)
+        self._model_tree.connect("row-activated", lambda t, p, c: self._remove_model(p))
+
+        for m in self._data.get("models", []):
+            self._model_store.append([m])
+
+        # Default model combo
+        dbox = Gtk.Box(spacing=6)
+        area.pack_start(dbox, False, False, 0)
+        dbox.pack_start(Gtk.Label(label="Default Model:"), False, False, 0)
+        self._combo_default = Gtk.ComboBoxText()
+        self._refresh_default_combo()
+        dbox.pack_start(self._combo_default, True, True, 0)
+        dm = self._data.get("default_model", "")
+        if dm:
+            self._combo_default.set_active_id(dm)
+
+        self._apply_selected_preset(initial=True)
+
+        # Buttons
+        self.add_button("Cancel", Gtk.ResponseType.CANCEL)
+        self.add_button("Save", Gtk.ResponseType.OK)
+        self.connect("response", self._on_response)
+        self.show_all()
+
+    def _add_model(self):
+        m = normalize_model_id(self._entry_model.get_text())
+        if m:
+            current = self._combo_default.get_active_text()
+            self._model_store.append([m])
+            self._refresh_default_combo(current or m)
+            self._entry_model.set_text("")
+
+    def _add_models_from_text(self):
+        buf = self._bulk_buf.get_text(self._bulk_buf.get_start_iter(), self._bulk_buf.get_end_iter(), True)
+        models = parse_model_list(buf)
+        if not models:
+            return
+        current = self._combo_default.get_active_text()
+        existing = {self._model_store[i][0] for i in range(len(self._model_store))}
+        added = False
+        for mid in models:
+            if mid not in existing:
+                self._model_store.append([mid])
+                existing.add(mid)
+                added = True
+        if added:
+            self._refresh_default_combo(current or models[0])
+        self._bulk_buf.set_text("")
+
+    def _apply_selected_preset(self, initial=False):
+        preset_name = self._combo_preset.get_active_text() or "Custom"
+        preset = PROVIDER_PRESETS.get(preset_name, PROVIDER_PRESETS["Custom"])
+        oauth_provider = preset.get("oauth_provider", "")
+        is_oauth = bool(oauth_provider)
+        self._oauth_btn.set_visible(is_oauth)
+        if oauth_provider == "freebuff":
+            self._oauth_btn.set_label("Freebuff Login")
+            self._entry_key.set_placeholder_text("Auto-filled by freebuff login")
+        elif is_oauth:
+            self._oauth_btn.set_label("OAuth Login")
+            self._entry_key.set_placeholder_text("Auto-filled by OAuth")
+        else:
+            self._entry_key.set_placeholder_text("")
+        if not initial or self._existing_name is None:
+            self._combo_type.set_active_id(preset.get("backend_type", "openai-compat"))
+            self._entry_url.set_text(preset.get("base_url", ""))
+            if not self._entry_key.get_text().strip():
+                self._entry_key.set_text("")
+            cc_ver = preset.get("cc_version", "")
+            if cc_ver and not self._entry_cc_ver.get_text().strip():
+                self._entry_cc_ver.set_text(cc_ver)
+            if preset.get("models") and len(self._model_store) == 0:
+                for mid in preset["models"]:
+                    self._model_store.append([mid])
+                self._refresh_default_combo(preset["models"][0])
+        if initial and self._data.get("models"):
+            self._refresh_default_combo(self._data.get("default_model", ""))
+
+    def _on_reasoning_toggled(self, *_):
+        active = self._switch_reasoning.get_active()
+        self._combo_effort.set_sensitive(active)
+        if active:
+            self._lbl_reasoning.set_markup('<span foreground="#27ae60" weight="bold">ON</span>')
+        else:
+            self._lbl_reasoning.set_markup('<span foreground="#e67e22" weight="bold">OFF</span>')
+
+    def _do_oauth_login(self):
+        preset_name = self._combo_preset.get_active_text() or "Custom"
+        preset = PROVIDER_PRESETS.get(preset_name, {})
+        provider = preset.get("oauth_provider", "")
+        if provider == "freebuff":
+            self._freebuff_oauth_flow()
+        elif (provider or "").startswith("google"):
+            self._google_oauth_flow(provider)
+
+    def _google_oauth_flow(self, oauth_provider="google-cli"):
+        is_antigravity = oauth_provider == "google-antigravity"
+        token_path = os.path.expanduser("~/.cache/codex-proxy/google-antigravity-oauth-token.json" if is_antigravity else "~/.cache/codex-proxy/google-cli-oauth-token.json")
+
+        if is_antigravity:
+            CLIENT_ID = "REDACTED_ANTIGRAVITY_CLIENT_ID"
+            CLIENT_SECRET = "REDACTED_ANTIGRAVITY_SECRET"
+            SCOPES = [
+                "https://www.googleapis.com/auth/cloud-platform",
+                "https://www.googleapis.com/auth/userinfo.email",
+                "https://www.googleapis.com/auth/userinfo.profile",
+                "https://www.googleapis.com/auth/cclog",
+                "https://www.googleapis.com/auth/experimentsandconfigs",
+            ]
+            port = 51121
+            redirect_uri = f"http://localhost:{port}/oauth-callback"
+            callback_path = "/oauth-callback"
+            provider_kind = "antigravity"
+        else:
+            CLIENT_ID = "REDACTED_GEMINI_CLI_CLIENT_ID"
+            CLIENT_SECRET = "REDACTED_GEMINI_CLI_SECRET"
+            SCOPES = [
+                "https://www.googleapis.com/auth/cloud-platform",
+                "https://www.googleapis.com/auth/userinfo.email",
+                "https://www.googleapis.com/auth/userinfo.profile",
+            ]
+            port = 0
+            redirect_uri = None
+            callback_path = "/oauth2callback"
+            provider_kind = "cli"
+
+        import http.server
+
+        state = secrets.token_hex(32)
+        verifier = secrets.token_urlsafe(64)
+        challenge = base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest()).rstrip(b"=").decode()
+
+        if port == 0:
+            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+                s.bind(("127.0.0.1", 0))
+                port = s.getsockname()[1]
+            redirect_uri = f"http://127.0.0.1:{port}/oauth2callback"
+
+        scope_str = " ".join(SCOPES)
+        auth_url = (
+            f"https://accounts.google.com/o/oauth2/v2/auth?"
+            f"client_id={CLIENT_ID}"
+            f"&redirect_uri={urllib.parse.quote(redirect_uri)}"
+            f"&response_type=code"
+            f"&scope={urllib.parse.quote(scope_str)}"
+            f"&access_type=offline"
+            f"&prompt=select_account%20consent"
+            f"&state={state}"
+            f"&code_challenge={challenge}"
+            f"&code_challenge_method=S256"
+        )
+
+        dlg = Gtk.Dialog(title="Google OAuth (Gemini Mode)", parent=self, modal=True)
+        dlg.add_button("Cancel", Gtk.ResponseType.CANCEL)
+        dlg.set_default_size(520, 280)
+        area = dlg.get_content_area()
+        area.set_margin_start(16)
+        area.set_margin_end(16)
+        area.set_margin_top(12)
+        area.set_margin_bottom(12)
+        area.set_spacing(8)
+
+        area.pack_start(Gtk.Label(label="<b>Sign in with Google</b>", use_markup=True, xalign=0), False, False, 0)
+        area.pack_start(Gtk.Label(label="Emulating Gemini CLI OAuth — no client_secret.json needed.", xalign=0), False, False, 0)
+
+        link_lbl = Gtk.Label()
+        link_lbl.set_markup(f'<a href="{auth_url}">Click here to open Google authorization</a>')
+        link_lbl.set_line_wrap(True)
+        area.pack_start(link_lbl, False, False, 4)
+
+        self._oauth_status = Gtk.Label(label="Opening browser…", xalign=0)
+        area.pack_start(self._oauth_status, False, False, 4)
+
+        spinner = Gtk.Spinner()
+        spinner.start()
+        area.pack_start(spinner, False, False, 8)
+
+        area.show_all()
+
+        code_holder = [None]
+        error_holder = [None]
+        received_state = [None]
+
+        class OAuthHandler(http.server.BaseHTTPRequestHandler):
+            def do_GET(self2):
+                qs = urllib.parse.urlparse(self2.path).query
+                params = urllib.parse.parse_qs(qs)
+                received_state[0] = params.get("state", [None])[0]
+                with open("/tmp/codex-oauth-debug.log", "a") as _dbg:
+                    _dbg.write(f"[{time.strftime('%H:%M:%S')}] GET {self2.path} state={received_state[0]} code={'code' in params}\n")
+                if self2.path.find(callback_path) == -1:
+                    self2.send_response(302)
+                    self2.send_header("Location", "https://developers.google.com/gemini-code-assist/auth_failure_gemini")
+                    self2.end_headers()
+                    error_holder[0] = "unexpected request"
+                    return
+                if "code" in params:
+                    if received_state[0] != state:
+                        self2.send_response(400)
+                        self2.send_header("Content-Type", "text/html")
+                        self2.end_headers()
+                        self2.wfile.write(b"<html><body style='font-family:sans-serif;text-align:center;padding-top:80px'>"
+                                         b"<h2 style='color:#e74c3c'>CSRF state mismatch.</h2></body></html>")
+                        error_holder[0] = "CSRF state mismatch"
+                        return
+                    code_holder[0] = params["code"][0]
+                    self2.send_response(302)
+                    self2.send_header("Location", "https://developers.google.com/gemini-code-assist/auth_success_gemini")
+                    self2.end_headers()
+                else:
+                    error_holder[0] = params.get("error", ["unknown"])[0]
+                    self2.send_response(302)
+                    self2.send_header("Location", "https://developers.google.com/gemini-code-assist/auth_failure_gemini")
+                    self2.end_headers()
+            def log_message(self2, fmt, *args):
+                with open("/tmp/codex-oauth-debug.log", "a") as _dbg:
+                    _dbg.write(f"[{time.strftime('%H:%M:%S')}] {fmt % args}\n")
+
+        try:
+            bind_host = "localhost" if is_antigravity else "127.0.0.1"
+            server = http.server.HTTPServer((bind_host, port), OAuthHandler)
+        except OSError:
+            self._oauth_status.set_text(f"Port {port} already in use — close other apps and retry.")
+            spinner.stop()
+            dlg.run(); dlg.destroy()
+            return
+
+        def _oauth_log(msg):
+            with open("/tmp/codex-oauth-debug.log", "a") as _f:
+                _f.write(f"[{time.strftime('%H:%M:%S')}] {msg}\n")
+
+        _oauth_log(f"Starting OAuth: port={port} redirect_uri={redirect_uri}")
+
+        def wait_for_code():
+            _oauth_log("wait_for_code thread started")
+            deadline = time.time() + 120
+            while code_holder[0] is None and error_holder[0] is None and time.time() < deadline:
+                server.handle_request()
+            server.server_close()
+            _oauth_log(f"Server closed. code={'yes' if code_holder[0] else 'no'} error={'yes' if error_holder[0] else 'no'}")
+            if code_holder[0]:
+                try:
+                    _oauth_log("Exchanging code for token...")
+                    token_data = urllib.parse.urlencode({
+                        "code": code_holder[0],
+                        "client_id": CLIENT_ID,
+                        "client_secret": CLIENT_SECRET,
+                        "redirect_uri": redirect_uri,
+                        "grant_type": "authorization_code",
+                        "code_verifier": verifier,
+                    }).encode()
+                    req = urllib.request.Request("https://oauth2.googleapis.com/token", data=token_data,
+                                                 headers={"Content-Type": "application/x-www-form-urlencoded"})
+                    resp = urllib.request.urlopen(req, timeout=30)
+                    tokens = json.loads(resp.read())
+                    tokens["client_id"] = CLIENT_ID
+                    tokens["client_secret"] = CLIENT_SECRET
+                    tokens["provider_kind"] = provider_kind
+                    tokens["expires_at"] = time.time() + tokens.get("expires_in", 3600)
+                    os.makedirs(os.path.dirname(token_path), exist_ok=True)
+                    with open(token_path, "w") as f:
+                        json.dump(tokens, f, indent=2)
+                    os.chmod(token_path, 0o600)
+                    _oauth_log(f"Token saved to {token_path}")
+                    project_id = ""
+                    try:
+                        _oauth_log("Discovering project ID via loadCodeAssist...")
+                        lr = urllib.request.Request(
+                            "https://cloudcode-pa.googleapis.com/v1internal:loadCodeAssist",
+                            data=json.dumps({}).encode(),
+                            headers={
+                                "Content-Type": "application/json",
+                                "Authorization": f"Bearer {tokens['access_token']}",
+                                "User-Agent": "google-api-nodejs-client/9.15.1",
+                            })
+                        lresp = urllib.request.urlopen(lr, timeout=15)
+                        ldata = json.loads(lresp.read())
+                        p = ldata.get("cloudaicompanionProject", "")
+                        if isinstance(p, dict):
+                            project_id = p.get("id", "")
+                        elif isinstance(p, str):
+                            project_id = p
+                        _oauth_log(f"Project ID: {project_id or '(none)'}")
+                        if project_id:
+                            tokens["project_id"] = project_id
+                            with open(token_path, "w") as f2:
+                                json.dump(tokens, f2, indent=2)
+                            os.chmod(token_path, 0o600)
+                    except Exception as pe:
+                        _oauth_log(f"loadCodeAssist failed (non-fatal): {pe}")
+                    if is_antigravity:
+                        found_models = [
+                            "gemini-2.5-flash", "gemini-2.5-pro",
+                            "gemini-3-flash-preview", "gemini-3-pro-preview", "gemini-3.1-pro-preview",
+                            "gemini-3-pro-low", "gemini-3-pro-high",
+                            "gemini-3.1-pro-low", "gemini-3.1-pro-high",
+                            "gemini-3-flash-low", "gemini-3-flash-medium", "gemini-3-flash-high",
+                            "claude-sonnet-4-6", "claude-opus-4-6-thinking",
+                            "claude-opus-4-6-thinking-low", "claude-opus-4-6-thinking-medium", "claude-opus-4-6-thinking-high",
+                            "gemini-claude-sonnet-4-6",
+                            "gemini-claude-opus-4-6-thinking-low", "gemini-claude-opus-4-6-thinking-medium", "gemini-claude-opus-4-6-thinking-high",
+                            "gemini-3-pro-image",
+                        ]
+                        probe_candidates = [
+                            "gemini-2.5-flash", "gemini-2.5-pro",
+                            "gemini-3-flash-preview", "gemini-3-pro-preview", "gemini-3.1-pro-preview",
+                        ]
+                        _oauth_log(f"Probing {len(probe_candidates)} model candidates...")
+                        for mc in probe_candidates:
+                            try:
+                                pr = urllib.request.Request(
+                                    "https://daily-cloudcode-pa.sandbox.googleapis.com/v1internal:generateContent",
+                                    data=json.dumps({
+                                        "project": project_id,
+                                        "model": mc,
+                                        "request": {"contents": [{"role": "user", "parts": [{"text": "x"}]}],
+                                                    "generationConfig": {"maxOutputTokens": 1}},
+                                    }).encode(),
+                                    headers={
+                                        "Content-Type": "application/json",
+                                        "Authorization": f"Bearer {tokens['access_token']}",
+                                        "User-Agent": "google-api-nodejs-client/9.15.1",
+                                        "Client-Metadata": "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI",
+                                    })
+                                pr.get_method = lambda: "POST"
+                                resp = urllib.request.urlopen(pr, timeout=10)
+                                resp.read()
+                                found_models.append(mc)
+                                _oauth_log(f"  {mc} → available")
+                            except urllib.error.HTTPError as e:
+                                if e.code == 429:
+                                    found_models.append(mc)
+                                    _oauth_log(f"  {mc} → available (rate limited)")
+                                else:
+                                    e.read()
+                                    _oauth_log(f"  {mc} → HTTP {e.code}")
+                            except Exception as e:
+                                _oauth_log(f"  {mc} → error: {e}")
+                    else:
+                        found_models = ["gemini-2.5-flash", "gemini-2.5-pro"]
+                    if found_models:
+                        tokens["available_models"] = found_models
+                        with open(token_path, "w") as f3:
+                            json.dump(tokens, f3, indent=2)
+                        os.chmod(token_path, 0o600)
+                        _oauth_log(f"Discovered {len(found_models)} models: {found_models}")
+                    else:
+                        _oauth_log("No models discovered (will use defaults)")
+                    GLib.idle_add(self._oauth_success, dlg, tokens.get("access_token", ""), spinner)
+                    return
+                except urllib.error.HTTPError as e:
+                    body = e.read().decode(errors='replace')
+                    _oauth_log(f"Token exchange HTTP {e.code}: {body}")
+                    GLib.idle_add(self._oauth_failed, dlg, f"Token exchange failed ({e.code}): {body[:200]}", spinner)
+                    return
+                except Exception as e:
+                    _oauth_log(f"Token exchange FAILED: {e}")
+                    GLib.idle_add(self._oauth_failed, dlg, f"Token exchange failed: {e}", spinner)
+                    return
+            _oauth_log(f"OAuth failed: {error_holder[0] or 'timeout'}")
+            GLib.idle_add(self._oauth_failed, dlg,
+                          error_holder[0] or "No authorization code received.", spinner)
+
+        threading.Thread(target=wait_for_code, daemon=True).start()
+        subprocess.Popen(["xdg-open", auth_url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        dlg.connect("response", lambda d, r: d.destroy())
+        dlg.run()
+
+    def _freebuff_oauth_flow(self):
+        dlg = Gtk.Dialog(title="Freebuff Login", parent=self, modal=True)
+        dlg.add_button("Cancel", Gtk.ResponseType.CANCEL)
+        dlg.set_default_size(500, 240)
+        area = dlg.get_content_area()
+        area.set_margin_start(16)
+        area.set_margin_end(16)
+        area.set_margin_top(12)
+        area.set_margin_bottom(12)
+        area.set_spacing(8)
+
+        area.pack_start(Gtk.Label(label="<b>Sign in with GitHub via Freebuff</b>", use_markup=True, xalign=0), False, False, 0)
+
+        self._oauth_status = Gtk.Label(label="Requesting login URL…", xalign=0)
+        self._oauth_status.set_line_wrap(True)
+        self._oauth_status.set_max_width_chars(60)
+        area.pack_start(self._oauth_status, False, False, 4)
+
+        link_lbl = Gtk.Label(xalign=0)
+        link_lbl.set_line_wrap(True)
+        link_lbl.set_max_width_chars(60)
+        area.pack_start(link_lbl, False, False, 4)
+
+        spinner = Gtk.Spinner()
+        spinner.start()
+        area.pack_start(spinner, False, False, 8)
+
+        area.show_all()
+        link_lbl.set_visible(False)
+
+        self._fb_oauth_result = {"success": False, "user": None, "error": None}
+
+        def _freebuff_auth_thread():
+            try:
+                fingerprint_id = str(uuid.uuid4())
+                auth_url = "https://freebuff.com/api/auth/cli/code"
+                body = json.dumps({"fingerprintId": fingerprint_id}).encode()
+                req = urllib.request.Request(auth_url, data=body,
+                    headers={"Content-Type": "application/json", "User-Agent": "codex-launcher/3.8.4"})
+                resp = urllib.request.urlopen(req, timeout=30)
+                data = json.loads(resp.read())
+                login_url = data.get("loginUrl", "") or data.get("login_url", "")
+                fingerprint_hash = data.get("fingerprintHash", "") or data.get("fingerprint_hash", "")
+                expires_at = data.get("expiresAt", 0) or data.get("expires_at", 0)
+                if not login_url:
+                    self._fb_oauth_result["error"] = "Server returned no login URL"
+                    GLib.idle_add(self._freebuff_oauth_done, dlg, spinner)
+                    return
+
+                def _set_link():
+                    self._oauth_status.set_text("Open this URL in your browser to log in:")
+                    link_lbl.set_markup(f'<a href="{login_url}">{login_url}</a>')
+                    link_lbl.set_visible(True)
+                GLib.idle_add(_set_link)
+
+                webbrowser.open(login_url)
+
+                poll_url = f"https://freebuff.com/api/auth/cli/status?fingerprintId={urllib.parse.quote(fingerprint_id)}&fingerprintHash={urllib.parse.quote(fingerprint_hash)}&expiresAt={expires_at}"
+                deadline = time.time() + 300
+                while time.time() < deadline:
+                    time.sleep(2)
+                    try:
+                        poll_req = urllib.request.Request(poll_url,
+                            headers={"User-Agent": "codex-launcher/3.8.4"})
+                        poll_resp = urllib.request.urlopen(poll_req, timeout=10)
+                        poll_data = json.loads(poll_resp.read())
+                        user = poll_data.get("user")
+                        if user and user.get("authToken"):
+                            self._fb_oauth_result["success"] = True
+                            self._fb_oauth_result["user"] = user
+                            GLib.idle_add(self._freebuff_oauth_done, dlg, spinner)
+                            return
+                    except urllib.error.HTTPError:
+                        pass
+                    except Exception:
+                        pass
+                self._fb_oauth_result["error"] = "Login timed out after 5 minutes."
+                GLib.idle_add(self._freebuff_oauth_done, dlg, spinner)
+            except Exception as e:
+                self._fb_oauth_result["error"] = str(e)[:200]
+                GLib.idle_add(self._freebuff_oauth_done, dlg, spinner)
+
+        threading.Thread(target=_freebuff_auth_thread, daemon=True).start()
+        dlg.connect("response", lambda d, r: d.destroy())
+        dlg.run()
+
+    def _freebuff_oauth_done(self, dlg, spinner):
+        spinner.stop()
+        if self._fb_oauth_result["success"] and self._fb_oauth_result["user"]:
+            user = self._fb_oauth_result["user"]
+            creds_path = os.path.expanduser("~/.config/manicode/credentials.json")
+            os.makedirs(os.path.dirname(creds_path), exist_ok=True)
+            creds = {"default": {
+                "id": user.get("id", ""),
+                "name": user.get("name", ""),
+                "email": user.get("email", ""),
+                "authToken": user.get("authToken", ""),
+                "fingerprintId": user.get("fingerprintId", ""),
+                "fingerprintHash": user.get("fingerprintHash", ""),
+            }}
+            with open(creds_path, "w") as f:
+                json.dump(creds, f, indent=2)
+            os.chmod(creds_path, 0o600)
+            self._entry_key.set_text(user.get("authToken", ""))
+            self._oauth_status.set_markup('<span foreground="#27ae60" weight="bold">Authorization successful! Credentials saved.</span>')
+            dlg.set_title("Freebuff Login – Success")
+            GLib.timeout_add(1500, lambda: dlg.response(Gtk.ResponseType.OK))
+        else:
+            self._oauth_status.set_markup(f'<span foreground="#e74c3c">{self._fb_oauth_result["error"] or "Login failed."}</span>')
+            GLib.timeout_add(3000, lambda: dlg.response(Gtk.ResponseType.CANCEL))
+
+    def _oauth_success(self, dlg, access_token, spinner):
+        spinner.stop()
+        self._entry_key.set_text(access_token)
+        self._oauth_status.set_markup('<span foreground="#27ae60" weight="bold">Authorization successful! Token saved.</span>')
+        dlg.set_title("Google OAuth — Success")
+        GLib.timeout_add(1500, lambda: dlg.response(Gtk.ResponseType.OK))
+
+    def _oauth_failed(self, dlg, msg, spinner):
+        spinner.stop()
+        self._oauth_status.set_markup(f'<span foreground="#e74c3c">{msg}</span>')
+        GLib.timeout_add(3000, lambda: dlg.response(Gtk.ResponseType.CANCEL))
+
+    def _remove_model(self, path):
+        current = self._combo_default.get_active_text()
+        self._model_store.remove(self._model_store.get_iter(path))
+        self._refresh_default_combo(current)
+
+    def _refresh_default_combo(self, active=None):
+        if active is None:
+            active = self._combo_default.get_active_text()
+        self._combo_default.remove_all()
+        for row in self._model_store:
+            self._combo_default.append(row[0], row[0])
+        if active and any(row[0] == active for row in self._model_store):
+            self._combo_default.set_active_id(active)
+        elif len(self._model_store) > 0:
+            self._combo_default.set_active(0)
+
+    def _fetch_models(self):
+        ok, err = self._try_fetch_models()
+        if not ok:
+            d = Gtk.MessageDialog(self, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.OK,
+                                  f"Failed to fetch models:\n{err}")
+            d.run()
+            d.destroy()
+
+    def _try_fetch_models(self):
+        endpoint = {
+            "base_url": self._entry_url.get_text().strip(),
+            "api_key": self._entry_key.get_text().strip(),
+            "backend_type": self._combo_type.get_active_id() or "openai-compat",
+        }
+        ids, err = fetch_models_for_endpoint(endpoint)
+        if ids:
+            current = self._combo_default.get_active_text()
+            added = 0
+            for mid in ids:
+                # check dupes
+                found = any(self._model_store[i][0] == mid for i in range(len(self._model_store)))
+                if not found:
+                    self._model_store.append([mid])
+                    added += 1
+            self._refresh_default_combo(current)
+            return True, None
+        return False, err or "No models returned by endpoint"
+
+    def _diagnose_endpoint(self):
+        ep = {
+            "base_url": self._entry_url.get_text().strip(),
+            "api_key": self._entry_key.get_text().strip(),
+            "backend_type": self._combo_type.get_active_id() or "openai-compat",
+            "default_model": self._combo_default.get_active_text() or "",
+        }
+        name = ep.get("default_model") or "endpoint"
+        wait_dlg = Gtk.Dialog(title="Running Doctor…", parent=self, modal=True)
+        wait_dlg.set_default_size(280, 80)
+        lbl = Gtk.Label(label="Running endpoint diagnostics…")
+        lbl.set_margin_top(16)
+        lbl.set_margin_bottom(16)
+        wait_dlg.get_content_area().pack_start(lbl, True, True, 0)
+        wait_dlg.show_all()
+
+        def _run():
+            checks = run_endpoint_doctor(ep)
+            GLib.idle_add(wait_dlg.destroy)
+            GLib.idle_add(_show_doctor_results, self, name, checks)
+
+        threading.Thread(target=_run, daemon=True).start()
+        wait_dlg.run()
+
+    def _on_response(self, dialog, response):
+        if response != Gtk.ResponseType.OK:
+            self.destroy()
+            return
+
+        name = self._entry_name.get_text().strip()
+        if not name:
+            self._show_error("Name is required")
+            return
+        bt = self._combo_type.get_active_id() or PROVIDER_PRESETS.get(self._combo_preset.get_active_text() or "", {}).get("backend_type") or "openai-compat"
+        url = self._entry_url.get_text().strip()
+        key = self._entry_key.get_text().strip()
+        models = [self._model_store[i][0] for i in range(len(self._model_store))]
+        if not models:
+            ok, err = self._try_fetch_models()
+            if ok:
+                models = [self._model_store[i][0] for i in range(len(self._model_store))]
+            else:
+                d = Gtk.MessageDialog(
+                    self, 0, Gtk.MessageType.QUESTION, Gtk.ButtonsType.YES_NO,
+                    f"Auto-fetch failed ({err}).\n\nAdd models manually now?"
+                )
+                r = d.run()
+                d.destroy()
+                if r == Gtk.ResponseType.YES:
+                    self._entry_model.grab_focus()
+                    return
+                self.destroy()
+                return
+
+        if not models:
+            self._show_error("At least one model is required")
+            self._entry_model.grab_focus()
+            return
+        default = self._combo_default.get_active_text() or models[0]
+
+        data = load_endpoints()
+
+        # If renaming, remove old entry
+        if self._existing_name and self._existing_name != name:
+            data["endpoints"] = [e for e in data["endpoints"] if e["name"] != self._existing_name]
+
+        # Check for duplicate name
+        existing = [e for e in data["endpoints"] if e["name"] == name and e != self._data]
+        if existing:
+            self._show_error(f'Endpoint "{name}" already exists')
+            return
+
+        new_ep = {"name": name, "backend_type": bt, "base_url": url,
+                  "api_key": key, "default_model": default, "models": models,
+                  "provider_preset": self._combo_preset.get_active_text() or "Custom"}
+        cc_ver = self._entry_cc_ver.get_text().strip()
+        if cc_ver:
+            new_ep["cc_version"] = cc_ver
+        new_ep["reasoning_enabled"] = self._switch_reasoning.get_active()
+        new_ep["reasoning_effort"] = self._combo_effort.get_active_id() or "medium"
+        preset_name = self._combo_preset.get_active_text() or "Custom"
+        preset = PROVIDER_PRESETS.get(preset_name, {})
+        if preset.get("oauth_provider"):
+            new_ep["oauth_provider"] = preset["oauth_provider"]
+        new_ep["base_url"] = normalize_base_url(new_ep["base_url"])
+
+        # Update or append
+        found = False
+        for i, e in enumerate(data["endpoints"]):
+            if e["name"] == name:
+                data["endpoints"][i] = new_ep
+                found = True
+                break
+        if not found:
+            data["endpoints"].append(new_ep)
+            if data.get("default") is None:
+                data["default"] = name
+
+        save_endpoints(data)
+        self._parent_mgr._rebuild()
+        self._parent_mgr._parent._on_endpoints_updated()
+        self.destroy()
+
+    def _show_error(self, msg):
+        d = Gtk.MessageDialog(self, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.OK, msg)
+        d.run(); d.destroy()
+
+# ═══════════════════════════════════════════════════════════════════
+# Entry point
+# ═══════════════════════════════════════════════════════════════════
+
+# ═══════════════════════════════════════════════════════════════════
+# BGP Pool Manager
+# ═══════════════════════════════════════════════════════════════════
+
+class BGPPoolMgr(Gtk.Window):
+    def __init__(self, parent):
+        super().__init__(title="AI BGP — Pool Manager")
+        self.set_transient_for(parent)
+        self.set_default_size(620, 440)
+        self._parent = parent
+
+        vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+        vbox.set_margin_start(12)
+        vbox.set_margin_end(12)
+        vbox.set_margin_top(12)
+        vbox.set_margin_bottom(12)
+        self.add(vbox)
+
+        hdr = Gtk.Box(spacing=8)
+        vbox.pack_start(hdr, False, False, 0)
+        hdr.pack_start(Gtk.Label(label="<b>AI BGP Pools</b>  —  multi-provider routing with automatic failover", use_markup=True), False, False, 0)
+
+        self._store = Gtk.ListStore(str, str, str)
+        self._tree = Gtk.TreeView(model=self._store)
+        for i, (title, w) in enumerate([("Pool Name", 200), ("Routes", 250), ("Strategy", 100)]):
+            r = Gtk.CellRendererText()
+            c = Gtk.TreeViewColumn(title, r, text=i)
+            c.set_min_width(w)
+            self._tree.append_column(c)
+        self._tree.set_headers_visible(True)
+        sw = Gtk.ScrolledWindow()
+        sw.add(self._tree)
+        vbox.pack_start(sw, True, True, 0)
+
+        sel = self._tree.get_selection()
+        sel.connect("changed", lambda *_: self._on_select())
+
+        bbox = Gtk.Box(spacing=8)
+        vbox.pack_start(bbox, False, False, 0)
+        self._add_btn = Gtk.Button(label="Create Pool")
+        self._add_btn.connect("clicked", lambda b: self._add_pool())
+        bbox.pack_start(self._add_btn, True, True, 0)
+        self._edit_btn = Gtk.Button(label="Edit Pool")
+        self._edit_btn.connect("clicked", lambda b: self._edit_pool())
+        self._edit_btn.set_sensitive(False)
+        bbox.pack_start(self._edit_btn, True, True, 0)
+        self._del_btn = Gtk.Button(label="Delete Pool")
+        self._del_btn.connect("clicked", lambda b: self._del_pool())
+        self._del_btn.set_sensitive(False)
+        bbox.pack_start(self._del_btn, True, True, 0)
+        close_btn = Gtk.Button(label="Close")
+        close_btn.connect("clicked", lambda b: self.destroy())
+        bbox.pack_start(close_btn, True, True, 0)
+
+        self._rebuild()
+        self.show_all()
+
+    def _rebuild(self):
+        self._store.clear()
+        for pool in load_bgp_pools().get("pools", []):
+            routes_str = " → ".join(f'{r.get("name","?")}/{r.get("model","?")}' for r in pool.get("routes", []))
+            self._store.append([pool["name"], routes_str, pool.get("strategy", "failover")])
+
+    def _selected_name(self):
+        sel = self._tree.get_selection()
+        m, i = sel.get_selected()
+        return self._store[i][0] if i else None
+
+    def _on_select(self):
+        name = self._selected_name()
+        self._edit_btn.set_sensitive(bool(name))
+        self._del_btn.set_sensitive(bool(name))
+
+    def _add_pool(self):
+        d = BGPPoolEditDialog(self, None)
+        d.connect("response", lambda *_: self._rebuild())
+
+    def _edit_pool(self):
+        name = self._selected_name()
+        if name:
+            d = BGPPoolEditDialog(self, name)
+            d.connect("response", lambda *_: self._rebuild())
+
+    def _del_pool(self):
+        name = self._selected_name()
+        if not name:
+            return
+        d = Gtk.MessageDialog(self, 0, Gtk.MessageType.QUESTION, Gtk.ButtonsType.YES_NO,
+                              f'Delete BGP pool "{name}"?')
+        r = d.run(); d.destroy()
+        if r != Gtk.ResponseType.YES:
+            return
+        data = load_bgp_pools()
+        data["pools"] = [p for p in data["pools"] if p["name"] != name]
+        save_bgp_pools(data)
+        self._rebuild()
+        self._parent._on_endpoints_updated()
+
+
+class BGPPoolEditDialog(Gtk.Dialog):
+    def __init__(self, parent, existing_name):
+        title = "Edit BGP Pool" if existing_name else "Create BGP Pool"
+        Gtk.Dialog.__init__(self, title=title, parent=parent, modal=True)
+        self.add_button("Cancel", Gtk.ResponseType.CANCEL)
+        self.add_button("Save", Gtk.ResponseType.OK)
+        self.set_default_size(580, 480)
+
+        self._existing_name = existing_name
+        self._parent_mgr = parent
+
+        data = load_bgp_pools()
+        pool = None
+        if existing_name:
+            for p in data.get("pools", []):
+                if p["name"] == existing_name:
+                    pool = p
+                    break
+        if not pool:
+            pool = {"name": "", "strategy": "failover", "routes": []}
+
+        area = self.get_content_area()
+        area.set_margin_start(12)
+        area.set_margin_end(12)
+        area.set_margin_top(12)
+        area.set_margin_bottom(12)
+        area.set_spacing(8)
+
+        grid = Gtk.Grid(column_spacing=8, row_spacing=6)
+        area.pack_start(grid, False, False, 0)
+
+        grid.attach(Gtk.Label(label="Pool Name:", xalign=1), 0, 0, 1, 1)
+        self._entry_name = Gtk.Entry(text=pool["name"])
+        grid.attach(self._entry_name, 1, 0, 1, 1)
+
+        grid.attach(Gtk.Label(label="Strategy:", xalign=1), 0, 1, 1, 1)
+        self._combo_strategy = Gtk.ComboBoxText()
+        self._combo_strategy.append("failover", "Failover (try primary, fall back on error)")
+        self._combo_strategy.append("race", "Race (send to all, return fastest)")
+        self._combo_strategy.set_active_id(pool.get("strategy", "failover"))
+        grid.attach(self._combo_strategy, 1, 1, 1, 1)
+
+        area.pack_start(Gtk.Label(label="<b>Routes</b>  (drag to reorder priority)", use_markup=True, xalign=0), False, False, 8)
+
+        self._route_store = Gtk.ListStore(str, str, str, str, str, str)
+        for r in pool.get("routes", []):
+            self._route_store.append([
+                r.get("name", ""), r.get("endpoint_name", ""),
+                r.get("target_url", ""), r.get("api_key", ""),
+                r.get("model", ""), str(r.get("priority", 99))
+            ])
+
+        self._route_tree = Gtk.TreeView(model=self._route_store)
+        for i, (title, w) in enumerate([
+            ("Route Name", 120), ("Endpoint", 120), ("URL", 150),
+            ("API Key", 80), ("Model", 120), ("Priority", 60)
+        ]):
+            renderer = Gtk.CellRendererText()
+            renderer.set_property("editable", False)
+            col = Gtk.TreeViewColumn(title, renderer, text=i)
+            col.set_min_width(w)
+            col.set_resizable(True)
+            self._route_tree.append_column(col)
+        self._route_tree.set_headers_visible(True)
+        sw = Gtk.ScrolledWindow()
+        sw.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC)
+        sw.add(self._route_tree)
+        sw.set_min_content_height(200)
+        area.pack_start(sw, True, True, 0)
+
+        bbox = Gtk.Box(spacing=6)
+        area.pack_start(bbox, False, False, 0)
+        add_r = Gtk.Button(label="Add Route")
+        add_r.connect("clicked", lambda b: self._add_route())
+        bbox.pack_start(add_r, True, True, 0)
+        edit_r = Gtk.Button(label="Edit Route")
+        edit_r.connect("clicked", lambda b: self._edit_route())
+        bbox.pack_start(edit_r, True, True, 0)
+        rm_r = Gtk.Button(label="Remove Route")
+        rm_r.connect("clicked", lambda b: self._remove_route())
+        bbox.pack_start(rm_r, True, True, 0)
+        up_r = Gtk.Button(label="↑ Up")
+        up_r.connect("clicked", lambda b: self._move_route(-1))
+        bbox.pack_start(up_r, True, True, 0)
+        down_r = Gtk.Button(label="↓ Down")
+        down_r.connect("clicked", lambda b: self._move_route(1))
+        bbox.pack_start(down_r, True, True, 0)
+
+        self.show_all()
+
+        if self.run() == Gtk.ResponseType.OK:
+            self._save()
+
+        self.destroy()
+
+    def _save(self):
+        name = self._entry_name.get_text().strip()
+        if not name:
+            return
+        strategy = self._combo_strategy.get_active_id() or "failover"
+        routes = []
+        for i, row in enumerate(self._route_store):
+            if not row[2]:
+                continue
+            routes.append({
+                "name": row[0] or f"Route {i+1}",
+                "endpoint_name": row[1],
+                "target_url": row[2],
+                "api_key": row[3],
+                "model": row[4],
+                "priority": i + 1,
+                "reasoning_enabled": True,
+                "reasoning_effort": "medium",
+            })
+        data = load_bgp_pools()
+        if self._existing_name:
+            data["pools"] = [p for p in data["pools"] if p["name"] != self._existing_name]
+        data["pools"].append({"name": name, "strategy": strategy, "routes": routes})
+        save_bgp_pools(data)
+        self._parent_mgr._parent._on_endpoints_updated()
+
+    def _add_route(self):
+        endpoints = load_endpoints().get("endpoints", [])
+        if not endpoints:
+            d = Gtk.MessageDialog(self, 0, Gtk.MessageType.INFO, Gtk.ButtonsType.OK,
+                                  "No endpoints configured. Add endpoints in Manage Endpoints first.")
+            d.run(); d.destroy()
+            return
+        d = BGPRouteDialog(self, endpoints, None)
+        if d.result:
+            r = d.result
+            self._route_store.append([
+                r.get("name", ""), r.get("endpoint_name", ""),
+                r.get("target_url", ""), r.get("api_key", ""),
+                r.get("model", ""), str(r.get("priority", 99))
+            ])
+
+    def _edit_route(self):
+        sel = self._route_tree.get_selection()
+        m, i = sel.get_selected()
+        if not i:
+            return
+        endpoints = load_endpoints().get("endpoints", [])
+        existing = {
+            "name": m[i][0], "endpoint_name": m[i][1],
+            "target_url": m[i][2], "api_key": m[i][3],
+            "model": m[i][4], "priority": int(m[i][5]) if m[i][5] else 99,
+        }
+        d = BGPRouteDialog(self, endpoints, existing)
+        if d.result:
+            r = d.result
+            m[i][0] = r.get("name", "")
+            m[i][1] = r.get("endpoint_name", "")
+            m[i][2] = r.get("target_url", "")
+            m[i][3] = r.get("api_key", "")
+            m[i][4] = r.get("model", "")
+            m[i][5] = str(r.get("priority", 99))
+
+    def _remove_route(self):
+        sel = self._route_tree.get_selection()
+        m, i = sel.get_selected()
+        if i:
+            self._route_store.remove(i)
+
+    def _move_route(self, direction):
+        sel = self._route_tree.get_selection()
+        m, i = sel.get_selected()
+        if not i:
+            return
+        path = m.get_path(i)
+        idx = path.get_indices()[0]
+        new_idx = idx + direction
+        if new_idx < 0 or new_idx >= len(self._route_store):
+            return
+        row_data = [m[idx][c] for c in range(6)]
+        self._route_store.remove(m.get_iter(Gtk.TreePath(idx)))
+        new_iter = self._route_store.insert(new_idx)
+        for c, v in enumerate(row_data):
+            self._route_store.set_value(new_iter, c, v)
+
+
+class BGPRouteDialog(Gtk.Dialog):
+    def __init__(self, parent, endpoints, existing):
+        Gtk.Dialog.__init__(self, title="BGP Route", parent=parent, modal=True)
+        self.add_button("Cancel", Gtk.ResponseType.CANCEL)
+        self.add_button("OK", Gtk.ResponseType.OK)
+        self.set_default_size(440, 300)
+        self.result = None
+
+        area = self.get_content_area()
+        area.set_margin_start(12)
+        area.set_margin_end(12)
+        area.set_margin_top(12)
+        area.set_margin_bottom(12)
+        area.set_spacing(6)
+
+        grid = Gtk.Grid(column_spacing=8, row_spacing=6)
+        area.pack_start(grid, False, False, 0)
+
+        def add_row(row, label, widget):
+            grid.attach(Gtk.Label(label=label, xalign=1), 0, row, 1, 1)
+            grid.attach(widget, 1, row, 1, 1)
+
+        self._entry_name = Gtk.Entry(text=existing.get("name", "") if existing else "")
+        add_row(0, "Route Name:", self._entry_name)
+
+        self._combo_ep = Gtk.ComboBoxText()
+        ep_names = [e["name"] for e in endpoints]
+        for en in ep_names:
+            self._combo_ep.append(en, en)
+        if existing and existing.get("endpoint_name") in ep_names:
+            self._combo_ep.set_active_id(existing["endpoint_name"])
+        elif ep_names:
+            self._combo_ep.set_active(0)
+        self._combo_ep.connect("changed", lambda b: self._on_ep_changed(endpoints))
+        add_row(1, "Endpoint:", self._combo_ep)
+
+        self._entry_url = Gtk.Entry()
+        add_row(2, "URL:", self._entry_url)
+
+        self._entry_key = Gtk.Entry()
+        self._entry_key.set_visibility(False)
+        add_row(3, "API Key:", self._entry_key)
+
+        self._combo_model = Gtk.ComboBoxText()
+        add_row(4, "Model:", self._combo_model)
+
+        if existing:
+            self._entry_url.set_text(existing.get("target_url", ""))
+            self._entry_key.set_text(existing.get("api_key", ""))
+        self._on_ep_changed(endpoints)
+        if existing and existing.get("model"):
+            self._combo_model.set_active_id(existing["model"])
+
+        self.show_all()
+        if self.run() == Gtk.ResponseType.OK:
+            ep_name = self._combo_ep.get_active_text() or ""
+            ep = None
+            for e in endpoints:
+                if e["name"] == ep_name:
+                    ep = e
+                    break
+            self.result = {
+                "name": self._entry_name.get_text().strip() or ep_name,
+                "endpoint_name": ep_name,
+                "target_url": self._entry_url.get_text().strip(),
+                "api_key": self._entry_key.get_text().strip(),
+                "model": self._combo_model.get_active_text() or "",
+                "priority": 99,
+            }
+            if ep:
+                self.result["reasoning_enabled"] = ep.get("reasoning_enabled", True)
+                self.result["reasoning_effort"] = ep.get("reasoning_effort", "medium")
+                self.result["oauth_provider"] = ep.get("oauth_provider", "")
+        self.destroy()
+
+    def _on_ep_changed(self, endpoints):
+        ep_name = self._combo_ep.get_active_text()
+        ep = None
+        for e in endpoints:
+            if e["name"] == ep_name:
+                ep = e
+                break
+        if ep:
+            self._entry_url.set_text(normalize_base_url(ep.get("base_url", "")))
+            self._entry_key.set_text(ep.get("api_key", ""))
+            self._combo_model.remove_all()
+            for m in ep.get("models", []):
+                mid = normalize_model_id(m) if m else ""
+                self._combo_model.append(mid, m)
+            if ep.get("default_model"):
+                self._combo_model.set_active_id(normalize_model_id(ep["default_model"]))
+            elif len(ep.get("models", [])) > 0:
+                self._combo_model.set_active(0)
+
+
+_U = {
+    "base": "#0C0E16", "surface0": "#161928", "surface1": "#1E2235",
+    "surface2": "#2A2F47", "text": "#E4E6F0", "subtext": "#B0B4C8",
+    "dim": "#5C6180", "accent": "#7EB8F7", "blue": "#5DA4E8",
+    "sapphire": "#4EC5C1", "green": "#59D4A0", "yellow": "#F0C75E",
+    "red": "#F06A77", "peach": "#F09860", "teal": "#4EC5C1",
+    "lavender": "#A899F0", "sky": "#70C8E8", "maroon": "#C44B5C",
+    "flamingo": "#E878B0", "rosewater": "#F0D0C0",
+    "model_palette": ["#F09860", "#4EC5C1", "#5DA4E8", "#59D4A0",
+                      "#F0C75E", "#A899F0", "#70C8E8", "#E878B0",
+                      "#C44B5C", "#F0D0C0", "#7EB8F7", "#F06A77"],
+}
+
+_USAGE_STATS_FILE = HOME / ".cache/codex-proxy/usage-stats.json"
+
+def _load_usage_stats():
+    try:
+        if _USAGE_STATS_FILE.exists():
+            return json.loads(_USAGE_STATS_FILE.read_text())
+    except Exception:
+        pass
+    return {"providers": {}, "updated": None}
+
+def _fmt_tok(n):
+    if n >= 1_000_000:
+        return f"{n/1_000_000:.1f}M"
+    if n >= 1_000:
+        return f"{n/1_000:.1f}K"
+    return str(n)
+
+def _fmt_dur(s):
+    if s >= 3600:
+        return f"{s/3600:.1f}h"
+    if s >= 60:
+        return f"{s/60:.1f}m"
+    return f"{s:.1f}s"
+
+def _status_pill(success_rate, fail_pct):
+    if fail_pct > 0.15:
+        return ("ERR", _U["red"])
+    if fail_pct > 0.05:
+        return ("WARN", _U["yellow"])
+    return ("OK", _U["green"])
+
+def _make_css_widget(css_str):
+    p = Gtk.CssProvider()
+    p.load_from_data(css_str.encode())
+    return p
+
+def _apply_css(widget, css_str):
+    ctx = widget.get_style_context()
+    ctx.add_provider(_make_css_widget(css_str), Gtk.STYLE_PROVIDER_PRIORITY_USER)
+
+
+class UsageWindow(Gtk.Window):
+    def __init__(self, parent):
+        super().__init__(title="Usage Dashboard")
+        self.set_transient_for(parent)
+        self.set_default_size(720, 640)
+        self.set_position(Gtk.WindowPosition.CENTER)
+        self._parent = parent
+
+        _apply_css(self, f"""
+            window {{ background-color: {_U["base"]}; }}
+            separator {{ background-color: {_U["surface1"]}; }}
+        """)
+
+        vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=0)
+        self.add(vbox)
+
+        self._build_header(vbox)
+        self._build_summary_strip(vbox)
+        sep = Gtk.Separator()
+        vbox.pack_start(sep, False, False, 0)
+
+        self._cards_box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6)
+        self._cards_box.set_margin_top(8)
+        sw = Gtk.ScrolledWindow()
+        sw.set_policy(Gtk.PolicyType.NEVER, Gtk.PolicyType.AUTOMATIC)
+        sw.add(self._cards_box)
+        vbox.pack_start(sw, True, True, 0)
+
+        self._refresh()
+        self.show_all()
+
+    def _build_header(self, parent):
+        hdr = Gtk.Box(spacing=8)
+        hdr.set_margin_start(16)
+        hdr.set_margin_end(16)
+        hdr.set_margin_top(12)
+        hdr.set_margin_bottom(6)
+        parent.pack_start(hdr, False, False, 0)
+
+        bolt = Gtk.Label()
+        bolt.set_markup(f'<span foreground="{_U["accent"]}" font="14">\u26A1</span>')
+        hdr.pack_start(bolt, False, False, 0)
+
+        title = Gtk.Label()
+        title.set_markup(f'<span font="14" weight="bold" foreground="{_U["text"]}">Usage Dashboard</span>')
+        hdr.pack_start(title, False, False, 0)
+
+        self._status_dots = Gtk.Label()
+        hdr.pack_start(self._status_dots, False, False, 8)
+
+        self._updated_lbl = Gtk.Label()
+        self._updated_lbl.set_markup(f'<span foreground="{_U["dim"]}" size="small">Never</span>')
+        hdr.pack_end(self._updated_lbl, False, False, 4)
+
+        refresh_btn = Gtk.Button(label="Refresh")
+        _apply_css(refresh_btn, f"""
+            button {{ color: {_U["text"]}; background-color: {_U["surface0"]};
+                     border: 1px solid {_U["surface1"]}; border-radius: 6px; padding: 4px 12px; }}
+            button:hover {{ background-color: {_U["surface1"]}; }}
+        """)
+        refresh_btn.connect("clicked", lambda b: self._refresh())
+        hdr.pack_end(refresh_btn, False, False, 0)
+
+    def _build_summary_strip(self, parent):
+        strip = Gtk.Box(spacing=0)
+        strip.set_margin_start(16)
+        strip.set_margin_end(16)
+        strip.set_margin_bottom(6)
+        _apply_css(strip, f"box {{ background-color: {_U["surface0"]}; border-radius: 8px; padding: 8px 12px; }}")
+        parent.pack_start(strip, False, False, 0)
+
+        self._kpi_boxes = {}
+        for key, label, icon in [
+            ("providers", "Providers", "\U0001F4CA"),
+            ("requests", "Requests", "\u26A1"),
+            ("tokens", "Tokens", "\U0001F9E0"),
+            ("latency", "Avg Latency", "\u23F1"),
+        ]:
+            box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=1)
+            lbl = Gtk.Label()
+            lbl.set_markup(f'<span foreground="{_U["dim"]}" size="x-small">{icon} {label}</span>')
+            lbl.set_xalign(0)
+            box.pack_start(lbl, False, False, 0)
+            val = Gtk.Label()
+            val.set_markup(f'<span weight="bold" foreground="{_U["text"]}" size="small">-</span>')
+            val.set_xalign(0)
+            box.pack_start(val, False, False, 0)
+            box.set_margin_end(20)
+            strip.pack_start(box, False, False, 0)
+            self._kpi_boxes[key] = val
+
+    def _refresh(self):
+        for c in self._cards_box.get_children():
+            self._cards_box.remove(c)
+        stats = _load_usage_stats()
+        updated = stats.get("updated")
+        if updated:
+            self._updated_lbl.set_markup(f'<span foreground="{_U["dim"]}" size="small">{updated}</span>')
+        providers = stats.get("providers", {})
+        if not providers:
+            empty = Gtk.Label()
+            empty.set_markup(f'<span foreground="{_U["dim"]}" size="large">No usage data yet.\nLaunch a session to start tracking.</span>')
+            empty.set_margin_top(60)
+            self._cards_box.pack_start(empty, False, False, 0)
+            self._cards_box.show_all()
+            return
+
+        total_req = 0
+        total_tok_in = 0
+        total_tok_out = 0
+        total_dur = 0.0
+        n_ok = 0
+        n_warn = 0
+        n_err = 0
+
+        sorted_providers = sorted(providers.items(), key=lambda x: x[1].get("total_requests", 0), reverse=True)
+        for prov_name, prov_data in sorted_providers:
+            t = prov_data.get("total_requests", 0)
+            total_req += t
+            total_tok_in += prov_data.get("total_tokens_in", 0)
+            total_tok_out += prov_data.get("total_tokens_out", 0)
+            total_dur += prov_data.get("total_duration_s", 0.0)
+            fail = prov_data.get("failures", 0)
+            fail_pct = fail / t if t > 0 else 0
+            _, sc = _status_pill(0, fail_pct)
+            if fail_pct > 0.15:
+                n_err += 1
+            elif fail_pct > 0.05:
+                n_warn += 1
+            else:
+                n_ok += 1
+
+        self._kpi_boxes["providers"].set_markup(
+            f'<span weight="bold" foreground="{_U["text"]}" size="small">{len(providers)}</span>')
+        self._kpi_boxes["requests"].set_markup(
+            f'<span weight="bold" foreground="{_U["text"]}" size="small">{total_req:,}</span>')
+        tok_sum = total_tok_in + total_tok_out
+        tok_str = f"{_fmt_tok(tok_sum)} in:{_fmt_tok(total_tok_in)} out:{_fmt_tok(total_tok_out)}" if tok_sum else "N/A"
+        self._kpi_boxes["tokens"].set_markup(
+            f'<span weight="bold" foreground="{_U["text"]}" size="small">{tok_str}</span>')
+        avg_lat = total_dur / total_req if total_req > 0 else 0
+        self._kpi_boxes["latency"].set_markup(
+            f'<span weight="bold" foreground="{_U["text"]}" size="small">{_fmt_dur(avg_lat)}</span>')
+
+        dots_parts = []
+        if n_ok:
+            dots_parts.append(f'<span foreground="{_U["green"]}">\u25CF{n_ok}</span>')
+        if n_warn:
+            dots_parts.append(f'<span foreground="{_U["yellow"]}">\u25D0{n_warn}</span>')
+        if n_err:
+            dots_parts.append(f'<span foreground="{_U["red"]}">\u2717{n_err}</span>')
+        if dots_parts:
+            self._status_dots.set_markup(" ".join(dots_parts))
+
+        for prov_name, prov_data in sorted_providers:
+            card = self._build_card(prov_name, prov_data)
+            self._cards_box.pack_start(card, False, False, 0)
+        self._cards_box.show_all()
+
+    def _build_card(self, name, data):
+        card = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=0)
+        card.set_margin_start(12)
+        card.set_margin_end(12)
+        _apply_css(card, f"""
+            box {{ background-color: {_U["surface0"]}; border-radius: 10px;
+                  border: 1px solid {_U["surface1"]}; }}
+        """)
+
+        total = data.get("total_requests", 0)
+        ok = data.get("successes", 0)
+        fail = data.get("failures", 0)
+        success_rate = ok / total if total > 0 else 1.0
+        fail_pct = fail / total if total > 0 else 0
+        status_text, status_color = _status_pill(success_rate, fail_pct)
+
+        border_color = status_color
+        _apply_css(card, f"""
+            box {{ background-color: {_U["surface0"]}; border-radius: 10px;
+                  border: 1px solid {border_color}; }}
+        """)
+
+        inner = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=3)
+        inner.set_margin_start(14)
+        inner.set_margin_end(14)
+        inner.set_margin_top(10)
+        inner.set_margin_bottom(10)
+        card.pack_start(inner, False, False, 0)
+
+        top = Gtk.Box(spacing=6)
+        inner.pack_start(top, False, False, 0)
+
+        dot = Gtk.Label()
+        dot.set_markup(f'<span foreground="{status_color}" font="10">\u25CF</span>')
+        top.pack_start(dot, False, False, 0)
+
+        name_lbl = Gtk.Label()
+        short = name.replace("https://", "").replace("http://", "").split("/")[0]
+        name_lbl.set_markup(f'<span weight="bold" foreground="{_U["text"]}" size="medium">{short}</span>')
+        top.pack_start(name_lbl, False, False, 0)
+
+        pill = Gtk.Label()
+        pill.set_markup(f'<span foreground="{_U["base"]}" bgalpha="80%" background="{status_color}" weight="bold" size="x-small"> {status_text} </span>')
+        top.pack_start(pill, False, False, 4)
+
+        req_lbl = Gtk.Label()
+        req_lbl.set_markup(f'<span foreground="{_U["subtext"]}" size="small">{total} req</span>')
+        top.pack_start(req_lbl, False, False, 6)
+
+        last_used = data.get("last_used", "")
+        if last_used:
+            lu_lbl = Gtk.Label()
+            lu_lbl.set_markup(f'<span foreground="{_U["dim"]}" size="x-small">{last_used}</span>')
+            top.pack_end(lu_lbl, False, False, 0)
+
+        sep1 = Gtk.Separator()
+        _apply_css(sep1, f"separator {{ background-color: {status_color}; margin-top: 4px; }}")
+        inner.pack_start(sep1, False, False, 0)
+
+        gauge_box = Gtk.Box(spacing=4)
+        gauge_box.set_margin_top(4)
+        inner.pack_start(gauge_box, False, False, 0)
+
+        gauge_label = Gtk.Label()
+        gauge_label.set_markup(f'<span foreground="{_U["yellow"]}" size="x-small">\u26A1</span>')
+        gauge_box.pack_start(gauge_label, False, False, 0)
+
+        bar = Gtk.ProgressBar()
+        bar.set_fraction(success_rate)
+        bar_pct = int(success_rate * 100)
+        bar.set_text(f"{bar_pct}%")
+        bar.set_show_text(True)
+        bar_css = f"""
+            progress {{ background-color: {status_color}; border-radius: 6px; }}
+            trough {{ background-color: {_U["surface1"]}; border-radius: 6px; min-height: 12px; }}
+        """
+        _apply_css(bar, bar_css)
+        bar.set_hexpand(True)
+        gauge_box.pack_start(bar, True, True, 0)
+
+        if fail > 0:
+            fail_lbl = Gtk.Label()
+            fail_lbl.set_markup(f'<span foreground="{_U["red"]}" size="x-small">{fail} fail</span>')
+            gauge_box.pack_end(fail_lbl, False, False, 0)
+
+        metrics_box = Gtk.Box(spacing=0)
+        metrics_box.set_margin_top(4)
+        inner.pack_start(metrics_box, False, False, 0)
+
+        t_in = data.get("total_tokens_in", 0)
+        t_out = data.get("total_tokens_out", 0)
+        dur = data.get("total_duration_s", 0.0)
+        avg_dur = dur / total if total > 0 else 0
+
+        for label, value, color in [
+            ("Tokens In", f"{_fmt_tok(t_in)}", _U["sapphire"]),
+            ("Tokens Out", f"{_fmt_tok(t_out)}", _U["peach"]),
+            ("Avg Latency", _fmt_dur(avg_dur), _U["sky"]),
+            ("Duration", _fmt_dur(dur), _U["lavender"]),
+        ]:
+            box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=0)
+            l = Gtk.Label()
+            l.set_markup(f'<span foreground="{_U["dim"]}" size="x-small">{label}</span>')
+            l.set_xalign(0)
+            box.pack_start(l, False, False, 0)
+            v = Gtk.Label()
+            v.set_markup(f'<span weight="bold" foreground="{color}" size="small">{value}</span>')
+            v.set_xalign(0)
+            box.pack_start(v, False, False, 0)
+            box.set_margin_end(16)
+            metrics_box.pack_start(box, False, False, 0)
+
+        models = data.get("models", {})
+        if models:
+            self._build_models_section(inner, models, total)
+
+        last_err = data.get("last_error")
+        if last_err:
+            err_box = Gtk.Box(spacing=4)
+            err_box.set_margin_top(4)
+            inner.pack_start(err_box, False, False, 0)
+            icon = Gtk.Label()
+            icon.set_markup(f'<span foreground="{_U["red"]}" size="x-small">\u26A0</span>')
+            err_box.pack_start(icon, False, False, 0)
+            err_lbl = Gtk.Label()
+            err_lbl.set_markup(f'<span foreground="{_U["red"]}" size="x-small">{last_err}</span>')
+            err_lbl.set_xalign(0)
+            err_lbl.set_line_wrap(True)
+            err_box.pack_start(err_lbl, False, False, 0)
+
+        return card
+
+    def _build_models_section(self, parent, models, total_req):
+        sep_m = Gtk.Separator()
+        _apply_css(sep_m, f"separator {{ background-color: {_U["lavender"]}; margin-top: 4px; margin-bottom: 2px; }}")
+        parent.pack_start(sep_m, False, False, 0)
+
+        header = Gtk.Box(spacing=4)
+        header.set_margin_top(2)
+        parent.pack_start(header, False, False, 0)
+        icon = Gtk.Label()
+        icon.set_markup(f'<span foreground="{_U["lavender"]}" size="x-small">\U0001F916</span>')
+        header.pack_start(icon, False, False, 0)
+        lbl = Gtk.Label()
+        lbl.set_markup(f'<span foreground="{_U["lavender"]}" weight="bold" size="x-small">Models</span>')
+        header.pack_start(lbl, False, False, 0)
+
+        sorted_models = sorted(models.items(), key=lambda x: x[1].get("requests", 0), reverse=True)
+
+        if total_req > 0:
+            comp_bar = Gtk.Box(spacing=0)
+            _apply_css(comp_bar, f"box {{ background-color: {_U["surface1"]}; border-radius: 4px; min-height: 8px; margin-top: 2px; }}")
+            parent.pack_start(comp_bar, False, False, 0)
+            for i, (mname, mdata) in enumerate(sorted_models):
+                m_req = mdata.get("requests", 0)
+                pct = m_req / total_req
+                if pct < 0.01:
+                    continue
+                seg = Gtk.Box()
+                color = _U["model_palette"][i % len(_U["model_palette"])]
+                _apply_css(seg, f"box {{ background-color: {color}; min-height: 8px; }}")
+                seg.set_size_request(max(int(pct * 400), 4), 8)
+                comp_bar.pack_start(seg, False, False, 0)
+
+        models_box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=1)
+        models_box.set_margin_top(2)
+        parent.pack_start(models_box, False, False, 0)
+
+        for i, (mname, mdata) in enumerate(sorted_models[:6]):
+            row = Gtk.Box(spacing=6)
+            models_box.pack_start(row, False, False, 0)
+            color = _U["model_palette"][i % len(_U["model_palette"])]
+            dot = Gtk.Label()
+            dot.set_markup(f'<span foreground="{color}" size="x-small">\u25CF</span>')
+            row.pack_start(dot, False, False, 0)
+            m_lbl = Gtk.Label()
+            m_lbl.set_markup(f'<span foreground="{_U["subtext"]}" size="x-small">{mname}</span>')
+            m_lbl.set_xalign(0)
+            m_lbl.set_size_request(120, -1)
+            row.pack_start(m_lbl, False, False, 0)
+
+            m_req = mdata.get("requests", 0)
+            pct = m_req / total_req * 100 if total_req > 0 else 0
+
+            m_bar = Gtk.ProgressBar()
+            m_bar.set_fraction(m_req / total_req if total_req > 0 else 0)
+            _apply_css(m_bar, f"""
+                progress {{ background-color: {color}; border-radius: 3px; }}
+                trough {{ background-color: {_U["surface1"]}; border-radius: 3px; min-height: 6px; }}
+            """)
+            m_bar.set_size_request(80, -1)
+            row.pack_start(m_bar, False, False, 0)
+
+            pct_lbl = Gtk.Label()
+            pct_lbl.set_markup(f'<span foreground="{_U["dim"]}" size="x-small">{pct:.0f}% ({m_req})</span>')
+            row.pack_start(pct_lbl, False, False, 0)
+
+            m_in = mdata.get("tokens_in", 0)
+            m_out = mdata.get("tokens_out", 0)
+            if m_in or m_out:
+                tok_lbl = Gtk.Label()
+                tok_lbl.set_markup(f'<span foreground="{_U["dim"]}" size="x-small">in:{_fmt_tok(m_in)} out:{_fmt_tok(m_out)}</span>')
+                row.pack_end(tok_lbl, False, False, 0)
+
+
+def main():
+    for d in [LOG_DIR, PROXY_CONFIG_DIR]:
+        d.mkdir(parents=True, exist_ok=True)
+
+    # Create default endpoints if none exist
+    if not ENDPOINTS_FILE.exists():
+        save_endpoints({
+            "default": "OpenAI",
+            "endpoints": [
+                {"name": "OpenAI", "backend_type": "native", "base_url": "https://api.openai.com/v1",
+                 "api_key": "", "default_model": "gpt-4o", "models": ["gpt-4o", "gpt-4o-mini"],
+                 "provider_preset": "OpenAI"},
+                {"name": "Z.AI", "backend_type": "openai-compat",
+                 "base_url": "https://api.z.ai/api/coding/paas/v4",
+                 "api_key": "", "default_model": "glm-5.1",
+                 "models": ["glm-4.5", "glm-4.5-air", "glm-4.6", "glm-4.7", "glm-5", "glm-5-turbo", "glm-5.1"],
+                 "provider_preset": "Custom"},
+            ],
+        })
+
+    w = LauncherWin()
+    w.connect("destroy", Gtk.main_quit)
+    Gtk.main()
+
+class RequestHistoryWindow(Gtk.Window):
+    _SNAP_DIR = Path.home() / ".cache/codex-proxy/requests"
+
+    def __init__(self, parent):
+        Gtk.Window.__init__(self, title="Request History")
+        self.set_transient_for(parent)
+        self.set_default_size(720, 500)
+        self.set_position(Gtk.WindowPosition.CENTER)
+
+        vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6)
+        vbox.set_margin_start(10)
+        vbox.set_margin_end(10)
+        vbox.set_margin_top(10)
+        vbox.set_margin_bottom(10)
+        self.add(vbox)
+
+        hdr = Gtk.Box(spacing=8)
+        vbox.pack_start(hdr, False, False, 0)
+        lbl = Gtk.Label(label="<b>Request History</b>")
+        lbl.set_use_markup(True)
+        hdr.pack_start(lbl, False, False, 0)
+        refresh_btn = Gtk.Button(label="Refresh")
+        refresh_btn.connect("clicked", lambda b: self._load())
+        hdr.pack_end(refresh_btn, False, False, 0)
+        clear_btn = Gtk.Button(label="Clear All")
+        clear_btn.connect("clicked", lambda b: self._clear_all())
+        hdr.pack_end(clear_btn, False, False, 0)
+
+        paned = Gtk.Paned(orientation=Gtk.Orientation.VERTICAL)
+        vbox.pack_start(paned, True, True, 0)
+
+        top_sw = Gtk.ScrolledWindow()
+        top_sw.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC)
+        paned.pack1(top_sw, resize=True, shrink=False)
+
+        self._store = Gtk.ListStore(str, str, str, str, str, str)
+        self._tree = Gtk.TreeView(model=self._store)
+        for i, (title, w) in enumerate([("Time", 140), ("Model", 140), ("Status", 80), ("Duration", 70), ("ID", 180), ("Error", 120)]):
+            col = Gtk.TreeViewColumn(title, Gtk.CellRendererText(), text=i)
+            col.set_resizable(True)
+            col.set_min_width(w)
+            self._tree.append_column(col)
+        self._tree.connect("row-activated", self._on_row_activated)
+        top_sw.add(self._tree)
+
+        self._detail = Gtk.TextView()
+        self._detail.set_editable(False)
+        self._detail.set_monospace(True)
+        self._detail.set_wrap_mode(Gtk.WrapMode.WORD_CHAR)
+        bottom_sw = Gtk.ScrolledWindow()
+        bottom_sw.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC)
+        bottom_sw.add(self._detail)
+        paned.pack2(bottom_sw, resize=True, shrink=False)
+
+        self._snapshots = []
+        self._load()
+        self.show_all()
+
+    def _load(self):
+        self._store.clear()
+        self._snapshots = []
+        snap_dir = self._SNAP_DIR
+        if not snap_dir.exists():
+            return
+        files = sorted(snap_dir.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True)
+        for f in files[:200]:
+            try:
+                data = json.loads(f.read_text())
+                meta = data.get("_meta", {})
+                self._snapshots.append(data)
+                ts = meta.get("ts_iso", "")[:19].replace("T", " ")
+                model = meta.get("model", "?")
+                status = meta.get("status", "unknown")
+                dur = f"{meta['duration_s']:.1f}s" if meta.get("duration_s") is not None else "-"
+                rid = meta.get("request_id", "")[:28]
+                err = (meta.get("error") or "")[:60]
+                self._store.append([ts, model, status, dur, rid, err])
+            except Exception:
+                pass
+
+    def _on_row_activated(self, tree, path, column):
+        idx = path[0]
+        if idx < len(self._snapshots):
+            data = self._snapshots[idx]
+            buf = self._detail.get_buffer()
+            buf.set_text(json.dumps(data, indent=2, ensure_ascii=False)[:50000])
+
+    def _clear_all(self):
+        d = Gtk.MessageDialog(self, 0, Gtk.MessageType.WARNING, Gtk.ButtonsType.YES_NO,
+                              "Delete all request snapshots?")
+        r = d.run()
+        d.destroy()
+        if r != Gtk.ResponseType.YES:
+            return
+        snap_dir = self._SNAP_DIR
+        if snap_dir.exists():
+            for f in snap_dir.glob("*.json"):
+                try:
+                    f.unlink()
+                except Exception:
+                    pass
+        self._store.clear()
+        self._snapshots = []
+        self._detail.get_buffer().set_text("")
+
+class BenchmarkWindow(Gtk.Window):
+    _BENCH_PROMPT = "In exactly 3 bullet points, explain why the sky is blue."
+    _BENCH_TOOLS = [{"type": "function", "function": {"name": "get_weather",
+                    "parameters": {"type": "object", "properties": {"city": {"type": "string"}}}}}]
+
+    def __init__(self, parent):
+        Gtk.Window.__init__(self, title="Model Benchmark")
+        self.set_transient_for(parent)
+        self.set_default_size(820, 560)
+        self.set_position(Gtk.WindowPosition.CENTER)
+        self._running = False
+        self._ep_data = load_endpoints()
+
+        vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+        vbox.set_margin_start(10)
+        vbox.set_margin_end(10)
+        vbox.set_margin_top(10)
+        vbox.set_margin_bottom(10)
+        self.add(vbox)
+
+        hdr = Gtk.Box(spacing=8)
+        vbox.pack_start(hdr, False, False, 0)
+        lbl = Gtk.Label(label="<b>Multi-Provider Benchmark</b>")
+        lbl.set_use_markup(True)
+        hdr.pack_start(lbl, False, False, 0)
+        self._run_btn = Gtk.Button(label="Run Benchmark")
+        self._run_btn.connect("clicked", lambda b: self._run())
+        hdr.pack_end(self._run_btn, False, False, 0)
+
+        lanes_box = Gtk.Box(spacing=6)
+        vbox.pack_start(lanes_box, False, False, 0)
+
+        self._lanes = []
+        for i in range(3):
+            frame = Gtk.Frame(label=f"{'A' if i == 0 else 'B' if i == 1 else 'C'}" if i < 2 else None)
+            if i == 2:
+                self._c_frame = frame
+                self._c_check = Gtk.CheckButton(label="Enable Lane C")
+                self._c_check.set_active(False)
+                frame.set_label_widget(self._c_check)
+                frame.set_sensitive(False)
+                self._c_check.connect("toggled", lambda b: frame.set_sensitive(b.get_active()))
+            inner = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=4)
+            inner.set_margin_start(6)
+            inner.set_margin_end(6)
+            inner.set_margin_top(4)
+            inner.set_margin_bottom(4)
+            frame.add(inner)
+            lanes_box.pack_start(frame, True, True, 0)
+
+            row_ep = Gtk.Box(spacing=4)
+            inner.pack_start(row_ep, False, False, 0)
+            row_ep.pack_start(Gtk.Label(label="Endpoint:"), False, False, 0)
+            ep_combo = Gtk.ComboBoxText()
+            for ep in self._ep_data.get("endpoints", []):
+                ep_combo.append(ep["name"], ep["name"])
+            row_ep.pack_start(ep_combo, True, True, 0)
+
+            row_m = Gtk.Box(spacing=4)
+            inner.pack_start(row_m, False, False, 0)
+            row_m.pack_start(Gtk.Label(label="Model:"), False, False, 0)
+            m_combo = Gtk.ComboBoxText()
+            m_combo.set_entry_text_column(0)
+            row_m.pack_start(m_combo, True, True, 0)
+
+            ep_combo.connect("changed", lambda b, mc=m_combo: self._update_lane_models(b, mc))
+
+            self._lanes.append({"ep": ep_combo, "model": m_combo})
+
+        default_name = self._ep_data.get("default")
+        if default_name:
+            self._lanes[0]["ep"].set_active_id(default_name)
+        eps = self._ep_data.get("endpoints", [])
+        if len(eps) > 1:
+            self._lanes[1]["ep"].set_active_id(eps[1]["name"])
+        elif eps:
+            self._lanes[1]["ep"].set_active_id(eps[0]["name"])
+        if len(eps) > 2:
+            self._lanes[2]["ep"].set_active_id(eps[2]["name"])
+        elif len(eps) > 1:
+            self._lanes[2]["ep"].set_active_id(eps[1]["name"])
+
+        tests_box = Gtk.Box(spacing=6)
+        vbox.pack_start(tests_box, False, False, 0)
+        self._test_ttft = Gtk.CheckButton(label="Time to First Token")
+        self._test_ttft.set_active(True)
+        tests_box.pack_start(self._test_ttft, False, False, 0)
+        self._test_total = Gtk.CheckButton(label="Total Latency")
+        self._test_total.set_active(True)
+        tests_box.pack_start(self._test_total, False, False, 0)
+        self._test_tools = Gtk.CheckButton(label="Tool Call")
+        self._test_tools.set_active(True)
+        tests_box.pack_start(self._test_tools, False, False, 0)
+        self._test_tps = Gtk.CheckButton(label="Tokens/sec")
+        self._test_tps.set_active(True)
+        tests_box.pack_start(self._test_tps, False, False, 0)
+
+        results_sw = Gtk.ScrolledWindow()
+        results_sw.set_policy(Gtk.PolicyType.AUTOMATIC, Gtk.PolicyType.AUTOMATIC)
+        vbox.pack_start(results_sw, True, True, 0)
+
+        self._results_store = Gtk.ListStore(str, str, str, str, str)
+        self._results_tree = Gtk.TreeView(model=self._results_store)
+        for i, title in enumerate(["Test", "Lane A", "Lane B", "Lane C", "Winner"]):
+            col = Gtk.TreeViewColumn(title, Gtk.CellRendererText(), text=i)
+            col.set_resizable(True)
+            self._results_tree.append_column(col)
+        results_sw.add(self._results_tree)
+
+        self._status = Gtk.Label(label="Select endpoints and models per lane, then Run Benchmark.")
+        self._status.set_xalign(0)
+        vbox.pack_start(self._status, False, False, 0)
+
+        self.show_all()
+
+    def _update_lane_models(self, ep_combo, model_combo):
+        name = ep_combo.get_active_text()
+        if not name:
+            return
+        ep = get_endpoint(name)
+        models = (ep or {}).get("models", [])
+        active = model_combo.get_active_text()
+        model_combo.remove_all()
+        for m in models:
+            model_combo.append(m, m)
+        if active and any(m == active for m in models):
+            model_combo.set_active_id(active)
+        elif models:
+            model_combo.set_active(0)
+
+    def _collect_lanes(self):
+        active = []
+        for i, lane in enumerate(self._lanes):
+            if i == 2 and not self._c_check.get_active():
+                continue
+            ep_name = lane["ep"].get_active_text()
+            model = lane["model"].get_active_text()
+            if not ep_name or not model:
+                continue
+            ep = get_endpoint(ep_name)
+            if not ep:
+                continue
+            active.append({"ep": ep, "model": model, "label": f"{ep_name}/{model}"})
+        return active
+
+    def _run(self):
+        if self._running:
+            return
+        lanes = self._collect_lanes()
+        if len(lanes) < 2:
+            self._status.set_text("Need at least 2 lanes with endpoint + model selected.")
+            return
+        self._running = True
+        self._run_btn.set_sensitive(False)
+        self._results_store.clear()
+        self._status.set_text("Running benchmark…")
+        threading.Thread(target=self._run_bench, args=(lanes,), daemon=True).start()
+
+    def _bench_single(self, ep, model, stream, with_tools=False):
+        url = normalize_base_url(ep.get("base_url", ""))
+        key = (ep.get("api_key") or "").strip()
+        bt = ep.get("backend_type", "openai-compat")
+        if bt == "anthropic":
+            test_url = f"{url}/v1/messages"
+            headers = {"x-api-key": key, "anthropic-version": "2023-06-01", "content-type": "application/json"}
+            body = {"model": model, "max_tokens": 100, "stream": stream,
+                    "messages": [{"role": "user", "content": self._BENCH_PROMPT}]}
+            if with_tools:
+                body["tools"] = self._BENCH_TOOLS
+                body["messages"] = [{"role": "user", "content": "Use get_weather for Paris"}]
+            data = json.dumps(body).encode()
+        elif bt.startswith("gemini-oauth"):
+            token_name = "google-antigravity-oauth-token.json" if "antigravity" in bt else "google-cli-oauth-token.json"
+            token_path = Path.home() / f".cache/codex-proxy/{token_name}"
+            oauth_token = ""
+            if token_path.exists():
+                try:
+                    td = json.loads(token_path.read_text())
+                    oauth_token = td.get("access_token", "")
+                except Exception:
+                    pass
+            test_url = f"{url}/v1/chat/completions"
+            headers = {"Authorization": f"Bearer {oauth_token}", "content-type": "application/json"}
+            body = {"model": model, "max_tokens": 100, "stream": stream,
+                    "messages": [{"role": "user", "content": self._BENCH_PROMPT}]}
+            if with_tools:
+                body["tools"] = self._BENCH_TOOLS
+                body["messages"] = [{"role": "user", "content": "Use get_weather for Paris"}]
+            data = json.dumps(body).encode()
+        else:
+            test_url = f"{url}/chat/completions"
+            headers = {"Authorization": f"Bearer {key}", "content-type": "application/json"}
+            body = {"model": model, "max_tokens": 100, "stream": stream,
+                    "messages": [{"role": "user", "content": self._BENCH_PROMPT}]}
+            if with_tools:
+                body["tools"] = self._BENCH_TOOLS
+                body["messages"] = [{"role": "user", "content": "Use get_weather for Paris"}]
+            data = json.dumps(body).encode()
+
+        req = urllib.request.Request(test_url, data=data, headers=headers, method="POST")
+        t0 = time.time()
+        ttft = None
+        try:
+            resp = urllib.request.urlopen(req, timeout=60)
+            if stream:
+                first_chunk_time = None
+                chunks = []
+                while True:
+                    chunk = resp.read(4096)
+                    if not chunk:
+                        break
+                    if first_chunk_time is None:
+                        first_chunk_time = time.time()
+                        ttft = first_chunk_time - t0
+                    chunks.append(chunk)
+                total = time.time() - t0
+                result_text = b"".join(chunks).decode(errors="replace")[:300]
+            else:
+                raw = resp.read()
+                total = time.time() - t0
+                result_text = raw.decode(errors="replace")[:300]
+                payload = json.loads(raw)
+                choices = payload.get("choices", [])
+                if choices:
+                    msg = choices[0].get("message", {})
+                    if with_tools:
+                        tcs = msg.get("tool_calls", [])
+                        has_tools = len(tcs) > 0
+                        return {"ttft": ttft or total, "total": total,
+                                "detail": f"tools={has_tools}, tok={payload.get('usage', {}).get('total_tokens', '?')}"}
+                    content = msg.get("content", "")[:50]
+                    return {"ttft": ttft or total, "total": total,
+                            "detail": f"{content[:40]}… tok={payload.get('usage', {}).get('total_tokens', '?')}"}
+            return {"ttft": ttft or total, "total": total, "detail": result_text[:60]}
+        except Exception as e:
+            total = time.time() - t0
+            return {"ttft": ttft or total, "total": total, "detail": f"Error: {str(e)[:40]}"}
+
+    def _bench_tps(self, ep, model):
+        url = normalize_base_url(ep.get("base_url", ""))
+        key = (ep.get("api_key") or "").strip()
+        bt = ep.get("backend_type", "openai-compat")
+        prompt = "Write a detailed paragraph about artificial intelligence in at least 150 words."
+        max_tok = 512
+        if bt == "anthropic":
+            test_url = f"{url}/v1/messages"
+            headers = {"x-api-key": key, "anthropic-version": "2023-06-01", "content-type": "application/json"}
+            body = json.dumps({"model": model, "max_tokens": max_tok, "stream": True,
+                               "messages": [{"role": "user", "content": prompt}]}).encode()
+        elif bt.startswith("gemini-oauth"):
+            token_name = "google-antigravity-oauth-token.json" if "antigravity" in bt else "google-cli-oauth-token.json"
+            token_path = Path.home() / f".cache/codex-proxy/{token_name}"
+            oauth_token = ""
+            if token_path.exists():
+                try:
+                    td = json.loads(token_path.read_text())
+                    oauth_token = td.get("access_token", "")
+                except Exception:
+                    pass
+            test_url = f"{url}/v1/chat/completions"
+            headers = {"Authorization": f"Bearer {oauth_token}", "content-type": "application/json"}
+            body = json.dumps({"model": model, "max_tokens": max_tok, "stream": True,
+                               "messages": [{"role": "user", "content": prompt}]}).encode()
+        else:
+            test_url = f"{url}/chat/completions"
+            headers = {"Authorization": f"Bearer {key}", "content-type": "application/json"}
+            body = json.dumps({"model": model, "max_tokens": max_tok, "stream": True,
+                               "messages": [{"role": "user", "content": prompt}]}).encode()
+
+        req = urllib.request.Request(test_url, data=body, headers=headers, method="POST")
+        t0 = time.time()
+        first_token_t = None
+        token_count = 0
+        try:
+            resp = urllib.request.urlopen(req, timeout=90)
+            buf = b""
+            while True:
+                chunk = resp.read(4096)
+                if not chunk:
+                    break
+                if first_token_t is None:
+                    first_token_t = time.time()
+                buf += chunk
+            total = time.time() - t0
+            text = buf.decode(errors="replace")
+            if bt == "anthropic":
+                for line in text.split("\n"):
+                    if "content_block_delta" in line and "text_delta" in line:
+                        try:
+                            idx = line.index("{")
+                            evt = json.loads(line[idx:])
+                            delta = evt.get("delta", {})
+                            token_count += len(delta.get("text", "")) / 4
+                        except Exception:
+                            pass
+                if token_count == 0:
+                    token_count = max(1, len(text) / 4)
+            else:
+                for line in text.split("\n"):
+                    if line.startswith("data: ") and line != "data: [DONE]":
+                        try:
+                            d = json.loads(line[6:])
+                            content = d.get("choices", [{}])[0].get("delta", {}).get("content", "")
+                            if content:
+                                token_count += max(1, len(content) / 4)
+                        except Exception:
+                            pass
+                if token_count == 0:
+                    token_count = max(1, len(text) / 4)
+            gen_time = (time.time() - first_token_t) if first_token_t else total
+            tps = token_count / gen_time if gen_time > 0 else 0
+            return {"tps": tps, "tokens": int(token_count), "gen_time": gen_time, "total": total,
+                    "detail": f"{int(token_count)} tok / {gen_time:.1f}s"}
+        except Exception as e:
+            total = time.time() - t0
+            return {"tps": 0, "tokens": 0, "gen_time": total, "total": total, "detail": f"Error: {str(e)[:40]}"}
+
+    def _run_bench(self, lanes):
+        results = []
+        tests = []
+        if self._test_ttft.get_active():
+            tests.append(("TTFT (stream)", True, False))
+        if self._test_total.get_active():
+            tests.append(("Total latency", False, False))
+        if self._test_tools.get_active():
+            tests.append(("Tool call", False, True))
+        run_tps = self._test_tps.get_active()
+
+        for test_name, stream, tools in tests:
+            lane_results = []
+            for lane in lanes:
+                label = lane["label"]
+                GLib.idle_add(self._status.set_text, f"{test_name}: {label}…")
+                r = self._bench_single(lane["ep"], lane["model"], stream, tools)
+                lane_results.append((label, r))
+
+            metric = "ttft" if stream else "total"
+            values = [(lr[0], lr[1][metric]) for lr in lane_results]
+            sorted_v = sorted(values, key=lambda x: x[1])
+            best_val = sorted_v[0][1]
+            second_val = sorted_v[1][1]
+            if best_val < second_val * 0.85:
+                winner = sorted_v[0][0]
+            else:
+                winner = "Tie"
+
+            cols = []
+            for lr in lane_results:
+                v = lr[1][metric]
+                cols.append(f"{v:.2f}s ({lr[1]['detail'][:30]})")
+            while len(cols) < 3:
+                cols.append("—")
+            cols.append(winner)
+            results.append(tuple([test_name] + cols))
+
+        if run_tps:
+            lane_tps = []
+            for lane in lanes:
+                label = lane["label"]
+                GLib.idle_add(self._status.set_text, f"Tokens/sec: {label}…")
+                r = self._bench_tps(lane["ep"], lane["model"])
+                lane_tps.append((label, r))
+
+            tps_vals = [(lt[0], lt[1]["tps"]) for lt in lane_tps]
+            sorted_tps = sorted(tps_vals, key=lambda x: x[1], reverse=True)
+            best_tps = sorted_tps[0][1]
+            second_tps = sorted_tps[1][1] if len(sorted_tps) > 1 else 0
+            if best_tps > 0 and second_tps > 0 and best_tps > second_tps * 1.15:
+                winner_tps = sorted_tps[0][0]
+            else:
+                winner_tps = "Tie"
+
+            cols_tps = []
+            for lt in lane_tps:
+                tps = lt[1]["tps"]
+                cols_tps.append(f"{tps:.1f} t/s ({lt[1]['detail'][:25]})")
+            while len(cols_tps) < 3:
+                cols_tps.append("—")
+            cols_tps.append(winner_tps)
+            results.append(tuple(["Tokens/sec"] + cols_tps))
+
+        def _show():
+            for row in results:
+                self._results_store.append(row)
+            self._status.set_text("Benchmark complete.")
+            self._running = False
+            self._run_btn.set_sensitive(True)
+
+        GLib.idle_add(_show)
+
+if __name__ == "__main__":
+    main()
diff --git a/codex-launcher_3.8.4_all.deb b/codex-launcher_3.8.4_all.deb
new file mode 100644
index 0000000..7c4e4bd
Binary files /dev/null and b/codex-launcher_3.8.4_all.deb differ
diff --git a/translate-proxy.py b/translate-proxy.py
new file mode 100755
index 0000000..4f93e85
--- /dev/null
+++ b/translate-proxy.py
@@ -0,0 +1,5595 @@
+#!/usr/bin/env python3
+"""
+translate-proxy.py — Responses API → backend API translation proxy.
+
+Backends:
+  openai-compat — any OpenAI-compatible Chat Completions API
+  anthropic     — Anthropic Messages API
+  command-code   — CommandCode /alpha/generate (Z.AI GLM Coding Plan)
+
+Usage:
+  python3 translate-proxy.py --config proxy-config.json
+  python3 translate-proxy.py --backend command-code --target-url https://... --api-key sk-...
+
+═══════════════════════════════════════════════════════════════════
+COMMANDCODE ADAPTER — FIX HISTORY (2026-05-22)
+═══════════════════════════════════════════════════════════════════
+
+This file contains multiple rounds of fixes for the CommandCode adapter.
+Each fix addresses a specific failure mode observed in production.
+They are documented here for future maintainability.
+
+FIX 1: Content blocks rejected by CC API (root cause of initial 400 errors)
+  Symptom: {"error":{"message":"params.messages[i].content expected string, received array"}}
+  Cause: cc_input_to_messages emitted tool results as content blocks [{"type":"tool_result",...}]
+  Fix: All messages now use string content. Tool results as role="user" with plain text.
+  Location: cc_input_to_messages() ~line 1085
+
+FIX 2: x-command-code-version header dropped during rewrite
+  Symptom: HTTP 403 upgrade_required from CommandCode API
+  Cause: _handle_command_code rewrite removed the header line
+  Fix: Always send x-command-code-version header with fallback "0.26.8"
+  Location: _handle_command_code() header setup block
+
+FIX 3: Stale schema cache with wrong content_type=array
+  Symptom: SchemaAdapter used content_type="array" causing content blocks in auto path
+  Cause: ErrorAnalyzer learned incorrect schema from error message text
+  Fix: Cleared provider-caps.json; added 24h staleness TTL to _load_schema()
+  Location: _load_schema(), provider-caps.json
+
+FIX 4: Stream disconnect before completion (client-side "stream disconnected")
+  Symptom: Client sees partial SSE then connection close, no response.completed event
+  Cause: No try/except around streaming path; exceptions crashed handler mid-stream
+  Fix: Wrapped stream_buffered_events in try/except; sends response.completed(status:"failed") on crash
+  Location: _handle_command_code() streaming section
+
+FIX 5: Tool calls echoed as text instead of being parsed (THE BIG ONE)
+  Symptom: Model generates inline JSON tool calls like {"type":"tool-call","id":"...","name":"exec_command","arguments":"{...}"}
+        These appear as raw text in the conversation. The tool is never executed.
+  Root cause chain:
+    a) cc_input_to_messages sends tool calls as inline JSON text in assistant messages
+    b) The CC model echoes back similar JSON in its text-delta response
+    c) _parse_commandcode_text_tool_calls only handled XML format (```
+<tool>``)
+    d) Raw JSON tool calls passed through as plain text → client shows them unparsed
+  Fix: Added _extract_raw_json_tool_calls() with field-level regex extraction.
+      Handles BOTH malformed (unescaped inner quotes) AND properly escaped JSON.
+      Three-tier parse: direct json.loads → unescape \"→\" → unicode_escape decode.
+  Location: _extract_args(), _extract_field(), _extract_raw_json_tool_calls()
+
+FIX 6: Double-wrapped arguments (nested {"cmd": "{\"cmd\": \"curl...\"}"}")
+  Symptom: args={"cmd": "{\\\"cmd\\\": \\\"curl...\\\"}"}
+        Tool executor receives cmd = the literal string '{"cmd": "curl..."', not the actual curl command.
+  Root cause: When model generates properly escaped JSON ("arguments": "{\\"cmd\\": \\"...\\"}"),
+         _extract_args naive brace-counting returns raw text with escaped quotes.
+         json.loads(raw) fails on \\ at structural level.
+         Fallback sets args["cmd"] = raw_string → double-wrapped.
+  Fix: _extract_args now tries 3 parse strategies before returning.
+         Also normalizes sandbox_permissions from parsed args dict (not raw snippet).
+  Location: _extract_args() three-tier parser, sandbox_permissions normalization
+
+FIX 7: _extract_field can't read values starting with \"
+  Symptom: sandbox_permissions="allow_all" passes through unnormalized because
+        _extract_field sees val_start=\ (backslash) which != " or { → returns None
+  Fix: Skip leading backslash before checking for " or { value type.
+  Location: _extract_field() leading-\ skip
+
+FIX 8: Adaptive probing caused format mismatch (REVERTED)
+  Symptom: Probe system discovered OpenAI tool_calls+role=tool format but CC API couldn't
+        process multi-turn tool loops correctly with it.
+  Fix: Removed probe system entirely. Use conservative format only:
+        - Inline JSON text for tool calls (cc_input_to_messages default)
+        - role="user" for all tool results
+        - ErrorAnalyzer learning on retries (not proactive probes)
+  Location: Reverted to cc_input_to_messages(), removed _build_cc_messages + _probe_cc_format
+
+FIX 21: DSML parser silently drops tool calls when model uses name="cmd" (THE HALT BUG)
+  Symptom: Codex CLI stops mid-task. Model generates valid DSML exec_command with
+        <｜｜DSML｜｜parameter name="cmd" string="true">curl ...
+        Parser returns parsed_tool_calls=0. Client sees text output but no tool to execute.
+        CLI has nothing to do and halts.
+  Root cause: Line 1798 had `if key == "command":` — only matching parameter name="command".
+        The actual tool schema defines the parameter as "cmd" (see exec_command schema).
+        When DeepSeek generates name="cmd", the key "cmd" != "command", so cmd stays None,
+        and line 1825-1826 `if not cmd: continue` silently skips the entire tool call.
+        The XML parser (line 2205) already handled both: `params.get("command") or params.get("cmd")`
+        but the DSML parser did not.
+  Fix: Changed to `if key in ("command", "cmd"):` in the DSML parameter loop.
+  Test: Pattern L self-test verifies DSML with name="cmd" is parsed correctly.
+  Location: _parse_commandcode_text_tool_calls() DSML parameter loop, self-test Pattern L
+
+════════════════════════════════════════════════════════════════════
+INTELLIGENCE ROUTING — Self-Healing Parser System (v3.7.0)
+════════════════════════════════════════════════════════════════════
+
+Problem: The Command Code model produces output in unpredictable formats
+that change between sessions and models. When the multi-format parser chain
+(DSML → <bash> → <explore_agent> → <tool_call type=...> → XML → raw JSON →
+fallback regex) returns empty, the Codex agent loop has zero tool calls and
+STALLS — the user sees the model "thinking" but nothing happens.
+
+Intelligence Routing is a three-layer self-healing system:
+
+LAYER 1 — Deep URL Extraction (FIX 23)
+  The <explore_agent> handler was failing because URLs were hidden inside
+  nested JSON: messages: [{"content": "https://..."}]. The regex couldn't
+  find them because it excluded the " character that terminates JSON values.
+  
+  Solution: _build_explore_cmd() is now a module-level function (was a
+  closure). After the initial regex fails, it tries json.loads() on the
+  text, iterates list items, and extracts the "content" field to find URLs.
+  Also added " to the regex exclusion set and rstrip characters.
+
+LAYER 2 — Escalation Block Handling (FIX 24)
+  The model produces <require_escalation> and <request_escalation_permission>
+  blocks when it wants elevated permissions. The CC adapter doesn't support
+  escalation — these blocks were silently dropped, causing parsed_tool_calls=0.
+  
+  Solution: Two handlers:
+    - FIX 24a: Closed-tag blocks — extracts URL if present, runs explore cmd;
+      otherwise echoes auto-proceed message.
+    - FIX 24b: Bare/unclosed tags (<require_escalation />) — auto-proceeds.
+
+LAYER 3 — Intent-Based Command Synthesis (FIX 25, THE CORE)
+  When ALL parsers return empty and text has content, the system plays
+  detective using 5 heuristics in priority order:
+  
+    1. URL detected in text → curl to fetch it
+    2. File path reference → cat or ls that file
+    3. Shell command in backticks/quotes → extract and run
+    4. "explore"/"fetch"/"investigate" intent + last user URL → explore cmd
+    5. "I need to"/"let me"/"please" intent text → echo diagnostic
+
+  This ensures the agent loop ALWAYS has a tool call to execute, even when
+  the model's output format is completely unrecognized. The loop never stalls.
+
+Architecture:
+  _parse_commandcode_text_tool_calls() — LAYER 1 + LAYER 2
+  cc_stream_to_sse() — LAYER 3 (runs after parser chain + fallback)
+  
+  The _last_user_urls deque (maxlen=20) tracks URLs from user messages
+  across the session, giving Layer 3 heuristic 4 a URL to work with.
+
+  Self-tests: 54 patterns (was 41) covering all three layers.
+
+════════════════════════════════════════════════════════════════════
+"""
+
+import json, http.server, socketserver, urllib.request, urllib.parse, urllib.error, re
+import time, uuid, os, sys, argparse, threading, socket, collections, contextlib, signal
+import dataclasses
+import http.client
+import selectors
+
+# ═══════════════════════════════════════════════════════════════════
+# Config
+# ═══════════════════════════════════════════════════════════════════
+
+DEFAULT_MODELS = {
+    "openai-compat": [
+        {"id": "gpt-4o-mini", "object": "model", "created": 1700000000, "owned_by": "custom"},
+    ],
+    "anthropic": [
+        {"id": "claude-sonnet-4-20250514", "object": "model", "created": 1700000000, "owned_by": "anthropic"},
+    ],
+    "freebuff": [
+        {"id": "deepseek/deepseek-v4-pro", "object": "model", "created": 1700000000, "owned_by": "freebuff"},
+        {"id": "deepseek/deepseek-v4-flash", "object": "model", "created": 1700000000, "owned_by": "freebuff"},
+        {"id": "moonshotai/kimi-k2.6", "object": "model", "created": 1700000000, "owned_by": "freebuff"},
+        {"id": "minimax/minimax-m2.7", "object": "model", "created": 1700000000, "owned_by": "freebuff"},
+    ],
+    "auto": [
+        {"id": "default-model", "object": "model", "created": 1700000000, "owned_by": "auto"},
+    ],
+}
+
+def load_config():
+    p = argparse.ArgumentParser(description="Responses API translation proxy")
+    p.add_argument("--config", help="JSON config file path")
+    p.add_argument("--port", type=int, default=None)
+    p.add_argument("--backend", default=None, choices=["openai-compat", "anthropic", "command-code", "freebuff", "auto"])
+    p.add_argument("--target-url", default=None)
+    p.add_argument("--api-key", default=None)
+    p.add_argument("--models-file", default=None, help="JSON file with model list array")
+    args = p.parse_args()
+
+    cfg = {}
+    if args.config:
+        with open(args.config) as f:
+            cfg = json.load(f)
+
+    for ck, ak in [("port", "port"), ("backend_type", "backend"),
+                    ("target_url", "target_url"), ("api_key", "api_key")]:
+        v = getattr(args, ak, None)
+        if v is not None:
+            cfg[ck] = v
+
+    env_map = {
+        "port": ("PROXY_PORT", "ZAI_PROXY_PORT", int),
+        "backend_type": ("PROXY_BACKEND", None, str),
+        "target_url": ("PROXY_TARGET_URL", "ZAI_BASE_URL", str),
+        "api_key": ("PROXY_API_KEY", "ZAI_API_KEY", str),
+    }
+    for ck, (ev1, ev2, conv) in env_map.items():
+        if ck not in cfg:
+            v = os.environ.get(ev1) or (os.environ.get(ev2) if ev2 else None)
+            if v:
+                cfg[ck] = conv(v) if conv == int else v
+
+    cfg.setdefault("port", 8080)
+    cfg.setdefault("backend_type", "openai-compat")
+    cfg.setdefault("target_url", "http://localhost:11434/v1")
+    cfg.setdefault("api_key", "")
+
+    models = cfg.get("models", [])
+    if not models and args.models_file:
+        with open(args.models_file) as f:
+            models = json.load(f)
+    if not models:
+        models = DEFAULT_MODELS.get(cfg["backend_type"], [])
+    cfg["models"] = models
+
+    return cfg
+
+CONFIG = None
+PORT = 8080
+BACKEND = "openai-compat"
+TARGET_URL = ""
+API_KEY = ""
+OAUTH_PROVIDER = ""
+MODELS = []
+CC_VERSION = ""
+REASONING_ENABLED = True
+REASONING_EFFORT = "medium"
+BGP_ROUTES = []
+SERVER = None
+
+_LOG_DIR = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy")
+os.makedirs(_LOG_DIR, exist_ok=True)
+_REQUESTS_DIR = os.path.join(_LOG_DIR, "requests")
+os.makedirs(_REQUESTS_DIR, exist_ok=True)
+_stats_path = os.path.join(_LOG_DIR, "usage-stats.json")
+_provider_caps_path = os.path.join(_LOG_DIR, "provider-caps.json")
+_stats_lock = threading.Lock()
+_stats_pending = []
+_stats_flush_timer = None
+_STATS_FLUSH_INTERVAL = 5.0
+_STATS = {}
+
+try:
+    _LOG_FILE = open(os.path.join(_LOG_DIR, "proxy.log"), "a")
+except Exception:
+    _LOG_FILE = None
+
+_response_store = collections.OrderedDict()
+_response_store_lock = threading.Lock()
+_MAX_STORED = 50
+_RESPONSE_TTL = 600
+
+_fb_reasoning_store = collections.OrderedDict()
+_fb_reasoning_store_lock = threading.Lock()
+
+_deepseek_reasoning_store = {}
+_deepseek_reasoning_lock = threading.Lock()
+_MAX_DS_STORED = 100
+
+_crof_lock = threading.Lock()
+_provider_caps_lock = threading.Lock()
+_provider_caps = None
+
+_shutdown_requested = False
+_active_connections = 0
+_active_connections_lock = threading.Lock()
+_active_requests = {}
+_active_requests_lock = threading.Lock()
+
+_pool = uuid.uuid4().hex[:8]
+_antigravity_version = "1.18.3"
+_antigravity_version_checked = 0
+_antigravity_version_lock = threading.Lock()
+_last_user_urls = collections.deque(maxlen=20)
+
+_conn_pool_lock = threading.Lock()
+_conn_pool = {}
+
+_STREAM_IDLE_TIMEOUT = 300
+
+_FREEBUFF_AUTH_URL = "https://freebuff.com"
+_FREEBUFF_API_URL = "https://www.codebuff.com"
+_FREEBUFF_AGENT_MAP = {
+    "deepseek/deepseek-v4-pro": "base2-free-deepseek",
+    "deepseek/deepseek-v4-flash": "base2-free-deepseek-flash",
+    "moonshotai/kimi-k2.6": "base2-free-kimi",
+    "minimax/minimax-m2.7": "base2-free",
+}
+_FREEBUFF_CREDS_PATH = os.path.join(os.path.expanduser("~"), ".config", "manicode", "credentials.json")
+_freebuff_token_cache = {"token": None, "checked": 0}
+_freebuff_session_cache = {"instance_id": None, "expires": 0, "model": None}
+_freebuff_token_lock = threading.Lock()
+
+def _get_freebuff_token():
+    with _freebuff_token_lock:
+        if _freebuff_token_cache["token"] and _freebuff_token_cache["checked"] > time.time() - 300:
+            return _freebuff_token_cache["token"]
+    try:
+        with open(_FREEBUFF_CREDS_PATH) as f:
+            creds = json.load(f)
+        default_account = creds.get("default", {})
+        token = default_account.get("authToken") or creds.get("apiKey") or ""
+        with _freebuff_token_lock:
+            _freebuff_token_cache["token"] = token
+            _freebuff_token_cache["checked"] = time.time()
+        return token
+    except Exception as e:
+        print(f"[freebuff] no credentials at {_FREEBUFF_CREDS_PATH}: {e}", file=sys.stderr)
+        return ""
+
+def _freebuff_get_session(token, model):
+    with _freebuff_token_lock:
+        sc = _freebuff_session_cache
+        if sc["instance_id"] and sc["expires"] > time.time() + 60 and sc["model"] == model:
+            return sc["instance_id"]
+    try:
+        url = f"{_FREEBUFF_API_URL}/api/v1/freebuff/session"
+        body = json.dumps({"model": model}).encode()
+        req = urllib.request.Request(url, data=body, headers={
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {token}",
+            "User-Agent": "codex-launcher/3.8.4",
+        })
+        resp = urllib.request.urlopen(req, timeout=15)
+        data = json.loads(resp.read())
+        instance_id = data.get("instanceId", "")
+        expires_at = data.get("remainingMs", 0)
+        if instance_id:
+            with _freebuff_token_lock:
+                _freebuff_session_cache["instance_id"] = instance_id
+                _freebuff_session_cache["expires"] = time.time() + min(expires_at / 1000, 3600)
+                _freebuff_session_cache["model"] = model
+            print(f"[freebuff] session active, instance={instance_id[:8]}...", file=sys.stderr)
+            return instance_id
+        return None
+    except Exception as e:
+        print(f"[freebuff] session failed: {e}", file=sys.stderr)
+        return None
+
+def _freebuff_start_run(token, agent_id):
+    url = f"{_FREEBUFF_API_URL}/api/v1/agent-runs"
+    body = json.dumps({"action": "START", "agentId": agent_id, "ancestorRunIds": []}).encode()
+    req = urllib.request.Request(url, data=body, headers={
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {token}",
+        "User-Agent": "codex-launcher/3.8.4",
+    })
+    try:
+        resp = urllib.request.urlopen(req, timeout=15)
+        data = json.loads(resp.read())
+        run_id = data.get("runId")
+        print(f"[freebuff] started run {run_id} for agent {agent_id}", file=sys.stderr)
+        return run_id
+    except urllib.error.HTTPError as e:
+        err = e.read().decode()[:300]
+        print(f"[freebuff] start run failed: HTTP {e.code}: {err}", file=sys.stderr)
+        return None
+    except Exception as e:
+        print(f"[freebuff] start run error: {e}", file=sys.stderr)
+        return None
+
+def _freebuff_finish_run(token, run_id, status="completed"):
+    url = f"{_FREEBUFF_API_URL}/api/v1/agent-runs"
+    body = json.dumps({"action": "FINISH", "runId": run_id, "status": status,
+                       "totalSteps": 1, "directCredits": 0, "totalCredits": 0}).encode()
+    req = urllib.request.Request(url, data=body, headers={
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {token}",
+        "User-Agent": "codex-launcher/3.8.4",
+    })
+    try:
+        urllib.request.urlopen(req, timeout=10)
+    except Exception as e:
+        print(f"[freebuff] finish run {run_id} error: {e}", file=sys.stderr)
+
+_LOG_FILE = None
+_LOG_FILE_LOCK = threading.Lock()
+
+def _fetch_antigravity_version():
+    cache_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", "antigravity-version.json")
+    try:
+        with open(cache_path) as f:
+            cached = json.load(f)
+        if cached.get("version") and cached.get("checked_at", 0) > time.time() - 6 * 3600:
+            return cached["version"]
+    except Exception:
+        pass
+    urls = [
+        ("https://antigravity-auto-updater-974169037036.us-central1.run.app", None),
+        ("https://antigravity.google/changelog", 5000),
+    ]
+    for url, limit in urls:
+        try:
+            req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
+            resp = urllib.request.urlopen(req, timeout=5)
+            text = resp.read().decode(errors="replace")
+            if limit:
+                text = text[:limit]
+            m = re.search(r"\d+\.\d+\.\d+", text)
+            if m:
+                version = m.group(0)
+                try:
+                    os.makedirs(os.path.dirname(cache_path), exist_ok=True)
+                    with open(cache_path, "w") as f:
+                        json.dump({"version": version, "checked_at": time.time()}, f)
+                except Exception:
+                    pass
+                return version
+        except Exception:
+            pass
+    return _antigravity_version
+
+def _ensure_antigravity_version():
+    global _antigravity_version, _antigravity_version_checked
+    if time.time() - _antigravity_version_checked < 6 * 3600:
+        return _antigravity_version
+    with _antigravity_version_lock:
+        if time.time() - _antigravity_version_checked < 6 * 3600:
+            return _antigravity_version
+        _antigravity_version = _fetch_antigravity_version()
+        _antigravity_version_checked = time.time()
+        return _antigravity_version
+
+def _init_runtime():
+    global CONFIG, PORT, BACKEND, TARGET_URL, API_KEY, OAUTH_PROVIDER, _antigravity_version
+    global MODELS, CC_VERSION, REASONING_ENABLED, REASONING_EFFORT, BGP_ROUTES
+
+    CONFIG = load_config()
+    PORT = CONFIG["port"]
+    BACKEND = CONFIG["backend_type"]
+    TARGET_URL = CONFIG["target_url"].rstrip("/")
+    API_KEY = CONFIG["api_key"]
+    OAUTH_PROVIDER = CONFIG.get("oauth_provider") or ""
+    MODELS = CONFIG["models"]
+    CC_VERSION = CONFIG.get("cc_version", "")
+    REASONING_ENABLED = CONFIG.get("reasoning_enabled", True)
+    REASONING_EFFORT = CONFIG.get("reasoning_effort", "medium")
+    BGP_ROUTES = CONFIG.get("bgp_routes", [])
+    if OAUTH_PROVIDER == "google-antigravity":
+        _antigravity_version = _ensure_antigravity_version()
+        print(f"[antigravity] version={_antigravity_version}", file=sys.stderr)
+
+    bgp_models = []
+    for _r in BGP_ROUTES:
+        for _m in _r.get("models", [{"id": _r.get("model", "unknown")}]):
+            mid = _m.get("id", _m) if isinstance(_m, dict) else _m
+            if mid not in bgp_models:
+                bgp_models.append(mid)
+    if BGP_ROUTES and not MODELS:
+        MODELS = [{"id": m, "object": "model", "created": 1700000000, "owned_by": "bgp"} for m in bgp_models]
+        CONFIG["models"] = MODELS
+
+    if (BACKEND or "").startswith("gemini-oauth") and (OAUTH_PROVIDER or "").startswith("google"):
+        token_name = "google-antigravity-oauth-token.json" if OAUTH_PROVIDER == "google-antigravity" else "google-cli-oauth-token.json"
+        token_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", token_name)
+        _preemptive_refresh_token(token_path)
+        try:
+            with open(token_path) as _tf:
+                _td = json.load(_tf)
+            _discovered = [] if OAUTH_PROVIDER == "google-antigravity" else _td.get("available_models", [])
+            if _discovered:
+                _seen = []
+                for _m in _discovered:
+                    if _m not in _seen:
+                        _seen.append(_m)
+                MODELS = [{"id": m, "object": "model", "created": 1700000000, "owned_by": "gemini-oauth"} for m in _seen]
+                CONFIG["models"] = MODELS
+                print(f"[gemini-oauth] loaded {len(_seen)} discovered models: {_seen}", file=sys.stderr)
+        except Exception:
+            pass
+
+def _preemptive_refresh_token(token_path):
+    try:
+        with open(token_path) as f:
+            td = json.load(f)
+        expires_at = td.get("expires_at", 0)
+        if expires_at and time.time() > expires_at - 300:
+            print(f"[oauth] preemptive refresh: token expires in {int(expires_at - time.time())}s", file=sys.stderr)
+    except Exception:
+        pass
+
+def _pooled_urlopen(url, data=None, headers=None, timeout=180):
+    parsed = urllib.parse.urlparse(url)
+    host = parsed.hostname
+    port = parsed.port or (443 if parsed.scheme == "https" else 80)
+    pool_key = f"{parsed.scheme}://{host}:{port}"
+    with _conn_pool_lock:
+        conn = _conn_pool.get(pool_key)
+        if conn:
+            try:
+                sock = conn.sock
+                if sock is None or sock._closed if hasattr(sock, '_closed') else False:
+                    conn = None
+            except Exception:
+                conn = None
+    if conn is None:
+        if parsed.scheme == "https":
+            conn = http.client.HTTPSConnection(host, port, timeout=timeout)
+        else:
+            conn = http.client.HTTPConnection(host, port, timeout=timeout)
+        with _conn_pool_lock:
+            _conn_pool[pool_key] = conn
+    path = parsed.path or "/"
+    if parsed.query:
+        path += "?" + parsed.query
+    method = "POST" if data else "GET"
+    conn.request(method, path, body=data, headers=headers or {})
+    return conn.getresponse()
+
+def _response_store_evict():
+    with _response_store_lock:
+        now = time.time()
+        expired = [k for k, v in _response_store.items()
+                   if isinstance(v, dict) and now - v.get("ts", 0) > _RESPONSE_TTL]
+        for k in expired:
+            del _response_store[k]
+
+def _log_dual(msg, level="INFO"):
+    ts = time.strftime("%H:%M:%S")
+    line = f"[{ts}] [{level}] {msg}"
+    print(line, file=sys.stderr, flush=True)
+    with _LOG_FILE_LOCK:
+        if _LOG_FILE:
+            try:
+                _LOG_FILE.write(line + "\n")
+                _LOG_FILE.flush()
+            except Exception:
+                pass
+
+def _stream_with_idle_timeout(response, timeout_seconds=None):
+    if timeout_seconds is None:
+        timeout_seconds = _STREAM_IDLE_TIMEOUT
+    sel = selectors.DefaultSelector()
+    try:
+        sock = response if hasattr(response, 'fp') and response.fp else response
+        raw_sock = getattr(getattr(sock, 'fp', None), 'raw', None) or getattr(sock, '_sock', None)
+        if raw_sock is None:
+            for chunk in response:
+                yield chunk
+            return
+        sel.register(raw_sock, selectors.EVENT_READ)
+        while True:
+            ready = sel.select(timeout=timeout_seconds)
+            if not ready:
+                raise TimeoutError(f"Stream idle for {timeout_seconds}s")
+            chunk = response.readline()
+            if not chunk:
+                break
+            yield chunk
+    finally:
+        try:
+            sel.close()
+        except Exception:
+            pass
+
+def _provider_cap_key(target_url=None, backend=None, model=None):
+    host = urllib.parse.urlparse(target_url or TARGET_URL).netloc.lower()
+    return f"{backend or BACKEND}|{host}|{model or '*'}"
+
+def _load_provider_caps():
+    global _provider_caps
+    with _provider_caps_lock:
+        if _provider_caps is not None:
+            return _provider_caps
+        try:
+            with open(_provider_caps_path) as f:
+                _provider_caps = json.load(f)
+        except Exception:
+            _provider_caps = {}
+        return _provider_caps
+
+def _save_provider_caps():
+    try:
+        os.makedirs(os.path.dirname(_provider_caps_path), exist_ok=True)
+        with open(_provider_caps_path, "w") as f:
+            json.dump(_provider_caps or {}, f, indent=2)
+    except Exception as e:
+        print(f"[provider-sensor] failed to save caps: {e}", file=sys.stderr)
+
+def _provider_cap(model, key, default=None):
+    caps = _load_provider_caps()
+    specific = caps.get(_provider_cap_key(model=model), {})
+    generic = caps.get(_provider_cap_key(model="*"), {})
+    return specific.get(key, generic.get(key, default))
+
+def _set_provider_cap(model, key, value, reason=""):
+    caps = _load_provider_caps()
+    cap_key = _provider_cap_key(model=model)
+    caps.setdefault(cap_key, {})[key] = value
+    caps[cap_key]["reason"] = reason
+    caps[cap_key]["updated_at"] = time.time()
+    _save_provider_caps()
+    print(f"[provider-sensor] learned {cap_key}: {key}={value} reason={reason}", file=sys.stderr)
+
+def _refresh_oauth_token():
+    return _refresh_oauth_token_for(API_KEY, OAUTH_PROVIDER)
+
+def _refresh_oauth_token_for(api_key, oauth_provider):
+    oauth_provider = oauth_provider or ""
+    if not oauth_provider.startswith("google"):
+        return api_key
+    token_name = "google-antigravity-oauth-token.json" if oauth_provider == "google-antigravity" else "google-cli-oauth-token.json"
+    token_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", token_name)
+    if not os.path.exists(token_path):
+        return api_key
+    try:
+        with open(token_path) as f:
+            tokens = json.load(f)
+        if tokens.get("expires_at", 0) > time.time() + 60:
+            return tokens.get("access_token", api_key)
+        client_id = tokens.get("client_id", "")
+        client_secret = tokens.get("client_secret", "")
+        refresh_token = tokens.get("refresh_token", "")
+        if not all([client_id, client_secret, refresh_token]):
+            return tokens.get("access_token", api_key)
+        print("[oauth] refreshing Google access token...", file=sys.stderr)
+        data = urllib.parse.urlencode({
+            "client_id": client_id, "client_secret": client_secret,
+            "refresh_token": refresh_token, "grant_type": "refresh_token",
+        }).encode()
+        req = urllib.request.Request("https://oauth2.googleapis.com/token", data=data,
+                                     headers={"Content-Type": "application/x-www-form-urlencoded"})
+        resp = urllib.request.urlopen(req, timeout=30)
+        new_tokens = json.loads(resp.read())
+        tokens["access_token"] = new_tokens.get("access_token", tokens.get("access_token"))
+        tokens["expires_at"] = time.time() + new_tokens.get("expires_in", 3600)
+        with open(token_path, "w") as f:
+            json.dump(tokens, f, indent=2)
+        print("[oauth] token refreshed OK", file=sys.stderr)
+        return tokens["access_token"]
+    except Exception as e:
+        print(f"[oauth] refresh failed: {e}", file=sys.stderr)
+        return API_KEY
+
+# ═══════════════════════════════════════════════════════════════════
+# Shared helpers
+# ═══════════════════════════════════════════════════════════════════
+
+_pool = uuid.uuid4().hex[:8]
+
+def _load_stats():
+    try:
+        if os.path.exists(_stats_path):
+            return json.load(open(_stats_path))
+    except Exception:
+        pass
+    return {"providers": {}, "updated": None}
+
+def _atomic_write_json(path, obj):
+    tmp = path + ".tmp"
+    with open(tmp, "w") as f:
+        json.dump(obj, f, indent=2, ensure_ascii=False)
+    os.replace(tmp, path)
+
+def _flush_stats():
+    global _stats_flush_timer
+    with _stats_lock:
+        batch = list(_stats_pending)
+        _stats_pending.clear()
+        _stats_flush_timer = None
+    if not batch:
+        return
+    stats = _load_stats()
+    for entry in batch:
+        provider = entry["provider"]
+        model = entry["model"]
+        p = stats["providers"].setdefault(provider, {
+            "total_requests": 0, "successes": 0, "failures": 0,
+            "total_tokens_in": 0, "total_tokens_out": 0,
+            "total_duration_s": 0.0, "models": {}, "last_used": None, "last_error": None,
+        })
+        p["total_requests"] += 1
+        p["total_tokens_in"] += entry["tokens_in"]
+        p["total_tokens_out"] += entry["tokens_out"]
+        p["total_duration_s"] += entry["duration_s"]
+        p["last_used"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(entry["ts"]))
+        if entry["success"]:
+            p["successes"] += 1
+        else:
+            p["failures"] += 1
+            p["last_error"] = entry.get("error_type") or "unknown"
+        m = p["models"].setdefault(model, {"requests": 0, "tokens_in": 0, "tokens_out": 0})
+        m["requests"] += 1
+        m["tokens_in"] += entry["tokens_in"]
+        m["tokens_out"] += entry["tokens_out"]
+    stats["updated"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+    _atomic_write_json(_stats_path, stats)
+
+def _record_usage(provider, model, success, duration_s, tokens_in=0, tokens_out=0, error_type=None):
+    global _stats_flush_timer
+    entry = {
+        "provider": provider or "unknown", "model": model or "unknown",
+        "success": bool(success), "duration_s": float(duration_s or 0),
+        "tokens_in": int(tokens_in or 0), "tokens_out": int(tokens_out or 0),
+        "error_type": error_type, "ts": time.time(),
+    }
+    with _stats_lock:
+        _stats_pending.append(entry)
+        if _stats_flush_timer is None:
+            _stats_flush_timer = threading.Timer(_STATS_FLUSH_INTERVAL, _flush_stats)
+            _stats_flush_timer.daemon = True
+            _stats_flush_timer.start()
+
+def store_response(resp_id, input_data, output_items):
+    if not resp_id:
+        return
+    _response_store_evict()
+    with _response_store_lock:
+        _response_store[resp_id] = {"input": input_data, "output": output_items, "ts": time.time()}
+        while len(_response_store) > _MAX_STORED:
+            _response_store.popitem(last=False)
+
+def resolve_previous_response(body):
+    prev_id = body.get("previous_response_id")
+    input_data = body.get("input", "")
+    if not prev_id:
+        return input_data
+    with _response_store_lock:
+        stored = _response_store.get(prev_id)
+    if not stored:
+        return input_data
+    prev_input = stored["input"]
+    prev_output = stored["output"]
+    new_input = input_data if isinstance(input_data, list) else []
+    if isinstance(prev_input, list):
+        combined = list(prev_input) + list(prev_output) + new_input
+    else:
+        combined = [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": str(prev_input)}]}] + list(prev_output) + new_input
+    return combined
+
+def _fb_store_reasoning(resp_id, reasoning_text):
+    if not resp_id or not reasoning_text:
+        return
+    with _fb_reasoning_store_lock:
+        _fb_reasoning_store[resp_id] = {"reasoning": reasoning_text, "ts": time.time()}
+        while len(_fb_reasoning_store) > _MAX_STORED:
+            _fb_reasoning_store.popitem(last=False)
+        expired = [k for k, v in _fb_reasoning_store.items() if time.time() - v["ts"] > _RESPONSE_TTL]
+        for k in expired:
+            del _fb_reasoning_store[k]
+
+def _fb_get_reasoning(resp_id):
+    if not resp_id:
+        return ""
+    with _fb_reasoning_store_lock:
+        entry = _fb_reasoning_store.get(resp_id)
+        return entry["reasoning"] if entry else ""
+
+def _fb_get_any_reasoning():
+    with _fb_reasoning_store_lock:
+        for k in _fb_reasoning_store:
+            return _fb_reasoning_store[k]["reasoning"]
+        return ""
+
+def _freebuff_hard_disable_reasoning(messages):
+    """Strip all reasoning/thinking fields from every message.
+    FreeBuff rejects mixed reasoning_content histories.
+    The final chat body must be clean before POST."""
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        for key in ("reasoning_content", "reasoning", "thinking",
+                     "thinking_content", "thoughts"):
+            msg.pop(key, None)
+
+def _is_reasoning_content_error(error_text):
+    if not error_text:
+        return False
+    e = error_text.lower()
+    return ("reasoning_content" in e or "thinking mode" in e
+            or "must be passed back" in e)
+
+def _ds_store_assistant(resp_id, assistant_msg):
+    if not resp_id or not isinstance(assistant_msg, dict):
+        return
+    tool_calls = assistant_msg.get("tool_calls") or []
+    reasoning = assistant_msg.get("reasoning_content")
+    if not tool_calls or not reasoning:
+        return
+    with _deepseek_reasoning_lock:
+        for tc in tool_calls:
+            tc_id = tc.get("id") or tc.get("call_id", "")
+            if tc_id:
+                _deepseek_reasoning_store[tc_id] = {
+                    "resp_id": resp_id,
+                    "assistant": dict(assistant_msg),
+                    "reasoning_content": reasoning,
+                    "ts": time.time(),
+                }
+        keys = list(_deepseek_reasoning_store.keys())
+        if len(keys) > _MAX_DS_STORED:
+            for k in keys[:len(keys) - _MAX_DS_STORED]:
+                del _deepseek_reasoning_store[k]
+
+def _ds_rebuild_tool_history(messages):
+    with _deepseek_reasoning_lock:
+        snapshot = dict(_deepseek_reasoning_store)
+        expired = [k for k, v in snapshot.items() if time.time() - v["ts"] > 900]
+        for k in expired:
+            _deepseek_reasoning_store.pop(k, None)
+            snapshot.pop(k, None)
+    if not snapshot:
+        return messages
+    rebuilt = []
+    inserted_ids = set()
+    for msg in messages:
+        if msg.get("role") == "tool":
+            tc_id = msg.get("tool_call_id", "")
+            stored = snapshot.get(tc_id)
+            if stored and tc_id not in inserted_ids:
+                am = dict(stored["assistant"])
+                if am.get("reasoning_content"):
+                    rebuilt.append(am)
+                    inserted_ids.add(tc_id)
+        rebuilt.append(msg)
+    return rebuilt
+
+def _fb_input_to_messages(input_data, instructions=""):
+    msgs = []
+    tool_name_by_id = {}
+    pending_tool_calls = []
+    last_flushed_ids = []
+    if isinstance(input_data, str):
+        msgs.append({"role": "user", "content": input_data})
+    elif isinstance(input_data, list):
+        for item in input_data:
+            t = item.get("type")
+            if t == "reasoning":
+                continue
+            if t == "function_call":
+                tcid = item.get("call_id") or item.get("id") or uid("tc")
+                pending_tool_calls.append(
+                    {"id": tcid, "type": "function",
+                     "function": {"name": item.get("name", ""),
+                                   "arguments": item.get("arguments", "{}")}})
+                tool_name_by_id[tcid] = item.get("name", "")
+                continue
+            if pending_tool_calls:
+                last_flushed_ids = [tc["id"] for tc in pending_tool_calls]
+                msg = {"role": "assistant", "content": None, "tool_calls": pending_tool_calls}
+                msgs.append(msg)
+                pending_tool_calls = []
+            if t == "message":
+                role = item.get("role", "user")
+                if role == "developer":
+                    role = "system"
+                text = ""
+                content = item.get("content", [])
+                if isinstance(content, str):
+                    text = content
+                else:
+                    for part in content:
+                        if isinstance(part, str):
+                            text += part
+                            continue
+                        pt = part.get("type", "")
+                        if pt in ("input_text", "output_text"):
+                            text += part.get("text", "")
+                if text is not None:
+                    am = {"role": role, "content": text}
+                    if role == "assistant":
+                        am["_fb_orig_id"] = item.get("id", "")
+                    msgs.append(am)
+            elif t == "function_call_output":
+                tcid = item.get("call_id") or item.get("id") or ""
+                if not tcid and last_flushed_ids:
+                    idx = len([m for m in msgs if m.get("role") == "tool"])
+                    if idx < len(last_flushed_ids):
+                        tcid = last_flushed_ids[idx]
+                msgs.append({"role": "tool", "tool_call_id": tcid,
+                             "tool_name": tool_name_by_id.get(tcid, ""),
+                             "content": item.get("output", "")})
+        if pending_tool_calls:
+            msg = {"role": "assistant", "content": None, "tool_calls": pending_tool_calls}
+            msgs.append(msg)
+    if instructions:
+        msgs.insert(0, {"role": "system", "content": instructions})
+    return msgs
+
+def _fb_strip_reasoning_from_messages(messages):
+    out = []
+    for m in messages:
+        nm = {k: v for k, v in m.items() if k != "reasoning_content"}
+        out.append(nm)
+    return out
+
+_HOP_BY_HOP_HEADERS = {
+    "connection",
+    "keep-alive",
+    "proxy-authenticate",
+    "proxy-authorization",
+    "te",
+    "trailers",
+    "transfer-encoding",
+    "upgrade",
+    "host",
+    "content-length",
+}
+
+def uid(prefix="id"):
+    return f"{prefix}-{_pool}-{uuid.uuid4().hex[:12]}"
+
+def emit(event, data):
+    return f"event: {event}\ndata: {json.dumps(data)}\n\n"
+
+def upstream_target(base_url, suffix):
+    base = base_url.rstrip("/")
+    if base.endswith(suffix):
+        return base
+    return f"{base}{suffix}"
+
+_BROWSER_HEADERS = {
+    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
+    "Accept": "application/json, text/event-stream, */*",
+    "Accept-Language": "en-US,en;q=0.9",
+    "Sec-Ch-Ua": '"Chromium";v="137", "Not/A)Brand";v="99"',
+    "Sec-Ch-Ua-Mobile": "?0",
+    "Sec-Ch-Ua-Platform": '"Linux"',
+    "Sec-Fetch-Dest": "empty",
+    "Sec-Fetch-Mode": "cors",
+    "Sec-Fetch-Site": "same-origin",
+}
+
+def forwarded_headers(request_headers, extra=None, browser_ua=False):
+    headers = {}
+    if browser_ua:
+        headers.update(_BROWSER_HEADERS)
+    for key, value in request_headers.items():
+        if key.lower() in _HOP_BY_HOP_HEADERS:
+            continue
+        if browser_ua and key.lower() == "user-agent":
+            continue
+        headers[key] = value
+    if extra:
+        headers.update(extra)
+    return headers
+
+_MAX_INPUT_ITEMS = 30
+_MAX_TOOL_OUTPUT_CHARS = 8000
+_COMPACT_KEEP_RECENT = 10
+
+_CROF_ADAPTIVE = {
+    "fail_history": [],
+    "model_limits": {},
+    "global_item_limit": 30,
+    "min_keep_recent": 4,
+}
+
+_BGP_STATS_PATH = os.path.join(_LOG_DIR, "bgp-route-stats.json")
+_bgp_stats_lock = threading.Lock()
+
+def _route_key(route):
+    return f"{route.get('name', '')}::{route.get('target_url', '')}::{route.get('model', '')}"
+
+def _load_bgp_stats():
+    try:
+        if os.path.exists(_BGP_STATS_PATH):
+            return json.load(open(_BGP_STATS_PATH))
+    except Exception:
+        pass
+    return {}
+
+def _save_bgp_stats(stats):
+    tmp = _BGP_STATS_PATH + ".tmp"
+    with open(tmp, "w") as f:
+        json.dump(stats, f, indent=2)
+    os.replace(tmp, _BGP_STATS_PATH)
+
+def _score_route(route, stats):
+    key = _route_key(route)
+    rs = stats.get(key, {})
+    now = time.time()
+    if float(rs.get("open_until_ts", 0)) > now:
+        return 1_000_000
+    priority = int(route.get("priority", 99))
+    ewma = float(rs.get("ewma_latency_s", 0))
+    failures = int(rs.get("consecutive_failures", 0))
+    score = priority + min(ewma * 5, 50) + failures * 20
+    if float(rs.get("rate_limited_until", 0)) > now:
+        score += 500
+    return score
+
+def _update_route_stats(route, success, duration_s, http_code=None, error_type=None):
+    with _bgp_stats_lock:
+        stats = _load_bgp_stats()
+        key = _route_key(route)
+        rs = stats.setdefault(key, {
+            "ewma_latency_s": duration_s, "consecutive_failures": 0,
+            "last_success": None, "last_failure": None,
+            "open_until_ts": 0, "rate_limited_until": 0, "last_error": None,
+        })
+        alpha = 0.25
+        rs["ewma_latency_s"] = alpha * duration_s + (1 - alpha) * float(rs.get("ewma_latency_s", duration_s))
+        if success:
+            rs["consecutive_failures"] = 0
+            rs["last_success"] = time.time()
+        else:
+            rs["consecutive_failures"] = int(rs.get("consecutive_failures", 0)) + 1
+            rs["last_failure"] = time.time()
+            rs["last_error"] = error_type or (f"http_{http_code}" if http_code else "unknown")
+            if http_code == 429:
+                rs["rate_limited_until"] = time.time() + 120
+            if rs["consecutive_failures"] >= 3:
+                rs["open_until_ts"] = time.time() + 60
+                rs["consecutive_failures"] = 0
+        _save_bgp_stats(stats)
+
+def _sorted_bgp_routes():
+    with _bgp_stats_lock:
+        stats = _load_bgp_stats()
+    return sorted(BGP_ROUTES, key=lambda r: _score_route(r, stats))
+
+def _crof_record(model, n_items, success):
+    if not isinstance(n_items, int) or n_items < 1:
+        return
+    entry = {"model": model, "items": n_items, "ok": success}
+    hist = _CROF_ADAPTIVE["fail_history"]
+    hist.append(entry)
+    if len(hist) > 200:
+        _CROF_ADAPTIVE["fail_history"] = hist[-100:]
+
+    ml = _CROF_ADAPTIVE["model_limits"].setdefault(model, {"ok_max": 30, "fail_min": 0, "limit": 30})
+    if success and n_items > ml["ok_max"]:
+        ml["ok_max"] = n_items
+    if not success and (ml["fail_min"] == 0 or n_items < ml["fail_min"]):
+        ml["fail_min"] = n_items
+
+    if ml["fail_min"] > 0 and ml["ok_max"] >= ml["fail_min"]:
+        ml["limit"] = ml["fail_min"] - 1
+    elif ml["fail_min"] > 0:
+        ml["limit"] = max(ml["fail_min"] - 2, _CROF_ADAPTIVE["min_keep_recent"] + 2)
+
+    global_limit = 30
+    for m, v in _CROF_ADAPTIVE["model_limits"].items():
+        if v.get("limit", 30) < global_limit:
+            global_limit = v["limit"]
+    _CROF_ADAPTIVE["global_item_limit"] = global_limit
+
+    print(f"[crof-adaptive] model={model} items={n_items} {'OK' if success else 'FAIL'} -> limit={ml.get('limit',30)} global={global_limit}", file=sys.stderr)
+
+def _crof_item_limit(model):
+    ml = _CROF_ADAPTIVE["model_limits"].get(model, {})
+    per_model = ml.get("limit", 30)
+    return min(per_model, _CROF_ADAPTIVE["global_item_limit"])
+
+def _crof_compact_for_retry(input_data, model):
+    limit = _crof_item_limit(model)
+    if not isinstance(input_data, list) or len(input_data) <= limit:
+        return input_data
+
+    keep = max(_CROF_ADAPTIVE["min_keep_recent"], limit // 3)
+    head_end = 0
+    for i, item in enumerate(input_data):
+        t = item.get("type")
+        if t == "message" and item.get("role") in ("developer", "system"):
+            head_end = i + 1
+        elif t == "message" and item.get("role") == "user" and head_end == i:
+            head_end = i + 1
+        else:
+            break
+
+    head = input_data[:head_end]
+    tail_start = max(head_end, len(input_data) - keep)
+    while tail_start > head_end:
+        t = input_data[tail_start].get("type")
+        r = input_data[tail_start].get("role", "")
+        if t in ("function_call_output", "function_call"):
+            tail_start -= 1
+        elif t == "message" and r == "assistant":
+            tail_start -= 1
+        else:
+            break
+    tail = input_data[tail_start:]
+    body = input_data[head_end:tail_start]
+
+    if not body:
+        return head + tail
+
+    summary_lines = [f"[Auto-compacted: {len(body)} turns removed (adaptive limit={limit})]"]
+    for item in body[-5:]:
+        summary_lines.append(_item_summary(item, max_len=120))
+
+    summary_msg = {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "\n".join(summary_lines)}]}
+    print(f"[crof-adaptive] RETRY compact: {len(input_data)} -> {len(head)+1+len(tail)} (limit={limit}, keep={len(tail)})", file=sys.stderr)
+    return head + [summary_msg] + tail
+
+def _item_summary(item, max_len=200):
+    t = item.get("type")
+    if t == "message":
+        role = item.get("role", "?")
+        text = ""
+        for p in item.get("content", []):
+            if p.get("type") in ("input_text", "output_text"):
+                text += p.get("text", "")
+        return f"[{role}] {text[:max_len]}"
+    elif t == "function_call":
+        name = item.get("name", "?")
+        args = item.get("arguments", "{}")
+        try:
+            a = json.loads(args)
+            cmd = a.get("cmd", a.get("command", ""))
+            if cmd:
+                return f"[tool call] {name}: {cmd[:max_len]}"
+        except Exception:
+            pass
+        return f"[tool call] {name}({args[:max_len]})"
+    elif t == "function_call_output":
+        output = item.get("output", "")
+        if len(output) > max_len:
+            return f"[tool result] {output[:max_len]}..."
+        return f"[tool result] {output}"
+    return f"[{t}]"
+
+def _extract_files(items):
+    files = set()
+    for item in items:
+        if item.get("type") == "function_call":
+            try:
+                a = json.loads(item.get("arguments", "{}"))
+                cmd = a.get("cmd", a.get("command", ""))
+                for prefix in (">", ">>", " > ", " >> "):
+                    for part in cmd.split(prefix)[1:]:
+                        f = part.strip().split()[0].strip("'\"")
+                        if f and not f.startswith("-") and "/" in f:
+                            files.add(f)
+            except Exception:
+                pass
+    return files
+
+def _compact_input(input_data):
+    if isinstance(input_data, str):
+        return input_data
+    if not isinstance(input_data, list) or len(input_data) <= _MAX_INPUT_ITEMS:
+        out = []
+        for item in input_data:
+            if isinstance(item, dict) and item.get("type") == "function_call_output":
+                o = item.get("output", "")
+                if len(o) > _MAX_TOOL_OUTPUT_CHARS:
+                    item = dict(item)
+                    item["output"] = o[:_MAX_TOOL_OUTPUT_CHARS] + f"\n... [truncated {len(o) - _MAX_TOOL_OUTPUT_CHARS} chars]"
+                    print(f"[compact] tool output truncated {len(o)} -> {_MAX_TOOL_OUTPUT_CHARS}", file=sys.stderr)
+            out.append(item)
+        return out
+
+    head_end = 0
+    for i, item in enumerate(input_data):
+        t = item.get("type")
+        if t == "message" and item.get("role") in ("developer", "system"):
+            head_end = i + 1
+        elif t == "message" and item.get("role") == "user" and head_end == i:
+            head_end = i + 1
+        else:
+            break
+
+    head = input_data[:head_end]
+    tail_start = len(input_data) - _COMPACT_KEEP_RECENT
+    while tail_start > head_end:
+        t = input_data[tail_start].get("type")
+        r = input_data[tail_start].get("role", "")
+        if t == "function_call_output":
+            tail_start -= 1
+        elif t == "function_call":
+            tail_start -= 1
+        elif t == "message" and r == "assistant":
+            tail_start -= 1
+        else:
+            break
+    tail = input_data[tail_start:]
+    body = input_data[head_end:tail_start]
+
+    if not body:
+        return head + tail
+
+    for item in tail:
+        if isinstance(item, dict) and item.get("type") == "function_call_output":
+            o = item.get("output", "")
+            if len(o) > _MAX_TOOL_OUTPUT_CHARS:
+                item["output"] = o[:_MAX_TOOL_OUTPUT_CHARS] + f"\n... [truncated {len(o) - _MAX_TOOL_OUTPUT_CHARS} chars]"
+
+    user_queries = []
+    for item in body:
+        if item.get("type") == "message" and item.get("role") == "user":
+            for p in item.get("content", []):
+                if p.get("type") == "input_text":
+                    user_queries.append(p.get("text", "")[:300])
+    assistant_msgs = []
+    for item in body:
+        if item.get("type") == "message" and item.get("role") == "assistant":
+            for p in item.get("content", []):
+                if p.get("type") == "output_text":
+                    assistant_msgs.append(p.get("text", "")[:300])
+
+    tool_summaries = []
+    for item in body:
+        if item.get("type") in ("function_call", "function_call_output"):
+            tool_summaries.append(_item_summary(item, max_len=150))
+
+    files = _extract_files(body)
+
+    summary_lines = [f"[Auto-compacted: {len(body)} earlier turns summarized to preserve context]"]
+    if user_queries:
+        summary_lines.append(f"User requests: {'; '.join(user_queries[-3:])}")
+    if assistant_msgs:
+        summary_lines.append(f"Assistant responses: {'; '.join(assistant_msgs[-3:])}")
+    if tool_summaries:
+        summary_lines.append(f"Actions taken ({len(tool_summaries)} steps):")
+        for ts in tool_summaries[-15:]:
+            summary_lines.append(f"  {ts}")
+    if files:
+        summary_lines.append(f"Files touched: {', '.join(sorted(files)[-10:])}")
+
+    summary_text = "\n".join(summary_lines)
+    summary_msg = {
+        "type": "message",
+        "role": "user",
+        "content": [{"type": "input_text", "text": summary_text}]
+    }
+
+    print(f"[compact] {len(input_data)} items -> {len(head) + 1 + len(tail)} (compacted {len(body)} old items into summary)", file=sys.stderr)
+    return head + [summary_msg] + tail
+
+# ═══════════════════════════════════════════════════════════════════
+# Provider policies
+# ═══════════════════════════════════════════════════════════════════
+
+_PROVIDER_POLICIES = {
+    "crof": {"reasoning_mode": "off", "max_tokens": 32768, "strip_reasoning": True,
+             "tool_output_limit": 4000, "max_input_items": 18, "compaction": "aggressive",
+             "synthetic_tool_results": True},
+    "chats-llm": {"reasoning_mode": "off", "max_tokens": 32768, "strip_reasoning": True,
+                  "tool_output_limit": 4000, "max_input_items": 20, "compaction": "aggressive"},
+    "z.ai": {"reasoning_mode": "medium", "max_tokens": 65536, "strip_reasoning": True,
+             "tool_output_limit": 8000, "max_input_items": 40, "compaction": "balanced"},
+    "openrouter": {"reasoning_mode": "provider_default", "max_tokens": 32768, "strip_reasoning": True,
+                   "tool_output_limit": 6000, "max_input_items": 35, "compaction": "balanced"},
+    "openadapter": {"reasoning_mode": "off", "max_tokens": 32768, "strip_reasoning": True,
+                    "tool_output_limit": 6000, "max_input_items": 30, "compaction": "balanced"},
+}
+
+def provider_policy(target_url=None, backend=None):
+    host = urllib.parse.urlparse(target_url or TARGET_URL).netloc.lower()
+    for key, policy in _PROVIDER_POLICIES.items():
+        if key in host:
+            return policy
+    return {}
+
+# ═══════════════════════════════════════════════════════════════════
+# Adaptive context compaction (model-aware)
+# ═══════════════════════════════════════════════════════════════════
+
+_MODEL_CONTEXT = {
+    "gpt-4o": 128000, "gpt-4o-mini": 128000, "gpt-5": 128000,
+    "claude-sonnet": 200000, "claude-haiku": 200000,
+    "glm-5.1": 128000, "glm-5": 128000, "glm-4": 128000,
+    "deepseek": 64000, "gemini-2.5-flash": 1000000, "gemini-2.5-pro": 2000000,
+    "mimo": 32768, "minimax": 32768, "kimi": 128000,
+    "_default": 32768,
+}
+
+def _context_limit_for_model(model):
+    if not model:
+        return _MODEL_CONTEXT["_default"]
+    ml = model.lower()
+    for key, limit in _MODEL_CONTEXT.items():
+        if key != "_default" and key in ml:
+            return limit
+    return _MODEL_CONTEXT["_default"]
+
+def _estimate_tokens(obj):
+    if obj is None:
+        return 0
+    if isinstance(obj, str):
+        return max(1, len(obj) // 4)
+    try:
+        raw = json.dumps(obj, ensure_ascii=False)
+    except Exception:
+        raw = str(obj)
+    return max(1, len(raw) // 4)
+
+def _adaptive_compact(input_data, model, policy=None):
+    policy = policy or {}
+    context_size = int(policy.get("context_size", _context_limit_for_model(model)))
+    input_budget = int(context_size * 0.60)
+    estimated = _estimate_tokens(input_data)
+    if estimated <= input_budget:
+        return input_data, False
+    if not isinstance(input_data, list):
+        return input_data, False
+    reduction = max(0.15, input_budget / max(estimated, 1))
+    target_items = max(int(len(input_data) * reduction), 6)
+    if target_items >= len(input_data):
+        return input_data, False
+    head_end = 0
+    for i, item in enumerate(input_data):
+        t = item.get("type")
+        if t == "message" and item.get("role") in ("developer", "system"):
+            head_end = i + 1
+        elif t == "message" and item.get("role") == "user" and head_end == i:
+            head_end = i + 1
+        else:
+            break
+    head = input_data[:head_end]
+    keep = max(4, target_items // 3)
+    tail_start = max(head_end, len(input_data) - keep)
+    while tail_start > head_end:
+        t = input_data[tail_start].get("type")
+        if t in ("function_call_output", "function_call"):
+            tail_start -= 1
+        elif t == "message" and input_data[tail_start].get("role") == "assistant":
+            tail_start -= 1
+        else:
+            break
+    tail = input_data[tail_start:]
+    body = input_data[head_end:tail_start]
+    if not body:
+        return head + tail, True
+    summary_lines = [f"[Auto-compacted: {len(body)} turns removed (budget={input_budget}tok, model={model})]"]
+    for item in body[-5:]:
+        summary_lines.append(_item_summary(item, max_len=120))
+    summary_msg = {"type": "message", "role": "user",
+                   "content": [{"type": "input_text", "text": "\n".join(summary_lines)}]}
+    print(f"[adaptive-compact] model={model} est={estimated}tok budget={input_budget}tok "
+          f"items {len(input_data)}->{len(head)+1+len(tail)}", file=sys.stderr)
+    return head + [summary_msg] + tail, True
+
+# ═══════════════════════════════════════════════════════════════════
+# Tool-call pairing validator
+# ═══════════════════════════════════════════════════════════════════
+
+def validate_tool_pairs(input_items):
+    if not isinstance(input_items, list):
+        return []
+    calls = {}
+    errors = []
+    for idx, item in enumerate(input_items):
+        t = item.get("type")
+        if t == "function_call":
+            cid = item.get("call_id") or item.get("id")
+            if cid:
+                calls[cid] = idx
+        elif t == "function_call_output":
+            cid = item.get("call_id") or item.get("id")
+            if not cid or cid not in calls:
+                errors.append({"index": idx, "call_id": cid, "error": "orphan_function_call_output"})
+    return errors
+
+def repair_orphan_tool_outputs(input_items, errors):
+    bad = {e["index"] for e in errors}
+    repaired = []
+    for idx, item in enumerate(input_items):
+        if idx in bad:
+            output = item.get("output", "")
+            repaired.append({"type": "message", "role": "user",
+                             "content": [{"type": "input_text",
+                                          "text": f"[Proxy: unmatched tool output]\n{str(output)[:4000]}"}]})
+        else:
+            repaired.append(item)
+    return repaired
+
+def synthesize_tool_results_for_chat(input_items):
+    """Convert Responses function_call/function_call_output pairs into plain text.
+
+    Some OpenAI-compatible providers accept tool calls on the first turn but fail
+    on the next request when role=tool messages are present. For those providers,
+    encode tool outputs as normal user text so the model can continue.
+    """
+    if not isinstance(input_items, list):
+        return input_items, False
+    calls = {}
+    changed = False
+    out = []
+    for item in input_items:
+        t = item.get("type")
+        if t == "function_call":
+            cid = item.get("call_id") or item.get("id") or ""
+            calls[cid] = item
+            changed = True
+            continue
+        if t == "function_call_output":
+            cid = item.get("call_id") or item.get("id") or ""
+            call = calls.get(cid, {})
+            name = call.get("name", "tool")
+            args = call.get("arguments", "{}")
+            output = item.get("output", "")
+            text = (
+                "Tool execution result. Continue the task using this result. "
+                "Do not repeat the same tool call unless more information is required.\n\n"
+                f"Tool: {name}\nArguments:\n```json\n{str(args)[:2000]}\n```\n"
+                f"Output:\n```\n{str(output)[:8000]}\n```"
+            )
+            out.append({"type": "message", "role": "user", "content": [{"type": "input_text", "text": text}]})
+            changed = True
+            continue
+        out.append(item)
+    return out, changed
+
+def has_function_call_output(input_items):
+    return isinstance(input_items, list) and any(i.get("type") == "function_call_output" for i in input_items)
+
+# ═══════════════════════════════════════════════════════════════════
+# Log redaction
+# ═══════════════════════════════════════════════════════════════════
+
+_SECRET_PATTERNS = [
+    (r"sk-[A-Za-z0-9_\-]{20,}", "[REDACTED:key]"),
+    (r"sk-ant-[A-Za-z0-9_\-]{20,}", "[REDACTED:anthropic]"),
+    (r"gh[pousr]_[A-Za-z0-9_]{20,}", "[REDACTED:github]"),
+    (r"Bearer\s+[A-Za-z0-9._\-]{20,}", "Bearer [REDACTED]"),
+]
+
+def _redact(text):
+    if not text:
+        return text
+    import re
+    for pattern, replacement in _SECRET_PATTERNS:
+        text = re.sub(pattern, replacement, text)
+    return text
+
+def _redact_json(obj):
+    try:
+        raw = json.dumps(obj, ensure_ascii=False)
+    except Exception:
+        raw = str(obj)
+    return _redact(raw)
+
+_MAX_SNAPSHOTS = 200
+
+def save_request_snapshot(request_id, body):
+    if not request_id:
+        return request_id
+    snapshot = {
+        "_meta": {
+            "request_id": request_id,
+            "model": body.get("model", ""),
+            "stream": body.get("stream", False),
+            "ts": time.time(),
+            "ts_iso": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+            "status": "pending",
+            "duration_s": None,
+            "error": None,
+        },
+        "request": json.loads(_redact_json(body)),
+    }
+    path = os.path.join(_REQUESTS_DIR, f"{request_id}.json")
+    tmp = path + ".tmp"
+    with open(tmp, "w") as f:
+        json.dump(snapshot, f, ensure_ascii=False, indent=2)
+    os.replace(tmp, path)
+    _rotate_snapshots()
+    return request_id
+
+def update_snapshot_response(request_id, status, duration_s=None, error=None):
+    if not request_id:
+        return
+    path = os.path.join(_REQUESTS_DIR, f"{request_id}.json")
+    if not os.path.exists(path):
+        return
+    try:
+        with open(path) as f:
+            snapshot = json.load(f)
+        meta = snapshot.get("_meta", {})
+        meta["status"] = status
+        if duration_s is not None:
+            meta["duration_s"] = round(duration_s, 3)
+        if error is not None:
+            meta["error"] = str(error)[:200]
+        snapshot["_meta"] = meta
+        tmp = path + ".tmp"
+        with open(tmp, "w") as f:
+            json.dump(snapshot, f, ensure_ascii=False, indent=2)
+        os.replace(tmp, path)
+    except Exception:
+        pass
+
+def _rotate_snapshots():
+    try:
+        files = sorted(
+            [os.path.join(_REQUESTS_DIR, f) for f in os.listdir(_REQUESTS_DIR) if f.endswith(".json")],
+            key=os.path.getmtime,
+        )
+        while len(files) > _MAX_SNAPSHOTS:
+            os.remove(files.pop(0))
+    except Exception:
+        pass
+
+# ═══════════════════════════════════════════════════════════════════
+# Rate-limit token buckets
+# ═══════════════════════════════════════════════════════════════════
+
+class TokenBucket:
+    def __init__(self, capacity=10, refill=1.0):
+        self.capacity = float(capacity)
+        self.tokens = float(capacity)
+        self.refill = float(refill)
+        self.updated = time.monotonic()
+        self.lock = threading.Lock()
+    def allow(self, cost=1):
+        with self.lock:
+            now = time.monotonic()
+            self.tokens = min(self.capacity, self.tokens + (now - self.updated) * self.refill)
+            self.updated = now
+            if self.tokens >= cost:
+                self.tokens -= cost
+                return True
+            return False
+
+_rate_buckets = {}
+_rate_buckets_lock = threading.Lock()
+
+def _bucket_for_route(route):
+    name = route.get("name") or route.get("target_url") or "default"
+    with _rate_buckets_lock:
+        if name not in _rate_buckets:
+            _rate_buckets[name] = TokenBucket(capacity=10, refill=1.0)
+        return _rate_buckets[name]
+
+# ═══════════════════════════════════════════════════════════════════
+# OpenAI-compat backend
+# ═══════════════════════════════════════════════════════════════════
+
+def oa_input_to_messages(input_data):
+    msgs = []
+    tool_name_by_id = {}
+    if isinstance(input_data, str):
+        msgs.append({"role": "user", "content": input_data})
+    elif isinstance(input_data, list):
+        pending_tool_calls = []
+        last_flushed_ids = []
+        for item in input_data:
+            t = item.get("type")
+            if t == "function_call":
+                tcid = item.get("call_id") or item.get("id") or uid("tc")
+                pending_tool_calls.append(
+                    {"id": tcid,
+                     "type": "function",
+                     "function": {"name": item.get("name", ""),
+                                   "arguments": item.get("arguments", "{}")}})
+                tool_name_by_id[tcid] = item.get("name", "")
+                continue
+            if pending_tool_calls:
+                last_flushed_ids = [tc["id"] for tc in pending_tool_calls]
+                msgs.append({"role": "assistant", "content": None, "tool_calls": pending_tool_calls})
+                pending_tool_calls = []
+            if t == "message":
+                role = item.get("role", "user")
+                if role == "developer":
+                    role = "system"
+                text = ""
+                reasoning_text = ""
+                content = item.get("content", [])
+                if isinstance(content, str):
+                    text = content
+                else:
+                    for part in content:
+                        if isinstance(part, str):
+                            text += part
+                            continue
+                        pt = part.get("type", "")
+                        if pt in ("input_text", "output_text"):
+                            text += part.get("text", "")
+                        elif pt in ("reasoning",):
+                            for rp in part.get("content", []):
+                                reasoning_text += rp.get("text", "")
+                        elif pt == "input_image":
+                            img = part.get("image_url", part)
+                            msgs.append({"role": role, "content": [{"type": "text", "text": text},
+                                        {"type": "image_url", "image_url": img}]})
+                            text = None
+                            break
+                if text is not None:
+                    msg = {"role": role, "content": text}
+                    if reasoning_text and role == "assistant":
+                        msg["reasoning_content"] = reasoning_text
+                    msgs.append(msg)
+            elif t == "function_call_output":
+                tcid = item.get("call_id") or item.get("id") or ""
+                if not tcid and last_flushed_ids:
+                    idx = len([m for m in msgs if m.get("role") == "tool"])
+                    if idx < len(last_flushed_ids):
+                        tcid = last_flushed_ids[idx]
+                msgs.append({"role": "tool", "tool_call_id": tcid,
+                             "tool_name": tool_name_by_id.get(tcid, ""),
+                             "content": item.get("output", "")})
+        if pending_tool_calls:
+            msgs.append({"role": "assistant", "content": None, "tool_calls": pending_tool_calls})
+    return msgs
+
+def cc_input_to_messages(input_data, instructions="", schema=None):
+    """Convert Responses API input into CommandCode /alpha/generate messages.
+
+    [FIX 1] All messages use STRING content (not content blocks).
+    CC API rejects params.messages[i].content when it's an array.
+    Tool results are role="user" with plain text content.
+    Tool calls: inline JSON text in assistant messages (e.g. {"type":"tool-call","id":"..."}).
+    
+    The model echoes this format back in its response text-delta events.
+    _parse_commandcode_text_tool_calls extracts them via _extract_raw_json_tool_calls.
+    
+    Schema parameter is accepted but not used for format decisions —
+    the conservative string-content format is always used regardless of schema hints.
+    """
+    msgs = []
+    pending_tool_calls = []
+    last_flushed_ids = []
+
+    def text_from_content(content):
+        if isinstance(content, str):
+            return content
+        text = ""
+        for part in content or []:
+            if isinstance(part, str):
+                text += part
+                continue
+            if not isinstance(part, dict):
+                continue
+            if part.get("type") in ("input_text", "output_text", "text"):
+                text += part.get("text", "")
+        return text
+
+    def flush_tool_calls():
+        nonlocal pending_tool_calls, last_flushed_ids
+        if not pending_tool_calls:
+            return
+        last_flushed_ids = [tc["id"] for tc in pending_tool_calls]
+        # Tool calls as plain text in assistant message
+        tc_text = "\n".join(
+            json.dumps(tc, ensure_ascii=False) for tc in pending_tool_calls
+        )
+        msgs.append({"role": "assistant", "content": tc_text})
+        pending_tool_calls = []
+
+    if instructions:
+        msgs.append({"role": "user", "content": instructions})
+
+    if isinstance(input_data, str):
+        msgs.append({"role": "user", "content": input_data})
+        return msgs
+    if not isinstance(input_data, list):
+        return msgs
+
+    for item in input_data:
+        if not isinstance(item, dict):
+            continue
+        t = item.get("type")
+        if t == "function_call":
+            tcid = item.get("call_id") or item.get("id") or uid("call")
+            name = item.get("name") or "exec_command"
+            pending_tool_calls.append({
+                "type": "tool-call",
+                "id": tcid,
+                "name": name,
+                "arguments": item.get("arguments") or "{}",
+            })
+            continue
+        flush_tool_calls()
+        if t == "message":
+            role = item.get("role", "user")
+            if role not in ("user", "assistant"):
+                role = "user"
+            text = text_from_content(item.get("content", []))
+            msgs.append({"role": role, "content": text})
+        elif t == "function_call_output":
+            output = item.get("output", "")
+            if not isinstance(output, str):
+                output = json.dumps(output, ensure_ascii=False)
+            # /alpha/generate expects string content for ALL messages
+            msgs.append({"role": "user", "content": output[:8000]})
+    flush_tool_calls()
+    return msgs
+
+def oa_convert_tools(tools, strict=False):
+    if not tools:
+        return None
+    out = []
+    for t in tools:
+        if t.get("type") != "function":
+            continue
+        fn = t.get("function", {})
+        name = ""
+        if fn:
+            name = (fn.get("name") or "").strip()
+        else:
+            name = (t.get("name") or "").strip()
+        if not name or name == "null":
+            continue
+        if fn:
+            entry = dict(t)
+            if strict and "strict" not in fn:
+                entry["function"] = dict(fn, strict=True)
+            out.append(entry)
+        else:
+            entry = {
+                "type": "function",
+                "function": {"name": name, "description": t.get("description", ""),
+                             "parameters": t.get("parameters", {})}
+            }
+            if strict:
+                entry["function"]["strict"] = True
+            out.append(entry)
+    return out or None
+
+def oa_resp_to_responses(chat_resp, model, resp_id=None):
+    choice = chat_resp["choices"][0]
+    msg = choice["message"]
+    content = msg.get("content") or ""
+    finish = choice.get("finish_reason", "stop")
+    fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"}
+    status = fm.get(finish, "incomplete")
+    outputs = []
+    if content:
+        outputs.append({"type": "message", "id": uid("msg"), "role": "assistant", "status": "completed",
+                        "content": [{"type": "output_text", "text": content, "annotations": []}]})
+    for tc in msg.get("tool_calls") or []:
+        fn = tc.get("function", {})
+        outputs.append({"type": "function_call", "id": uid("fc"), "call_id": tc.get("id"),
+                        "name": fn.get("name"), "arguments": fn.get("arguments", "{}"), "status": "completed"})
+    usage = chat_resp.get("usage", {})
+    return {"id": resp_id or uid("resp"), "object": "response", "created": int(time.time()),
+            "model": model, "status": status, "output": outputs,
+            "usage": {"input_tokens": usage.get("prompt_tokens", 0),
+                      "output_tokens": usage.get("completion_tokens", 0),
+                      "total_tokens": usage.get("total_tokens", 0),
+                      "input_tokens_details": {"cached_tokens": usage.get("prompt_tokens_details", {}).get("cached_tokens", 0)}}}
+
+def oa_stream_to_sse(chat_stream, model, req_id, _reasoning_out=None):
+    resp_id = req_id or uid("resp")
+    msg_id = uid("msg")
+    text_buf = ""
+    reasoning_buf = ""
+    reasoning_opened = False
+    tc_buf = {}
+    fr = None
+    msg_opened = False
+
+    yield emit("response.created", {"type": "response.created",
+        "response": {"id": resp_id, "object": "response", "model": model,
+                     "status": "in_progress", "created": int(time.time()), "output": []}})
+    yield emit("response.in_progress", {"type": "response.in_progress", "response": {"id": resp_id}})
+
+    for line in _stream_with_idle_timeout(chat_stream):
+        line = line.decode("utf-8", errors="replace").strip()
+        if not line or line.startswith(":") or line == "data: [DONE]":
+            continue
+        if not line.startswith("data: "):
+            continue
+        try:
+            chunk = json.loads(line[6:])
+        except json.JSONDecodeError:
+            continue
+        choices = chunk.get("choices", [])
+        if not choices:
+            continue
+        delta = choices[0].get("delta", {})
+        fr = choices[0].get("finish_reason")
+
+        rc = delta.get("reasoning_content") or delta.get("reasoning")
+        if rc:
+            if not reasoning_opened:
+                reasoning_opened = True
+            reasoning_buf += rc
+            yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": rc})
+
+        content = delta.get("content")
+        if content:
+            if not msg_opened:
+                msg_id = uid("msg")
+                yield emit("response.output_item.added", {"type": "response.output_item.added",
+                    "item": {"type": "message", "id": msg_id, "role": "assistant", "status": "in_progress", "content": []}})
+                yield emit("response.content_part.added", {"type": "response.content_part.added",
+                    "part": {"type": "output_text", "text": "", "annotations": []}, "item_id": msg_id})
+                msg_opened = True
+            text_buf += content
+            yield emit("response.output_text.delta", {"type": "response.output_text.delta",
+                        "delta": content, "item_id": msg_id, "content_index": 0})
+
+        for tc in delta.get("tool_calls") or []:
+            idx = tc.get("index", 0)
+            if idx not in tc_buf:
+                fid = uid("fc")
+                tc_buf[idx] = {"id": fid, "call_id": tc.get("id", fid), "name": "", "args": ""}
+                yield emit("response.output_item.added", {"type": "response.output_item.added",
+                    "item": {"type": "function_call", "id": fid, "call_id": tc_buf[idx]["call_id"],
+                             "name": "", "arguments": "", "status": "in_progress"}})
+            fn = tc.get("function", {})
+            if "name" in fn and fn["name"]:
+                tc_buf[idx]["name"] = fn["name"]
+            if "arguments" in fn and fn["arguments"]:
+                tc_buf[idx]["args"] += fn["arguments"]
+                yield emit("response.output_text.delta", {"type": "response.function_call_arguments.delta",
+                            "delta": fn["arguments"], "item_id": tc_buf[idx]["id"]})
+
+    reasoning_rsn_id = uid("rsn") if reasoning_buf else None
+    if reasoning_opened:
+        yield emit("response.reasoning.done", {"type": "response.reasoning.done",
+                    "item_id": reasoning_rsn_id, "text": reasoning_buf})
+
+    if msg_opened:
+        yield emit("response.output_text.done", {"type": "response.output_text.done",
+                    "text": text_buf, "item_id": msg_id, "content_index": 0})
+        yield emit("response.content_part.done", {"type": "response.content_part.done",
+                    "part": {"type": "output_text", "text": text_buf, "annotations": []}, "item_id": msg_id})
+        yield emit("response.output_item.done", {"type": "response.output_item.done",
+            "item": {"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
+                     "content": [{"type": "output_text", "text": text_buf, "annotations": []}]}})
+
+    for idx in sorted(tc_buf):
+        t = tc_buf[idx]
+        yield emit("response.function_call_arguments.done", {"type": "response.function_call_arguments.done",
+                    "item_id": t["id"], "name": t["name"], "arguments": t["args"]})
+        yield emit("response.output_item.done", {"type": "response.output_item.done",
+            "item": {"type": "function_call", "id": t["id"], "call_id": t["call_id"],
+                     "name": t["name"], "arguments": t["args"], "status": "completed"}})
+
+    fm = {"stop": "completed", "length": "incomplete", "tool_calls": "completed", "content_filter": "incomplete"}
+    status = fm.get(fr, "incomplete")
+    final_out = []
+    if reasoning_buf:
+        final_out.append({"type": "reasoning", "id": reasoning_rsn_id, "status": "completed",
+                          "content": [{"type": "text", "text": reasoning_buf}]})
+    if msg_opened:
+        msg_content = []
+        if reasoning_buf:
+            msg_content.append({"type": "output_text", "text": text_buf, "annotations": []})
+        else:
+            msg_content.append({"type": "output_text", "text": text_buf, "annotations": []})
+        final_out.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
+                          "content": msg_content})
+    for idx in sorted(tc_buf):
+        t = tc_buf[idx]
+        final_out.append({"type": "function_call", "id": t["id"], "call_id": t["call_id"],
+                          "name": t["name"], "arguments": t["args"], "status": "completed"})
+    yield emit("response.completed", {"type": "response.completed",
+        "response": {"id": resp_id, "object": "response", "model": model,
+                     "status": status, "created": int(time.time()), "output": final_out}})
+    if _reasoning_out is not None:
+        _reasoning_out["text"] = reasoning_buf
+        _reasoning_out["tool_calls"] = [tc_buf[i] for i in sorted(tc_buf)] if tc_buf else []
+
+# ═══════════════════════════════════════════════════════════════════
+# Anthropic backend
+# ═══════════════════════════════════════════════════════════════════
+
+def an_input_to_messages(input_data):
+    msgs = []
+    if isinstance(input_data, str):
+        msgs.append({"role": "user", "content": input_data})
+    elif isinstance(input_data, list):
+        for item in input_data:
+            t = item.get("type")
+            if t == "message":
+                role = item.get("role", "user")
+                if role == "developer":
+                    role = "user"
+                text = ""
+                thinking_blocks = []
+                for part in item.get("content", []):
+                    pt = part.get("type", "")
+                    if pt in ("input_text", "output_text"):
+                        text += part.get("text", "")
+                    elif pt in ("reasoning", "thinking"):
+                        thinking_text = ""
+                        for rp in part.get("content", []):
+                            thinking_text += rp.get("text", "")
+                        if thinking_text:
+                            thinking_blocks.append({"type": "thinking", "thinking": thinking_text, "signature": part.get("signature", "")})
+                if role == "assistant":
+                    content_parts = []
+                    if thinking_blocks:
+                        content_parts.extend(thinking_blocks)
+                    if text:
+                        content_parts.append({"type": "text", "text": text})
+                    msgs.append({"role": "assistant", "content": content_parts if content_parts else text})
+                else:
+                    msgs.append({"role": "user", "content": text})
+            elif t == "function_call":
+                msgs.append({"role": "assistant", "content": [
+                    {"type": "tool_use", "id": item.get("call_id", item.get("id", uid("tu"))),
+                     "name": item.get("name", ""),
+                     "input": json.loads(item.get("arguments", "{}"))}
+                ]})
+            elif t == "function_call_output":
+                msgs.append({"role": "user", "content": [
+                    {"type": "tool_result", "tool_use_id": item.get("id", ""),
+                     "content": item.get("output", "")}
+                ]})
+    return msgs
+
+def an_convert_tools(tools):
+    if not tools:
+        return None
+    out = []
+    for t in tools:
+        if t.get("type") != "function":
+            continue
+        fn = t.get("function", {})
+        if fn:
+            out.append({"name": fn.get("name"), "description": fn.get("description", ""),
+                        "input_schema": fn.get("parameters", {"type": "object", "properties": {}})})
+        else:
+            out.append({"name": t.get("name"), "description": t.get("description", ""),
+                        "input_schema": t.get("parameters", {"type": "object", "properties": {}})})
+    return out or None
+
+def an_resp_to_responses(anthro_resp, model, resp_id=None):
+    blocks = anthro_resp.get("content", [])
+    sr = anthro_resp.get("stop_reason", "end_turn")
+    sm = {"end_turn": "completed", "max_tokens": "incomplete", "stop_sequence": "completed", "tool_use": "completed"}
+    status = sm.get(sr, "incomplete")
+    outputs = []
+    for b in blocks:
+        bt = b.get("type", "")
+        if bt == "text":
+            outputs.append({"type": "message", "id": uid("msg"), "role": "assistant", "status": "completed",
+                            "content": [{"type": "output_text", "text": b.get("text", ""), "annotations": []}]})
+        elif bt == "tool_use":
+            outputs.append({"type": "function_call", "id": uid("fc"), "call_id": b.get("id", ""),
+                            "name": b.get("name", ""), "arguments": json.dumps(b.get("input", {})),
+                            "status": "completed"})
+        elif bt == "thinking":
+            outputs.append({"type": "reasoning", "id": uid("rsn"), "status": "completed",
+                            "content": [{"type": "text", "text": b.get("thinking", "")}]})
+    usage = anthro_resp.get("usage", {})
+    return {"id": resp_id or uid("resp"), "object": "response", "created": int(time.time()),
+            "model": model, "status": status, "output": outputs,
+            "usage": {"input_tokens": usage.get("input_tokens", 0),
+                      "output_tokens": usage.get("output_tokens", 0),
+                      "total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
+                      "input_tokens_details": {"cached_tokens": 0}}}
+
+def an_stream_to_sse(stream, model, req_id):
+    resp_id = req_id or uid("resp")
+    completed = []
+    msg_id = uid("msg")
+    text_buf = ""
+    tc_id = None
+    tc_call_id = None
+    tc_name = ""
+    tc_args = ""
+    block_type = None
+    stop_reason = "end_turn"
+
+    yield emit("response.created", {"type": "response.created",
+        "response": {"id": resp_id, "object": "response", "model": model,
+                     "status": "in_progress", "created": int(time.time()), "output": []}})
+    yield emit("response.in_progress", {"type": "response.in_progress", "response": {"id": resp_id}})
+
+    for raw in stream:
+        line = raw.decode("utf-8", errors="replace").strip()
+        if not line:
+            continue
+        if line.startswith("event: "):
+            evt_type = line[7:]
+            continue
+        if not line.startswith("data: "):
+            continue
+        try:
+            data = json.loads(line[6:])
+        except json.JSONDecodeError:
+            continue
+
+        et = data.get("type", "")
+
+        if et == "message_start":
+            pass
+
+        elif et == "content_block_start":
+            cb_type = data.get("content_block", {}).get("type", "")
+            block_type = cb_type
+            if cb_type == "text":
+                msg_id = uid("msg")
+                yield emit("response.output_item.added", {"type": "response.output_item.added",
+                    "item": {"type": "message", "id": msg_id, "role": "assistant",
+                             "status": "in_progress", "content": []}})
+                yield emit("response.content_part.added", {"type": "response.content_part.added",
+                    "part": {"type": "output_text", "text": "", "annotations": []}, "item_id": msg_id})
+            elif cb_type == "tool_use":
+                cb = data.get("content_block", {})
+                tc_id = uid("fc")
+                tc_call_id = cb.get("id", tc_id)
+                tc_name = cb.get("name", "")
+                yield emit("response.output_item.added", {"type": "response.output_item.added",
+                    "item": {"type": "function_call", "id": tc_id, "call_id": tc_call_id,
+                             "name": tc_name, "arguments": "", "status": "in_progress"}})
+            elif cb_type == "thinking":
+                pass
+
+        elif et == "content_block_delta":
+            dd = data.get("delta", {})
+            dt = dd.get("type", "")
+            if dt == "text_delta":
+                txt = dd.get("text", "")
+                text_buf += txt
+                yield emit("response.output_text.delta", {"type": "response.output_text.delta",
+                            "delta": txt, "item_id": msg_id, "content_index": 0})
+            elif dt == "input_json_delta":
+                pj = dd.get("partial_json", "")
+                tc_args += pj
+                yield emit("response.output_text.delta", {"type": "response.function_call_arguments.delta",
+                            "delta": pj, "item_id": tc_id})
+            elif dt == "thinking_delta":
+                tk = dd.get("thinking", "")
+                yield emit("response.reasoning.delta", {"type": "response.reasoning.delta", "delta": tk})
+
+        elif et == "content_block_stop":
+            if block_type == "text":
+                yield emit("response.output_text.done", {"type": "response.output_text.done",
+                            "text": text_buf, "item_id": msg_id, "content_index": 0})
+                yield emit("response.content_part.done", {"type": "response.content_part.done",
+                    "part": {"type": "output_text", "text": text_buf, "annotations": []}, "item_id": msg_id})
+                yield emit("response.output_item.done", {"type": "response.output_item.done",
+                    "item": {"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
+                             "content": [{"type": "output_text", "text": text_buf, "annotations": []}]}})
+                completed.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
+                                  "content": [{"type": "output_text", "text": text_buf, "annotations": []}]})
+                text_buf = ""
+            elif block_type == "tool_use":
+                yield emit("response.function_call_arguments.done", {"type": "response.function_call_arguments.done",
+                            "item_id": tc_id, "name": tc_name, "arguments": tc_args})
+                yield emit("response.output_item.done", {"type": "response.output_item.done",
+                    "item": {"type": "function_call", "id": tc_id, "call_id": tc_call_id,
+                             "name": tc_name, "arguments": tc_args, "status": "completed"}})
+                completed.append({"type": "function_call", "id": tc_id, "call_id": tc_call_id,
+                                  "name": tc_name, "arguments": tc_args, "status": "completed"})
+                tc_id = None
+                tc_args = ""
+            block_type = None
+
+        elif et == "message_delta":
+            stop_reason = data.get("delta", {}).get("stop_reason", "end_turn")
+
+        elif et == "message_stop":
+            sm = {"end_turn": "completed", "max_tokens": "incomplete",
+                  "stop_sequence": "completed", "tool_use": "completed"}
+            status = sm.get(stop_reason, "incomplete")
+            yield emit("response.completed", {"type": "response.completed",
+                "response": {"id": resp_id, "object": "response", "model": model,
+                             "status": status, "created": int(time.time()), "output": completed}})
+
+_DEFAULT_CC_CONFIG = {
+    "workingDir": "/tmp",
+    "date": "",
+    "environment": "linux",
+    "shell": "bash",
+    "files": [],
+    "structure": [],
+    "isGitRepo": False,
+    "currentBranch": "",
+    "mainBranch": "",
+    "gitStatus": "",
+    "recentCommits": [],
+}
+
+def _cc_config():
+    cfg = dict(_DEFAULT_CC_CONFIG)
+    cfg["date"] = time.strftime("%Y-%m-%d")
+    return cfg
+
+def cc_convert_tools(tools):
+    return oa_convert_tools(tools)
+
+def _strip_xmlish_tags(text):
+    return re.sub(r"<[^>]+>", "", text or "")
+
+def _unwrap_cmd(cmd_val):
+    """[FIX 11] Self-healing: unwrap double-wrapped cmd values.
+    
+    Model sometimes generates: {"cmd": "{\"cmd\": \"actual_command\"}"}
+    Detect when cmd value is itself a JSON object with a nested "cmd" key,
+    and extract the real command string. Recursively unwraps up to 3 levels.
+    """
+    if not isinstance(cmd_val, str) or not cmd_val.startswith("{"):
+        return cmd_val
+    for _ in range(3):
+        try:
+            inner = json.loads(cmd_val)
+            if isinstance(inner, dict) and "cmd" in inner and isinstance(inner["cmd"], str):
+                cmd_val = inner["cmd"]
+            else:
+                break
+        except Exception:
+            break
+    return cmd_val
+
+def _build_explore_cmd(text_for_url):
+    """Module-level explore command builder. Extracts repo URL from text,
+    builds a curl pipeline to fetch README, contents listing, and releases.
+    Used by _parse_commandcode_text_tool_calls (closure wrapper) and
+    cc_stream_to_sse (stuck recovery heuristic)."""
+    if not text_for_url:
+        return None, None
+    url_m = re.search(r"https?://[^\s\]'\\>\",]+", text_for_url)
+    repo_url = url_m.group(0).rstrip(")].,;'\\\"") if url_m else ""
+    if not repo_url and isinstance(text_for_url, str):
+        try:
+            _parsed = json.loads(text_for_url)
+            if isinstance(_parsed, list):
+                for _item in _parsed:
+                    _c = _item.get("content", "") if isinstance(_item, dict) else str(_item)
+                    url_m2 = re.search(r"https?://[^\s\]'\\>\",]+", _c)
+                    if url_m2:
+                        repo_url = url_m2.group(0).rstrip(")].,;'\\\"")
+                        break
+        except Exception:
+            pass
+    if not repo_url:
+        return None, None
+    if repo_url.endswith(".git"):
+        repo_url = repo_url[:-4]
+    if "/api/v1/repos/" not in repo_url:
+        host_m = re.match(r"(https?://[^/]+)/(.*)", repo_url)
+        if host_m:
+            host, path = host_m.groups()
+            api_base = f"{host}/api/v1/repos/{path}"
+        else:
+            api_base = repo_url.replace("/admin/", "/api/v1/repos/")
+    else:
+        api_base = repo_url
+    cmd = (
+        f"cd /tmp && "
+        f"curl -sL --max-time 15 '{api_base}/contents/README.md' 2>/dev/null | "
+        f"python3 -c \"import sys,json,base64; d=json.load(sys.stdin); print(base64.b64decode(d['content']).decode())\" 2>/dev/null | head -600 && "
+        f"curl -sL --max-time 15 '{api_base}/contents' 2>/dev/null | python3 -c \"import sys,json; d=json.load(sys.stdin); print('\\n'.join(f'{{x.get(\'path\')}} {{x.get(\'type\')}}' for x in d[:50]))\" 2>/dev/null && "
+        f"curl -sL --max-time 15 '{api_base}/releases' 2>/dev/null | python3 -c \"import sys,json; d=json.load(sys.stdin); print(json.dumps(d[:3], indent=2)[:2000])\" 2>/dev/null"
+    )
+    return cmd, "Explore repository to understand the app and gather README, root contents, and releases for the landing page."
+
+def _parse_commandcode_text_tool_calls(text):
+    """Parse CommandCode's text-form tool calls into Responses function calls.
+
+    Handles THREE formats:
+      1. XML: ``<tool_call name="bash"><parameter name="command">...</parameter>`` (original)
+      2. Function: ``<function=bash>...</function>`` (original)
+      3. [FIX 5] Raw JSON inline: {"type":"tool-call","id":"...","name":"exec_command","arguments":"{...}"}
+
+    Format 3 exists because cc_input_to_messages sends tool calls as inline JSON text.
+    The CC model echoes this format back in its response.
+    Extraction is done by _extract_raw_json_tool_calls() which is appended after the
+    XML pattern loop. See that function for details on malformed-JSON handling.
+
+    Tolerant of: unescaped inner quotes, unbalanced braces, missing type/id fields,
+    sandbox_permissions at top level vs nested inside arguments, etc.
+    """
+    calls = []
+    if not text:
+        return calls
+
+    _build_explore_cmd_local = _build_explore_cmd
+
+    # [FIX 17] DSML tool_call blocks used by the model now.
+    # Example:
+    #   <｜｜DSML｜｜tool_calls>
+    #   <｜｜DSML｜｜invoke name="exec">
+    #   <｜｜DSML｜｜parameter name="command" string="true">curl ...</｜｜DSML｜｜parameter>
+    #   <｜｜DSML｜｜parameter name="sandbox_permissions" string="true">require_escalated</｜｜DSML｜｜parameter>
+    #   <｜｜DSML｜｜parameter name="justification" string="true">...</｜｜DSML｜｜parameter>
+    #   <｜｜DSML｜｜parameter name="prefix_rule" string="true">["/bin/bash", "-lc", "curl ..."]</｜｜DSML｜｜parameter>
+    #   </｜｜DSML｜｜invoke>
+    #   </｜｜DSML｜｜tool_calls>
+    for m in re.finditer(r"<[^>]*tool_calls[^>]*>(.*?)</[^>]*tool_calls[^>]*>", text, re.DOTALL | re.IGNORECASE):
+        block = m.group(1) or ""
+        for im in re.finditer(r"<[^>]*invoke[^>]*name=\"([^\"]+)\"[^>]*>(.*?)</[^>]*invoke>", block, re.DOTALL | re.IGNORECASE):
+            raw_name = (im.group(1) or "").strip()
+            body = (im.group(2) or "").strip()
+            if not body:
+                continue
+            cmd = None
+            sandbox_permissions = None
+            justification = None
+            # Parameter tags are the canonical source.
+            for pm in re.finditer(r"<[^>]*parameter[^>]*name=\"([^\"]+)\"[^>]*>(.*?)</[^>]*parameter>", body, re.DOTALL | re.IGNORECASE):
+                key = (pm.group(1) or "").strip().lower()
+                val = _strip_xmlish_tags(pm.group(2)).strip()
+                # [FIX 21] Accept both "command" and "cmd" parameter names.
+                # The tool schema defines the parameter as "cmd" (see exec_command schema),
+                # but the model sometimes uses "command" (especially from prefix_rule fallback).
+                # Previously only "command" was accepted, so DSML blocks with name="cmd"
+                # were silently dropped — causing Codex CLI to stop mid-task.
+                if key in ("command", "cmd"):
+                    cmd = val
+                elif key == "prefix_rule" and not cmd:
+                    try:
+                        pr_obj = json.loads(val)
+                    except Exception:
+                        pr_obj = None
+                    if isinstance(pr_obj, list) and pr_obj and isinstance(pr_obj[-1], str):
+                        cmd = pr_obj[-1]
+                elif key == "sandbox_permissions":
+                    sandbox_permissions = val
+                elif key == "justification":
+                    justification = val
+
+            # [FIX 20] Support explore / explore_agent in DSML blocks
+            is_explore = raw_name.lower() in ("explore", "explore_agent")
+            if is_explore:
+                explore_cmd, explore_just = _build_explore_cmd_local(body)
+                if explore_cmd:
+                    cmd = explore_cmd
+                    justification = explore_just
+
+            # Fallback: if the body contains a raw JSON command.
+            if not cmd:
+                jm = re.search(r'"(?:command|cmd)"\s*:\s*"((?:[^"\\]|\\.)*)"', body, re.DOTALL)
+                if jm:
+                    cmd = jm.group(1).replace('\\n', '\n').replace('\\"', '"').strip()
+            if not cmd:
+                continue
+            # [FIX 19] Translate execute_request and other variations to exec_command (CLI only supports exec_command)
+            # [FIX 20] Translate explore and explore_agent to exec_command
+            tool_name = "exec_command" if raw_name.lower() in ("exec", "bash", "shell", "terminal", "run_command", "execute_request", "execute_command", "run_shell_command", "run_shell", "run", "explore", "explore_agent") else raw_name
+            args = {"cmd": _unwrap_cmd(cmd)}
+            if sandbox_permissions:
+                args["sandbox_permissions"] = sandbox_permissions if sandbox_permissions in ("use_default", "require_escalated", "with_user_approval") else "require_escalated"
+            if justification:
+                args["justification"] = justification
+            calls.append({
+                "full_match": m.group(0),
+                "name": tool_name,
+                "arguments": json.dumps(args, ensure_ascii=False),
+            })
+
+    # [FIX 16] Native <bash> blocks from CommandCode.
+    # Example:
+    #   <bash>
+    #   sandbox_permissions: require_escalated
+    #   justification: ...
+    #   prefix_rule: ["/bin/bash", "-lc", "curl ..."]
+    #   </bash>
+    # Convert into exec_command calls by extracting the command from prefix_rule.
+    for m in re.finditer(r"<bash>(.*?)</bash>", text, re.DOTALL | re.IGNORECASE):
+        body = (m.group(1) or "").strip()
+        if not body:
+            continue
+        sandbox_permissions = None
+        justification = None
+        cmd = None
+        # Try line-oriented parsing first.
+        for line in body.splitlines():
+            s = line.strip()
+            if s.lower().startswith("sandbox_permissions:"):
+                sandbox_permissions = s.split(":", 1)[1].strip()
+            elif s.lower().startswith("justification:"):
+                justification = s.split(":", 1)[1].strip()
+            elif s.lower().startswith("prefix_rule:"):
+                pr = s.split(":", 1)[1].strip()
+                try:
+                    pr_obj = json.loads(pr)
+                except Exception:
+                    pr_obj = None
+                if isinstance(pr_obj, list) and pr_obj:
+                    # If the last arg exists, it is typically the shell command.
+                    cmd = pr_obj[-1] if isinstance(pr_obj[-1], str) else None
+                elif pr.startswith("[") and pr.endswith("]"):
+                    parts = re.findall(r'"((?:[^"\\]|\\.)*)"', pr)
+                    if parts:
+                        cmd = parts[-1].encode().decode("unicode_escape")
+        # Fallback: grab a shell-looking line if prefix_rule wasn't parseable.
+        if not cmd:
+            for line in body.splitlines():
+                s = line.strip()
+                if re.match(r"^(curl|wget|python3?|node|npm|pnpm|yarn|cat|ls|find|grep|rg|sed|awk|git|mkdir|touch|printf|echo)\b", s):
+                    cmd = s
+                    break
+        if not cmd:
+            continue
+        args = {"cmd": cmd}
+        if sandbox_permissions:
+            args["sandbox_permissions"] = sandbox_permissions if sandbox_permissions in ("use_default", "require_escalated", "with_user_approval") else "require_escalated"
+        if justification:
+            args["justification"] = justification
+        calls.append({
+            "full_match": m.group(0),
+            "name": "exec_command",
+            "arguments": json.dumps(args, ensure_ascii=False),
+        })
+
+    # [FIX 15] Native <explore_agent> blocks from CommandCode.
+    # Format seen in logs:
+    #   <explore_agent>\nmessages: [{...}]\n</explore_agent>
+    # Treat as an assistant-requested agent call so the loop can continue.
+    for m in re.finditer(r"<explore_agent>(.*?)</explore_agent>|<explore_agent>\s*messages:\s*(\[.*?\])", text, re.DOTALL | re.IGNORECASE):
+        body = m.group(1) or m.group(2) or ""
+        body = body.strip()
+        msgs = None
+        if body:
+            try:
+                msgs = json.loads(body) if body.startswith("[") else None
+            except Exception:
+                msgs = None
+        if msgs is None and body:
+            mm = re.search(r"(\[.*\])", body, re.DOTALL)
+            if mm:
+                try:
+                    msgs = json.loads(mm.group(1))
+                except Exception:
+                    msgs = None
+        if msgs is None:
+            msgs = body
+        text_for_url = body if isinstance(body, str) else json.dumps(body, ensure_ascii=False)
+        cmd, justification = _build_explore_cmd_local(text_for_url)
+        if not cmd:
+            cmd = "echo 'explore_agent: unable to extract repository URL'"
+            justification = "Fallback for explore_agent block without URL."
+        args = {"cmd": cmd}
+        if justification:
+            args["justification"] = justification
+        calls.append({
+            "full_match": m.group(0),
+            "name": "exec_command",
+            "arguments": json.dumps(args, ensure_ascii=False),
+        })
+
+    if not calls and text.count("<explore_agent>") >= 2:
+        url_m = re.search(r"https?://[^\s\]'\\>\"]+", text)
+        if not url_m:
+            for prev_url in _last_user_urls:
+                url_m = re.search(r"https?://[^\s\]'\\>\"]+", prev_url)
+                if url_m:
+                    break
+        if url_m:
+            explore_url = url_m.group(0).rstrip(")].,;'\\")
+            cmd, justification = _build_explore_cmd_local(explore_url)
+            if cmd:
+                calls.append({
+                    "full_match": "<explore_agent>...",
+                    "name": "exec_command",
+                    "arguments": json.dumps({"cmd": cmd, "justification": justification or "Explore repository"}, ensure_ascii=False),
+                })
+
+    # [FIX 24] Handle <require_escalation> and <request_escalation_permission> blocks.
+    # The model produces these when it wants elevated permissions but the CC
+    # adapter doesn't support them. Synthesize a proceed command so the loop continues.
+    if not calls:
+        for m in re.finditer(r"<(?:require_escalation|request_escalation_permission)>(.*?)</(?:require_escalation|request_escalation_permission)>", text, re.DOTALL | re.IGNORECASE):
+            body_escal = (m.group(1) or "").strip()
+            _inner_url_m = re.search(r"https?://[^\s\]'\\>\",]+", body_escal)
+            if _inner_url_m:
+                _e_url = _inner_url_m.group(0).rstrip(")].,;'\\\"")
+                _e_cmd, _e_just = _build_explore_cmd_local(_e_url)
+                if _e_cmd:
+                    calls.append({
+                        "full_match": m.group(0),
+                        "name": "exec_command",
+                        "arguments": json.dumps({"cmd": _e_cmd, "justification": _e_just or "Escalation block with URL — auto-proceed"}, ensure_ascii=False),
+                    })
+                    continue
+            if not calls:
+                calls.append({
+                    "full_match": m.group(0),
+                    "name": "exec_command",
+                    "arguments": json.dumps({"cmd": "echo 'escalation: auto-proceeding — no specific command in escalation block'", "justification": "Auto-proceed past escalation request"}, ensure_ascii=False),
+                })
+
+    # [FIX 24b] Bare <require_escalation ... /> or <request_escalation_permission ... />
+    # without closing tags. Just auto-proceed.
+    if not calls and re.search(r"<(?:require_escalation|request_escalation_permission)[\s/>]", text, re.IGNORECASE):
+        calls.append({
+            "full_match": "<escalation_bare/>",
+            "name": "exec_command",
+            "arguments": json.dumps({"cmd": "echo 'escalation: auto-proceeding past bare escalation tag'", "justification": "Auto-proceed past bare escalation tag"}, ensure_ascii=False),
+        })
+
+    patterns = [
+        r"<tool_call(?:\s+name=['\"]?([^'\">\s]+)['\"]?)?>(.*?)</tool_call[)]?>",
+        r"<function=(\w+)>(.*?)</function>",
+        # [FIX 14] CC model actual output: <tool_call type="bash">\n{"command":"...", "description":"..."}
+        # No </tool_call) closing tag — body is a raw JSON object
+        r"<tool_call(?:\s+type=['\"]?(\w+)['\"]?)?>\s*(\{.*?\})(?:\s*</tool_call)?",
+    ]
+
+    def _find_balanced_brace(text, start):
+        """Find the closing brace matching text[start], handling quoted strings."""
+        if start >= len(text) or text[start] != '{':
+            return -1
+        depth = 0
+        i = start
+        in_str = False
+        escape = False
+        while i < len(text):
+            ch = text[i]
+            if escape:
+                escape = False
+            elif ch == '\\':
+                escape = True
+            elif ch == '"':
+                in_str = not in_str
+            elif not in_str:
+                if ch == '{':
+                    depth += 1
+                elif ch == '}':
+                    depth -= 1
+                    if depth == 0:
+                        return i
+            i += 1
+        return -1
+
+    def _extract_field(text, key, end_chars=',}'):
+        """Extract a field value after "key": in rough JSON text.
+
+        [FIX 7] Handles values starting with \" (backslash-quote) which occurs when
+        the model generates properly-escaped JSON inside a string value.
+        Without this fix, _extract_field returns None for escaped values,
+        causing sandbox_permissions/justification to not be extracted from
+        the parsed args dict (falling through to raw snippet extraction).
+
+        Also tolerant of unescaped quotes inside string values.
+        Returns None if key not found or value is empty.
+        """
+        pat = re.compile(r'"' + re.escape(key) + r'"\s*:\s*', re.DOTALL)
+        m = pat.search(text)
+        if not m:
+            return None
+        val_start = m.end()
+        # Skip leading backslash-escape if the value starts with \" (nested JSON string)
+        if val_start < len(text) and text[val_start] == '\\':
+            val_start += 1
+        # Check if value is a string
+        if val_start < len(text) and text[val_start] == '"':
+            s = val_start + 1
+            buf = []
+            while s < len(text):
+                ch = text[s]
+                if ch == '\\' and s + 1 < len(text):
+                    buf.append(text[s+1])
+                    s += 2
+                elif ch == '"':
+                    return ''.join(buf)
+                elif ch in end_chars and not buf:
+                    return None
+                else:
+                    buf.append(ch)
+                    s += 1
+            return ''.join(buf)
+        # Object value: find balanced brace
+        if val_start < len(text) and text[val_start] == '{':
+            end = _find_balanced_brace(text, val_start)
+            if end > val_start:
+                return text[val_start:end+1]
+        return None
+
+    def _extract_args(text):
+        """Extract arguments value from tool-call JSON, handling multiple malformed formats.
+
+        [FIX 6] THREE-TIER PARSER — solves double-wrapped arguments bug:
+          Model generates arguments in TWO different escaped forms:
+            A) Unescaped: "arguments": "{"cmd": "curl ...", "sp": "allow_all"}"
+               → naive brace-counting finds boundaries correctly
+            B) Escaped:   "arguments": "{\\"cmd\\": \\"curl...\\"}"
+               → json.loads fails on \\ at structural level
+               → unescape \\" → " and retry
+               → unicode_escape decode and retry
+
+        Returns the raw JSON string (after best-effort unescaping).
+        Caller does json.loads() on the result.
+        If all 3 tiers fail, returns raw text (caller handles as fallback).
+        """
+        m = re.search(r'"(?:arguments|input)"\s*:\s*"?', text)
+        if not m:
+            return None
+        start = m.end()
+        if start < len(text) and text[start] == '"':
+            start += 1
+        if start >= len(text) or text[start] != '{':
+            return None
+        depth = 0
+        i = start
+        while i < len(text):
+            ch = text[i]
+            if ch == '{':
+                depth += 1
+            elif ch == '}':
+                depth -= 1
+                if depth == 0:
+                    raw = text[start:i+1]
+
+                    # Try JSON.parse as-is
+                    try:
+                        json.loads(raw)
+                        return raw
+                    except json.JSONDecodeError:
+                        pass
+
+                    # Try after unescaping inner \" -> "
+                    unescaped = raw.replace('\\"', '"')
+                    try:
+                        json.loads(unescaped)
+                        return unescaped
+                    except json.JSONDecodeError:
+                        pass
+
+                    # Try after also unescaping \\n -> \n etc
+                    try:
+                        fixed = raw.encode().decode('unicode_escape')
+                        json.loads(fixed)
+                        return fixed
+                    except Exception:
+                        pass
+
+                    # Give up — return raw text
+                    return raw
+            i += 1
+        return None
+
+    def _extract_raw_json_tool_calls(t):
+        """[FIX 5] Extract raw JSON tool-call objects from free text.
+
+        Finds "type":"tool-call" (or tool_call/function_call) in text, then extracts
+        name/id/arguments/sandbox_permissions/justification via field-level regex.
+        
+        Delegates to _extract_args() for the arguments field (handles unescaped + escaped JSON).
+        Delegates to _extract_field() for name/id/sandbox_permissions/justification
+          (with FIX 7 for leading-\ handling).
+        
+        Normalizes sandbox_permissions to valid values (use_default|require_escalated|with_user_approval)
+        [FIX 6] Prevents double-wrapped args: {"cmd": "{\"cmd\": \"curl...\"}"}
+        """
+        results = []
+        idx = 0
+        while True:
+            m = re.search(r'"type"\s*:\s*"(tool-call|tool_call|function_call)"', t[idx:])
+            if not m:
+                break
+            tc_pos = idx + m.start()
+            snippet = t[tc_pos:]
+            idx = tc_pos + 1
+            tc_type = m.group(1)
+            tc_name = _extract_field(snippet, "name")
+            if not tc_name:
+                continue
+            tc_id = _extract_field(snippet, "id")
+            
+            # [FIX 20] Support explore / explore_agent in raw JSON tool calls
+            is_explore = tc_name.lower() in ("explore", "explore_agent")
+            
+            if is_explore:
+                # Build explore command from the whole snippet/arguments
+                explore_cmd, explore_just = _build_explore_cmd_local(snippet)
+                if explore_cmd:
+                    args = {"cmd": explore_cmd}
+                    if explore_just:
+                        args["justification"] = explore_just
+                else:
+                    args = {"cmd": "echo 'explore: unable to extract repository URL'", "justification": "Fallback for explore tool call without URL."}
+                tool_name = "exec_command"
+            else:
+                # [FIX 19] Translate execute_request and other variations to exec_command (CLI only supports exec_command)
+                tool_name = "exec_command" if tc_name.lower() in ("exec", "bash", "shell", "terminal", "run_command", "execute_request", "execute_command", "run_shell_command", "run_shell", "run") else tc_name
+                args_raw = _extract_args(snippet) or _extract_field(snippet, "arguments") or _extract_field(snippet, "input") or "{}"
+                try:
+                    args = json.loads(args_raw) if args_raw.startswith('{') else {"cmd": args_raw}
+                except Exception:
+                    args = {"cmd": args_raw}
+                if "cmd" not in args or not args["cmd"]:
+                    args["cmd"] = str(args)
+                # [FIX 11] Self-healing: unwrap double-wrapped cmd values
+                args["cmd"] = _unwrap_cmd(args.get("cmd", ""))
+                
+            # Normalize sandbox_permissions to valid values
+            _VALID_SP = frozenset({"use_default", "require_escalated", "with_user_approval"})
+            if "sandbox_permissions" in args:
+                spv = args["sandbox_permissions"]
+                if isinstance(spv, dict):
+                    args["sandbox_permissions"] = "require_escalated" if spv.get("require_escalated") else "use_default"
+                elif isinstance(spv, str) and spv not in _VALID_SP:
+                    args["sandbox_permissions"] = "require_escalated"
+            else:
+                # Fallback: extract from raw snippet (model puts it at top level)
+                sp_raw = _extract_field(snippet, "sandbox_permissions")
+                if sp_raw:
+                    try:
+                        sp_obj = json.loads(sp_raw) if sp_raw.startswith('{') else {"require_escalated": bool(sp_raw)}
+                        if isinstance(sp_obj, dict) and sp_obj.get("require_escalated"):
+                            args["sandbox_permissions"] = "require_escalated"
+                    except Exception:
+                        pass
+            if "justification" not in args:
+                just_raw = _extract_field(snippet, "justification")
+                if just_raw:
+                    args["justification"] = just_raw
+            results.append({
+                "full_match": snippet,
+                "name": tool_name,
+                "arguments": json.dumps(args, ensure_ascii=False),
+            })
+        return results
+
+    for pat in patterns:
+        for m in re.finditer(pat, text, re.DOTALL | re.IGNORECASE):
+            if pat.startswith("<function"):
+                raw_name = m.group(1)
+                body = m.group(2)
+            else:
+                raw_name = m.group(1) or ""
+                body = m.group(2)
+                nm = re.search(r"<tool\s+name=[\"']?([^\"'>\s]+)", body, re.IGNORECASE)
+                raw_name = raw_name or (nm.group(1) if nm else "bash")
+            params = {}
+            body_stripped = body.strip()
+            if body_stripped.startswith("{"):
+                try:
+                    obj = json.loads(body_stripped)
+                    cmd = obj.get("command") or obj.get("cmd") or ""
+                    cmd = _unwrap_cmd(cmd)  # [FIX 11]
+                    if cmd:
+                        # [FIX 19] Translate execute_request and other variations to exec_command (CLI only supports exec_command)
+                        tool_name = "exec_command" if raw_name.lower() in ("exec", "bash", "shell", "terminal", "run_command", "execute_request", "execute_command", "run_shell_command", "run_shell", "run") else raw_name
+                        args = {"cmd": cmd}
+                        sp = obj.get("sandbox_permissions")
+                        if isinstance(sp, dict) and sp.get("require_escalated"):
+                            args["sandbox_permissions"] = "require_escalated"
+                        elif isinstance(sp, str):
+                            args["sandbox_permissions"] = sp
+                        if obj.get("justification"):
+                            args["justification"] = obj.get("justification")
+                        calls.append({"full_match": m.group(0), "name": tool_name, "arguments": json.dumps(args)})
+                        continue
+                except Exception:
+                    pass
+            for pm in re.finditer(r"<parameter(?:\s+name=[\"']?(\w+)[\"']?|=(\w+))>(.*?)</parameter>", body, re.DOTALL | re.IGNORECASE):
+                key = pm.group(1) or pm.group(2) or "text"
+                params[key] = _strip_xmlish_tags(pm.group(3)).strip()
+            
+            # [FIX 20] Support explore / explore_agent in XML tool calls
+            is_explore = raw_name.lower() in ("explore", "explore_agent")
+            if is_explore:
+                explore_cmd, explore_just = _build_explore_cmd_local(body)
+                if explore_cmd:
+                    cmd = explore_cmd
+                    params["justification"] = explore_just
+                else:
+                    cmd = ""
+            else:
+                cmd = params.get("command") or params.get("cmd") or ""
+
+            if not cmd and body_stripped.startswith("{"):
+                cm = re.search(r'"(?:command|cmd)"\s*:\s*"(.*?)"\s*,\s*"(?:sandbox_permissions|justification|prefix_rule)"', body, re.DOTALL)
+                if not cm:
+                    cm = re.search(r'"(?:command|cmd)"\s*:\s*"(.*?)"\s*}', body, re.DOTALL)
+                if cm:
+                    cmd = cm.group(1)
+                    cmd = cmd.replace('\\n', '\n').replace('\\"', '"').strip()
+                    cmd = _unwrap_cmd(cmd)  # [FIX 11]
+                    if re.search(r'"sandbox_permissions"\s*:\s*\{\s*"require_escalated"\s*:\s*true\s*\}', body, re.DOTALL):
+                        params["sandbox_permissions"] = "require_escalated"
+                    jm = re.search(r'"justification"\s*:\s*"(.*?)"\s*(?:,|})', body, re.DOTALL)
+                    if jm:
+                        params["justification"] = jm.group(1).replace('\\n', '\n').replace('\\"', '"').strip()
+            if not cmd:
+                stripped = _strip_xmlish_tags(body)
+                lines = [ln.strip() for ln in stripped.splitlines() if ln.strip()]
+                for i, ln in enumerate(lines):
+                    if re.match(r"^(curl|wget|python3?|node|npm|pnpm|yarn|cat|ls|find|grep|rg|sed|awk|git|mkdir|touch|printf|echo)\b", ln):
+                        cmd = "\n".join(lines[i:])
+                        break
+                if not cmd and lines:
+                    cmd = "\n".join(lines)
+            if not cmd:
+                continue
+            # [FIX 19] Translate execute_request and other variations to exec_command (CLI only supports exec_command)
+            # [FIX 20] Translate explore and explore_agent to exec_command
+            tool_name = "exec_command" if raw_name.lower() in ("exec", "bash", "shell", "terminal", "run_command", "execute_request", "execute_command", "run_shell_command", "run_shell", "run", "explore", "explore_agent") else raw_name
+            args = {"cmd": _unwrap_cmd(cmd)}  # [FIX 11] all paths must unwrap
+            if params.get("sandbox_permissions"):
+                args["sandbox_permissions"] = params["sandbox_permissions"]
+            if params.get("justification"):
+                args["justification"] = params["justification"]
+            calls.append({"full_match": m.group(0), "name": tool_name, "arguments": json.dumps(args)})
+
+    # Also extract raw JSON tool-call objects embedded in free text
+    calls.extend(_extract_raw_json_tool_calls(text))
+
+    # [FIX 18] Native <todo_write> blocks from the model (used for checklist/task tracking)
+    # The model outputs a task checklist in a custom <todo_write> XML tag block:
+    #   <todo_write>
+    #     <todos>[{"id":"1","status":"in_progress","description":"..."}]</todos>
+    #   </todo_write>
+    # We parse this and map it to a standard 'TodoWrite' tool call so the CLI agent loop continues execution.
+    for m in re.finditer(r"<todo_write>(.*?)</todo_write>", text, re.DOTALL | re.IGNORECASE):
+        body = (m.group(1) or "").strip()
+        if not body:
+            continue
+        todos_match = re.search(r"<todos>(.*?)</todos>", body, re.DOTALL | re.IGNORECASE)
+        if not todos_match:
+            continue
+        raw_todos_json = todos_match.group(1).strip()
+        try:
+            raw_todos = json.loads(raw_todos_json)
+        except Exception as e:
+            print(f"[translate-proxy] [FIX 18] Failed to parse <todos> JSON: {e}", file=sys.stderr)
+            raw_todos = None
+        if isinstance(raw_todos, list):
+            parsed_todos = []
+            for item in raw_todos:
+                if isinstance(item, dict):
+                    desc = item.get("description") or item.get("content") or ""
+                    parsed_todos.append({
+                        "content": desc,
+                        "activeForm": item.get("activeForm") or desc,
+                        "status": item.get("status") or "pending"
+                    })
+            calls.append({
+                "full_match": m.group(0),
+                "name": "TodoWrite",
+                "arguments": json.dumps({"todos": parsed_todos}, ensure_ascii=False)
+            })
+
+    # [FIX 11] Self-healing: last-chance sanitization pass on ALL extracted calls
+    calls = _sanitize_tool_calls(calls)
+    return calls
+
+def _sanitize_tool_calls(calls):
+    """[FIX 11/T3] Post-extraction self-healing validation layer.
+    
+    Runs AFTER all extraction paths (XML, raw JSON, regex) have produced their
+    tool calls. This is the final safety net before calls are returned to the
+    streaming/response builder.
+    
+    Validates and repairs:
+      - Double/triple-wrapped cmd values (recursive unwrap)
+      - cmd that looks like JSON object/string instead of shell command
+      - cmd containing escaped newlines or quotes that would break bash
+      - Empty or whitespace-only cmd → replaced with diagnostic string
+    
+    Logs warnings for any repair made (visible in stderr/proxy logs).
+    Returns sanitized list (may be shorter if irreparable calls are dropped).
+    """
+    cleaned = []
+    for i, call in enumerate(calls):
+        # [FIX 18] Skip sanitization pass for non-shell tool calls (e.g., TodoWrite)
+        # Sanitization specifically validates and repairs command shell executions (the 'cmd' argument).
+        # Running it on other tools without a 'cmd' parameter (like TodoWrite) would falsely flag
+        # them as containing JSON garbage or empty commands, corrupting their actual parameters.
+        if call.get("name") != "exec_command":
+            cleaned.append(call)
+            continue
+
+        try:
+            args_raw = call.get("arguments", "{}")
+            if isinstance(args_raw, str):
+                args = json.loads(args_raw)
+            else:
+                args = dict(args_raw)
+        except Exception:
+            cleaned.append(call)
+            continue
+        cmd = args.get("cmd", "")
+        repaired = False
+        
+        # Detect and unwrap nested JSON cmd values (up to 4 levels deep)
+        unwrapped = _unwrap_cmd(cmd)
+        if unwrapped != cmd:
+            cmd = unwrapped
+            args["cmd"] = cmd
+            repaired = True
+        
+        # Detect cmd that is still a JSON object (unwrap missed it or deeper nesting)
+        if isinstance(cmd, str) and cmd.strip().startswith("{"):
+            try:
+                inner = json.loads(cmd)
+                if isinstance(inner, dict):
+                    for key in ("cmd", "command", "c"):
+                        if key in inner and isinstance(inner[key], str):
+                            args["cmd"] = inner[key]
+                            repaired = True
+                            break
+            except Exception:
+                pass
+        
+        # Detect cmd that looks like a JSON-encoded string with backslash escapes
+        _cmd = args.get("cmd", "")
+        if _cmd and ('\\"' in _cmd or "\\n" in _cmd or _cmd.count("{") > _cmd.count("}")):
+            try:
+                decoded = _cmd.encode().decode("unicode_escape")
+                if decoded != _cmd and not decoded.startswith("{"):
+                    args["cmd"] = decoded
+                    repaired = True
+            except Exception:
+                pass
+        
+        # Final guard: if cmd is empty or just JSON garbage, make it obvious
+        _final_cmd = args.get("cmd", "")
+        if not _final_cmd or _final_cmd.strip() in ("{}", "null", "None", ""):
+            _safe_preview = args_raw[:200].replace('"', "'").replace('\\', '/')
+            args["cmd"] = f"# [CC-SANITIZER] empty cmd recovered from: {_safe_preview}"
+            repaired = True
+        elif _final_cmd.startswith("{") and len(_final_cmd) < 500:
+            # Still looks like JSON — likely unrecoverable, flag it
+            _safe_preview = _final_cmd.replace('"', "'").replace('\\', '/')
+            args["cmd"] = f"# [CC-SANITIZER] suspicious cmd (still JSON): {_safe_preview}"
+            repaired = True
+        
+        if repaired:
+            print(f"[translate-proxy] [CC-SANITIZER] repaired tool call #{i}: "
+                  f"name={call.get('name')} cmd_preview={str(args.get('cmd',''))[:120]}",
+                  file=sys.stderr)
+        
+        call["arguments"] = json.dumps(args, ensure_ascii=False)
+        cleaned.append(call)
+    
+    return cleaned
+
+def _parse_cc_line(line):
+    """Parse a raw line from CommandCode /alpha/generate, stripping SSE data: prefix."""
+    stripped = line.strip()
+    if not stripped:
+        return None
+    if stripped.startswith("data: "):
+        stripped = stripped[6:]
+    elif stripped.startswith("data:"):
+        stripped = stripped[5:]
+    if not stripped or stripped == "[DONE]":
+        return None
+    try:
+        return json.loads(stripped)
+    except json.JSONDecodeError:
+        return None
+
+
+def _iter_cc_events(stream):
+    """Yield parsed JSON events from a CommandCode /alpha/generate stream.
+    Handles raw JSON lines, SSE data: events, and multi-event chunks.
+    """
+    buf = ""
+    for chunk in _stream_with_idle_timeout(stream):
+        buf += chunk.decode("utf-8", errors="replace")
+        while "\n" in buf:
+            line, buf = buf.split("\n", 1)
+            d = _parse_cc_line(line)
+            if d is not None:
+                yield d
+    # Process remaining buffer (non-streaming single-JSON response)
+    if buf.strip():
+        if buf.strip().startswith("{"):
+            d = _parse_cc_line(buf)
+            if d is not None:
+                yield d
+        else:
+            for line in buf.strip().split("\n"):
+                d = _parse_cc_line(line)
+                if d is not None:
+                    yield d
+
+
+def cc_resp_to_responses(cc_lines, model, resp_id=None):
+    text = ""
+    usage = {}
+    if isinstance(cc_lines, str):
+        cc_lines = [cc_lines]
+    for line in cc_lines:
+        d = _parse_cc_line(line)
+        if d is None:
+            continue
+        t = d.get("type", "")
+        if t == "text-delta":
+            text += d.get("text", "")
+        elif t == "finish-step":
+            u = d.get("usage", {})
+            usage = {
+                "input_tokens": u.get("inputTokens", 0),
+                "output_tokens": u.get("outputTokens", 0),
+                "total_tokens": u.get("inputTokens", 0) + u.get("outputTokens", 0),
+            }
+    outputs = []
+    if text:
+        outputs.append({"type": "message", "id": uid("msg"), "role": "assistant",
+                         "status": "completed",
+                         "content": [{"type": "output_text", "text": text, "annotations": []}]})
+    return {"id": resp_id or uid("resp"), "object": "response", "created": int(time.time()),
+            "model": model, "status": "completed", "output": outputs,
+            "usage": {"input_tokens": usage.get("input_tokens", 0),
+                      "output_tokens": usage.get("output_tokens", 0),
+                      "total_tokens": usage.get("total_tokens", 0),
+                      "input_tokens_details": {"cached_tokens": 0}}}
+
+def cc_stream_to_sse(cc_stream, model, req_id):
+    resp_id = req_id or uid("resp")
+    msg_id = uid("msg")
+    text_buf = ""
+
+    yield emit("response.created", {"type": "response.created",
+        "response": {"id": resp_id, "object": "response", "model": model,
+                     "status": "in_progress", "created": int(time.time()), "output": []}})
+    yield emit("response.in_progress", {"type": "response.in_progress", "response": {"id": resp_id}})
+
+    total_usage = {}
+    _event_types_seen = set()
+    _debug_log_path = os.path.expanduser("~/.cache/codex-proxy/cc-debug.log")
+    _debug_fh = open(_debug_log_path, "a")  # [FIX 14] always write debug to FILE (not just stderr which may be piped)
+    _deflog = lambda *a, **kw: print(*a, file=_debug_fh, flush=True, **kw)
+    
+    for d in _iter_cc_events(cc_stream):
+        t = d.get("type", "")
+        _event_types_seen.add(t)
+
+        if t == "text-delta":
+            txt = d.get("text", "")
+            if txt:
+                text_buf += txt
+
+        elif t == "finish-step":
+            u = d.get("usage", {})
+            total_usage = {
+                "input_tokens": u.get("inputTokens", 0),
+                "output_tokens": u.get("outputTokens", 0),
+                "total_tokens": u.get("inputTokens", 0) + u.get("outputTokens", 0),
+            }
+        elif t not in ("text-delta", "finish-step"):
+            _deflog(f"[CC-DEBUG] unexpected event type: {t} keys={list(d.keys())[:5]} data={str(d)[:200]}")
+    
+    _deflog(f"[CC-DEBUG] stream ended. event_types={_event_types_seen} text_buf_len={len(text_buf)}")
+
+    parsed_tool_calls = _parse_commandcode_text_tool_calls(text_buf)
+    _deflog(f"[CC-DEBUG] text_buf len={len(text_buf)} parsed_tool_calls={len(parsed_tool_calls)} "
+          f"text_preview={text_buf[:500]!r}")
+    if parsed_tool_calls:
+        for ti, tc in enumerate(parsed_tool_calls):
+            _deflog(f"[CC-DEBUG]   tool_call[{ti}] name={tc.get('name')} args_preview={tc.get('arguments','')[:150]!r}")
+    
+    # [FIX 13] FALLBACK: if parser returned empty but text contains tool-call patterns,
+    # force-extract using regex. This catches cases where model output format
+    # doesn't match any of our named patterns (XML/raw JSON/function=).
+    if not parsed_tool_calls and len(text_buf) > 20:
+        _has_tc_signals = (
+            '"type"' in text_buf and ('tool-call' in text_buf or 'tool_call' in text_buf or 'function_call' in text_buf)
+        ) or (
+            '<tool' in text_buf.lower() and '<parameter' in text_buf.lower()
+        ) or (
+            '<function=' in text_buf
+        ) or (
+            '{"cmd":' in text_buf or '{"command":' in text_buf
+        )
+        if _has_tc_signals:
+            _deflog(f"[CC-DEBUG] Parser returned empty but text has tool-call signals! Attempting fallback...")
+            # Try direct raw JSON extraction on entire buffer
+            _fallback_calls = _extract_raw_json_tool_calls(text_buf)
+            if not _fallback_calls:
+                # [FIX 14b] Match BOTH "cmd" and "command" keys (model uses both)
+                import re as _re
+                for _m in _re.finditer(r'\{[^{}]*"(?:command|cmd)"\s*:\s*"(?:[^"\\]|\\.)*"', text_buf):
+                    try:
+                        _args = json.loads(_m.group(0))
+                        if isinstance(_args, dict) and ("cmd" in _args or "command" in _args):
+                            _cmd_val = _unwrap_cmd(_args.get("cmd") or _args.get("command", ""))
+                            _args["cmd"] = _cmd_val
+                            # Copy description as justification if present
+                            if "description" in _args:
+                                _args["justification"] = _args["description"]
+                            _fallback_calls.append({
+                                "full_match": _m.group(0),
+                                "name": "exec_command",
+                                "arguments": json.dumps(_args, ensure_ascii=False),
+                            })
+                    except Exception:
+                        continue
+            if _fallback_calls:
+                _deflog(f"[CC-DEBUG] Fallback extracted {len(_fallback_calls)} tool calls!")
+                for _fi, _fc in enumerate(_fallback_calls):
+                    _deflog(f"[CC-DEBUG]   fallback[{_fi}] name={_fc.get('name')} args={_fc.get('arguments','')[:120]!r}")
+                parsed_tool_calls = _fallback_calls
+            else:
+                _deflog(f"[CC-DEBUG] Fallback also failed. text_buf first 500: {text_buf[:500]!r}")
+    
+    # [FIX 25] SELF-HEALING STUCK DETECTOR
+    # When ALL parsers returned empty and text has intent signals, synthesize a
+    # command so the agent loop doesn't stall. This catches:
+    #   - Bare text with no tool call format at all
+    #   - Unrecognized XML-ish blocks
+    #   - Partial JSON (bare "{")
+    #   - Model explaining what it wants to do but not producing a tool call
+    if not parsed_tool_calls and len(text_buf) > 10:
+        _synth_cmd = None
+        _synth_just = None
+        _tl = text_buf.lower()
+
+        # Heuristic 1: URL in text → fetch it
+        _url_in_text = re.search(r"https?://[^\s\]'\\>\",]+", text_buf)
+        if _url_in_text:
+            _synth_url = _url_in_text.group(0).rstrip(")].,;'\\\"")
+            _synth_cmd = f"curl -sL --max-time 15 '{_synth_url}' 2>/dev/null | head -200"
+            _synth_just = "Auto-synthesized: URL detected in text, fetching"
+
+        # Heuristic 2: File path references → list or read
+        if not _synth_cmd:
+            _file_m = re.search(r"(?:read|open|view|check|examine|cat|show)\s+(?:the\s+)?(?:file\s+)?[`'\"]?(/[^\s'\"]+\.\w+)", _tl)
+            if _file_m:
+                _fpath = _file_m.group(1)
+                _synth_cmd = f"cat '{_fpath}' 2>/dev/null | head -200 || ls -la '{_fpath}'"
+                _synth_just = f"Auto-synthesized: file reference detected ({_fpath})"
+
+        # Heuristic 3: Shell command mentioned in backticks or quotes
+        if not _synth_cmd:
+            _shell_m = re.search(r"[`'\"]((?:curl|wget|git|npm|pip|python|ls|cat|grep|find|mkdir|cd|rm|cp|mv|chmod|docker|make|cargo|go)\s[^\s`'\"]+)", text_buf)
+            if _shell_m:
+                _synth_cmd = _shell_m.group(1)
+                _synth_just = "Auto-synthesized: shell command detected in text"
+
+        # Heuristic 4: "explore" or "fetch" intent + last user URL
+        if not _synth_cmd and ("explore" in _tl or "fetch" in _tl or "investigate" in _tl or "repository" in _tl):
+            for _prev_url in _last_user_urls:
+                _url_m2 = re.search(r"https?://[^\s\]'\\>\",]+", _prev_url)
+                if _url_m2:
+                    _pu = _url_m2.group(0).rstrip(")].,;'\\\"")
+                    _ecmd, _ejust = _build_explore_cmd(_pu)
+                    if _ecmd:
+                        _synth_cmd = _ecmd
+                        _synth_just = _ejust or "Auto-synthesized: explore intent with last user URL"
+                    break
+
+        # Heuristic 5: Generic "I need to" / "let me" / "I'll" intent with command-like text
+        if not _synth_cmd:
+            _intent_m = re.search(r"(?:I(?:'ll| will| need to| should)|let me|please)\s+(.+?)(?:\.|!|\n|$)", _tl, re.IGNORECASE)
+            if _intent_m:
+                _intent_text = _intent_m.group(1).strip()
+                if len(_intent_text) > 10 and len(_intent_text) < 200:
+                    _synth_cmd = f"echo 'Stuck recovery: model intent was: {_intent_text[:100]}'"
+                    _synth_just = f"Auto-synthesized from intent text: {_intent_text[:80]}"
+
+        if _synth_cmd:
+            parsed_tool_calls = [{
+                "full_match": "__synth_stuck_recovery__",
+                "name": "exec_command",
+                "arguments": json.dumps({"cmd": _synth_cmd, "justification": _synth_just or "Auto-synthesized stuck recovery"}, ensure_ascii=False),
+            }]
+            _deflog(f"[CC-DEBUG] [STUCK-RECOVERY] Synthesized: cmd={_synth_cmd[:120]!r}")
+            print(f"[CC-DEBUG] [STUCK-RECOVERY] Synthesized command from text intent", file=sys.stderr, flush=True)
+
+    # Also log to stderr for visibility when not piped
+    print(f"[CC-DEBUG] text_buf={len(text_buf)} chars, tool_calls={len(parsed_tool_calls)}", file=sys.stderr, flush=True)
+    
+    try:
+        _debug_fh.close()
+    except Exception:
+        pass
+    clean_text = text_buf
+    for tc in parsed_tool_calls:
+        clean_text = clean_text.replace(tc["full_match"], "")
+    clean_text = clean_text.strip()
+
+    if clean_text:
+        yield emit("response.output_item.added", {"type": "response.output_item.added",
+            "item": {"type": "message", "id": msg_id, "role": "assistant", "status": "in_progress", "content": []}})
+        yield emit("response.content_part.added", {"type": "response.content_part.added",
+            "part": {"type": "output_text", "text": "", "annotations": []}, "item_id": msg_id})
+        yield emit("response.output_text.delta", {"type": "response.output_text.delta",
+                    "delta": clean_text, "item_id": msg_id, "content_index": 0})
+        yield emit("response.output_text.done", {"type": "response.output_text.done",
+                    "text": clean_text, "item_id": msg_id, "content_index": 0})
+        yield emit("response.content_part.done", {"type": "response.content_part.done",
+                    "part": {"type": "output_text", "text": clean_text, "annotations": []}, "item_id": msg_id})
+        yield emit("response.output_item.done", {"type": "response.output_item.done",
+            "item": {"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
+                     "content": [{"type": "output_text", "text": clean_text, "annotations": []}]}})
+
+    function_outputs = []
+    for tc in parsed_tool_calls:
+        fid = uid("fc")
+        call_id = uid("call")
+        item = {"type": "function_call", "id": fid, "call_id": call_id,
+                "name": tc["name"], "arguments": tc["arguments"], "status": "completed"}
+        function_outputs.append(item)
+        yield emit("response.output_item.added", {"type": "response.output_item.added", "item": item})
+        yield emit("response.function_call_arguments.done", {"type": "response.function_call_arguments.done",
+                    "item_id": fid, "name": tc["name"], "arguments": tc["arguments"]})
+        yield emit("response.output_item.done", {"type": "response.output_item.done", "item": item})
+
+    final_out = []
+    if clean_text:
+        final_out.append({"type": "message", "id": msg_id, "role": "assistant", "status": "completed",
+                          "content": [{"type": "output_text", "text": clean_text, "annotations": []}]})
+    final_out.extend(function_outputs)
+    yield emit("response.completed", {"type": "response.completed",
+        "response": {"id": resp_id, "object": "response", "model": model,
+                     "status": "completed", "created": int(time.time()), "output": final_out,
+                     "usage": total_usage}})
+
+# ═══════════════════════════════════════════════════════════════════
+# Auto-sensing provider adapter
+# ═══════════════════════════════════════════════════════════════════
+
+_SENTINEL = object()
+
+@dataclasses.dataclass
+class ProviderSchema:
+    """Describes what message formats a provider supports.
+
+    Populated by probing the endpoint and/or analyzing error responses.
+    Cached in provider-caps.json so probing only happens once per provider.
+    """
+    supported_roles: tuple = ("user", "assistant")
+    content_type: str = "string"  # "string" | "array"
+    content_block_types: tuple = ()  # e.g. ("text", "tool_result", "tool-call")
+    tool_result_style: str = "inline"  # "inline" | "tool_result_block" | "anthropic"
+    tool_call_style: str = "openai_function"  # "openai_function" | "tool-call" | "anthropic_tool_use"
+    accepts_tool_role: bool = False
+    accepts_system_role: bool = True
+    cc_body_wrap: bool = False  # needs {config, params, threadId} wrapping
+    field_names: dict = dataclasses.field(default_factory=dict)
+    auth_type: str = ""  # "bearer" | "x-api-key" | "custom"
+    auth_header: str = "Authorization"  # header name for auth
+    auth_scheme: str = "Bearer "  # prefix for auth value
+    tool_decl_format: str = "openai"  # "openai" | "anthropic" | "command_code"
+    param_names: dict = dataclasses.field(default_factory=lambda: {
+        "max_tokens": "max_tokens",
+        "temperature": "temperature",
+        "top_p": "top_p",
+    })
+    response_format: str = "auto"  # "sse" | "raw_json" | "ndjson" | "auto"
+    stream_format: str = "auto"  # "sse_data" | "sse_event" | "raw_lines" | "json_lines"
+
+    def hints(self) -> dict:
+        """Return a dict for storing in provider-caps.json."""
+        d = {}
+        for k, v in dataclasses.asdict(self).items():
+            if isinstance(v, (list, tuple)) and not v:
+                continue
+            if isinstance(v, dict) and not v:
+                continue
+            if v is False:
+                continue
+            if v == "":
+                continue
+            if v == "auto":
+                continue
+            d[k] = v
+        return d
+
+
+class ErrorAnalyzer:
+    """Parse upstream error responses to infer provider schema.
+    Analyzes 400, 401, 422 errors for hints about auth, roles, content format,
+    parameter names, field names, tool format, and response format.
+    """
+
+    @staticmethod
+    def analyze(error_text: str, current: ProviderSchema = None) -> dict:
+        hints = {}
+        if not error_text:
+            return hints
+        err = error_text.lower()
+
+        # ── Auth detection (401 errors) ──
+        if re.search(r"unauthorized|invalid.*api.?key|missing.*api.?key|x-api-key", err):
+            hints["auth_type"] = "x-api-key"
+            hints["auth_header"] = "x-api-key"
+            hints["auth_scheme"] = ""
+        elif re.search(r"invalid.*bearer|bearer.*token|authorization.*header|invalid.*token", err):
+            hints["auth_type"] = "bearer"
+            hints["auth_header"] = "Authorization"
+            hints["auth_scheme"] = "Bearer "
+
+        # ── Role validation ──
+        if re.search(r"role.*expected.*(?:user|assistant)", err):
+            hints["accepts_tool_role"] = False
+            hints["accepts_function_role"] = False
+
+        if re.search(r"role.*(?:tool|function).*(?:invalid|not.*(?:support|allow))", err):
+            hints["accepts_tool_role"] = False
+            hints["accepts_function_role"] = False
+
+        if re.search(r"role.*system.*(?:invalid|not.*(?:support|allow))", err):
+            hints["accepts_system_role"] = False
+
+        # ── Content format (top-level only, not content[i].xxx) ──
+        if re.search(r'params\.messages\[\d+\]\.content', err):
+            # Explicit path to content field in a messages array (e.g. /alpha/generate)
+            if re.search(r"expected string.*received array", err):
+                hints["content_type"] = "string"
+                hints["tool_result_style"] = "inline"  # no tool_result blocks allowed
+            elif re.search(r"expected array.*received string", err):
+                hints["content_type"] = "array"
+        elif re.search(r"(?<!\w)content(?!\[)\s*(?:of type|field|should be|expected|must be).*(?:string|array)", err) or \
+             re.search(r"expected (?:string|array).*content", err):
+            if re.search(r"expected string", err) and not re.search(r"expected array", err):
+                hints["content_type"] = "string"
+            elif re.search(r"expected array", err):
+                hints["content_type"] = "array"
+        elif re.search(r"content.*expected string.*received array", err) and not re.search(r"\[\d*\]", err):
+            hints["content_type"] = "string"
+        elif re.search(r"content.*expected array.*received string", err) and not re.search(r"\[\d*\]", err):
+            hints["content_type"] = "array"
+
+        # ── Content block types ──
+        types = set()
+        for m in re.finditer(
+            r'expected\s+"('
+            r'text|image|document|search_result|thinking|redacted_thinking|reasoning|'
+            r'tool_use|tool-call|tool_result|tool-result|'
+            r'server_tool_use|web_search_tool_result|web_fetch_tool_result|tool'
+            r')"', err
+        ):
+            types.add(m.group(1))
+        # Also detect from "expected string, received array at params.messages[i].content" pattern
+        # where the "or" clauses list valid block types
+        if not types and re.search(r'params\.messages\[\d+\]\.content', err):
+            for valid_type in ("text", "image", "document", "tool_use", "tool-call", "tool_result"):
+                if re.search(r'expected\s+"' + re.escape(valid_type) + r'"', err):
+                    types.add(valid_type)
+        if types:
+            hints["content_block_types"] = tuple(sorted(types))
+
+        # ── Tool result style ──
+        if re.search(r"tool_result", err):
+            hints["tool_result_style"] = "tool_result_block"
+        elif re.search(r"tool_use", err) and not re.search(r"tool.use", err):
+            hints["tool_result_style"] = "anthropic"
+
+        # ── Tool call style ──
+        if re.search(r"tool-call", err) or re.search(r"tool_call", err):
+            hints["tool_call_style"] = "tool-call"
+        elif re.search(r"tool_use", err):
+            hints["tool_call_style"] = "anthropic_tool_use"
+
+        # ── CC body wrap detection ──
+        if re.search(r"(?:params\.|body\.)config", err) or re.search(r"threadId", err):
+            hints["cc_body_wrap"] = True
+
+        # ── Field name mappings (keys MUST match SchemaAdapter lookups) ──
+        fields = {}
+        if re.search(r"tool_use_id", err):
+            fields["tool_use_id"] = "tool_use_id"
+        if re.search(r"toolCallId", err):
+            fields["toolCallId"] = "toolCallId"
+            # SchemaAdapter._tool_result_block looks up "tool_use_id"
+            fields["tool_use_id"] = "toolCallId"
+        if re.search(r"tool_result", err) and not re.search(r"tool.result", err):
+            fields["tool_result_type"] = "tool_result"
+        if re.search(r"tool-result", err):
+            fields["tool_result_type"] = "tool-result"
+        # Detect tool call field names from errors
+        if re.search(r"(?:id|call_id|callId|tool_use_id).*(?:invalid|unknown|expected|required)", err) or \
+           re.search(r"(?:expected|required).*(?:id|call_id|callId)", err):
+            for alt in ("id", "call_id", "callId", "tool_use_id"):
+                if alt in err:
+                    fields["tool_call_id_field"] = alt
+                    break
+        if re.search(r"(?:name|tool_name|function).*(?:invalid|unknown|expected|required)", err) or \
+           re.search(r"(?:expected|required).*(?:name|tool_name)", err):
+            for alt in ("name", "tool_name", "function"):
+                if alt in err:
+                    fields["tool_call_name_field"] = alt
+                    break
+        if re.search(r"arguments.*(?:invalid|unknown|expect|required)", err) or \
+           re.search(r"input.*(?:invalid|unknown|expect|required)", err):
+            if re.search(r"input_schema|input\b", err) and not re.search(r"arguments", err):
+                fields["tool_call_args_field"] = "input"
+                fields["tool_args_field"] = "input"
+            else:
+                fields["tool_call_args_field"] = "arguments"
+                fields["tool_args_field"] = "arguments"
+
+        # ── Supported roles from error ──
+        if re.search(r"params\.messages\[\d+\]\.role", err):
+            roles = re.findall(r'expected one of\s+"([^"]+)"', err)
+            if roles:
+                hints["supported_roles"] = tuple(r.strip() for r in roles[0].split("|"))
+        if fields:
+            hints["field_names"] = fields
+
+        # ── Parameter name negotiation ──
+        param_hints = {}
+        if re.search(r"max_tokens.*(?:invalid|unknown|not.*(?:support|recognize))", err) or \
+           re.search(r"(?:unknown|invalid).*param.*max_tokens", err):
+            for alt in ("max_output_tokens", "max_tokens_to_sample", "max_new_tokens", "max_token"):
+                if alt.lower() in err:
+                    param_hints["max_tokens"] = alt
+                    break
+        if re.search(r"temperature.*(?:invalid|unknown)", err):
+            for alt in ("creation_temperature", "temp", "model_temperature"):
+                if alt.lower() in err:
+                    param_hints["temperature"] = alt
+                    break
+        if re.search(r"top_p.*(?:invalid|unknown)", err):
+            for alt in ("top_p", "nucleus_sampling"):
+                if alt.lower() in err:
+                    param_hints["top_p"] = alt
+                    break
+        if param_hints:
+            hints["param_names"] = param_hints
+
+        # ── Tool declaration format ──
+        if re.search(r"tools.*input_schema", err) or re.search(r"input_schema.*required", err):
+            hints["tool_decl_format"] = "anthropic"
+        elif re.search(r"tools.*function.*(?:required|expected)", err):
+            hints["tool_decl_format"] = "openai"
+        elif re.search(r"tool-call|tool_call.*format", err):
+            hints["tool_decl_format"] = "command_code"
+
+        # ── Response/Stream format hints from content-type or error ──
+        if re.search(r"content.type.*text/event.stream", err) or \
+           re.search(r"stream.*sse|sse.*expected", err):
+            hints["stream_format"] = "sse_data"
+        if re.search(r"ndjson|json.*lines", err):
+            hints["stream_format"] = "json_lines"
+
+        return hints
+
+    @staticmethod
+    def merge_into_schema(hints: dict, schema: ProviderSchema) -> ProviderSchema:
+        for k, v in hints.items():
+            if k == "field_names" and isinstance(v, dict):
+                schema.field_names.update(v)
+            elif k == "param_names" and isinstance(v, dict):
+                schema.param_names.update(v)
+            elif hasattr(schema, k):
+                setattr(schema, k, v)
+        return schema
+
+
+def _schema_cache_key(target_url=None, backend=None, model=None):
+    host = urllib.parse.urlparse(target_url or TARGET_URL).netloc.lower()
+    return f"auto-schema|{backend or BACKEND}|{host}|{model or '*'}"
+
+
+def _load_schema(target_url=None, backend=None, model=None):
+    caps = _load_provider_caps()
+    key = _schema_cache_key(target_url, backend, model)
+    raw = caps.get(key)
+    generic = caps.get(_schema_cache_key(target_url, backend, model="*"))
+    data = raw or generic or {}
+    if not data:
+        return ProviderSchema()
+    # Staleness check: re-learn after 24h (86400s)
+    updated = data.get("_updated", 0)
+    if isinstance(updated, (int, float)) and time.time() - updated > 86400:
+        print(f"[auto-sense] cached schema stale ({int(time.time()-updated)}s old), re-learning", file=sys.stderr)
+        return ProviderSchema()
+    return ProviderSchema(
+        supported_roles=tuple(data.get("supported_roles", ("user", "assistant"))),
+        content_type=data.get("content_type", "string"),
+        content_block_types=tuple(data.get("content_block_types", ())),
+        tool_result_style=data.get("tool_result_style", "inline"),
+        tool_call_style=data.get("tool_call_style", "openai_function"),
+        accepts_tool_role=data.get("accepts_tool_role", False),
+        accepts_system_role=data.get("accepts_system_role", True),
+        cc_body_wrap=data.get("cc_body_wrap", False),
+        field_names=dict(data.get("field_names", {})),
+        auth_type=data.get("auth_type", ""),
+        auth_header=data.get("auth_header", "Authorization"),
+        auth_scheme=data.get("auth_scheme", "Bearer "),
+        tool_decl_format=data.get("tool_decl_format", "openai"),
+        param_names=dict(data.get("param_names", {
+            "max_tokens": "max_tokens",
+            "temperature": "temperature",
+            "top_p": "top_p",
+        })),
+        response_format=data.get("response_format", "auto"),
+        stream_format=data.get("stream_format", "auto"),
+    )
+
+
+def _save_schema(schema: ProviderSchema, target_url=None, backend=None, model=None):
+    caps = _load_provider_caps()
+    key = _schema_cache_key(target_url, backend, model)
+    caps[key] = schema.hints()
+    caps[key]["_updated"] = time.time()
+    caps[key]["_backend"] = backend or BACKEND
+    _save_provider_caps()
+    print(f"[auto-sense] cached schema {key}", file=sys.stderr)
+
+
+class SchemaAdapter:
+    """Convert Responses API messages based on a detected ProviderSchema."""
+
+    def __init__(self, schema: ProviderSchema):
+        self.s = schema
+
+    def convert(self, input_data, instructions=""):
+        if self.s.content_type == "string" and not self.s.content_block_types:
+            return self._to_plain_string(input_data, instructions)
+        return self._to_content_blocks(input_data, instructions)
+
+    def _to_plain_string(self, input_data, instructions=""):
+        """Fallback: user/assistant string content — no tool roles."""
+        msgs = []
+        if instructions and self.s.accepts_system_role:
+            msgs.append({"role": "system", "content": instructions})
+        elif instructions:
+            msgs.append({"role": "user", "content": instructions})
+        if isinstance(input_data, str):
+            msgs.append({"role": "user", "content": input_data})
+            return msgs
+        if not isinstance(input_data, list):
+            return msgs
+        last_flushed = []
+        pending = []
+        for item in input_data:
+            t = item.get("type")
+            if t == "function_call":
+                cid = item.get("call_id") or item.get("id") or uid("fc")
+                pending.append({"id": cid, "name": item.get("name", ""),
+                                "arguments": item.get("arguments", "{}")})
+                continue
+            if pending:
+                last_flushed = [p["id"] for p in pending]
+                msgs.append({"role": "assistant", "content": None,
+                             "tool_calls": [{"id": p["id"], "type": "function",
+                                             "function": {"name": p["name"],
+                                                          "arguments": p["arguments"]}}
+                                            for p in pending]})
+                pending = []
+            if t == "message":
+                role = "user" if item.get("role") in ("user", "developer") else "assistant"
+                text = _extract_text(item.get("content", []))
+                if text:
+                    msgs.append({"role": role, "content": text})
+            elif t == "function_call_output":
+                out = item.get("output", "")
+                if not isinstance(out, str):
+                    out = json.dumps(out, ensure_ascii=False)
+                msgs.append({"role": "user", "content": out[:8000]})
+        if pending:
+            last_flushed = [p["id"] for p in pending]
+            msgs.append({"role": "assistant", "content": None,
+                         "tool_calls": [{"id": p["id"], "type": "function",
+                                         "function": {"name": p["name"],
+                                                      "arguments": p["arguments"]}}
+                                        for p in pending]})
+        return msgs
+
+    def _to_content_blocks(self, input_data, instructions=""):
+        msgs = []
+        pending_tc = []
+        tool_name_by_id = {}
+        last_ids = []
+
+        def flush():
+            nonlocal last_ids
+            if not pending_tc:
+                return
+            last_ids = [t["id"] for t in pending_tc]
+            msgs.append({"role": "assistant", "content": pending_tc})
+            pending_tc.clear()
+
+        _str = self.s.content_type == "string"
+
+        if instructions:
+            msgs.append({"role": "user", "content": instructions if _str else [{"type": "text", "text": instructions}]})
+
+        if isinstance(input_data, str):
+            msgs.append({"role": "user", "content": input_data if _str else [{"type": "text", "text": input_data}]})
+            return msgs
+        if not isinstance(input_data, list):
+            return msgs
+
+        for item in input_data:
+            t = item.get("type")
+            if t == "function_call":
+                cid = item.get("call_id") or item.get("id") or uid("call")
+                nm = item.get("name") or "exec_command"
+                tool_name_by_id[cid] = nm
+                tc_block = self._tool_call_block(cid, nm, item.get("arguments", "{}"))
+                if tc_block:
+                    pending_tc.append(tc_block)
+                continue
+            flush()
+            if t == "message":
+                role = "user" if item.get("role") in ("user", "developer") else "assistant"
+                text = _extract_text(item.get("content", []))
+                if text:
+                    msgs.append({"role": role, "content": text if _str else [{"type": "text", "text": text}]})
+            elif t == "function_call_output":
+                cid = item.get("call_id") or item.get("id") or ""
+                if not cid and last_ids:
+                    idx = sum(1 for m in msgs for c in (m.get("content") or [])
+                              if isinstance(c, dict) and c.get("type") in
+                              ("tool_result", "tool-result"))
+                    if idx < len(last_ids):
+                        cid = last_ids[idx]
+                out = item.get("output", "")
+                if not isinstance(out, str):
+                    out = json.dumps(out, ensure_ascii=False)
+                tr = self._tool_result_block(cid, out)
+                if tr:
+                    msgs.append({"role": "user", "content": [tr]})
+        flush()
+        return msgs
+
+    def _tool_call_block(self, cid, name, args):
+        style = self.s.tool_call_style
+        fn = self.s.field_names
+        if style == "tool-call":
+            return {
+                "type": fn.get("tool_call_type", "tool-call"),
+                fn.get("tool_call_id_field", "id"): cid,
+                fn.get("tool_call_name_field", "name"): name,
+                fn.get("tool_call_args_field", "arguments"): args,
+            }
+        elif style == "anthropic_tool_use":
+            try:
+                parsed = json.loads(args)
+            except Exception:
+                parsed = {}
+            return {
+                "type": fn.get("tool_use_type", "tool_use"),
+                fn.get("tool_call_id_field", "id"): cid,
+                fn.get("tool_call_name_field", "name"): name,
+                fn.get("tool_call_args_field", "input"): parsed,
+            }
+        else:
+            return None  # handled as OpenAI function call
+
+    def _tool_result_block(self, cid, output):
+        style = self.s.tool_result_style
+        fn = self.s.field_names
+        if style == "tool_result_block":
+            return {
+                "type": fn.get("tool_result_type", "tool_result"),
+                fn.get("tool_use_id", "tool_use_id"): cid or "",
+                "content": [{"type": "text", "text": output[:8000]}],
+            }
+        elif style == "anthropic":
+            return {
+                "type": fn.get("tool_result_type", "tool_result"),
+                fn.get("tool_use_id", "tool_use_id"): cid or "",
+                "content": output[:8000],
+            }
+        return None  # inline — handled by _to_plain_string
+
+
+def _sanitize_err_body(body):
+    """Sanitize upstream error body: strip HTML, truncate, remove control chars."""
+    if not body:
+        return ""
+    s = re.sub(r'<[^>]+>', '', body)
+    s = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', s)
+    s = s.strip()[:1000]
+    return s
+
+
+def _extract_text(content):
+    if isinstance(content, str):
+        return content
+    if not isinstance(content, list):
+        return ""
+    parts = []
+    for p in content:
+        if isinstance(p, str):
+            parts.append(p)
+        elif isinstance(p, dict) and p.get("type") in ("input_text", "output_text", "text"):
+            parts.append(p.get("text", ""))
+    return "".join(parts)
+
+
+# ═══════════════════════════════════════════════════════════════════
+# HTTP Server
+# ═══════════════════════════════════════════════════════════════════
+
+def _log_resp(resp_id, status, output):
+    try:
+        import datetime as _dt
+        _lp = os.path.join(_LOG_DIR, "requests.log")
+        with open(_lp, "a") as _f:
+            _f.write(f"  RESPONSE id={resp_id} status={status}\n")
+            if output:
+                for o in output:
+                    ot = o.get("type")
+                    if ot == "message":
+                        _f.write(f"    -> message: {o.get('content',[{}])[0].get('text','')[:200]}\n")
+                    elif ot == "function_call":
+                        _f.write(f"    -> function_call: {o.get('name')}({o.get('arguments','')[:120]})\n")
+                    else:
+                        _f.write(f"    -> {ot}\n")
+            _f.write(f"{'='*60}\n")
+            _f.flush()
+    except Exception:
+        pass
+
+class ConnectionTracker:
+    def __enter__(self):
+        global _active_connections
+        with _active_connections_lock:
+            _active_connections += 1
+    def __exit__(self, *a):
+        global _active_connections
+        with _active_connections_lock:
+            _active_connections -= 1
+
+class RequestTracker:
+    def __init__(self, request_id):
+        self.request_id = request_id
+        self.cancelled = threading.Event()
+
+    def __enter__(self):
+        if self.request_id:
+            with _active_requests_lock:
+                _active_requests[self.request_id] = self
+        return self
+
+    def __exit__(self, *a):
+        if self.request_id:
+            with _active_requests_lock:
+                _active_requests.pop(self.request_id, None)
+
+def _cancel_request(request_id):
+    with _active_requests_lock:
+        req = _active_requests.get(request_id)
+    if not req:
+        return False
+    req.cancelled.set()
+    return True
+
+def _handle_shutdown_signal(signum, frame):
+    global _shutdown_requested
+    _shutdown_requested = True
+    print("[proxy] shutdown requested; draining connections", file=sys.stderr)
+    def _drain():
+        deadline = time.time() + 5
+        while time.time() < deadline:
+            with _active_connections_lock:
+                if _active_connections == 0:
+                    break
+            time.sleep(0.1)
+        if SERVER is not None:
+            SERVER.shutdown()
+    threading.Thread(target=_drain, daemon=True).start()
+
+def _upstream_timeout(body, stream):
+    input_data = body.get("input", "")
+    n_items = len(input_data) if isinstance(input_data, list) else 1
+    has_tools = bool(body.get("tools"))
+    if stream:
+        return min((180 if has_tools else 120) + n_items * 2, 300)
+    return min(60 + n_items * 2, 120)
+
+def _auto_continue_gemini(handler, flush_event, message_id, model, gen_config, gemini_tools, system_parts, project_id, headers, endpoints, url_suffix, accumulated_text, output_items, message_started):
+    max_continuations = 5
+    for _cont in range(max_continuations):
+        cont_contents = [
+            {"role": "model", "parts": [{"text": accumulated_text[-12000:]}]},
+            {"role": "user", "parts": [{"text": "Continue exactly where you left off. Do not repeat anything already written."}]},
+        ]
+        cont_request = {"contents": cont_contents, "generationConfig": dict(gen_config)}
+        if system_parts:
+            cont_request["systemInstruction"] = {"parts": system_parts}
+        if gemini_tools:
+            cont_request["tools"] = gemini_tools
+        cont_wrapped = {"project": project_id, "model": model, "request": cont_request}
+        if OAUTH_PROVIDER == "google-antigravity":
+            cont_wrapped["requestType"] = "agent"
+            cont_wrapped["userAgent"] = "antigravity"
+            cont_wrapped["requestId"] = f"agent-{uuid.uuid4().hex[:12]}"
+        cont_body = json.dumps(cont_wrapped).encode()
+        upstream = None
+        for ep in endpoints:
+            target = f"{ep}/{url_suffix}"
+            req = urllib.request.Request(target, data=cont_body, headers=headers)
+            try:
+                upstream = urllib.request.urlopen(req, timeout=180)
+                break
+            except Exception as e:
+                print(f"[auto-continue] {ep} failed: {e}", file=sys.stderr)
+                continue
+        if not upstream:
+            break
+        cont_text = ""
+        cont_finish = ""
+        cont_buf = ""
+        for raw_line in _stream_with_idle_timeout(upstream):
+            line = raw_line.decode(errors="replace")
+            if line.startswith("data: "):
+                cont_buf += line[6:]
+                continue
+            if not line.strip() and cont_buf:
+                try:
+                    chunk = json.loads(cont_buf)
+                except Exception:
+                    cont_buf = ""
+                    continue
+                cont_buf = ""
+                candidates = chunk.get("response", chunk).get("candidates", [])
+                if not candidates:
+                    continue
+                cont_finish = candidates[0].get("finishReason", "")
+                parts = candidates[0].get("content", {}).get("parts", [])
+                for part in parts:
+                    if part.get("thought"):
+                        continue
+                    if "text" in part and not part.get("functionCall"):
+                        delta = part["text"]
+                        if delta:
+                            cont_text += delta
+                            flush_event("response.output_text.delta", {"type": "response.output_text.delta", "output_index": 0, "content_index": 0, "delta": delta})
+                    elif part.get("functionCall"):
+                        fc = part["functionCall"]
+                        call_id = f"call_{uuid.uuid4().hex[:24]}"
+                        args_str = json.dumps(fc.get("args", fc.get("arguments", {})))
+                        output_index = len(output_items)
+                        flush_event("response.output_item.added", {"type": "response.output_item.added", "output_index": output_index, "item": {"type": "function_call", "id": call_id, "call_id": call_id, "name": fc.get("name", ""), "arguments": ""}})
+                        flush_event("response.function_call_arguments.delta", {"type": "response.function_call_arguments.delta", "output_index": output_index, "item_id": call_id, "delta": args_str})
+                        flush_event("response.function_call_arguments.done", {"type": "response.function_call_arguments.done", "output_index": output_index, "item_id": call_id, "arguments": args_str})
+                        output_items.append({"tool": True, "fc": fc, "call_id": call_id})
+        accumulated_text += cont_text
+        print(f"[auto-continue] chunk {len(cont_text)} chars, finish={cont_finish}, total={len(accumulated_text)}", file=sys.stderr)
+        if cont_finish != "MAX_TOKENS":
+            break
+    return accumulated_text
+
+class Handler(http.server.BaseHTTPRequestHandler):
+    protocol_version = "HTTP/1.1"
+
+    def do_GET(self):
+        if self.path in ("/v1/models", "/models"):
+            self.send_json(200, {"object": "list", "data": MODELS})
+        elif self.path in ("/health", "/v1/health"):
+            import resource as _res
+            _mem_mb = 0
+            try:
+                _mem_mb = _res.getrusage(_res.RUSAGE_SELF).ru_maxrss / 1024
+            except Exception:
+                pass
+            _uptime = time.time() - _START_TIME if '_START_TIME' in dir() else 0
+            self.send_json(200, {"ok": True, "backend": BACKEND,
+                                 "target_url": TARGET_URL,
+                                 "models": [m.get("id") for m in MODELS],
+                                 "bgp_routes": len(BGP_ROUTES),
+                                 "uptime_s": round(_uptime, 1),
+                                 "memory_mb": round(_mem_mb, 1),
+                                 "requests_total": _STATS.get("requests", 0)})
+        else:
+            self.send_error(404)
+
+    def do_POST(self):
+        if _shutdown_requested:
+            return self.send_json(503, {"error": {"type": "proxy_shutting_down",
+                                                   "message": "Proxy is shutting down"}})
+        if self.path.startswith("/admin/cancel/"):
+            request_id = self.path.rsplit("/", 1)[-1]
+            if _cancel_request(request_id):
+                return self.send_json(200, {"ok": True, "cancelled": request_id})
+            return self.send_json(404, {"ok": False, "error": "request_not_found"})
+        if self.path in ("/v1/responses", "/responses"):
+            with ConnectionTracker():
+                self._handle()
+        else:
+            self.send_error(404)
+
+    _logf = None
+
+    def _handle(self):
+        try:
+            clen = int(self.headers.get("Content-Length", 0))
+            body = json.loads(self.rfile.read(clen))
+        except Exception as e:
+            return self.send_json(400, {"error": {"message": f"Bad request: {e}"}})
+
+        self._session_id = uuid.uuid4().hex[:8]
+        _sid = self._session_id
+
+        import datetime as _dt
+        _log_path = os.path.join(_LOG_DIR, "requests.log")
+        _ts = _dt.datetime.now().isoformat()
+
+        prev_id = body.get("previous_response_id")
+        raw_input = body.get("input", "")
+        input_data = resolve_previous_response(body)
+        input_data = _compact_input(input_data)
+        body["input"] = input_data
+
+        raw_types = [i.get("type") for i in raw_input] if isinstance(raw_input, list) else "str"
+        resolved_types = [i.get("type") for i in input_data] if isinstance(input_data, list) else "str"
+
+        print(f"[{_sid}] prev_id={prev_id} raw={raw_types} resolved={resolved_types}", file=sys.stderr)
+        with open(_log_path, "a") as _lf:
+            _lf.write(f"\n{'='*60}\n{_ts} [session={_sid}] REQUEST {self.path}\n")
+            _lf.write(f"  prev_id={prev_id}\n")
+            _lf.write(f"  raw_input_types={raw_types}\n")
+            _lf.write(f"  resolved_input_types={resolved_types}\n")
+            _lf.write(f"  stream={body.get('stream')} model={body.get('model')}\n")
+            _lf.write(f"  store_keys={list(_response_store.keys())}\n")
+            if isinstance(input_data, list):
+                for i, item in enumerate(input_data):
+                    t = item.get("type")
+                    if t == "message":
+                        _lf.write(f"  [{i}] message role={item.get('role')} text={str(item.get('content',''))[:120]}\n")
+                    elif t == "function_call":
+                        _lf.write(f"  [{i}] function_call call_id={item.get('call_id')} id={item.get('id')} name={item.get('name')} args={item.get('arguments','')[:120]}\n")
+                    elif t == "function_call_output":
+                        _lf.write(f"  [{i}] function_call_output id={item.get('id')} output={str(item.get('output',''))[:120]}\n")
+                    else:
+                        _lf.write(f"  [{i}] {t}\n")
+            _lf.flush()
+
+        model = body.get("model", MODELS[0]["id"] if MODELS else "unknown")
+        stream = body.get("stream", False)
+        request_id = body.get("request_id") or body.get("id") or uid("req")
+        if isinstance(input_data, list):
+            for item in input_data:
+                if isinstance(item, dict) and item.get("type") == "message" and item.get("role") == "user":
+                    content = str(item.get("content", ""))
+                    for url_m in re.finditer(r"https?://[^\s\]'\"<>]+", content):
+                        _last_user_urls.append(url_m.group(0))
+        save_request_snapshot(request_id, body)
+        _req_t0 = time.time()
+        try:
+            with RequestTracker(request_id) as tracker:
+                if BACKEND == "auto":
+                    self._handle_auto(body, model, stream, tracker)
+                elif BACKEND == "anthropic":
+                    self._handle_anthropic(body, model, stream, tracker)
+                elif BACKEND == "command-code":
+                    self._handle_command_code(body, model, stream, tracker)
+                elif BACKEND == "freebuff":
+                    self._handle_freebuff(body, model, stream, tracker)
+                elif (BACKEND or "").startswith("gemini-oauth"):
+                    self._handle_gemini_oauth(body, model, stream, tracker)
+                else:
+                    self._handle_openai_compat(body, model, stream, tracker)
+            update_snapshot_response(request_id, "completed", time.time() - _req_t0)
+        except Exception as _snap_err:
+            update_snapshot_response(request_id, "error", time.time() - _req_t0, _snap_err)
+            raise
+
+    def _handle_openai_compat(self, body, model, stream, tracker=None):
+        input_data = body.get("input", "")
+        policy = provider_policy()
+
+        pair_errors = validate_tool_pairs(input_data)
+        if pair_errors:
+            print(f"[tool-validator] repairing {len(pair_errors)} orphan tool outputs", file=sys.stderr)
+            input_data = repair_orphan_tool_outputs(input_data, pair_errors)
+            body = dict(body)
+            body["input"] = input_data
+
+        if (policy.get("synthetic_tool_results") or _provider_cap(model, "synthetic_tool_results", False)) and isinstance(input_data, list):
+            input_data, synthesized = synthesize_tool_results_for_chat(input_data)
+            if synthesized:
+                print("[provider-adapter] using synthetic tool-result continuation", file=sys.stderr)
+                body = dict(body)
+                body["input"] = input_data
+
+        compacted = False
+        if policy.get("compaction") and isinstance(input_data, list):
+            input_data, compacted = _adaptive_compact(input_data, model, policy)
+            if compacted:
+                body = dict(body)
+                body["input"] = input_data
+
+        crof_limit = _crof_item_limit(model)
+        if not compacted and isinstance(input_data, list) and len(input_data) > crof_limit:
+            print(f"[crof-adaptive] proactive compact: {len(input_data)} items > limit {crof_limit}", file=sys.stderr)
+            input_data = _crof_compact_for_retry(input_data, model)
+            body = dict(body)
+            body["input"] = input_data
+
+        messages = oa_input_to_messages(input_data)
+        instructions = body.get("instructions", "").strip()
+        if instructions:
+            messages.insert(0, {"role": "system", "content": instructions})
+
+        if BGP_ROUTES:
+            self._handle_bgp(body, model, stream, messages, input_data)
+        else:
+            chat_body = self._build_chat_body(model, messages, body, stream)
+            target = upstream_target(TARGET_URL, "/chat/completions")
+            effective_key = _refresh_oauth_token()
+            fwd = forwarded_headers(self.headers, {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {effective_key}",
+            }, browser_ua=True)
+            print(f"[{self._session_id}] POST {target} model={model} stream={stream} items={len(input_data) if isinstance(input_data,list) else 1}", file=sys.stderr)
+            chat_body_b = json.dumps(chat_body).encode()
+            max_retries = 3
+            for attempt in range(max_retries + 1):
+                req = urllib.request.Request(target, data=chat_body_b, headers=fwd)
+                try:
+                    upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
+                except urllib.error.HTTPError as e:
+                    err_body = e.read().decode()
+                    if e.code in (429, 502, 503) and attempt < max_retries:
+                        retry_after = e.headers.get("Retry-After")
+                        if retry_after:
+                            try:
+                                wait = min(int(retry_after), 60)
+                            except ValueError:
+                                wait = min(2 ** (attempt + 1), 15)
+                        else:
+                            wait = min(2 ** (attempt + 1), 15)
+                        print(f"[{self._session_id}] HTTP {e.code} (attempt {attempt+1}/{max_retries}), retrying in {wait}s: {err_body[:150]}", file=sys.stderr)
+                        time.sleep(wait)
+                        continue
+                    return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}})
+                except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError) as e:
+                    if attempt < max_retries:
+                        wait = min(2 ** (attempt + 1), 10)
+                        print(f"[{self._session_id}] connection error (attempt {attempt+1}/{max_retries}), retrying in {wait}s: {e}", file=sys.stderr)
+                        time.sleep(wait)
+                        continue
+                    return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}})
+                except Exception as e:
+                    return self.send_json(500, {"error": {"type": "proxy_error", "message": str(e)}})
+                break
+            self._forward_oa_compat(upstream, stream, model, chat_body, body, input_data, fwd, target, tracker)
+
+    def _build_chat_body(self, model, messages, body, stream):
+        chat_body = {"model": model, "messages": messages}
+        for k in ("temperature", "top_p"):
+            if k in body:
+                chat_body[k] = body[k]
+        chat_body["max_tokens"] = max(body.get("max_output_tokens", 0), 64000)
+        tools = oa_convert_tools(body.get("tools"))
+        if tools:
+            chat_body["tools"] = tools
+        if body.get("tool_choice"):
+            chat_body["tool_choice"] = body["tool_choice"]
+        chat_body["stream"] = stream
+        if not REASONING_ENABLED or REASONING_EFFORT == "none":
+            chat_body["enable_thinking"] = False
+            chat_body["reasoning_effort"] = "none"
+        else:
+            chat_body["reasoning_effort"] = REASONING_EFFORT
+        return chat_body
+
+    def _handle_gemini_oauth(self, body, model, stream, tracker=None):
+        input_data = body.get("input", "")
+        policy = provider_policy()
+        if OAUTH_PROVIDER == "google-antigravity":
+            alias_map = {
+                "antigravity-gemini-3-flash": "gemini-3-flash",
+                "antigravity-gemini-3-pro": "gemini-3-pro-low",
+                "antigravity-gemini-3.1-pro": "gemini-3.1-pro-low",
+                "gemini-3-flash-preview": "gemini-3-flash",
+                "gemini-3-pro-preview": "gemini-3-pro-low",
+                "gemini-3.1-pro-preview": "gemini-3.1-pro-low",
+                "gemini-3-pro": "gemini-3-pro-low",
+                "gemini-3.1-pro": "gemini-3.1-pro-low",
+                "antigravity-claude-sonnet-4-6": "claude-sonnet-4-6",
+                "antigravity-claude-opus-4-6-thinking": "claude-opus-4-6-thinking",
+            }
+            model = alias_map.get(model, model)
+
+        pair_errors = validate_tool_pairs(input_data)
+        if pair_errors:
+            input_data = repair_orphan_tool_outputs(input_data, pair_errors)
+            body = dict(body)
+            body["input"] = input_data
+
+        compacted = False
+        if policy.get("compaction") and isinstance(input_data, list):
+            input_data, compacted = _adaptive_compact(input_data, model, policy)
+            if compacted:
+                body = dict(body)
+                body["input"] = input_data
+
+        access_token = _refresh_oauth_token()
+        token_name = "google-antigravity-oauth-token.json" if OAUTH_PROVIDER == "google-antigravity" else "google-cli-oauth-token.json"
+        token_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", token_name)
+        project_id = ""
+        try:
+            with open(token_path) as f:
+                project_id = json.load(f).get("project_id", "")
+        except Exception:
+            pass
+
+        contents = []
+        system_parts = []
+        instructions = body.get("instructions", "").strip()
+        tool_call_names = {}
+
+        if isinstance(input_data, list):
+            for item in input_data:
+                t = item.get("type")
+                if t == "message":
+                    role = "user" if item.get("role") == "user" else "model"
+                    content = item.get("content", "")
+                    if isinstance(content, list):
+                        parts = []
+                        for c in content:
+                            ct = c.get("type")
+                            if ct == "input_text":
+                                parts.append({"text": c.get("text", "")})
+                            elif ct == "text":
+                                parts.append({"text": c.get("text", "")})
+                            elif ct == "input_image" or ct == "image_url":
+                                iu = c.get("image_url") or c.get("url", {})
+                                url = iu.get("url", iu) if isinstance(iu, dict) else iu
+                                if isinstance(url, str) and url.startswith("data:"):
+                                    mime, _, b64 = url.partition(";base64,")
+                                    mime = mime.replace("data:", "") or "image/png"
+                                    parts.append({"inlineData": {"mimeType": mime, "data": b64}})
+                                else:
+                                    parts.append({"text": str(url)})
+                        if parts:
+                            contents.append({"role": role, "parts": parts})
+                    elif isinstance(content, str):
+                        contents.append({"role": role, "parts": [{"text": content}]})
+                elif t == "function_call":
+                    call_id = item.get("call_id") or item.get("id") or f"call_{uuid.uuid4().hex[:24]}"
+                    fname = item.get("name", "")
+                    if call_id and fname:
+                        tool_call_names[call_id] = fname
+                    args = item.get("arguments", "{}")
+                    if isinstance(args, str):
+                        try:
+                            args = json.loads(args)
+                        except Exception:
+                            args = {}
+                    contents.append({"role": "model", "parts": [{"functionCall": {"name": fname, "args": args, "id": call_id}, "thoughtSignature": "skip_thought_signature_validator"}]})
+                elif t == "function_call_output":
+                    call_id = item.get("call_id", item.get("id", ""))
+                    output = item.get("output", "")
+                    fname = item.get("name", "") or tool_call_names.get(call_id, "")
+                    try:
+                        output_parsed = json.loads(output) if isinstance(output, str) else output
+                    except Exception:
+                        output_parsed = output
+                    resp_part = {"functionResponse": {"name": fname or "unknown", "response": {"result": output_parsed if isinstance(output_parsed, (dict, list)) else output}}}
+                    if call_id:
+                        resp_part["functionResponse"]["id"] = call_id
+                    contents.append({"role": "user", "parts": [resp_part]})
+
+        if OAUTH_PROVIDER.startswith("google"):
+            sanitized = []
+            last_user_text = None
+            last_role = None
+            for content in contents:
+                role = content.get("role")
+                parts = [p for p in content.get("parts", []) if isinstance(p, dict)]
+                if not parts:
+                    continue
+                text_key = "\n".join([p.get("text", "") for p in parts if "text" in p]).strip()
+                if role == "user" and text_key and text_key == last_user_text:
+                    continue
+                if role == last_role and role in ("user", "model") and sanitized:
+                    sanitized[-1].setdefault("parts", []).extend(parts)
+                else:
+                    sanitized.append({"role": role, "parts": parts})
+                if role == "user" and text_key:
+                    last_user_text = text_key
+                last_role = role
+            while sanitized and sanitized[0].get("role") != "user":
+                sanitized.pop(0)
+            while sanitized and sanitized[-1].get("role") != "user":
+                sanitized.pop()
+            contents = sanitized
+
+        if instructions:
+            system_parts.append({"text": instructions})
+        if OAUTH_PROVIDER == "google-antigravity":
+            system_parts.append({"text": (
+                "You are connected through a Responses API translation proxy. "
+                "If tools are available and the user's request requires changing files, call the appropriate tool immediately. "
+                "Do not announce plans, do not say you will list files, browse, fetch, inspect, or start by exploring unless you are emitting the actual tool call in the same response. "
+                "For file creation requests, use tools to create or modify the file instead of only printing code in chat. "
+                "If no suitable tool is available, answer directly with the complete result. "
+                "Never answer only with a plan such as 'I will start by...' or 'I am going to...'."
+            )})
+
+        gen_config = {}
+        mot = body.get("max_output_tokens", 0)
+        if mot:
+            gen_config["maxOutputTokens"] = mot
+        if body.get("temperature") is not None:
+            gen_config["temperature"] = body["temperature"]
+        if body.get("top_p") is not None:
+            gen_config["topP"] = body["top_p"]
+
+        if REASONING_ENABLED and REASONING_EFFORT != "none":
+            budget = {"low": 2048, "medium": 8192, "high": 24576}.get(REASONING_EFFORT, 8192)
+            gen_config["thinkingConfig"] = {"includeThoughts": True, "thinkingBudget": budget}
+
+        oa_tools = body.get("tools", [])
+        gemini_tools = []
+        if oa_tools:
+            func_decls = []
+            for tool in oa_tools:
+                ttype = tool.get("type", "function")
+                fname = tool.get("name", "")
+                if ttype == "function":
+                    fn = tool.get("function", tool)
+                    name = fn.get("name", fname)
+                    desc = fn.get("description", "")
+                    params = fn.get("parameters", fn.get("input_schema", {}))
+                    func_decls.append({"name": name, "description": desc, "parameters": params})
+                elif fname:
+                    func_decls.append({"name": fname, "description": tool.get("description", ""), "parameters": tool.get("parameters", {"type": "object", "properties": {}})})
+            if func_decls:
+                gemini_tools = [{"functionDeclarations": func_decls}]
+
+        request_body = {"contents": contents}
+        if system_parts:
+            request_body["systemInstruction"] = {"parts": system_parts}
+        if gen_config:
+            request_body["generationConfig"] = gen_config
+        if gemini_tools:
+            request_body["tools"] = gemini_tools
+
+        wrapped = {
+            "project": project_id,
+            "model": model,
+            "request": request_body,
+        }
+        if OAUTH_PROVIDER == "google-antigravity":
+            wrapped["requestType"] = "agent"
+            wrapped["userAgent"] = "antigravity"
+            wrapped["requestId"] = f"agent-{uuid.uuid4().hex[:12]}"
+
+        endpoints = ([
+            "https://daily-cloudcode-pa.sandbox.googleapis.com",
+            "https://autopush-cloudcode-pa.sandbox.googleapis.com",
+            "https://cloudcode-pa.googleapis.com",
+        ] if OAUTH_PROVIDER == "google-antigravity" else [
+            "https://cloudcode-pa.googleapis.com",
+        ])
+        action = "streamGenerateContent" if stream else "generateContent"
+        url_suffix = f"v1internal:{action}?alt=sse" if stream else f"v1internal:{action}"
+
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {access_token}",
+        }
+        if OAUTH_PROVIDER == "google-antigravity":
+            version = _ensure_antigravity_version()
+            headers["User-Agent"] = f"antigravity/{version} darwin/arm64"
+        else:
+            headers["User-Agent"] = "google-api-nodejs-client/9.15.1"
+            headers["X-Goog-Api-Client"] = "gl-node/22.17.0"
+            headers["Client-Metadata"] = "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI"
+        body_b = json.dumps(wrapped).encode()
+        print(f"[{self._session_id}] model={model} stream={stream} items={len(input_data) if isinstance(input_data, list) else 1} project={project_id}", file=sys.stderr)
+
+        for ep in endpoints:
+            target = f"{ep}/{url_suffix}"
+            req = urllib.request.Request(target, data=body_b, headers=headers)
+            try:
+                upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
+                break
+            except urllib.error.HTTPError as e:
+                err_body = e.read().decode()
+                if e.code == 400 and OAUTH_PROVIDER.startswith("google"):
+                    try:
+                        debug_path = os.path.join(_LOG_DIR, "gemini-last-400-request.json")
+                        with open(debug_path, "w") as dbg:
+                            json.dump({"endpoint": ep, "model": model, "wrapped": wrapped, "error": err_body}, dbg, indent=2)
+                        print(f"[{self._session_id}] saved 400 debug request to {debug_path}", file=sys.stderr)
+                    except Exception:
+                        pass
+                if e.code == 429 and ep != endpoints[-1]:
+                    print(f"[{self._session_id}] {ep} HTTP 429, trying next endpoint", file=sys.stderr)
+                    continue
+                return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}})
+            except Exception as e:
+                if ep == endpoints[-1]:
+                    return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}})
+                print(f"[{self._session_id}] {ep} failed: {e}, trying next", file=sys.stderr)
+                continue
+
+        if stream:
+            self._forward_gemini_sse(upstream, model, body, input_data, tracker)
+        else:
+            self._forward_gemini_json(upstream, model, body, input_data)
+
+    def _forward_gemini_sse(self, upstream, model, body, input_data, tracker=None):
+        resp_id = f"resp-{uuid.uuid4().hex[:24]}"
+        created = int(time.time())
+        self.send_response(200)
+        self.send_header("Content-Type", "text/event-stream")
+        self.send_header("Cache-Control", "no-cache")
+        self.send_header("Connection", "keep-alive")
+        self.end_headers()
+
+        full_text = ""
+        output_items = []
+        current_tool_calls = {}
+        message_started = False
+        message_id = f"msg-{uuid.uuid4().hex[:24]}"
+
+        def flush_event(event_type, data):
+            self.wfile.write(f"event: {event_type}\ndata: {json.dumps(data)}\n\n".encode())
+            self.wfile.flush()
+
+        flush_event("response.created", {"type": "response.created", "response": {"id": resp_id, "object": "response", "model": model, "status": "in_progress", "created": created, "output": []}})
+        flush_event("response.in_progress", {"type": "response.in_progress", "response": {"id": resp_id}})
+
+        buf = ""
+        stream_finished = False
+        for raw_line in _stream_with_idle_timeout(upstream):
+            if tracker and tracker.cancelled.is_set():
+                print("[gemini-oauth] stream cancelled", file=sys.stderr)
+                break
+            if stream_finished:
+                break
+            line = raw_line.decode(errors="replace")
+            if line.startswith("data: "):
+                buf += line[6:]
+                continue
+            if not line.strip() and buf:
+                try:
+                    chunk = json.loads(buf)
+                except Exception:
+                    buf = ""
+                    continue
+                buf = ""
+
+                candidates = chunk.get("response", chunk).get("candidates", [])
+                if not candidates:
+                    if chunk.get("error"):
+                        print(f"[{self._session_id}] stream error chunk: {str(chunk.get('error'))[:300]}", file=sys.stderr)
+                    continue
+                if candidates[0].get("finishReason") and not candidates[0].get("content", {}).get("parts"):
+                    print(f"[{self._session_id}] finish without parts: {candidates[0].get('finishReason')}", file=sys.stderr)
+                parts = candidates[0].get("content", {}).get("parts", [])
+                for part in parts:
+                    if part.get("thought"):
+                        continue
+                    if "text" in part and not part.get("functionCall"):
+                        text_delta = part["text"]
+                        if not text_delta:
+                            continue
+                        full_text += text_delta
+                        if not message_started:
+                            flush_event("response.output_item.added", {"type": "response.output_item.added", "output_index": 0, "item": {"type": "message", "id": message_id, "role": "assistant", "content": []}})
+                            flush_event("response.content_part.added", {"type": "response.content_part.added", "output_index": 0, "content_index": 0, "part": {"type": "output_text", "text": ""}})
+                            output_items.append({"text": True})
+                            message_started = True
+                        flush_event("response.output_text.delta", {"type": "response.output_text.delta", "output_index": 0, "content_index": 0, "delta": text_delta})
+                    elif part.get("functionCall"):
+                        fc = part["functionCall"]
+                        call_id = f"call_{uuid.uuid4().hex[:24]}"
+                        args_str = json.dumps(fc.get("args", fc.get("arguments", {})))
+                        output_index = len(output_items)
+                        flush_event("response.output_item.added", {"type": "response.output_item.added", "output_index": output_index, "item": {"type": "function_call", "id": call_id, "call_id": call_id, "name": fc.get("name", ""), "arguments": ""}})
+                        flush_event("response.function_call_arguments.delta", {"type": "response.function_call_arguments.delta", "output_index": output_index, "item_id": call_id, "delta": args_str})
+                        flush_event("response.function_call_arguments.done", {"type": "response.function_call_arguments.done", "output_index": output_index, "item_id": call_id, "arguments": args_str})
+                        current_tool_calls[call_id] = fc
+                        output_items.append({"tool": True})
+                last_finish = candidates[0].get("finishReason", "")
+                if OAUTH_PROVIDER == "google-antigravity" and full_text and last_finish:
+                    if last_finish == "MAX_TOKENS" and not current_tool_calls:
+                        print(f"[{self._session_id}] MAX_TOKENS hit ({len(full_text)} chars), auto-continuing...", file=sys.stderr)
+                        break
+                    stream_finished = True
+                    break
+
+        if OAUTH_PROVIDER.startswith("google") and full_text and not current_tool_calls and last_finish == "MAX_TOKENS" and not stream_finished:
+            result = _auto_continue_gemini(self, flush_event, message_id, model, gen_config, gemini_tools, system_parts, project_id, headers, endpoints, url_suffix, full_text, output_items, message_started)
+            if result:
+                full_text = result
+                for item in output_items:
+                    if isinstance(item, dict) and item.get("tool") and "fc" in item and "call_id" in item:
+                        current_tool_calls[item["call_id"]] = item["fc"]
+
+        out = []
+        if not full_text and not current_tool_calls:
+            print("[gemini-oauth] WARNING: completed with empty output", file=sys.stderr)
+        if full_text:
+            out.append({"type": "message", "id": message_id, "role": "assistant", "content": [{"type": "output_text", "text": full_text}]})
+        tool_outputs = []
+        for cid, fc in current_tool_calls.items():
+            tool_outputs.append({"type": "function_call", "id": cid, "call_id": cid, "name": fc.get("name", ""), "arguments": json.dumps(fc.get("args", fc.get("arguments", {})))})
+        out.extend(tool_outputs)
+
+        final_resp = {"id": resp_id, "object": "response", "model": model, "status": "completed", "created": created, "output": out}
+        if full_text:
+            flush_event("response.output_text.done", {"type": "response.output_text.done", "output_index": 0, "content_index": 0, "text": full_text})
+            flush_event("response.content_part.done", {"type": "response.content_part.done", "output_index": 0, "content_index": 0, "part": {"type": "output_text", "text": full_text}})
+            flush_event("response.output_item.done", {"type": "response.output_item.done", "output_index": 0, "item": out[0]})
+        for idx, item in enumerate(tool_outputs, start=(1 if full_text else 0)):
+            flush_event("response.output_item.done", {"type": "response.output_item.done", "output_index": idx, "item": item})
+        flush_event("response.completed", {"type": "response.completed", "response": final_resp})
+        self.close_connection = True
+
+        with _response_store_lock:
+            _response_store[resp_id] = final_resp
+            while len(_response_store) > _MAX_STORED:
+                _response_store.popitem(last=False)
+
+    def _forward_gemini_json(self, upstream, model, body, input_data):
+        data = json.loads(upstream.read().decode())
+        resp_id = f"resp-{uuid.uuid4().hex[:24]}"
+        created = int(time.time())
+        out = []
+        full_text = ""
+        candidates = data.get("response", data).get("candidates", [])
+        if candidates:
+            parts = candidates[0].get("content", {}).get("parts", [])
+            text_parts = []
+            for part in parts:
+                if part.get("thought"):
+                    continue
+                if "text" in part and not part.get("functionCall"):
+                    text_parts.append(part["text"])
+                elif part.get("functionCall"):
+                    fc = part["functionCall"]
+                    call_id = f"call_{uuid.uuid4().hex[:24]}"
+                    out.append({"type": "function_call", "id": call_id, "call_id": call_id, "name": fc.get("name", ""), "arguments": json.dumps(fc.get("args", fc.get("arguments", {})))})
+            if text_parts:
+                full_text = "".join(text_parts)
+                out.insert(0, {"type": "message", "id": f"msg-{uuid.uuid4().hex[:24]}", "role": "assistant", "content": [{"type": "output_text", "text": full_text}]})
+        resp = {"id": resp_id, "object": "response", "model": model, "status": "completed", "created": created, "output": out}
+        with _response_store_lock:
+            _response_store[resp_id] = resp
+            while len(_response_store) > _MAX_STORED:
+                _response_store.popitem(last=False)
+        self.send_json(200, resp)
+
+    def _handle_bgp(self, body, model, stream, messages, input_data):
+        routes = _sorted_bgp_routes()
+        routes = [r for r in routes if _bucket_for_route(r).allow()]
+        if not routes:
+            return self.send_json(503, {"error": {"type": "bgp_rate_limited", "message": "All routes rate-limited"}})
+        errors = []
+        for route in routes:
+            r_model = route.get("model", model)
+            r_url = route["target_url"].rstrip("/")
+            r_key = route.get("api_key", "")
+            r_reasoning = route.get("reasoning_enabled", True)
+            r_effort = route.get("reasoning_effort", "medium")
+            r_oauth = route.get("oauth_provider", "")
+
+            chat_body = dict(messages=list(messages))
+            chat_body["model"] = r_model
+            for k in ("temperature", "top_p"):
+                if k in body:
+                    chat_body[k] = body[k]
+            chat_body["max_tokens"] = max(body.get("max_output_tokens", 0), 64000)
+            tools = oa_convert_tools(body.get("tools"))
+            if tools:
+                chat_body["tools"] = tools
+            if body.get("tool_choice"):
+                chat_body["tool_choice"] = body["tool_choice"]
+            chat_body["stream"] = stream
+            if not r_reasoning or r_effort == "none":
+                chat_body["enable_thinking"] = False
+                chat_body["reasoning_effort"] = "none"
+            else:
+                chat_body["reasoning_effort"] = r_effort
+
+            target = upstream_target(r_url, "/chat/completions")
+            if r_oauth == "google":
+                r_key = _refresh_oauth_token_for(r_key, r_oauth)
+            fwd = forwarded_headers(self.headers, {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {r_key}",
+            }, browser_ua=True)
+            print(f"[{self._session_id}] trying route '{route.get('name', r_url)}' model={r_model}", file=sys.stderr)
+            req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=fwd)
+            t0_route = time.time()
+            route_ok = False
+            for attempt in range(3):
+                try:
+                    upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
+                    print(f"[{self._session_id}] route '{route.get('name', r_url)}' connected OK", file=sys.stderr)
+                    _update_route_stats(route, True, time.time() - t0_route)
+                    self._forward_oa_compat(upstream, stream, r_model, chat_body, body, input_data, fwd, target)
+                    return
+                except urllib.error.HTTPError as e:
+                    err = e.read().decode()
+                    if e.code in (429, 502, 503) and attempt < 2:
+                        retry_after = e.headers.get("Retry-After")
+                        wait = min(int(retry_after), 60) if retry_after and retry_after.isdigit() else min(2 ** (attempt + 1), 10)
+                        print(f"[{self._session_id}] route '{route.get('name', r_url)}' HTTP {e.code}, retry {attempt+1}/2 in {wait}s", file=sys.stderr)
+                        time.sleep(wait)
+                        req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=fwd)
+                        continue
+                    print(f"[{self._session_id}] route '{route.get('name', r_url)}' FAILED: HTTP {e.code}: {err[:200]}", file=sys.stderr)
+                    _update_route_stats(route, False, time.time() - t0_route, http_code=e.code)
+                    errors.append(f"{route.get('name','?')}: HTTP {e.code}")
+                    break
+                except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError) as e:
+                    if attempt < 2:
+                        wait = min(2 ** (attempt + 1), 8)
+                        print(f"[{self._session_id}] route '{route.get('name', r_url)}' conn error, retry {attempt+1}/2 in {wait}s: {e}", file=sys.stderr)
+                        time.sleep(wait)
+                        req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=fwd)
+                        continue
+                    _update_route_stats(route, False, time.time() - t0_route, error_type=str(e))
+                    errors.append(f"{route.get('name','?')}: {e}")
+                    break
+                except Exception as e:
+                    print(f"[{self._session_id}] route '{route.get('name', r_url)}' FAILED: {e}", file=sys.stderr)
+                    _update_route_stats(route, False, time.time() - t0_route, error_type=str(e))
+                    errors.append(f"{route.get('name','?')}: {e}")
+                    break
+
+        print(f"[{self._session_id}] ALL ROUTES FAILED: {errors}", file=sys.stderr)
+        self.send_json(502, {"error": {"type": "bgp_all_routes_failed", "message": f"All BGP routes failed: {'; '.join(errors)}"}})
+
+    def _forward_oa_compat(self, upstream, stream, model, chat_body, body, input_data, fwd, target, tracker=None):
+        n_items = len(input_data) if isinstance(input_data, list) else 1
+        t0 = time.time()
+        provider = TARGET_URL.split("//")[-1].split("/")[0]
+        if BGP_ROUTES:
+            provider = "bgp:" + (BGP_ROUTES[0].get("name", "pool") if BGP_ROUTES else "unknown")
+
+        if stream:
+            self.send_response(200)
+            self.send_header("Content-Type", "text/event-stream")
+            self.send_header("Cache-Control", "no-cache")
+            self.send_header("Connection", "keep-alive")
+            self.end_headers()
+            if hasattr(self, 'connection') and self.connection:
+                try:
+                    self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+                except Exception:
+                    pass
+
+            collected_events = []
+            last_resp_id = None
+            last_output = None
+            last_status = None
+            finish_reason = None
+            has_content = False
+
+            def _observe_event(event):
+                nonlocal last_resp_id, last_output, last_status, finish_reason, has_content
+                for line in event.strip().split("\n"):
+                    if line.startswith("data: "):
+                        try:
+                            d = json.loads(line[6:])
+                            if d.get("type") == "response.completed":
+                                last_resp_id = d.get("response", {}).get("id")
+                                last_output = d.get("response", {}).get("output", [])
+                                last_status = d.get("response", {}).get("status")
+                                finish_reason = "length" if last_status == "incomplete" else "stop"
+                                has_content = any(o.get("type") == "message" for o in (last_output or []))
+                        except Exception:
+                            pass
+
+            try:
+                for event in oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")):
+                    if tracker and tracker.cancelled.is_set():
+                        print("[translate-proxy] stream cancelled", file=sys.stderr)
+                        break
+                    collected_events.append(event)
+                    _observe_event(event)
+            except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
+                print("[translate-proxy] client disconnected during stream", file=sys.stderr)
+                _crof_record(model, n_items, False)
+                _log_resp(last_resp_id, "client_disconnect", last_output)
+                return
+
+            # Record outcome
+            success = (finish_reason != "length")
+            _crof_record(model, n_items, success)
+            _log_resp(last_resp_id, last_status, last_output)
+            if last_resp_id and input_data is not None:
+                store_response(last_resp_id, input_data, last_output)
+            _record_usage(provider, model, success, time.time() - t0, error_type="length" if not success else None)
+
+            # Auto-learn provider quirks before flushing the bad response to Codex.
+            if finish_reason == "length" and not has_content and has_function_call_output(input_data):
+                _set_provider_cap(model, "synthetic_tool_results", True, "incomplete empty response after tool output")
+                new_input, synthesized = synthesize_tool_results_for_chat(input_data)
+                if synthesized:
+                    print("[provider-sensor] retrying turn with synthetic tool results", file=sys.stderr)
+                    new_messages = oa_input_to_messages(new_input)
+                    instructions = body.get("instructions", "").strip()
+                    if instructions:
+                        new_messages.insert(0, {"role": "system", "content": instructions})
+                    new_chat_body = self._build_chat_body(model, new_messages, body, stream)
+                    new_req = urllib.request.Request(target, data=json.dumps(new_chat_body).encode(), headers=fwd)
+                    try:
+                        retry_upstream = urllib.request.urlopen(new_req, timeout=_upstream_timeout(body, True))
+                        collected_events = []
+                        last_resp_id = last_output = last_status = None
+                        finish_reason = None
+                        has_content = False
+                        for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")):
+                            collected_events.append(event)
+                            _observe_event(event)
+                        input_data = new_input
+                    except Exception as e:
+                        print(f"[provider-sensor] synthetic retry failed: {e}", file=sys.stderr)
+
+            # Auto-retry on finish_reason=length with no content due to too much context.
+            if finish_reason == "length" and not has_content and isinstance(input_data, list) and len(input_data) > 5:
+                print(f"[crof-adaptive] RETRY: finish_reason=length with no content, compacting {n_items} items", file=sys.stderr)
+                new_input = _crof_compact_for_retry(input_data, model)
+                if len(new_input) < len(input_data):
+                    new_body = dict(body)
+                    new_body["input"] = new_input
+                    new_messages = oa_input_to_messages(new_input)
+                    instructions = body.get("instructions", "").strip()
+                    if instructions:
+                        new_messages.insert(0, {"role": "system", "content": instructions})
+                    new_chat_body = dict(chat_body)
+                    new_chat_body["messages"] = new_messages
+                    new_req = urllib.request.Request(
+                        target,
+                        data=json.dumps(new_chat_body).encode(),
+                        headers=fwd,
+                    )
+                    try:
+                        retry_upstream = urllib.request.urlopen(new_req, timeout=_upstream_timeout(body, True))
+                        collected_events = []
+                        last_resp_id = last_output = last_status = None
+                        finish_reason = None
+                        has_content = False
+                        for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")):
+                            collected_events.append(event)
+                            _observe_event(event)
+                        input_data = new_input
+                    except Exception as e:
+                        print(f"[crof-adaptive] retry failed: {e}", file=sys.stderr)
+
+            self.stream_buffered_events(collected_events)
+        else:
+            result = oa_resp_to_responses(json.loads(upstream.read()), model)
+            success = result.get("status") != "incomplete"
+            _crof_record(model, n_items, success)
+            self.send_json(200, result)
+            rid = result.get("id")
+            _log_resp(rid, result.get("status"), result.get("output", []))
+            if rid and input_data is not None:
+                store_response(rid, input_data, result.get("output", []))
+            _record_usage(provider, model, success, time.time() - t0)
+
+    def _forward_oa_compat_retry(self, req, model, chat_body, body, input_data, tracker=None):
+        try:
+            upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, True))
+        except Exception as e:
+            print(f"[crof-adaptive] retry failed: {e}", file=sys.stderr)
+            return
+
+        self.send_response(200)
+        self.send_header("Content-Type", "text/event-stream")
+        self.send_header("Cache-Control", "no-cache")
+        self.send_header("Connection", "keep-alive")
+        self.end_headers()
+        if hasattr(self, 'connection') and self.connection:
+            try:
+                self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+            except Exception:
+                pass
+
+        last_resp_id = None
+        last_output = None
+        last_status = None
+        try:
+            def on_event(event):
+                nonlocal last_resp_id, last_output, last_status
+                if tracker and tracker.cancelled.is_set():
+                    print("[translate-proxy] retry stream cancelled", file=sys.stderr)
+                    return False
+                for line in event.strip().split("\n"):
+                    if line.startswith("data: "):
+                        try:
+                            d = json.loads(line[6:])
+                            if d.get("type") == "response.completed":
+                                 last_resp_id = d.get("response", {}).get("id")
+                                 last_output = d.get("response", {}).get("output", [])
+                                 last_status = d.get("response", {}).get("status")
+                        except: pass
+                return True
+            self.stream_buffered_events(oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")), on_event=on_event)
+        except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
+            print("[translate-proxy] client disconnected during retry stream", file=sys.stderr)
+
+        n_items = len(input_data) if isinstance(input_data, list) else 1
+        _crof_record(model, n_items, last_status == "completed")
+        _log_resp(last_resp_id, last_status or "retry_disconnect", last_output)
+        if last_resp_id and input_data is not None:
+            store_response(last_resp_id, input_data, last_output)
+
+    def _handle_anthropic(self, body, model, stream, tracker=None):
+        input_data = body.get("input", "")
+        an_body = {"model": model, "messages": an_input_to_messages(input_data),
+                   "max_tokens": body.get("max_output_tokens", 8192)}
+        instructions = body.get("instructions", "").strip()
+        if instructions:
+            an_body["system"] = [{"type": "text", "text": instructions,
+                                   "cache_control": {"type": "ephemeral"}}]
+        for k in ("temperature", "top_p"):
+            if k in body:
+                an_body[k] = body[k]
+        tools = an_convert_tools(body.get("tools"))
+        if tools:
+            an_body["tools"] = tools
+        if body.get("tool_choice"):
+            tc = body["tool_choice"]
+            if isinstance(tc, str):
+                an_body["tool_choice"] = {"type": tc}
+            elif isinstance(tc, dict):
+                an_body["tool_choice"] = tc
+        an_body["stream"] = stream
+
+        target = upstream_target(TARGET_URL, "/messages")
+        req = urllib.request.Request(
+            target,
+            data=json.dumps(an_body).encode(),
+            headers=forwarded_headers(self.headers, {
+                "Content-Type": "application/json",
+                "x-api-key": API_KEY,
+                "anthropic-version": "2023-06-01",
+            }),
+        )
+        self._forward(req, stream, model,
+            lambda r: an_resp_to_responses(json.loads(r.read()), model),
+            lambda s: an_stream_to_sse(s, model, body.get("request_id") or body.get("id")),
+            input_data=body.get("input", ""), tracker=tracker)
+
+    def _handle_command_code(self, body, model, stream, tracker=None):
+        """[ALL FIXES IN ONE] CommandCode /alpha/generate adapter.
+
+        FIX 1: Uses cc_input_to_messages (string content only, no content blocks)
+        FIX 2: Always sends x-command-code-version header (fallback "0.26.8")
+        FIX 3: No stale schema cache — cleared, 24h TTL
+        FIX 4: Streaming path wrapped in try/except → sends response.completed(status="failed") on crash
+        FIX 5: Response parser (_parse_commandcode_text_tool_calls) now extracts raw JSON tool calls
+        FIX 6: Arguments no longer double-wrapped (three-tier parser in _extract_args)
+        FIX 7: _extract_field handles escaped values (\") correctly
+        FIX 8: sandbox_permissions normalized to valid variants only
+        REVERTED: Removed adaptive probing system (caused format mismatch).
+        Uses conservative cc_input_to_messages format exclusively.
+        ErrorAnalyzer learning on retries (not proactive probes).
+        """
+        input_data = body.get("input", "")
+        instructions = body.get("instructions", "").strip()
+
+        schema = _load_schema(model=model)
+
+        thread_id = body.get("request_id") or body.get("id") or ""
+        try:
+            uuid.UUID(thread_id)
+        except (ValueError, AttributeError):
+            thread_id = str(uuid.uuid4())
+
+        # Build auth headers
+        auth_val = f"{schema.auth_scheme}{API_KEY}" if schema.auth_scheme else API_KEY
+        headers_extra = {
+            "Content-Type": "application/json",
+            "Accept": "text/event-stream, application/json",
+        }
+        if schema.auth_header:
+            headers_extra[schema.auth_header] = auth_val
+        else:
+            headers_extra["Authorization"] = f"Bearer {API_KEY}"
+        headers_extra["x-command-code-version"] = CC_VERSION or "0.26.8"
+
+        pm = schema.param_names
+        tp = schema.field_names.get("tools_param", "tools")
+        target = upstream_target(TARGET_URL, "/alpha/generate")
+
+        # ── MAIN REQUEST WITH RETRY ──
+        max_retries = 2
+        for attempt in range(max_retries + 1):
+            cc_msgs = cc_input_to_messages(input_data, instructions, schema)
+            cc_body = {
+                "config": _cc_config(),
+                "memory": "", "taste": "", "skills": "",
+                "params": {
+                    "stream": True,
+                    pm.get("max_tokens", "max_tokens"): body.get("max_output_tokens", 64000),
+                    pm.get("temperature", "temperature"): body.get("temperature", 0.3),
+                    "messages": cc_msgs,
+                    "model": model,
+                    tp: [],
+                },
+                "threadId": thread_id,
+            }
+
+            fwd = forwarded_headers(self.headers, headers_extra, browser_ua=True)
+            print(f"[{self._session_id}] POST {target} model={model} stream={stream} attempt={attempt} [command-code]", file=sys.stderr)
+            req = urllib.request.Request(
+                target,
+                data=json.dumps(cc_body).encode(),
+                headers=fwd,
+            )
+
+            try:
+                upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, True))
+                break
+            except urllib.error.HTTPError as e:
+                err = e.read().decode()
+                if attempt < max_retries:
+                    hints = ErrorAnalyzer.analyze(err, schema)
+                    if hints:
+                        print(f"[{self._session_id}] error analysis: {hints}", file=sys.stderr)
+                        ErrorAnalyzer.merge_into_schema(hints, schema)
+                        _save_schema(schema, model=model)
+                        continue
+                    if e.code in (429, 502, 503):
+                        time.sleep(min(2 ** (attempt + 1), 10))
+                        continue
+                return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err)}})
+            except Exception as e:
+                if attempt < max_retries:
+                    time.sleep(1)
+                    continue
+                return self.send_json(500, {"error": {"type": "proxy_error", "message": str(e)}})
+
+        _save_schema(schema, model=model)
+
+        if stream:
+            self.send_response(200)
+            self.send_header("Content-Type", "text/event-stream")
+            self.send_header("Cache-Control", "no-cache")
+            self.send_header("Connection", "keep-alive")
+            self.end_headers()
+            if hasattr(self, 'connection') and self.connection:
+                try:
+                    self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+                except Exception:
+                    pass
+            last_resp_id = None
+            last_output = None
+            def on_event(event):
+                nonlocal last_resp_id, last_output
+                if tracker and tracker.cancelled.is_set():
+                    print("[command-code] stream cancelled", file=sys.stderr)
+                    return False
+                for line in event.strip().split("\n"):
+                    if line.startswith("data: "):
+                        try:
+                            d = json.loads(line[6:])
+                            if d.get("type") == "response.completed":
+                                last_resp_id = d.get("response", {}).get("id")
+                                last_output = d.get("response", {}).get("output", [])
+                        except: pass
+                return True
+            try:
+                self.stream_buffered_events(cc_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")), on_event=on_event)
+            except Exception as e:
+                print(f"[{self._session_id}] stream error: {e}", file=sys.stderr)
+                try:
+                    err_event = 'data: ' + json.dumps({"type": "response.completed",
+                        "response": {"id": body.get("request_id") or body.get("id") or uid("resp"),
+                                     "object": "response", "model": model, "status": "failed",
+                                     "created": int(time.time()), "output": [],
+                                     "usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0,
+                                               "input_tokens_details": {"cached_tokens": 0}}}})
+                    self.wfile.write(err_event.encode())
+                    self.wfile.flush()
+                except Exception:
+                    pass
+            if last_resp_id:
+                store_response(last_resp_id, body.get("input", ""), last_output)
+        else:
+            raw = upstream.read().decode()
+            result = cc_resp_to_responses(raw, model)
+            self.send_json(200, result)
+            rid = result.get("id")
+            if rid:
+                store_response(rid, body.get("input", ""), result.get("output", []))
+
+    def _handle_freebuff(self, body, model, stream, tracker=None):
+         token = _get_freebuff_token()
+         if not token:
+             return self.send_json(401, {"error": {"type": "auth_error",
+                 "message": "No freebuff credentials found. Install freebuff (npm i -g freebuff) and login first."}})
+
+         agent_id = _FREEBUFF_AGENT_MAP.get(model)
+         if not agent_id:
+             matched = None
+             for m in _FREEBUFF_AGENT_MAP:
+                 if model.lower().replace("/", "").replace("-", "") in m.lower().replace("/", "").replace("-", ""):
+                     matched = m
+                     break
+             if matched:
+                 agent_id = _FREEBUFF_AGENT_MAP[matched]
+                 model = matched
+             else:
+                 fallback_model = "deepseek/deepseek-v4-flash"
+                 agent_id = _FREEBUFF_AGENT_MAP.get(fallback_model, "base2-free-deepseek-flash")
+                 print(f"[freebuff] unknown model '{model}', falling back to {fallback_model}", file=sys.stderr)
+                 model = fallback_model
+
+         run_id = _freebuff_start_run(token, agent_id)
+         if not run_id:
+             return self.send_json(502, {"error": {"type": "upstream_error",
+                 "message": "Failed to start freebuff agent run. Check credentials and network."}})
+
+         instance_id = _freebuff_get_session(token, model)
+
+         input_data = body.get("input", "")
+         instructions = body.get("instructions", "").strip()
+         messages = _fb_input_to_messages(input_data, instructions)
+         messages = _ds_rebuild_tool_history(messages)
+
+         metadata = {
+             "run_id": run_id,
+             "cost_mode": "free",
+         }
+         if instance_id:
+             metadata["freebuff_instance_id"] = instance_id
+
+         chat_body = {
+             "model": model,
+             "messages": messages,
+             "stream": stream,
+             "max_tokens": max(body.get("max_output_tokens", 0), 64000),
+             "codebuff_metadata": metadata,
+         }
+         for k in ("temperature", "top_p"):
+             if k in body:
+                 chat_body[k] = body[k]
+         tools = oa_convert_tools(body.get("tools"))
+         if tools:
+             chat_body["tools"] = tools
+         if body.get("tool_choice"):
+             chat_body["tool_choice"] = body["tool_choice"]
+
+         target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions"
+         headers = {
+             "Content-Type": "application/json",
+             "Authorization": f"Bearer {token}",
+             "User-Agent": "codex-launcher/3.8.4",
+         }
+
+         print(f"[{self._session_id}] [freebuff] POST {target} model={model} stream={stream} run={run_id}", file=sys.stderr)
+         chat_body_b = json.dumps(chat_body).encode()
+
+         try:
+             req = urllib.request.Request(target, data=chat_body_b, headers=headers)
+             upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
+         except urllib.error.HTTPError as e:
+             err_body = e.read().decode()[:1000]
+             _freebuff_finish_run(token, run_id, "failed")
+             if _is_reasoning_content_error(err_body):
+                 print(f"[freebuff] reasoning_content error, retrying with thinking disabled (DeepSeek native format): {err_body[:200]}", file=sys.stderr)
+                 result = self._fb_retry_thinking_disabled(body, model, token, agent_id, stream, tracker, input_data, instructions, err_body)
+                 return result
+             print(f"[freebuff] HTTP {e.code}: {err_body[:300]}", file=sys.stderr)
+             return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}})
+         except Exception as e:
+             _freebuff_finish_run(token, run_id, "failed")
+             return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}})
+
+         t0 = time.time()
+         try:
+             if stream:
+                 self.send_response(200)
+                 self.send_header("Content-Type", "text/event-stream")
+                 self.send_header("Cache-Control", "no-cache")
+                 self.send_header("Connection", "keep-alive")
+                 self.end_headers()
+                 if hasattr(self, 'connection') and self.connection:
+                     try:
+                         self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+                     except Exception:
+                         pass
+
+                 last_resp_id = [None]
+                 last_output = [None]
+                 last_status = [None]
+                 finish_reason = [None]
+                 reasoning_out = {}
+
+                 def _on_fb_event(event):
+                     if tracker and tracker.cancelled.is_set():
+                         return False
+                     for line in event.strip().split("\n"):
+                         if line.startswith("data: "):
+                             try:
+                                 d = json.loads(line[6:])
+                                 if d.get("type") == "response.completed":
+                                     last_resp_id[0] = d.get("response", {}).get("id")
+                                     last_output[0] = d.get("response", {}).get("output", [])
+                                     last_status[0] = d.get("response", {}).get("status")
+                                     finish_reason[0] = "length" if last_status[0] == "incomplete" else "stop"
+                             except Exception:
+                                 pass
+                     return None
+
+                 try:
+                     self.stream_buffered_events(
+                         oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id"),
+                                          _reasoning_out=reasoning_out),
+                         on_event=_on_fb_event)
+                 except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
+                     print(f"[{self._session_id}] [freebuff] client disconnected", file=sys.stderr)
+                     return
+
+                 success = finish_reason[0] != "length"
+                 _record_usage("freebuff", model, success, time.time() - t0)
+                 if last_resp_id[0] and input_data is not None:
+                     store_response(last_resp_id[0], input_data, last_output[0])
+                 if last_resp_id[0] and reasoning_out.get("text") or reasoning_out.get("tool_calls"):
+                     asm = {"role": "assistant", "content": reasoning_out.get("text", "") or ""}
+                     if reasoning_out.get("tool_calls"):
+                         asm["tool_calls"] = reasoning_out["tool_calls"]
+                     if reasoning_out.get("text"):
+                         asm["reasoning_content"] = reasoning_out["text"]
+                     _ds_store_assistant(last_resp_id[0], asm)
+                 print(f"[{self._session_id}] [freebuff] stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr)
+             else:
+                 raw = upstream.read().decode()
+                 chat_resp = json.loads(raw)
+                 result = oa_resp_to_responses(chat_resp, model)
+                 self.send_json(200, result)
+                 rid = result.get("id")
+                 if rid:
+                     store_response(rid, input_data, result.get("output", []))
+                 print(f"[{self._session_id}] [freebuff] non-stream done in {time.time()-t0:.1f}s", file=sys.stderr)
+         finally:
+             _freebuff_finish_run(token, run_id, "completed")
+
+    def _fb_retry_thinking_disabled(self, body, model, token, agent_id, stream, tracker, input_data, instructions, original_error):
+        run_id = _freebuff_start_run(token, agent_id)
+        if not run_id:
+            return self.send_json(502, {"error": {"type": "upstream_error",
+                "message": "Failed to start freebuff agent run for retry."}})
+        instance_id = _freebuff_get_session(token, model)
+        messages = _fb_input_to_messages(input_data, instructions)
+        _freebuff_hard_disable_reasoning(messages)
+        metadata = {"run_id": run_id, "cost_mode": "free"}
+        if instance_id:
+            metadata["freebuff_instance_id"] = instance_id
+        chat_body = {
+            "model": model, "messages": messages, "stream": stream,
+            "max_tokens": max(body.get("max_output_tokens", 0), 64000),
+            "thinking": {"type": "disabled"},
+            "codebuff_metadata": metadata,
+        }
+        for k in ("temperature", "top_p"):
+            if k in body:
+                chat_body[k] = body[k]
+        tools = oa_convert_tools(body.get("tools"))
+        if tools:
+            chat_body["tools"] = tools
+        if body.get("tool_choice"):
+            chat_body["tool_choice"] = body["tool_choice"]
+        target = f"{_FREEBUFF_API_URL}/api/v1/chat/completions"
+        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}", "User-Agent": "codex-launcher/3.8.4"}
+        print(f"[freebuff] retry POST {target} model={model} stream={stream} run={run_id} (thinking disabled via DeepSeek native)", file=sys.stderr)
+        try:
+            req = urllib.request.Request(target, data=json.dumps(chat_body).encode(), headers=headers)
+            upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
+        except urllib.error.HTTPError as e:
+            err_body = e.read().decode()[:500]
+            _freebuff_finish_run(token, run_id, "failed")
+            print(f"[freebuff] thinking-disabled retry failed: HTTP {e.code}: {err_body[:300]}", file=sys.stderr)
+            return self.send_json(e.code, {"error": {"type": "freebuff_deepseek_thinking_error",
+                "message": "FreeBuff/DeepSeek V4 requires reasoning_content round-trip for tool-call sessions. Use Command Code provider for this model instead.", "upstream_error": _sanitize_err_body(err_body)}})
+        except Exception as e:
+            _freebuff_finish_run(token, run_id, "failed")
+            return self.send_json(502, {"error": {"type": "proxy_error", "message": str(e)}})
+        t0 = time.time()
+        try:
+            if stream:
+                self.send_response(200)
+                self.send_header("Content-Type", "text/event-stream")
+                self.send_header("Cache-Control", "no-cache")
+                self.send_header("Connection", "keep-alive")
+                self.end_headers()
+                if hasattr(self, 'connection') and self.connection:
+                    try:
+                        self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+                    except Exception:
+                        pass
+                last_resp_id = [None]
+                last_output = [None]
+                last_status = [None]
+                finish_reason = [None]
+                reasoning_out = {}
+                def _on_fb_retry_event(event):
+                    if tracker and tracker.cancelled.is_set():
+                        return False
+                    for line in event.strip().split("\n"):
+                        if line.startswith("data: "):
+                            try:
+                                d = json.loads(line[6:])
+                                if d.get("type") == "response.completed":
+                                    last_resp_id[0] = d.get("response", {}).get("id")
+                                    last_output[0] = d.get("response", {}).get("output", [])
+                                    last_status[0] = d.get("response", {}).get("status")
+                                    finish_reason[0] = "length" if last_status[0] == "incomplete" else "stop"
+                            except Exception:
+                                pass
+                    return None
+                try:
+                    self.stream_buffered_events(
+                        oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id"),
+                                         _reasoning_out=reasoning_out),
+                        on_event=_on_fb_retry_event)
+                except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
+                    return
+                success = finish_reason[0] != "length"
+                _record_usage("freebuff", model, success, time.time() - t0)
+                if last_resp_id[0] and input_data is not None:
+                    store_response(last_resp_id[0], input_data, last_output[0])
+                if last_resp_id[0] and reasoning_out.get("text") or reasoning_out.get("tool_calls"):
+                    asm = {"role": "assistant", "content": reasoning_out.get("text", "") or ""}
+                    if reasoning_out.get("tool_calls"):
+                        asm["tool_calls"] = reasoning_out["tool_calls"]
+                    if reasoning_out.get("text"):
+                        asm["reasoning_content"] = reasoning_out["text"]
+                    _ds_store_assistant(last_resp_id[0], asm)
+                print(f"[{self._session_id}] [freebuff] retry stream done status={last_status[0]} in {time.time()-t0:.1f}s", file=sys.stderr)
+            else:
+                raw = upstream.read().decode()
+                chat_resp = json.loads(raw)
+                result = oa_resp_to_responses(chat_resp, model)
+                self.send_json(200, result)
+                rid = result.get("id")
+                if rid:
+                    store_response(rid, input_data, result.get("output", []))
+                print(f"[{self._session_id}] [freebuff] retry non-stream done in {time.time()-t0:.1f}s", file=sys.stderr)
+        finally:
+            _freebuff_finish_run(token, run_id, "completed")
+
+    def _handle_auto(self, body, model, stream, tracker=None):
+        """Auto-sensing backend: probe schema, adapt, retry on errors.
+        Uses hostname heuristics as initial guess, then learns from errors
+        and caches the learned schema for subsequent requests.
+        """
+        input_data = body.get("input", "")
+        instructions = body.get("instructions", "").strip()
+
+        schema = _load_schema(model=model)
+        fresh = not schema.hints().get("_updated")
+        host = urllib.parse.urlparse(TARGET_URL).netloc.lower()
+
+        def _detect_style():
+            cc = schema.cc_body_wrap or "commandcode" in host or "command-code" in host
+            anth = schema.tool_call_style == "anthropic_tool_use" or any(h in host for h in ("anthropic", "claude"))
+            return cc, anth
+
+        is_cc, is_anthropic = _detect_style()
+
+        def _endpoint():
+            ep = schema.field_names.get("endpoint_path", "")
+            if ep:
+                return ep
+            if is_cc:
+                return "/alpha/generate"
+            if is_anthropic:
+                return "/messages"
+            return "/chat/completions"
+
+        _FALLBACK_ENDPOINTS = ["/v1/chat/completions", "/chat/completions",
+                                "/v1/messages", "/messages",
+                                "/alpha/generate", "/complete", "/v1/complete"]
+        target = upstream_target(TARGET_URL, _endpoint())
+        tried_endpoints = {target}  # track tried endpoints to avoid loops
+
+        max_retries = 3
+        prev_content_type = None  # for oscillation detection
+        for attempt in range(max_retries + 1):
+            adapter = SchemaAdapter(schema)
+            messages = adapter.convert(input_data, instructions)
+            use_cc_wrap = schema.cc_body_wrap or is_cc
+
+            # Build auth header from schema
+            auth_val = f"{schema.auth_scheme}{API_KEY}" if schema.auth_scheme else API_KEY
+            headers_extra = {"Content-Type": "application/json"}
+            if schema.auth_header:
+                headers_extra[schema.auth_header] = auth_val
+
+            pm = schema.param_names  # short alias
+
+            if use_cc_wrap:
+                thread_id = body.get("request_id") or body.get("id") or str(uuid.uuid4())
+                try:
+                    uuid.UUID(thread_id)
+                except (ValueError, AttributeError):
+                    thread_id = str(uuid.uuid4())
+                params_body = {
+                    "stream": True,
+                    pm.get("max_tokens", "max_tokens"): body.get("max_output_tokens", 64000),
+                    pm.get("temperature", "temperature"): body.get("temperature", 0.3),
+                    "messages": messages,
+                    "model": model,
+                }
+                tp = schema.field_names.get("tools_param", "tools")
+                params_body[tp] = []
+                req_body = {
+                    "config": _cc_config(),
+                    "memory": "", "taste": "", "skills": "",
+                    "params": params_body,
+                    "threadId": thread_id,
+                }
+                if CC_VERSION:
+                    headers_extra["x-command-code-version"] = CC_VERSION or "0.26.8"
+            elif is_anthropic:
+                req_body = {
+                    "model": model,
+                    "messages": messages,
+                    pm.get("max_tokens", "max_tokens"): body.get("max_output_tokens", 8192),
+                    "stream": stream,
+                }
+                if instructions:
+                    req_body["system"] = [{"type": "text", "text": instructions}]
+                tools = an_convert_tools(body.get("tools"))
+                if tools:
+                    req_body["tools"] = tools
+                headers_extra.setdefault("anthropic-version", "2023-06-01")
+            else:
+                req_body = {
+                    "model": model,
+                    "messages": messages,
+                    pm.get("max_tokens", "max_tokens"): max(body.get("max_output_tokens", 0), 64000),
+                    "stream": stream,
+                }
+                for k in ("temperature", "top_p"):
+                    pk = pm.get(k, k)
+                    if k in body:
+                        req_body[pk] = body[k]
+                if schema.tool_decl_format == "anthropic":
+                    tools = an_convert_tools(body.get("tools"))
+                else:
+                    tools = oa_convert_tools(body.get("tools"))
+                if tools:
+                    req_body["tools"] = tools
+                    req_body["tool_choice"] = body.get("tool_choice", "auto")
+                if not REASONING_ENABLED or REASONING_EFFORT == "none":
+                    req_body["enable_thinking"] = False
+                    req_body["reasoning_effort"] = "none"
+                else:
+                    req_body["reasoning_effort"] = REASONING_EFFORT
+
+            req_body_b = json.dumps(req_body).encode()
+            fwd = forwarded_headers(self.headers, headers_extra, browser_ua=True)
+            print(f"[auto-sense] POST {target} model={model} attempt={attempt} schema={schema.hints()}", file=sys.stderr)
+
+            req = urllib.request.Request(target, data=req_body_b, headers=fwd)
+            try:
+                upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
+            except urllib.error.HTTPError as e:
+                err_body = e.read().decode()
+                # ── 404 endpoint fallback ──
+                if e.code == 404 and attempt < max_retries:
+                    for ep in _FALLBACK_ENDPOINTS:
+                        ep_full = upstream_target(TARGET_URL, ep)
+                        if ep_full not in tried_endpoints:
+                            tried_endpoints.add(ep_full)
+                            target = ep_full
+                            # Try the new endpoint without schema change
+                            print(f"[auto-sense] 404 -> trying endpoint {ep_full}", file=sys.stderr)
+                            break
+                    else:
+                        # All endpoints tried -> real 404
+                        return self.send_json(404, {"error": {"type": "not_found", "message": f"No working endpoint found (tried {len(tried_endpoints)} paths)"}})
+                    continue
+                # ── Non-404 error handling ──
+                if attempt < max_retries:
+                    hints = ErrorAnalyzer.analyze(err_body, schema)
+                    oscillation_retry = False
+                    if hints:
+                        # Content-type oscillation detection
+                        if "content_type" in hints:
+                            if prev_content_type is not None and hints["content_type"] != prev_content_type:
+                                print(f"[auto-sense] content_type oscillation: {prev_content_type} -> {hints['content_type']}, freezing", file=sys.stderr)
+                                hints.pop("content_type")
+                                schema.content_type = "string"
+                                prev_content_type = None
+                                oscillation_retry = True  # hints became empty, still retry
+                            else:
+                                prev_content_type = hints["content_type"]
+                        else:
+                            prev_content_type = None
+                    if hints:
+                        print(f"[auto-sense] error analysis: {hints}", file=sys.stderr)
+                        ErrorAnalyzer.merge_into_schema(hints, schema)
+                        _save_schema(schema, model=model)
+                        is_cc, is_anthropic = _detect_style()
+                        target = upstream_target(TARGET_URL, _endpoint())
+                        continue
+                    if oscillation_retry:
+                        continue
+                    if e.code in (429, 502, 503):
+                        wait = min(2 ** (attempt + 1), 15)
+                        time.sleep(wait)
+                        continue
+                return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}})
+            except Exception as e:
+                if attempt < max_retries:
+                    continue
+                return self.send_json(500, {"error": {"type": "proxy_error", "message": str(e)}})
+
+            if fresh:
+                _save_schema(schema, model=model)
+                fresh = False
+
+            # Auto-detect stream/response format from Content-Type if still "auto"
+            ct = (upstream.headers.get("Content-Type", "") if hasattr(upstream, "headers") else "").lower()
+            if schema.stream_format == "auto" and stream:
+                if "text/event-stream" in ct:
+                    sf = "sse_data"
+                elif "x-ndjson" in ct or "jsonlines" in ct or "json-seq" in ct:
+                    sf = "json_lines"
+                else:
+                    sf = "sse_data" if not use_cc_wrap else "json_lines"
+            else:
+                sf = schema.stream_format
+            if schema.response_format == "auto" and not stream:
+                if "application/json" in ct or not ct:
+                    rf = "json"
+                elif "x-ndjson" in ct:
+                    rf = "ndjson"
+                else:
+                    rf = "json"
+            else:
+                rf = schema.response_format
+
+            if stream:
+                self.send_response(200)
+                self.send_header("Content-Type", "text/event-stream")
+                self.send_header("Cache-Control", "no-cache")
+                self.send_header("Connection", "keep-alive")
+                self.end_headers()
+
+                if sf == "json_lines" or use_cc_wrap:
+                    events = cc_stream_to_sse(upstream, model,
+                                              body.get("request_id") or body.get("id"))
+                elif sf == "sse_event" or is_anthropic:
+                    events = an_stream_to_sse(upstream, model,
+                                              body.get("request_id") or body.get("id"))
+                else:
+                    events = oa_stream_to_sse(upstream, model,
+                                              body.get("request_id") or body.get("id"))
+                self.stream_buffered_events(events)
+            else:
+                raw = upstream.read().decode().strip()
+                if rf == "ndjson" or use_cc_wrap:
+                    result = cc_resp_to_responses(raw, model)
+                elif rf == "json" and is_anthropic:
+                    result = an_resp_to_responses(json.loads(raw), model)
+                else:
+                    result = oa_resp_to_responses(json.loads(raw), model)
+                self.send_json(200, result)
+            return
+
+    def _forward(self, req, stream, model, nonstream_fn, stream_fn, input_data=None, tracker=None):
+        try:
+            upstream = urllib.request.urlopen(req, timeout=_upstream_timeout({}, stream))
+        except urllib.error.HTTPError as e:
+            err = e.read().decode()
+            return self.send_json(e.code, {"error": {"type": "upstream_error", "message": err}})
+        except Exception as e:
+            return self.send_json(500, {"error": {"type": "proxy_error", "message": str(e)}})
+
+        if stream:
+            self.send_response(200)
+            self.send_header("Content-Type", "text/event-stream")
+            self.send_header("Cache-Control", "no-cache")
+            self.send_header("Connection", "keep-alive")
+            self.end_headers()
+            if hasattr(self, 'connection') and self.connection:
+                try:
+                    self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+                except Exception:
+                    pass
+            last_resp_id = None
+            last_output = None
+            last_status = None
+            try:
+                def on_event(event):
+                    nonlocal last_resp_id, last_output, last_status
+                    if tracker and tracker.cancelled.is_set():
+                        print("[translate-proxy] stream cancelled", file=sys.stderr)
+                        return False
+                    for line in event.strip().split("\n"):
+                        if line.startswith("data: "):
+                            try:
+                                d = json.loads(line[6:])
+                                if d.get("type") == "response.completed":
+                                     last_resp_id = d.get("response", {}).get("id")
+                                     last_output = d.get("response", {}).get("output", [])
+                                     last_status = d.get("response", {}).get("status")
+                            except: pass
+                    return True
+                self.stream_buffered_events(stream_fn(upstream), on_event=on_event)
+            except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
+                print("[translate-proxy] client disconnected during stream", file=sys.stderr)
+            _log_resp(last_resp_id, last_status or "client_disconnect", last_output)
+            if last_resp_id and input_data is not None:
+                store_response(last_resp_id, input_data, last_output)
+        else:
+            result = nonstream_fn(upstream)
+            self.send_json(200, result)
+            rid = result.get("id")
+            _log_resp(rid, result.get("status"), result.get("output", []))
+            if rid and input_data is not None:
+                store_response(rid, input_data, result.get("output", []))
+
+    def send_json(self, status, data):
+        body = json.dumps(data).encode()
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+    def stream_buffered_events(self, event_iter, flush_interval=0.03, max_bytes=4096, on_event=None):
+        buf = bytearray()
+        last_flush = time.monotonic()
+        _MAX_BUF = 8 * 1024 * 1024
+        def _flush():
+            nonlocal buf, last_flush
+            if buf:
+                self.wfile.write(buf)
+                self.wfile.flush()
+                buf.clear()
+                last_flush = time.monotonic()
+        for event in event_iter:
+            if on_event is not None and on_event(event) is False:
+                break
+            encoded = event.encode("utf-8") if isinstance(event, str) else event
+            if len(buf) + len(encoded) > _MAX_BUF:
+                _flush()
+            buf.extend(encoded)
+            urgent = ("response.completed" in event or "response.output_text.done" in event
+                      or "response.output_item.done" in event
+                      or "function_call_arguments.done" in event
+                      or "response.failed" in event or '"type":"error"' in event)
+            if urgent or len(buf) >= max_bytes or time.monotonic() - last_flush >= flush_interval:
+                _flush()
+        _flush()
+
+    def log_message(self, fmt, *args):
+        msg = fmt % args if args else fmt
+        _sid = getattr(self, '_session_id', None) or 'proxy'
+        print(f"[{_sid}] {BACKEND} {msg}", file=sys.stderr)
+
+_SHUTDOWN_REQUESTED = False
+
+def _handle_shutdown_signal(sig, frame):
+    global _SHUTDOWN_REQUESTED
+    _SHUTDOWN_REQUESTED = True
+    print(f"[SELF-REVIVE] Signal {sig} received, shutting down cleanly", flush=True)
+    if 'SERVER' in globals() and SERVER:
+         SERVER.shutdown()
+ 
+def main():
+    global SERVER, _START_TIME
+    _START_TIME = time.time()
+    _init_runtime()
+    signal.signal(signal.SIGTERM, _handle_shutdown_signal)
+    signal.signal(signal.SIGINT, _handle_shutdown_signal)
+    try:
+        from http.server import ThreadingHTTPServer as _BaseSrv
+    except ImportError:
+        class _BaseSrv(socketserver.ThreadingMixIn, http.server.HTTPServer):
+            daemon_threads = True
+    class ReusableHTTPServer(_BaseSrv):
+        allow_reuse_address = True
+        daemon_threads = True
+        request_queue_size = 64
+    SERVER = ReusableHTTPServer(("127.0.0.1", PORT), Handler)
+    print(f"translate-proxy ({BACKEND}) listening on http://127.0.0.1:{PORT}", flush=True)
+    print(f"Target: {TARGET_URL}", flush=True)
+    print(f"Models: {[m['id'] for m in MODELS]}", flush=True)
+    if BGP_ROUTES:
+        print(f"BGP routes: {len(BGP_ROUTES)} ({[r.get('name','?') for r in BGP_ROUTES]})", flush=True)
+    try:
+        SERVER.serve_forever()
+    finally:
+        _flush_stats()
+
+if __name__ == "__main__":
+    if "--self-test" in sys.argv:
+        _counts = [0, 0]
+        def _check(label, condition, detail=""):
+            if condition:
+                _counts[0] += 1
+            else:
+                _counts[1] += 1
+                print(f"  FAIL: {label} {detail}", file=sys.stderr)
+        print("[CC-SELF-TEST] CommandCode Parsing Pipeline", file=sys.stderr)
+        
+        # Test _unwrap_cmd (these simulate what json.loads of args produces)
+        _check("unwrap: plain cmd", _unwrap_cmd("ls -la") == "ls -la")
+        _check("unwrap: single wrap", _unwrap_cmd('{"cmd": "cat /etc/passwd"}') == "cat /etc/passwd")
+        _dw = '{"cmd": "{\\"cmd\\": \\"curl -sL url\\"}"}'
+        _check("unwrap: double wrap", _unwrap_cmd(_dw) == "curl -sL url",
+               f"got {_unwrap_cmd(_dw)!r}")
+        _tw = '{"cmd": "{\\"cmd\\": \\"{\\"cmd\\": \\"echo hi\\"}\\"}"}'
+        _tw_result = _unwrap_cmd(_tw)
+        _check("unwrap: triple wrap", "echo hi" in _tw_result or "{" in _tw_result,
+               f"got {_tw_result!r}")  # triple-unwrap depends on proper JSON escaping
+        _check("unwrap: non-dict JSON", _unwrap_cmd('{"foo":"bar"}') == '{"foo":"bar"}')
+        _check("unwrap: empty string", _unwrap_cmd("") == "")
+        _check("unwrap: None-like", _unwrap_cmd("null") == "null")
+        
+        # Pattern A: double-wrapped cmd (the production bug)
+        # Model text after _extract_args brace-counting produces this args_raw:
+        _args_a_raw = '{"cmd": "{\\"cmd\\": \\"mkdir -p /tmp/test\\"}"}'
+        _calls_a = _sanitize_tool_calls([{
+            "name": "exec_command",
+            "arguments": _args_a_raw,
+        }])
+        _check("double-wrap: sanitized call exists", len(_calls_a) == 1)
+        if _calls_a:
+            _args_a = json.loads(_calls_a[0]["arguments"])
+            _check("double-wrap: cmd unwrapped to real command",
+                   _args_a.get("cmd") == "mkdir -p /tmp/test",
+                   f"cmd={_args_a.get('cmd')!r}")
+        
+        # Pattern B: unescaped inner quotes (model outputs malformed JSON)
+        # Test via _extract_raw_json_tool_calls directly to avoid XML regex issues
+        _calls_b = _parse_commandcode_text_tool_calls(
+            '{"type":"tool-call","name":"bash",'
+            '"arguments":"{\\\"cmd\\\": \\\"cat file.html\\\", \\\"sp\\\": \\\"allow_all\\\"}"}')
+        _check("unescaped quotes: extracted call", len(_calls_b) >= 1,
+               f"got {len(_calls_b)} calls")
+        
+        # Pattern C: XML format (fixed regex — was broken with unbalanced paren)
+        _calls_c = _parse_commandcode_text_tool_calls(
+            '<tool_call name="bash"><parameter name="command">curl -sL https://example.com</parameter></tool_call)>')
+        _check("XML format: extracted call", len(_calls_c) == 1,
+               f"got {len(_calls_c)} calls")
+        if _calls_c:
+            _args_c = json.loads(_calls_c[0]["arguments"])
+            _check("XML: correct cmd", "curl" in _args_c.get("cmd", ""),
+                   f"cmd={_args_c.get('cmd')!r}")
+        
+        # Pattern D: function= format
+        _calls_d = _parse_commandcode_text_tool_calls(
+            "<function=bash>echo hello world</function>")
+        _check("function= format: extracted call", len(_calls_d) == 1)
+        
+        # Pattern E: empty input
+        _check("empty input", len(_parse_commandcode_text_tool_calls("")) == 0)
+        _check("None input", len(_parse_commandcode_text_tool_calls(None)) == 0)
+        
+        # Pattern F: sanitizer catches empty cmd
+        _san_empty = _sanitize_tool_calls([{"name": "exec_command", "arguments": '{"cmd": ""}'}])
+        _san_f_args = json.loads(_san_empty[0]["arguments"]) if _san_empty else {}
+        _check("sanitizer: empty cmd flagged",
+               "# [CC-SANITIZER]" in _san_f_args.get("cmd", ""),
+               f"cmd={_san_f_args.get('cmd', '')!r}")
+        
+        # Pattern G: sanitizer catches still-JSON cmd (must produce valid JSON)
+        _g_args_raw = '{"cmd": "{\\"nested\\":true}"}'
+        _san_json = _sanitize_tool_calls([{"name": "exec_command", "arguments": _g_args_raw}])
+        _check("sanitizer: JSON call produced", len(_san_json) == 1)
+        if _san_json:
+            try:
+                _san_g_args = json.loads(_san_json[0]["arguments"])
+                _check("sanitizer: output is valid JSON", True)
+                _check("sanitizer: JSON cmd flagged",
+                       "# [CC-SANITIZER]" in _san_g_args.get("cmd", ""),
+                       f"cmd={_san_g_args.get('cmd', '')!r}")
+            except Exception as e:
+                _check(f"sanitizer: output valid JSON, got {e}", False)
+        
+        # Pattern H: Native <todo_write> XML block parsing and sanitization bypass (FIX 18)
+        _todo_xml = """Some preamble text.
+<todo_write>
+<todos>[{"id":"1","status":"in_progress","description":"Create landing page directory and HTML structure"},{"id":"2","status":"pending","description":"Write the full landing page"}]</todos>
+</todo_write>
+Postamble text."""
+        _calls_h = _parse_commandcode_text_tool_calls(_todo_xml)
+        _check("todo_write: extracted call exists", len(_calls_h) == 1, f"got {len(_calls_h)} calls")
+        if _calls_h:
+            _call_h = _calls_h[0]
+            _check("todo_write: name is TodoWrite", _call_h.get("name") == "TodoWrite")
+            try:
+                _args_h = json.loads(_call_h.get("arguments", "{}"))
+                _todos_h = _args_h.get("todos", [])
+                _check("todo_write: correct todos count", len(_todos_h) == 2, f"got {len(_todos_h)} todos")
+                if len(_todos_h) == 2:
+                    _check("todo_write: item 1 content", _todos_h[0].get("content") == "Create landing page directory and HTML structure")
+                    _check("todo_write: item 1 activeForm", _todos_h[0].get("activeForm") == "Create landing page directory and HTML structure")
+                    _check("todo_write: item 1 status", _todos_h[0].get("status") == "in_progress")
+                    _check("todo_write: item 2 status", _todos_h[1].get("status") == "pending")
+                # Confirm that the arguments contain no 'cmd' or sanitization comment
+                _check("todo_write: no cmd injected", "cmd" not in _args_h)
+            except Exception as e:
+                _check(f"todo_write: parsed JSON error: {e}", False)
+        
+        # Pattern I: Translate execute_request to exec_command (FIX 19)
+        _exec_req_raw = '<｜｜DSML｜｜tool_calls>\n<｜｜DSML｜｜invoke name="execute_request">\n<｜｜DSML｜｜parameter name="command" string="true">ls -la</｜｜DSML｜｜parameter>\n</｜｜DSML｜｜invoke>\n</｜｜DSML｜｜tool_calls>'
+        _calls_i = _parse_commandcode_text_tool_calls(_exec_req_raw)
+        _check("execute_request: mapped successfully", len(_calls_i) == 1, f"got {len(_calls_i)} calls")
+        if _calls_i:
+            _call_i = _calls_i[0]
+            _check("execute_request: name translated to exec_command", _call_i.get("name") == "exec_command", f"got {_call_i.get('name')}")
+            try:
+                _args_i = json.loads(_call_i.get("arguments", "{}"))
+                _check("execute_request: correct command extracted", _args_i.get("cmd") == "ls -la", f"got {_args_i.get('cmd')}")
+            except Exception as e:
+                _check(f"execute_request: arguments parsing error: {e}", False)
+
+        # Pattern J: Translate DSML-style explore/explore_agent block (FIX 20)
+        _explore_dsml = '<｜｜DSML｜｜tool_calls>\n  <｜｜DSML｜｜invoke name="explore">\n  <｜｜DSML｜｜parameter name="messages" string="true">[{"content": "Understand what the Z.AI-Chat-for-Android project is about... URL: https://github.rommark.dev/admin/Z.AI-Chat-for-Android", "role": "user"}]</｜｜DSML｜｜parameter>\n  </｜｜DSML｜｜invoke>\n  </｜｜DSML｜｜tool_calls>'
+        _calls_j = _parse_commandcode_text_tool_calls(_explore_dsml)
+        _check("explore DSML: mapped successfully", len(_calls_j) == 1, f"got {len(_calls_j)} calls")
+        if _calls_j:
+            _call_j = _calls_j[0]
+            _check("explore DSML: name translated to exec_command", _call_j.get("name") == "exec_command", f"got {_call_j.get('name')}")
+            try:
+                _args_j = json.loads(_call_j.get("arguments", "{}"))
+                _check("explore DSML: built a curl explore script targeting api base", "api/v1/repos/admin/Z.AI-Chat-for-Android" in _args_j.get("cmd", ""), f"got {_args_j.get('cmd')!r}")
+            except Exception as e:
+                _check(f"explore DSML: arguments parsing error: {e}", False)
+
+        # Pattern K: Translate raw JSON-style explore call (FIX 20)
+        _explore_json = '{"type":"tool-call","name":"explore_agent","id":"call_123","arguments":"{\\\"messages\\\": [{\\\"content\\\": \\\"https://github.rommark.dev/admin/Z.AI-Chat-for-Android\\\"}]}"}'
+        _calls_k = _parse_commandcode_text_tool_calls(_explore_json)
+        _check("explore JSON: mapped successfully", len(_calls_k) == 1, f"got {len(_calls_k)} calls")
+        if _calls_k:
+            _call_k = _calls_k[0]
+            _check("explore JSON: name translated to exec_command", _call_k.get("name") == "exec_command")
+            try:
+                _args_k = json.loads(_call_k.get("arguments", "{}"))
+                _check("explore JSON: built a curl explore script targeting api base", "api/v1/repos/admin/Z.AI-Chat-for-Android" in _args_k.get("cmd", ""), f"got {_args_k.get('cmd')!r}")
+            except Exception as e:
+                _check(f"explore JSON: arguments parsing error: {e}", False)
+
+        # Pattern L: DSML with parameter name="cmd" instead of name="command" (FIX 21)
+        # This is THE critical regression test — the model often uses name="cmd" (matching
+        # the actual tool schema) instead of name="command". Previously the DSML parser
+        # silently dropped these, causing Codex CLI to halt mid-task.
+        _cmd_dsml = '<｜｜DSML｜｜tool_calls>\n  <｜｜DSML｜｜invoke name="exec_command">\n  <｜｜DSML｜｜parameter name="cmd" string="true">curl -sL --max-time 15 \'https://github.rommark.dev/api/v1/repos/admin/Z.AI-Chat-for-Android/contents/README.md\' 2>/dev/null</｜｜DSML｜｜parameter>\n  <｜｜DSML｜｜parameter name="sandbox_permissions" string="true">require_escalated</｜｜DSML｜｜parameter>\n  <｜｜DSML｜｜parameter name="justification" string="true">I need to get the README from the private repo to understand the Android app before building the landing page mockup.</｜｜DSML｜｜parameter>\n  </｜｜DSML｜｜invoke>\n  </｜｜DSML｜｜tool_calls>'
+        _calls_l = _parse_commandcode_text_tool_calls(_cmd_dsml)
+        _check("DSML name=cmd: mapped successfully", len(_calls_l) == 1, f"got {len(_calls_l)} calls")
+        if _calls_l:
+            _call_l = _calls_l[0]
+            _check("DSML name=cmd: name is exec_command", _call_l.get("name") == "exec_command", f"got {_call_l.get('name')}")
+            try:
+                _args_l = json.loads(_call_l.get("arguments", "{}"))
+                _check("DSML name=cmd: cmd extracted correctly", "curl -sL --max-time 15" in _args_l.get("cmd", ""), f"got {_args_l.get('cmd')!r}")
+                _check("DSML name=cmd: sandbox_permissions extracted", _args_l.get("sandbox_permissions") == "require_escalated", f"got {_args_l.get('sandbox_permissions')!r}")
+                _check("DSML name=cmd: justification extracted", "README" in _args_l.get("justification", ""), f"got {_args_l.get('justification')!r}")
+            except Exception as e:
+                _check(f"DSML name=cmd: arguments parsing error: {e}", False)
+
+        # Pattern M: explore_agent with nested JSON messages containing URL (FIX 23)
+        _explore_nested = '<explore_agent>\nmessages: [{"content": "Understand the Z.AI-Chat-for-Android repo at https://github.rommark.dev/admin/Z.AI-Chat-for-Android"}]\n</explore_agent>'
+        _calls_m = _parse_commandcode_text_tool_calls(_explore_nested)
+        _check("FIX23 explore nested JSON: parsed", len(_calls_m) == 1, f"got {len(_calls_m)} calls")
+        if _calls_m:
+            _args_m = json.loads(_calls_m[0].get("arguments", "{}"))
+            _check("FIX23 explore nested JSON: cmd has curl", "curl" in _args_m.get("cmd", ""), f"got {_args_m.get('cmd')!r}")
+            _check("FIX23 explore nested JSON: URL in cmd", "github.rommark.dev" in _args_m.get("cmd", ""), f"missing URL in cmd")
+
+        # Pattern N: require_escalation block (FIX 24)
+        _esc_text = '<require_escalation>I need to run a command with elevated permissions to access the repository at https://github.rommark.dev/admin/Z.AI-Chat-for-Android</require_escalation>'
+        _calls_n = _parse_commandcode_text_tool_calls(_esc_text)
+        _check("FIX24 require_escalation: parsed", len(_calls_n) == 1, f"got {len(_calls_n)} calls")
+        if _calls_n:
+            _args_n = json.loads(_calls_n[0].get("arguments", "{}"))
+            _check("FIX24 require_escalation: name is exec_command", _calls_n[0].get("name") == "exec_command", f"got {_calls_n[0].get('name')}")
+            _check("FIX24 require_escalation: cmd has curl or echo", "curl" in _args_n.get("cmd", "") or "echo" in _args_n.get("cmd", ""), f"got {_args_n.get('cmd')!r}")
+
+        # Pattern N2: bare request_escalation_permission tag (FIX 24b)
+        _esc_bare = 'I want to proceed.\n<request_escalation_permission />\nPlease let me continue.'
+        _calls_n2 = _parse_commandcode_text_tool_calls(_esc_bare)
+        _check("FIX24b bare escalation: parsed", len(_calls_n2) == 1, f"got {len(_calls_n2)} calls")
+        if _calls_n2:
+            _check("FIX24b bare escalation: name is exec_command", _calls_n2[0].get("name") == "exec_command", f"got {_calls_n2[0].get('name')}")
+
+        # Pattern O: _build_explore_cmd module-level function (FIX 23/25)
+        _cmd_o, _just_o = _build_explore_cmd("https://github.rommark.dev/admin/Z.AI-Chat-for-Android")
+        _check("FIX23/25 _build_explore_cmd: returns cmd", _cmd_o is not None, "returned None")
+        _check("FIX23/25 _build_explore_cmd: has curl", _cmd_o and "curl" in _cmd_o, f"no curl in {_cmd_o!r}")
+        _check("FIX23/25 _build_explore_cmd: has api path", _cmd_o and "/api/v1/repos/" in _cmd_o, f"no api path in {_cmd_o!r}")
+
+        # Pattern O2: _build_explore_cmd with JSON array containing URL
+        _cmd_o2, _ = _build_explore_cmd('[{"content": "https://github.rommark.dev/admin/Z.AI-Chat-for-Android"}]')
+        _check("FIX23/25 _build_explore_cmd from JSON array: returns cmd", _cmd_o2 is not None, "returned None")
+        _check("FIX23/25 _build_explore_cmd from JSON array: has curl", _cmd_o2 and "curl" in _cmd_o2, f"no curl in {_cmd_o2!r}")
+
+        print(f"[CC-SELF-TEST] Results: {_counts[0]} passed, {_counts[1]} failed",
+              file=sys.stderr)
+        if _counts[1]:
+            sys.exit(1)
+        else:
+            print("[CC-SELF-TEST] ALL PASSED — pipeline is healthy", file=sys.stderr)
+            sys.exit(0)
+    
+    # [FIX 12] SELF-REVIVE: auto-restart proxy on crash (not on clean shutdown)
+    _MAX_RESTARTS = 50
+    _restart_count = 0
+    _RESTART_BACKOFF = [1, 2, 3, 5, 10, 15, 30]  # seconds, progressive
+    while not _SHUTDOWN_REQUESTED and _restart_count < _MAX_RESTARTS:
+        try:
+            main()
+        except KeyboardInterrupt:
+            print("[SELF-REVIVE] Keyboard interrupt — exiting", flush=True)
+            break
+        except Exception as e:
+            _restart_count += 1
+            _backoff = _RESTART_BACKOFF[min(_restart_count - 1, len(_RESTART_BACKOFF) - 1)]
+            import traceback as _tb
+            print(f"[SELF-REVIVE] CRASH #{_restart_count}/{_MAX_RESTARTS}: {e}", flush=True)
+            print(f"[SELF-REVIVE] Restarting in {_backoff}s... (Ctrl+C to exit)", flush=True)
+            _tb.print_exc()
+            time.sleep(_backoff)
+        else:
+            if not _SHUTDOWN_REQUESTED:
+                _restart_count += 1
+                _backoff = _RESTART_BACKOFF[min(_restart_count - 1, len(_RESTART_BACKOFF) - 1)]
+                print(f"[SELF-REVIVE] main() returned (unexpected), restart #{_restart_count} in {_backoff}s", flush=True)
+                time.sleep(_backoff)
+    
+    if _SHUTDOWN_REQUESTED or _restart_count >= _MAX_RESTARTS:
+        print(f"[SELF-REVIVE] Exiting (shutdown={_SHUTDOWN_REQUESTED}, restarts={_restart_count})", flush=True)