v3.10.8: Fix Re-OAuth buttons, block staging/sandbox for Antigravity, prefer production endpoint

- Fix Linux GUI Re-OAuth: load_oauth_secrets() was undefined, now loads inline - Fix GLib.idle_add lambda returning truthy tuple (repeated callbacks) - Proxy: production cloudcode-pa.googleapis.com tried first, sandbox as fallback - Proxy: 403 SERVICE_DISABLED falls through to next endpoint - Project discovery validates against production endpoint, not staging - Antigravity preset base_url changed to production - Windows GUI project discovery also uses production endpoint
2026-05-25 21:57:30 +04:00
parent 7bc737d8cc
commit f645e92908
11 changed files with 16485 additions and 2586 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,25 @@
 # Changelog
 ## v3.10.8 (2026-05-25)
 **OAuth & Antigravity Endpoint Fixes**
 ### Re-OAuth Buttons Fixed
 - Linux GUI: `load_oauth_secrets()` was undefined — buttons crashed silently on click
 - Now loads OAuth secrets inline from `~/.config/codex-launcher/oauth-secrets.json`
 - Both Linux and Windows Re-OAuth use PKCE + localhost callback (was deprecated OOB paste)
 ### Antigravity Staging/Sandbox Blocked by Default
 - Proxy: production `cloudcode-pa.googleapis.com` tried FIRST, sandbox/daily/autopush as fallback only
 - Proxy: 403 SERVICE_DISABLED now falls through to next endpoint instead of returning error immediately
 - Project discovery: validates against production endpoint, not staging-cloudaicompanion.sandbox
 - Antigravity preset `base_url` changed to production (was `daily-cloudcode-pa.sandbox.googleapis.com`)
 - `[antigravity-endpoint]` log line shows which endpoints are being tried
 ### Other Fixes
 - GLib.idle_add lambda returning truthy tuple fixed (caused repeated callbacks)
 - Windows GUI project discovery also uses production endpoint
 ## v3.10.7 (2026-05-25)
 **Prompt Enhancer — Fix Lost Context After Compaction**
--- a/5726
+++ b/5726
--- a/codex-launcher-gui.py
+++ b/codex-launcher-gui.py
--- a/codex-launcher_3.10.7_all.deb
+++ b/codex-launcher_3.10.7_all.deb
--- a/codex-launcher_3.10.8_all.deb
+++ b/codex-launcher_3.10.8_all.deb
--- a/codex_launcher_lib.py
+++ b/codex_launcher_lib.py
--- a/install.sh
+++ b/install.sh
@@ -3,11 +3,11 @@ set -e
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-if [ -f "$SCRIPT_DIR/codex-launcher_3.10.7_all.deb" ]; then
+if [ -f "$SCRIPT_DIR/codex-launcher_3.10.8_all.deb" ]; then
-    echo "Installing codex-launcher_3.10.7_all.deb ..."
+    echo "Installing codex-launcher_3.10.8_all.deb ..."
-    sudo dpkg -i "$SCRIPT_DIR/codex-launcher_3.10.7_all.deb"
+    sudo dpkg -i "$SCRIPT_DIR/codex-launcher_3.10.8_all.deb"
    echo ""
-    echo "Installed v3.10.7 via .deb package."
+    echo "Installed v3.10.8 via .deb package."
    echo "  translate-proxy.py   -> /usr/bin/translate-proxy.py"
    echo "  codex-launcher-gui   -> /usr/bin/codex-launcher-gui"
    echo "  cleanup-codex-stale  -> /usr/bin/cleanup-codex-stale.sh"
--- a/src/codex-launcher-gui.py
+++ b/src/codex-launcher-gui.py
--- a/src/codex_launcher_lib.py
+++ b/src/codex_launcher_lib.py
@@ -83,6 +83,16 @@ model_catalog_json = ""
 """
 CHANGELOG = [
    ("3.10.8", "2026-05-25", [
        "Fix Re-OAuth buttons: load_oauth_secrets() was undefined in Linux GUI",
        "Re-OAuth: replaced deprecated OOB flow with PKCE + localhost callback",
        "Proxy: prefer production cloudcode-pa over staging/sandbox endpoints",
        "Proxy: fallthrough 403 SERVICE_DISABLED to next endpoint",
        "Project discovery: validate against production endpoint, not staging",
        "Antigravity preset base_url changed to production (was daily-cloudcode-pa.sandbox)",
        "Fix GLib.idle_add lambda returning truthy tuple (caused repeated calls)",
        "Windows GUI: project discovery also uses production endpoint",
    ]),
    ("3.10.7", "2026-05-25", [
        "Prompt Enhancer: per-provider toggle to improve prompt clarity after compaction",
        "Two modes: offline (template injection) and ai-powered (external LLM rewrites)",
@@ -439,7 +449,7 @@ PROVIDER_PRESETS = {
    },
    "Google Antigravity (OAuth)": {
        "backend_type": "gemini-oauth-antigravity",
-        "base_url": "https://daily-cloudcode-pa.sandbox.googleapis.com",
+        "base_url": "https://cloudcode-pa.googleapis.com",
        "oauth_provider": "google-antigravity",
        "models": [
            "antigravity-gemini-3-flash",
--- a/src/translate-proxy.py
+++ b/src/translate-proxy.py
@@ -4812,9 +4812,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
            wrapped["requestId"] = f"agent-{uuid.uuid4().hex[:12]}"
        endpoints = ([
            "https://cloudcode-pa.googleapis.com",
            "https://daily-cloudcode-pa.sandbox.googleapis.com",
            "https://autopush-cloudcode-pa.sandbox.googleapis.com",
            "https://cloudcode-pa.googleapis.com",
        ] if OAUTH_PROVIDER == "google-antigravity" else [
            "https://cloudcode-pa.googleapis.com",
        ])
@@ -4844,6 +4844,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
            except Exception:
                pass
        if OAUTH_PROVIDER == "google-antigravity":
            print(f"[antigravity-endpoint] endpoints={[e.replace('https://','') for e in endpoints]} project={project_id}", file=sys.stderr)
        for ep in endpoints:
            target = f"{ep}/{url_suffix}"
            req = urllib.request.Request(target, data=body_b, headers=headers)
@@ -4860,6 +4863,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
                        print(f"[{self._session_id}] saved 400 debug request to {debug_path}", file=sys.stderr)
                    except Exception:
                        pass
                if e.code == 403 and "SERVICE_DISABLED" in err_body[:500] and ep != endpoints[-1]:
                    print(f"[{self._session_id}] {ep} SERVICE_DISABLED, trying next endpoint", file=sys.stderr)
                    continue
                if e.code == 429 and ep != endpoints[-1]:
                    print(f"[{self._session_id}] {ep} HTTP 429, trying next endpoint", file=sys.stderr)
                    continue
--- a/translate-proxy.py
+++ b/translate-proxy.py
@@ -157,6 +157,7 @@ Architecture:
 import json, http.server, socketserver, urllib.request, urllib.parse, urllib.error, re
 import time, uuid, os, sys, argparse, threading, socket, collections, contextlib, signal
 import secrets, string
 import dataclasses
 import http.client
 import selectors
@@ -246,6 +247,11 @@ REASONING_ENABLED = True
 REASONING_EFFORT = "medium"
 FORCE_MODEL = ""
 BGP_ROUTES = []
 PROMPT_ENHANCER = False
 PROMPT_ENHANCER_MODE = "offline"
 PROMPT_ENHANCER_MODEL = ""
 PROMPT_ENHANCER_URL = ""
 PROMPT_ENHANCER_KEY = ""
 SERVER = None
 if _IS_WINDOWS:
@@ -310,7 +316,7 @@ _conn_pool = {}
 _STREAM_IDLE_TIMEOUT = 300
-_CODEBUFF_AUTH_URL = "https://codebuff.com"
+_CODEBUFF_AUTH_URL = "https://www.codebuff.com"
 _CODEBUFF_API_URL = "https://www.codebuff.com"
 _CODEBUFF_AGENT_MAP = {
    "deepseek/deepseek-v4-pro": "base2-free-deepseek",
@@ -350,11 +356,11 @@ def _codebuff_get_session(token, model):
            return sc["instance_id"]
    try:
        url = f"{_CODEBUFF_API_URL}/api/v1/freebuff/session"
-        body = json.dumps({"model": model}).encode()
+        body = json.dumps({}).encode()
        req = urllib.request.Request(url, data=body, headers={
            "Content-Type": "application/json",
            "Authorization": f"Bearer {token}",
-            "User-Agent": "codex-launcher/3.10.4",
+            "User-Agent": "ai-sdk/openai-compatible/1.0.25/codebuff",
            "x-codebuff-model": model,
        })
        try:
@@ -402,7 +408,7 @@ def _codebuff_start_run(token, agent_id):
    req = urllib.request.Request(url, data=body, headers={
        "Content-Type": "application/json",
        "Authorization": f"Bearer {token}",
-        "User-Agent": "codex-launcher/3.10.4",
+        "User-Agent": "ai-sdk/openai-compatible/1.0.25/codebuff",
    })
    try:
        resp = urllib.request.urlopen(req, timeout=15)
@@ -435,7 +441,7 @@ def _codebuff_finish_run(token, run_id, status="completed"):
    req = urllib.request.Request(url, data=body, headers={
        "Content-Type": "application/json",
        "Authorization": f"Bearer {token}",
-        "User-Agent": "codex-launcher/3.10.4",
+        "User-Agent": "ai-sdk/openai-compatible/1.0.25/codebuff",
    })
    try:
        urllib.request.urlopen(req, timeout=10)
@@ -718,7 +724,6 @@ _GEMINI_AGENT_GUARDRAIL = (
    "Always emit the actual tool call in the same response."
 )
 _LOG_FILE = None
 _LOG_FILE_LOCK = threading.Lock()
 def _fetch_antigravity_version():
@@ -769,7 +774,7 @@ def _ensure_antigravity_version():
 def _init_runtime():
    global CONFIG, PORT, BACKEND, TARGET_URL, API_KEY, OAUTH_PROVIDER, _antigravity_version
    global MODELS, CC_VERSION, REASONING_ENABLED, REASONING_EFFORT, BGP_ROUTES
-    global _api_key_pool
+    global _api_key_pool, PROMPT_ENHANCER
    CONFIG = load_config()
    PORT = CONFIG["port"]
@@ -782,6 +787,11 @@ def _init_runtime():
    REASONING_ENABLED = CONFIG.get("reasoning_enabled", True)
    REASONING_EFFORT = CONFIG.get("reasoning_effort", "medium")
    FORCE_MODEL = (CONFIG.get("force_model") or "").strip()
    PROMPT_ENHANCER = CONFIG.get("prompt_enhancer", False)
    PROMPT_ENHANCER_MODE = CONFIG.get("prompt_enhancer_mode", "offline")
    PROMPT_ENHANCER_MODEL = CONFIG.get("prompt_enhancer_model", "")
    PROMPT_ENHANCER_URL = CONFIG.get("prompt_enhancer_url", "")
    PROMPT_ENHANCER_KEY = CONFIG.get("prompt_enhancer_key", "")
    BGP_ROUTES = CONFIG.get("bgp_routes", [])
    _api_key_pool = None
    if API_KEY and "," in API_KEY and not OAUTH_PROVIDER.startswith("google") and BACKEND not in ("codebuff", "freebuff"):
@@ -1297,8 +1307,8 @@ _COMPACT_KEEP_RECENT = 10
 _CROF_ADAPTIVE = {
    "fail_history": [],
    "model_limits": {},
-    "global_item_limit": 30,
+    "global_item_limit": 80,
-    "min_keep_recent": 4,
+    "min_keep_recent": 6,
 }
 _BGP_STATS_PATH = os.path.join(_LOG_DIR, "bgp-route-stats.json")
@@ -1366,6 +1376,8 @@ def _sorted_bgp_routes():
    return sorted(BGP_ROUTES, key=lambda r: _score_route(r, stats))
 def _crof_record(model, n_items, success):
    if TARGET_URL and "crof.ai" not in TARGET_URL:
        return
    if not isinstance(n_items, int) or n_items < 1:
        return
    entry = {"model": model, "items": n_items, "ok": success}
@@ -1391,7 +1403,8 @@ def _crof_record(model, n_items, success):
            global_limit = v["limit"]
    _CROF_ADAPTIVE["global_item_limit"] = global_limit
-    print(f"[crof-adaptive] model={model} items={n_items} {'OK' if success else 'FAIL'} -> limit={ml.get('limit',30)} global={global_limit}", file=sys.stderr)
+    if TARGET_URL and "crof.ai" in TARGET_URL:
        print(f"[crof-adaptive] model={model} items={n_items} {'OK' if success else 'FAIL'} -> limit={ml.get('limit',30)} global={global_limit}", file=sys.stderr)
 def _crof_item_limit(model):
    ml = _CROF_ADAPTIVE["model_limits"].get(model, {})
@@ -1436,7 +1449,8 @@ def _crof_compact_for_retry(input_data, model):
        summary_lines.append(_item_summary(item, max_len=120))
    summary_msg = {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "\n".join(summary_lines)}]}
-    print(f"[crof-adaptive] RETRY compact: {len(input_data)} -> {len(head)+1+len(tail)} (limit={limit}, keep={len(tail)})", file=sys.stderr)
+    if TARGET_URL and "crof.ai" in TARGET_URL:
        print(f"[crof-adaptive] RETRY compact: {len(input_data)} -> {len(head)+1+len(tail)} (limit={limit}, keep={len(tail)})", file=sys.stderr)
    return head + [summary_msg] + tail
 def _item_summary(item, max_len=200):
@@ -1590,6 +1604,10 @@ _PROVIDER_POLICIES = {
                   "tool_output_limit": 6000, "max_input_items": 35, "compaction": "balanced"},
    "openadapter": {"reasoning_mode": "off", "max_tokens": 32768, "strip_reasoning": True,
                    "tool_output_limit": 6000, "max_input_items": 30, "compaction": "balanced"},
    "cloudcode-pa": {"compaction": "aggressive", "context_size": 1000000,
                     "tool_output_limit": 6000, "max_input_items": 60},
    "googleapis": {"compaction": "balanced", "context_size": 1000000,
                   "tool_output_limit": 6000, "max_input_items": 80},
 }
 def provider_policy(target_url=None, backend=None):
@@ -1608,12 +1626,14 @@ _MODEL_CONTEXT = {
    "claude-sonnet": 200000, "claude-haiku": 200000,
    "glm-5.1": 128000, "glm-5": 128000, "glm-4": 128000,
    "deepseek": 64000, "gemini-2.5-flash": 1000000, "gemini-2.5-pro": 2000000,
    "gemini-3-flash": 1000000, "gemini-3.5-flash-low": 1000000,
    "gemini-3.1-pro-low": 2000000,
    "gemini-3.5-flash": 1000000, "gemini-3.1-pro": 2000000,
    "Gemini 3.5 Flash": 1000000, "Gemini 3.1 Pro": 2000000,
    "Claude Sonnet 4.6": 200000, "Claude Opus 4.6": 200000,
    "GPT-OSS 120B": 128000,
-    "claude-sonnet-4.6-thinking": 200000, "claude-opus-4.6-thinking": 200000,
+    "claude-sonnet-4-6": 200000, "claude-opus-4-6-thinking": 200000,
-    "gpt-oss-120b": 128000,
+    "gpt-oss-120b-medium": 128000,
    "mimo": 32768, "minimax": 32768, "kimi": 128000,
    "_default": 32768,
 }
@@ -1641,7 +1661,7 @@ def _estimate_tokens(obj):
 def _adaptive_compact(input_data, model, policy=None):
    policy = policy or {}
    context_size = int(policy.get("context_size", _context_limit_for_model(model)))
-    input_budget = int(context_size * 0.60)
+    input_budget = int(context_size * 0.80)
    estimated = _estimate_tokens(input_data)
    if estimated <= input_budget:
        return input_data, False
@@ -1684,6 +1704,120 @@ def _adaptive_compact(input_data, model, policy=None):
          f"items {len(input_data)}->{len(head)+1+len(tail)}", file=sys.stderr)
    return head + [summary_msg] + tail, True
 # ═══════════════════════════════════════════════════════════════════
 # Prompt Enhancer
 # ═══════════════════════════════════════════════════════════════════
 _PROMPT_ENHANCER_SYSTEM = """You are a prompt enhancement assistant for a coding agent (Codex CLI).
 Your job: rewrite the user's latest message to be clearer, more specific, and more actionable.
 Rules:
 - Preserve the user's EXACT intent — never change what they want done
 - Add explicit action verbs and step-by-step clarity
 - If the message is vague ("fix it", "make it better"), infer context from prior conversation summary and make it specific
 - Keep the enhanced prompt concise — no longer than 2x the original
 - If the original prompt is already clear and specific, return it unchanged
 - Output ONLY the enhanced prompt text, nothing else
 - Never add tasks the user didn't ask for"""
 _PROMPT_ENHANCER_OFFLINE = """<prompt-enhancer>
 <instructions>
 You are a coding agent operating inside a context-compacted session. Follow these rules strictly:
 1. ACTION CLARITY: Re-read the user's latest message. Identify every explicit and implicit action request. Execute ALL of them — do not skip any.
 2. COMPACTED CONTEXT: Previous conversation was summarized. The summary preserves your task history but may lose details. If the user references earlier work ("fix that", "continue", "update it"), infer from the compacted summary what was done and what remains.
 3. NO CLARIFICATION ASKING: Never ask "which file?" or "what exactly?" — infer from context. If truly ambiguous, make a reasonable assumption and proceed. The user can correct you.
 4. DECISIVE EXECUTION: When the user says "fix", "update", "change", "add", "remove" — do it immediately in the relevant file(s). Do not describe what you would do — actually do it.
 5. COMPLETE EDITS: When editing files, make the FULL change requested. Do not partially apply edits or leave placeholders.
 6. PRESERVE WORKING STATE: Never break existing functionality. If changing code, keep all surrounding logic intact.
 7. MULTI-STEP REQUESTS: If the user asks for multiple things, do ALL of them in sequence. Do not stop after the first one.
 </instructions>
 </prompt-enhancer>
 """
 def _enhance_prompt_llm(text, compaction_summary=""):
    global PROMPT_ENHANCER_MODEL, PROMPT_ENHANCER_URL, PROMPT_ENHANCER_KEY
    if not PROMPT_ENHANCER_MODEL or not PROMPT_ENHANCER_URL:
        return text
    try:
        messages = [
            {"role": "system", "content": _PROMPT_ENHANCER_SYSTEM},
        ]
        if compaction_summary:
            messages.append({"role": "user", "content": f"Context from earlier conversation (compacted):\n{compaction_summary[:2000]}"})
        messages.append({"role": "user", "content": f"Enhance this prompt:\n{text}"})
        body = json.dumps({"model": PROMPT_ENHANCER_MODEL, "messages": messages, "max_tokens": 2000, "temperature": 0.3}).encode()
        headers = {"Content-Type": "application/json"}
        if PROMPT_ENHANCER_KEY:
            headers["Authorization"] = f"Bearer {PROMPT_ENHANCER_KEY}"
        req = urllib.request.Request(f"{PROMPT_ENHANCER_URL.rstrip('/')}/chat/completions", data=body, headers=headers)
        resp = urllib.request.urlopen(req, timeout=15)
        data = json.loads(resp.read())
        enhanced = data.get("choices", [{}])[0].get("message", {}).get("content", "").strip()
        if enhanced and len(enhanced) >= len(text) * 0.5:
            print(f"[prompt-enhancer] AI enhanced: {text[:80]}... -> {enhanced[:80]}...", file=sys.stderr)
            return enhanced
    except Exception as e:
        print(f"[prompt-enhancer] AI enhancement failed: {e}", file=sys.stderr)
    return text
 def _apply_prompt_enhancer(input_data):
    global PROMPT_ENHANCER_MODE
    if not isinstance(input_data, list) or len(input_data) == 0:
        return input_data
    last_user_idx = None
    for i in range(len(input_data) - 1, -1, -1):
        item = input_data[i]
        if isinstance(item, dict) and item.get("type") == "message" and item.get("role") == "user":
            last_user_idx = i
            break
    if last_user_idx is None:
        return input_data
    item = input_data[last_user_idx]
    content = item.get("content", "")
    if isinstance(content, list):
        text = content[0].get("text", "") if content else ""
    elif isinstance(content, str):
        text = content
    else:
        return input_data
    if not text or len(text) < 5:
        return input_data
    if text.startswith("<prompt-enhancer>"):
        return input_data
    compaction_summary = ""
    for it in input_data:
        if isinstance(it, dict) and it.get("type") == "message" and it.get("role") == "user":
            c = it.get("content", "")
            t = ""
            if isinstance(c, list):
                t = c[0].get("text", "") if c else ""
            elif isinstance(c, str):
                t = c
            if "[Auto-compacted:" in t:
                compaction_summary = t[:3000]
                break
    if PROMPT_ENHANCER_MODE == "ai-powered" and PROMPT_ENHANCER_MODEL and PROMPT_ENHANCER_URL:
        enhanced = _enhance_prompt_llm(text, compaction_summary)
    else:
        enhanced = text
    enhanced = _PROMPT_ENHANCER_OFFLINE + enhanced
    new_item = dict(item)
    if isinstance(item.get("content"), list):
        new_item["content"] = [{"type": "input_text", "text": enhanced}]
    else:
        new_item["content"] = enhanced
    result = list(input_data)
    result[last_user_idx] = new_item
    print(f"[prompt-enhancer] mode={PROMPT_ENHANCER_MODE} enhanced last user message ({len(text)}->{len(enhanced)} chars)", file=sys.stderr)
    return result
 # ═══════════════════════════════════════════════════════════════════
 # Tool-call pairing validator
 # ═══════════════════════════════════════════════════════════════════
@@ -4284,8 +4418,14 @@ class Handler(http.server.BaseHTTPRequestHandler):
                body = dict(body)
                body["input"] = input_data
        if PROMPT_ENHANCER and isinstance(input_data, list):
            input_data = _apply_prompt_enhancer(input_data)
            body = dict(body)
            body["input"] = input_data
        crof_limit = _crof_item_limit(model)
-        if not compacted and isinstance(input_data, list) and len(input_data) > crof_limit:
+        _crof_eligible = TARGET_URL and "crof.ai" in TARGET_URL
        if _crof_eligible and not compacted and isinstance(input_data, list) and len(input_data) > crof_limit:
            print(f"[crof-adaptive] proactive compact: {len(input_data)} items > limit {crof_limit}", file=sys.stderr)
            input_data = _crof_compact_for_retry(input_data, model)
            body = dict(body)
@@ -4456,6 +4596,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
                body = dict(body)
                body["input"] = input_data
        if PROMPT_ENHANCER and isinstance(input_data, list):
            input_data = _apply_prompt_enhancer(input_data)
            body = dict(body)
            body["input"] = input_data
        access_token = _refresh_oauth_token()
        token_name = "google-antigravity-oauth-token.json" if OAUTH_PROVIDER == "google-antigravity" else "google-cli-oauth-token.json"
        token_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", token_name)
@@ -4667,9 +4812,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
            wrapped["requestId"] = f"agent-{uuid.uuid4().hex[:12]}"
        endpoints = ([
            "https://cloudcode-pa.googleapis.com",
            "https://daily-cloudcode-pa.sandbox.googleapis.com",
            "https://autopush-cloudcode-pa.sandbox.googleapis.com",
            "https://cloudcode-pa.googleapis.com",
        ] if OAUTH_PROVIDER == "google-antigravity" else [
            "https://cloudcode-pa.googleapis.com",
        ])
@@ -4699,6 +4844,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
            except Exception:
                pass
        if OAUTH_PROVIDER == "google-antigravity":
            print(f"[antigravity-endpoint] endpoints={[e.replace('https://','') for e in endpoints]} project={project_id}", file=sys.stderr)
        for ep in endpoints:
            target = f"{ep}/{url_suffix}"
            req = urllib.request.Request(target, data=body_b, headers=headers)
@@ -4715,6 +4863,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
                        print(f"[{self._session_id}] saved 400 debug request to {debug_path}", file=sys.stderr)
                    except Exception:
                        pass
                if e.code == 403 and "SERVICE_DISABLED" in err_body[:500] and ep != endpoints[-1]:
                    print(f"[{self._session_id}] {ep} SERVICE_DISABLED, trying next endpoint", file=sys.stderr)
                    continue
                if e.code == 429 and ep != endpoints[-1]:
                    print(f"[{self._session_id}] {ep} HTTP 429, trying next endpoint", file=sys.stderr)
                    continue
@@ -5079,7 +5230,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
                        print(f"[provider-sensor] synthetic retry failed: {e}", file=sys.stderr)
            # Auto-retry on finish_reason=length with no content due to too much context.
-            if finish_reason == "length" and not has_content and isinstance(input_data, list) and len(input_data) > 5:
+            if finish_reason == "length" and not has_content and isinstance(input_data, list) and len(input_data) > 5 and TARGET_URL and "crof.ai" in TARGET_URL:
                print(f"[crof-adaptive] RETRY: finish_reason=length with no content, compacting {n_items} items", file=sys.stderr)
                new_input = _crof_compact_for_retry(input_data, model)
                if len(new_input) < len(input_data):
@@ -5417,9 +5568,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
             metadata = {
                 "run_id": run_id,
                 "cost_mode": "free",
                 "client_id": "".join(secrets.choice(string.digits + string.ascii_lowercase) for _ in range(13)),
             }
             if instance_id:
-                 metadata["codebuff_instance_id"] = instance_id
+                 metadata["freebuff_instance_id"] = instance_id
             chat_body = {
                 "model": model,
@@ -5441,7 +5593,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
             headers = {
                 "Content-Type": "application/json",
                 "Authorization": f"Bearer {token}",
-                 "User-Agent": "codex-launcher/3.10.4",
+                 "User-Agent": "ai-sdk/openai-compatible/1.0.25/codebuff",
                 "x-codebuff-model": model,
             }
             if instance_id:
@@ -5589,9 +5741,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
        instance_id = _codebuff_get_session(token, model)
        messages = _cb_input_to_messages(input_data, instructions)
        _codebuff_hard_disable_reasoning(messages)
-        metadata = {"run_id": run_id, "cost_mode": "free"}
+        metadata = {"run_id": run_id, "cost_mode": "free", "client_id": secrets.token_hex(7)[:13]}
        if instance_id:
-            metadata["codebuff_instance_id"] = instance_id
+            metadata["freebuff_instance_id"] = instance_id
        chat_body = {
            "model": model, "messages": messages, "stream": stream,
            "max_tokens": max(body.get("max_output_tokens", 0), 64000),
@@ -5607,7 +5759,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
        if body.get("tool_choice"):
            chat_body["tool_choice"] = body["tool_choice"]
        target = f"{_CODEBUFF_API_URL}/api/v1/chat/completions"
-        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}", "User-Agent": "codex-launcher/3.10.4", "x-codebuff-model": model}
+        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}", "User-Agent": "ai-sdk/openai-compatible/1.0.25/codebuff", "x-codebuff-model": model}
        if instance_id:
            headers["x-codebuff-instance-id"] = instance_id
        print(f"[codebuff] retry POST {target} model={model} stream={stream} run={run_id} (thinking disabled via DeepSeek native)", file=sys.stderr)
@@ -6017,6 +6169,15 @@ def main():
    global SERVER, _START_TIME
    _START_TIME = time.time()
    _init_runtime()
    try:
        _current_cfg = os.path.basename(args.config) if args.config else ""
        for _f in os.listdir(_LOG_DIR):
            if _f.startswith("proxy-") and _f.endswith(".json") and _f != _current_cfg:
                os.remove(os.path.join(_LOG_DIR, _f))
            if _f.startswith("models-") and _f.endswith(".json"):
                os.remove(os.path.join(_LOG_DIR, _f))
    except Exception:
        pass
    signal.signal(signal.SIGINT, _handle_shutdown_signal)
    if _IS_WINDOWS:
        if hasattr(signal, "SIGBREAK"):