v3.3.0: fix auto-continue class breakage, add MAX_TOKENS auto-continue for Gemini/Antigravity, bump version label

2026-05-20 22:00:49 +04:00
parent e2f20810f0
commit f184fdf9b9
4 changed files with 88 additions and 16 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@
 - Added `response.function_call_arguments.delta` and `response.function_call_arguments.done` events so Codex can execute Antigravity tool calls and create files.
 - Fixed functionResponse name matching — uses the original functionCall name instead of falling back to call_id.
 - Strengthened Antigravity prompt policy: use tools immediately for file changes, avoid planning-only responses, and answer directly when no suitable tool exists.
+- **Auto-continue on MAX_TOKENS** — when Gemini/Antigravity truncates a text response, the proxy transparently sends a continuation request and concatenates the output so Codex receives the complete response without manual intervention.

 ### Reliability + Routing
 - Added BGP++ route scoring, route cooldowns, token buckets, and persisted route stats.
--- a/codex-launcher_3.3.0_all.deb
+++ b/codex-launcher_3.3.0_all.deb
--- a/src/codex-launcher-gui
+++ b/src/codex-launcher-gui
@@ -31,23 +31,9 @@ CHANGELOG = [
        "Added Gemini CLI OAuth backend using public Gemini CLI OAuth client",
        "Antigravity now creates files via tool calls — full Codex agent loop with Gemini-style history hardening",
        "Fixed tool-call streaming: function_call_arguments delta/done events, thought signatures, functionResponse name matching",
+        "Auto-continue on MAX_TOKENS — proxy transparently requests continuation for truncated Gemini/Antigravity responses",
        "Added Endpoint Doctor, adaptive BGP scoring, provider policies, adaptive compaction, log redaction",
    ]),
-    ("3.1.0", "2026-05-20", [
-        "Initial Antigravity/Gemini CLI OAuth split, history hardening, SSE fixes",
-    ]),
-    ("3.0.0", "2026-05-20", [
-        "ThreadingHTTPServer with dynamic proxy ports and health-gated Codex launch",
-        "Atomic config writes, safe cleanup registry, graceful shutdown, and buffered SSE streaming",
-        "Usage Dashboard v2, TCP_NODELAY streaming, Anthropic prompt caching, and batched usage stats",
-    ]),
-    ("2.6.1", "2026-05-20", [
-        "Google OAuth rebuilt to emulate Gemini CLI — no client_secret.json needed",
-        "Uses Google's public OAuth client_id (same as gemini-cli)",
-        "PKCE + CSRF state protection for secure auth",
-        "Just click OAuth Login → browser opens → authorize → done",
-        "Includes cloud-platform scope for Gemini Code Assist compatibility",
-    ]),
    ("2.6.0", "2026-05-20", [
        "Usage Dashboard — per-provider request/token/latency tracking",
        "Visual cards with success rate bars, model breakdown, error tracking",
--- a/src/translate-proxy.py
+++ b/src/translate-proxy.py
@@ -1402,6 +1402,79 @@ def _upstream_timeout(body, stream):
        return min((180 if has_tools else 120) + n_items * 2, 300)
    return min(60 + n_items * 2, 120)

+def _auto_continue_gemini(handler, flush_event, message_id, model, gen_config, gemini_tools, system_parts, project_id, headers, endpoints, url_suffix, accumulated_text, output_items, message_started):
+    max_continuations = 5
+    for _cont in range(max_continuations):
+        cont_contents = [
+            {"role": "model", "parts": [{"text": accumulated_text[-12000:]}]},
+            {"role": "user", "parts": [{"text": "Continue exactly where you left off. Do not repeat anything already written."}]},
+        ]
+        cont_request = {"contents": cont_contents, "generationConfig": dict(gen_config)}
+        if system_parts:
+            cont_request["systemInstruction"] = {"parts": system_parts}
+        if gemini_tools:
+            cont_request["tools"] = gemini_tools
+        cont_wrapped = {"project": project_id, "model": model, "request": cont_request}
+        if OAUTH_PROVIDER == "google-antigravity":
+            cont_wrapped["requestType"] = "agent"
+            cont_wrapped["userAgent"] = "antigravity"
+            cont_wrapped["requestId"] = f"agent-{uuid.uuid4().hex[:12]}"
+        cont_body = json.dumps(cont_wrapped).encode()
+        upstream = None
+        for ep in endpoints:
+            target = f"{ep}/{url_suffix}"
+            req = urllib.request.Request(target, data=cont_body, headers=headers)
+            try:
+                upstream = urllib.request.urlopen(req, timeout=180)
+                break
+            except Exception as e:
+                print(f"[auto-continue] {ep} failed: {e}", file=sys.stderr)
+                continue
+        if not upstream:
+            break
+        cont_text = ""
+        cont_finish = ""
+        cont_buf = ""
+        for raw_line in upstream:
+            line = raw_line.decode(errors="replace")
+            if line.startswith("data: "):
+                cont_buf += line[6:]
+                continue
+            if not line.strip() and cont_buf:
+                try:
+                    chunk = json.loads(cont_buf)
+                except Exception:
+                    cont_buf = ""
+                    continue
+                cont_buf = ""
+                candidates = chunk.get("response", chunk).get("candidates", [])
+                if not candidates:
+                    continue
+                cont_finish = candidates[0].get("finishReason", "")
+                parts = candidates[0].get("content", {}).get("parts", [])
+                for part in parts:
+                    if part.get("thought"):
+                        continue
+                    if "text" in part and not part.get("functionCall"):
+                        delta = part["text"]
+                        if delta:
+                            cont_text += delta
+                            flush_event("response.output_text.delta", {"type": "response.output_text.delta", "output_index": 0, "content_index": 0, "delta": delta})
+                    elif part.get("functionCall"):
+                        fc = part["functionCall"]
+                        call_id = f"call_{uuid.uuid4().hex[:24]}"
+                        args_str = json.dumps(fc.get("args", fc.get("arguments", {})))
+                        output_index = len(output_items)
+                        flush_event("response.output_item.added", {"type": "response.output_item.added", "output_index": output_index, "item": {"type": "function_call", "id": call_id, "call_id": call_id, "name": fc.get("name", ""), "arguments": ""}})
+                        flush_event("response.function_call_arguments.delta", {"type": "response.function_call_arguments.delta", "output_index": output_index, "item_id": call_id, "delta": args_str})
+                        flush_event("response.function_call_arguments.done", {"type": "response.function_call_arguments.done", "output_index": output_index, "item_id": call_id, "arguments": args_str})
+                        output_items.append({"tool": True, "fc": fc, "call_id": call_id})
+        accumulated_text += cont_text
+        print(f"[auto-continue] chunk {len(cont_text)} chars, finish={cont_finish}, total={len(accumulated_text)}", file=sys.stderr)
+        if cont_finish != "MAX_TOKENS":
+            break
+    return accumulated_text
+
 class Handler(http.server.BaseHTTPRequestHandler):
    protocol_version = "HTTP/1.1"

@@ -1877,10 +1950,22 @@ class Handler(http.server.BaseHTTPRequestHandler):
                        flush_event("response.function_call_arguments.done", {"type": "response.function_call_arguments.done", "output_index": output_index, "item_id": call_id, "arguments": args_str})
                        current_tool_calls[call_id] = fc
                        output_items.append({"tool": True})
-                if OAUTH_PROVIDER == "google-antigravity" and full_text and candidates[0].get("finishReason"):
+                last_finish = candidates[0].get("finishReason", "")
+                if OAUTH_PROVIDER == "google-antigravity" and full_text and last_finish:
+                    if last_finish == "MAX_TOKENS" and not current_tool_calls:
+                        print(f"[gemini-oauth] MAX_TOKENS hit ({len(full_text)} chars), auto-continuing...", file=sys.stderr)
+                        break
                    stream_finished = True
                    break

+        if OAUTH_PROVIDER.startswith("google") and full_text and not current_tool_calls and last_finish == "MAX_TOKENS" and not stream_finished:
+            result = _auto_continue_gemini(self, flush_event, message_id, model, gen_config, gemini_tools, system_parts, project_id, headers, endpoints, url_suffix, full_text, output_items, message_started)
+            if result:
+                full_text = result
+                for item in output_items:
+                    if isinstance(item, dict) and item.get("tool") and "fc" in item and "call_id" in item:
+                        current_tool_calls[item["call_id"]] = item["fc"]
+
        out = []
        if not full_text and not current_tool_calls:
            print("[gemini-oauth] WARNING: completed with empty output", file=sys.stderr)