diff --git a/CHANGELOG.md b/CHANGELOG.md index 7547332..06ed0d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ - Added `response.function_call_arguments.delta` and `response.function_call_arguments.done` events so Codex can execute Antigravity tool calls and create files. - Fixed functionResponse name matching — uses the original functionCall name instead of falling back to call_id. - Strengthened Antigravity prompt policy: use tools immediately for file changes, avoid planning-only responses, and answer directly when no suitable tool exists. +- **Auto-continue on MAX_TOKENS** — when Gemini/Antigravity truncates a text response, the proxy transparently sends a continuation request and concatenates the output so Codex receives the complete response without manual intervention. ### Reliability + Routing - Added BGP++ route scoring, route cooldowns, token buckets, and persisted route stats. diff --git a/codex-launcher_3.3.0_all.deb b/codex-launcher_3.3.0_all.deb index 67c8d99..54a5a1e 100644 Binary files a/codex-launcher_3.3.0_all.deb and b/codex-launcher_3.3.0_all.deb differ diff --git a/src/codex-launcher-gui b/src/codex-launcher-gui index c4de26a..c225daa 100755 --- a/src/codex-launcher-gui +++ b/src/codex-launcher-gui @@ -31,23 +31,9 @@ CHANGELOG = [ "Added Gemini CLI OAuth backend using public Gemini CLI OAuth client", "Antigravity now creates files via tool calls — full Codex agent loop with Gemini-style history hardening", "Fixed tool-call streaming: function_call_arguments delta/done events, thought signatures, functionResponse name matching", + "Auto-continue on MAX_TOKENS — proxy transparently requests continuation for truncated Gemini/Antigravity responses", "Added Endpoint Doctor, adaptive BGP scoring, provider policies, adaptive compaction, log redaction", ]), - ("3.1.0", "2026-05-20", [ - "Initial Antigravity/Gemini CLI OAuth split, history hardening, SSE fixes", - ]), - ("3.0.0", "2026-05-20", [ - "ThreadingHTTPServer with dynamic proxy ports and health-gated Codex launch", - "Atomic config writes, safe cleanup registry, graceful shutdown, and buffered SSE streaming", - "Usage Dashboard v2, TCP_NODELAY streaming, Anthropic prompt caching, and batched usage stats", - ]), - ("2.6.1", "2026-05-20", [ - "Google OAuth rebuilt to emulate Gemini CLI — no client_secret.json needed", - "Uses Google's public OAuth client_id (same as gemini-cli)", - "PKCE + CSRF state protection for secure auth", - "Just click OAuth Login → browser opens → authorize → done", - "Includes cloud-platform scope for Gemini Code Assist compatibility", - ]), ("2.6.0", "2026-05-20", [ "Usage Dashboard — per-provider request/token/latency tracking", "Visual cards with success rate bars, model breakdown, error tracking", diff --git a/src/translate-proxy.py b/src/translate-proxy.py index d86e14d..f45a2b1 100755 --- a/src/translate-proxy.py +++ b/src/translate-proxy.py @@ -1402,6 +1402,79 @@ def _upstream_timeout(body, stream): return min((180 if has_tools else 120) + n_items * 2, 300) return min(60 + n_items * 2, 120) +def _auto_continue_gemini(handler, flush_event, message_id, model, gen_config, gemini_tools, system_parts, project_id, headers, endpoints, url_suffix, accumulated_text, output_items, message_started): + max_continuations = 5 + for _cont in range(max_continuations): + cont_contents = [ + {"role": "model", "parts": [{"text": accumulated_text[-12000:]}]}, + {"role": "user", "parts": [{"text": "Continue exactly where you left off. Do not repeat anything already written."}]}, + ] + cont_request = {"contents": cont_contents, "generationConfig": dict(gen_config)} + if system_parts: + cont_request["systemInstruction"] = {"parts": system_parts} + if gemini_tools: + cont_request["tools"] = gemini_tools + cont_wrapped = {"project": project_id, "model": model, "request": cont_request} + if OAUTH_PROVIDER == "google-antigravity": + cont_wrapped["requestType"] = "agent" + cont_wrapped["userAgent"] = "antigravity" + cont_wrapped["requestId"] = f"agent-{uuid.uuid4().hex[:12]}" + cont_body = json.dumps(cont_wrapped).encode() + upstream = None + for ep in endpoints: + target = f"{ep}/{url_suffix}" + req = urllib.request.Request(target, data=cont_body, headers=headers) + try: + upstream = urllib.request.urlopen(req, timeout=180) + break + except Exception as e: + print(f"[auto-continue] {ep} failed: {e}", file=sys.stderr) + continue + if not upstream: + break + cont_text = "" + cont_finish = "" + cont_buf = "" + for raw_line in upstream: + line = raw_line.decode(errors="replace") + if line.startswith("data: "): + cont_buf += line[6:] + continue + if not line.strip() and cont_buf: + try: + chunk = json.loads(cont_buf) + except Exception: + cont_buf = "" + continue + cont_buf = "" + candidates = chunk.get("response", chunk).get("candidates", []) + if not candidates: + continue + cont_finish = candidates[0].get("finishReason", "") + parts = candidates[0].get("content", {}).get("parts", []) + for part in parts: + if part.get("thought"): + continue + if "text" in part and not part.get("functionCall"): + delta = part["text"] + if delta: + cont_text += delta + flush_event("response.output_text.delta", {"type": "response.output_text.delta", "output_index": 0, "content_index": 0, "delta": delta}) + elif part.get("functionCall"): + fc = part["functionCall"] + call_id = f"call_{uuid.uuid4().hex[:24]}" + args_str = json.dumps(fc.get("args", fc.get("arguments", {}))) + output_index = len(output_items) + flush_event("response.output_item.added", {"type": "response.output_item.added", "output_index": output_index, "item": {"type": "function_call", "id": call_id, "call_id": call_id, "name": fc.get("name", ""), "arguments": ""}}) + flush_event("response.function_call_arguments.delta", {"type": "response.function_call_arguments.delta", "output_index": output_index, "item_id": call_id, "delta": args_str}) + flush_event("response.function_call_arguments.done", {"type": "response.function_call_arguments.done", "output_index": output_index, "item_id": call_id, "arguments": args_str}) + output_items.append({"tool": True, "fc": fc, "call_id": call_id}) + accumulated_text += cont_text + print(f"[auto-continue] chunk {len(cont_text)} chars, finish={cont_finish}, total={len(accumulated_text)}", file=sys.stderr) + if cont_finish != "MAX_TOKENS": + break + return accumulated_text + class Handler(http.server.BaseHTTPRequestHandler): protocol_version = "HTTP/1.1" @@ -1877,10 +1950,22 @@ class Handler(http.server.BaseHTTPRequestHandler): flush_event("response.function_call_arguments.done", {"type": "response.function_call_arguments.done", "output_index": output_index, "item_id": call_id, "arguments": args_str}) current_tool_calls[call_id] = fc output_items.append({"tool": True}) - if OAUTH_PROVIDER == "google-antigravity" and full_text and candidates[0].get("finishReason"): + last_finish = candidates[0].get("finishReason", "") + if OAUTH_PROVIDER == "google-antigravity" and full_text and last_finish: + if last_finish == "MAX_TOKENS" and not current_tool_calls: + print(f"[gemini-oauth] MAX_TOKENS hit ({len(full_text)} chars), auto-continuing...", file=sys.stderr) + break stream_finished = True break + if OAUTH_PROVIDER.startswith("google") and full_text and not current_tool_calls and last_finish == "MAX_TOKENS" and not stream_finished: + result = _auto_continue_gemini(self, flush_event, message_id, model, gen_config, gemini_tools, system_parts, project_id, headers, endpoints, url_suffix, full_text, output_items, message_started) + if result: + full_text = result + for item in output_items: + if isinstance(item, dict) and item.get("tool") and "fc" in item and "call_id" in item: + current_tool_calls[item["call_id"]] = item["fc"] + out = [] if not full_text and not current_tool_calls: print("[gemini-oauth] WARNING: completed with empty output", file=sys.stderr)