v3.3.0: fix auto-continue class breakage, add MAX_TOKENS auto-continue for Gemini/Antigravity, bump version label

This commit is contained in:
Roman
2026-05-20 22:00:49 +04:00
Unverified
parent e2f20810f0
commit f184fdf9b9
4 changed files with 88 additions and 16 deletions

View File

@@ -18,6 +18,7 @@
- Added `response.function_call_arguments.delta` and `response.function_call_arguments.done` events so Codex can execute Antigravity tool calls and create files.
- Fixed functionResponse name matching — uses the original functionCall name instead of falling back to call_id.
- Strengthened Antigravity prompt policy: use tools immediately for file changes, avoid planning-only responses, and answer directly when no suitable tool exists.
- **Auto-continue on MAX_TOKENS** — when Gemini/Antigravity truncates a text response, the proxy transparently sends a continuation request and concatenates the output so Codex receives the complete response without manual intervention.
### Reliability + Routing
- Added BGP++ route scoring, route cooldowns, token buckets, and persisted route stats.

Binary file not shown.

View File

@@ -31,23 +31,9 @@ CHANGELOG = [
"Added Gemini CLI OAuth backend using public Gemini CLI OAuth client",
"Antigravity now creates files via tool calls — full Codex agent loop with Gemini-style history hardening",
"Fixed tool-call streaming: function_call_arguments delta/done events, thought signatures, functionResponse name matching",
"Auto-continue on MAX_TOKENS — proxy transparently requests continuation for truncated Gemini/Antigravity responses",
"Added Endpoint Doctor, adaptive BGP scoring, provider policies, adaptive compaction, log redaction",
]),
("3.1.0", "2026-05-20", [
"Initial Antigravity/Gemini CLI OAuth split, history hardening, SSE fixes",
]),
("3.0.0", "2026-05-20", [
"ThreadingHTTPServer with dynamic proxy ports and health-gated Codex launch",
"Atomic config writes, safe cleanup registry, graceful shutdown, and buffered SSE streaming",
"Usage Dashboard v2, TCP_NODELAY streaming, Anthropic prompt caching, and batched usage stats",
]),
("2.6.1", "2026-05-20", [
"Google OAuth rebuilt to emulate Gemini CLI — no client_secret.json needed",
"Uses Google's public OAuth client_id (same as gemini-cli)",
"PKCE + CSRF state protection for secure auth",
"Just click OAuth Login → browser opens → authorize → done",
"Includes cloud-platform scope for Gemini Code Assist compatibility",
]),
("2.6.0", "2026-05-20", [
"Usage Dashboard — per-provider request/token/latency tracking",
"Visual cards with success rate bars, model breakdown, error tracking",

View File

@@ -1402,6 +1402,79 @@ def _upstream_timeout(body, stream):
return min((180 if has_tools else 120) + n_items * 2, 300)
return min(60 + n_items * 2, 120)
def _auto_continue_gemini(handler, flush_event, message_id, model, gen_config, gemini_tools, system_parts, project_id, headers, endpoints, url_suffix, accumulated_text, output_items, message_started):
max_continuations = 5
for _cont in range(max_continuations):
cont_contents = [
{"role": "model", "parts": [{"text": accumulated_text[-12000:]}]},
{"role": "user", "parts": [{"text": "Continue exactly where you left off. Do not repeat anything already written."}]},
]
cont_request = {"contents": cont_contents, "generationConfig": dict(gen_config)}
if system_parts:
cont_request["systemInstruction"] = {"parts": system_parts}
if gemini_tools:
cont_request["tools"] = gemini_tools
cont_wrapped = {"project": project_id, "model": model, "request": cont_request}
if OAUTH_PROVIDER == "google-antigravity":
cont_wrapped["requestType"] = "agent"
cont_wrapped["userAgent"] = "antigravity"
cont_wrapped["requestId"] = f"agent-{uuid.uuid4().hex[:12]}"
cont_body = json.dumps(cont_wrapped).encode()
upstream = None
for ep in endpoints:
target = f"{ep}/{url_suffix}"
req = urllib.request.Request(target, data=cont_body, headers=headers)
try:
upstream = urllib.request.urlopen(req, timeout=180)
break
except Exception as e:
print(f"[auto-continue] {ep} failed: {e}", file=sys.stderr)
continue
if not upstream:
break
cont_text = ""
cont_finish = ""
cont_buf = ""
for raw_line in upstream:
line = raw_line.decode(errors="replace")
if line.startswith("data: "):
cont_buf += line[6:]
continue
if not line.strip() and cont_buf:
try:
chunk = json.loads(cont_buf)
except Exception:
cont_buf = ""
continue
cont_buf = ""
candidates = chunk.get("response", chunk).get("candidates", [])
if not candidates:
continue
cont_finish = candidates[0].get("finishReason", "")
parts = candidates[0].get("content", {}).get("parts", [])
for part in parts:
if part.get("thought"):
continue
if "text" in part and not part.get("functionCall"):
delta = part["text"]
if delta:
cont_text += delta
flush_event("response.output_text.delta", {"type": "response.output_text.delta", "output_index": 0, "content_index": 0, "delta": delta})
elif part.get("functionCall"):
fc = part["functionCall"]
call_id = f"call_{uuid.uuid4().hex[:24]}"
args_str = json.dumps(fc.get("args", fc.get("arguments", {})))
output_index = len(output_items)
flush_event("response.output_item.added", {"type": "response.output_item.added", "output_index": output_index, "item": {"type": "function_call", "id": call_id, "call_id": call_id, "name": fc.get("name", ""), "arguments": ""}})
flush_event("response.function_call_arguments.delta", {"type": "response.function_call_arguments.delta", "output_index": output_index, "item_id": call_id, "delta": args_str})
flush_event("response.function_call_arguments.done", {"type": "response.function_call_arguments.done", "output_index": output_index, "item_id": call_id, "arguments": args_str})
output_items.append({"tool": True, "fc": fc, "call_id": call_id})
accumulated_text += cont_text
print(f"[auto-continue] chunk {len(cont_text)} chars, finish={cont_finish}, total={len(accumulated_text)}", file=sys.stderr)
if cont_finish != "MAX_TOKENS":
break
return accumulated_text
class Handler(http.server.BaseHTTPRequestHandler):
protocol_version = "HTTP/1.1"
@@ -1877,10 +1950,22 @@ class Handler(http.server.BaseHTTPRequestHandler):
flush_event("response.function_call_arguments.done", {"type": "response.function_call_arguments.done", "output_index": output_index, "item_id": call_id, "arguments": args_str})
current_tool_calls[call_id] = fc
output_items.append({"tool": True})
if OAUTH_PROVIDER == "google-antigravity" and full_text and candidates[0].get("finishReason"):
last_finish = candidates[0].get("finishReason", "")
if OAUTH_PROVIDER == "google-antigravity" and full_text and last_finish:
if last_finish == "MAX_TOKENS" and not current_tool_calls:
print(f"[gemini-oauth] MAX_TOKENS hit ({len(full_text)} chars), auto-continuing...", file=sys.stderr)
break
stream_finished = True
break
if OAUTH_PROVIDER.startswith("google") and full_text and not current_tool_calls and last_finish == "MAX_TOKENS" and not stream_finished:
result = _auto_continue_gemini(self, flush_event, message_id, model, gen_config, gemini_tools, system_parts, project_id, headers, endpoints, url_suffix, full_text, output_items, message_started)
if result:
full_text = result
for item in output_items:
if isinstance(item, dict) and item.get("tool") and "fc" in item and "call_id" in item:
current_tool_calls[item["call_id"]] = item["fc"]
out = []
if not full_text and not current_tool_calls:
print("[gemini-oauth] WARNING: completed with empty output", file=sys.stderr)