v3.3.0: fix auto-continue class breakage, add MAX_TOKENS auto-continue for Gemini/Antigravity, bump version label
This commit is contained in:
@@ -18,6 +18,7 @@
|
||||
- Added `response.function_call_arguments.delta` and `response.function_call_arguments.done` events so Codex can execute Antigravity tool calls and create files.
|
||||
- Fixed functionResponse name matching — uses the original functionCall name instead of falling back to call_id.
|
||||
- Strengthened Antigravity prompt policy: use tools immediately for file changes, avoid planning-only responses, and answer directly when no suitable tool exists.
|
||||
- **Auto-continue on MAX_TOKENS** — when Gemini/Antigravity truncates a text response, the proxy transparently sends a continuation request and concatenates the output so Codex receives the complete response without manual intervention.
|
||||
|
||||
### Reliability + Routing
|
||||
- Added BGP++ route scoring, route cooldowns, token buckets, and persisted route stats.
|
||||
|
||||
Binary file not shown.
@@ -31,23 +31,9 @@ CHANGELOG = [
|
||||
"Added Gemini CLI OAuth backend using public Gemini CLI OAuth client",
|
||||
"Antigravity now creates files via tool calls — full Codex agent loop with Gemini-style history hardening",
|
||||
"Fixed tool-call streaming: function_call_arguments delta/done events, thought signatures, functionResponse name matching",
|
||||
"Auto-continue on MAX_TOKENS — proxy transparently requests continuation for truncated Gemini/Antigravity responses",
|
||||
"Added Endpoint Doctor, adaptive BGP scoring, provider policies, adaptive compaction, log redaction",
|
||||
]),
|
||||
("3.1.0", "2026-05-20", [
|
||||
"Initial Antigravity/Gemini CLI OAuth split, history hardening, SSE fixes",
|
||||
]),
|
||||
("3.0.0", "2026-05-20", [
|
||||
"ThreadingHTTPServer with dynamic proxy ports and health-gated Codex launch",
|
||||
"Atomic config writes, safe cleanup registry, graceful shutdown, and buffered SSE streaming",
|
||||
"Usage Dashboard v2, TCP_NODELAY streaming, Anthropic prompt caching, and batched usage stats",
|
||||
]),
|
||||
("2.6.1", "2026-05-20", [
|
||||
"Google OAuth rebuilt to emulate Gemini CLI — no client_secret.json needed",
|
||||
"Uses Google's public OAuth client_id (same as gemini-cli)",
|
||||
"PKCE + CSRF state protection for secure auth",
|
||||
"Just click OAuth Login → browser opens → authorize → done",
|
||||
"Includes cloud-platform scope for Gemini Code Assist compatibility",
|
||||
]),
|
||||
("2.6.0", "2026-05-20", [
|
||||
"Usage Dashboard — per-provider request/token/latency tracking",
|
||||
"Visual cards with success rate bars, model breakdown, error tracking",
|
||||
|
||||
@@ -1402,6 +1402,79 @@ def _upstream_timeout(body, stream):
|
||||
return min((180 if has_tools else 120) + n_items * 2, 300)
|
||||
return min(60 + n_items * 2, 120)
|
||||
|
||||
def _auto_continue_gemini(handler, flush_event, message_id, model, gen_config, gemini_tools, system_parts, project_id, headers, endpoints, url_suffix, accumulated_text, output_items, message_started):
|
||||
max_continuations = 5
|
||||
for _cont in range(max_continuations):
|
||||
cont_contents = [
|
||||
{"role": "model", "parts": [{"text": accumulated_text[-12000:]}]},
|
||||
{"role": "user", "parts": [{"text": "Continue exactly where you left off. Do not repeat anything already written."}]},
|
||||
]
|
||||
cont_request = {"contents": cont_contents, "generationConfig": dict(gen_config)}
|
||||
if system_parts:
|
||||
cont_request["systemInstruction"] = {"parts": system_parts}
|
||||
if gemini_tools:
|
||||
cont_request["tools"] = gemini_tools
|
||||
cont_wrapped = {"project": project_id, "model": model, "request": cont_request}
|
||||
if OAUTH_PROVIDER == "google-antigravity":
|
||||
cont_wrapped["requestType"] = "agent"
|
||||
cont_wrapped["userAgent"] = "antigravity"
|
||||
cont_wrapped["requestId"] = f"agent-{uuid.uuid4().hex[:12]}"
|
||||
cont_body = json.dumps(cont_wrapped).encode()
|
||||
upstream = None
|
||||
for ep in endpoints:
|
||||
target = f"{ep}/{url_suffix}"
|
||||
req = urllib.request.Request(target, data=cont_body, headers=headers)
|
||||
try:
|
||||
upstream = urllib.request.urlopen(req, timeout=180)
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"[auto-continue] {ep} failed: {e}", file=sys.stderr)
|
||||
continue
|
||||
if not upstream:
|
||||
break
|
||||
cont_text = ""
|
||||
cont_finish = ""
|
||||
cont_buf = ""
|
||||
for raw_line in upstream:
|
||||
line = raw_line.decode(errors="replace")
|
||||
if line.startswith("data: "):
|
||||
cont_buf += line[6:]
|
||||
continue
|
||||
if not line.strip() and cont_buf:
|
||||
try:
|
||||
chunk = json.loads(cont_buf)
|
||||
except Exception:
|
||||
cont_buf = ""
|
||||
continue
|
||||
cont_buf = ""
|
||||
candidates = chunk.get("response", chunk).get("candidates", [])
|
||||
if not candidates:
|
||||
continue
|
||||
cont_finish = candidates[0].get("finishReason", "")
|
||||
parts = candidates[0].get("content", {}).get("parts", [])
|
||||
for part in parts:
|
||||
if part.get("thought"):
|
||||
continue
|
||||
if "text" in part and not part.get("functionCall"):
|
||||
delta = part["text"]
|
||||
if delta:
|
||||
cont_text += delta
|
||||
flush_event("response.output_text.delta", {"type": "response.output_text.delta", "output_index": 0, "content_index": 0, "delta": delta})
|
||||
elif part.get("functionCall"):
|
||||
fc = part["functionCall"]
|
||||
call_id = f"call_{uuid.uuid4().hex[:24]}"
|
||||
args_str = json.dumps(fc.get("args", fc.get("arguments", {})))
|
||||
output_index = len(output_items)
|
||||
flush_event("response.output_item.added", {"type": "response.output_item.added", "output_index": output_index, "item": {"type": "function_call", "id": call_id, "call_id": call_id, "name": fc.get("name", ""), "arguments": ""}})
|
||||
flush_event("response.function_call_arguments.delta", {"type": "response.function_call_arguments.delta", "output_index": output_index, "item_id": call_id, "delta": args_str})
|
||||
flush_event("response.function_call_arguments.done", {"type": "response.function_call_arguments.done", "output_index": output_index, "item_id": call_id, "arguments": args_str})
|
||||
output_items.append({"tool": True, "fc": fc, "call_id": call_id})
|
||||
accumulated_text += cont_text
|
||||
print(f"[auto-continue] chunk {len(cont_text)} chars, finish={cont_finish}, total={len(accumulated_text)}", file=sys.stderr)
|
||||
if cont_finish != "MAX_TOKENS":
|
||||
break
|
||||
return accumulated_text
|
||||
|
||||
class Handler(http.server.BaseHTTPRequestHandler):
|
||||
protocol_version = "HTTP/1.1"
|
||||
|
||||
@@ -1877,10 +1950,22 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
flush_event("response.function_call_arguments.done", {"type": "response.function_call_arguments.done", "output_index": output_index, "item_id": call_id, "arguments": args_str})
|
||||
current_tool_calls[call_id] = fc
|
||||
output_items.append({"tool": True})
|
||||
if OAUTH_PROVIDER == "google-antigravity" and full_text and candidates[0].get("finishReason"):
|
||||
last_finish = candidates[0].get("finishReason", "")
|
||||
if OAUTH_PROVIDER == "google-antigravity" and full_text and last_finish:
|
||||
if last_finish == "MAX_TOKENS" and not current_tool_calls:
|
||||
print(f"[gemini-oauth] MAX_TOKENS hit ({len(full_text)} chars), auto-continuing...", file=sys.stderr)
|
||||
break
|
||||
stream_finished = True
|
||||
break
|
||||
|
||||
if OAUTH_PROVIDER.startswith("google") and full_text and not current_tool_calls and last_finish == "MAX_TOKENS" and not stream_finished:
|
||||
result = _auto_continue_gemini(self, flush_event, message_id, model, gen_config, gemini_tools, system_parts, project_id, headers, endpoints, url_suffix, full_text, output_items, message_started)
|
||||
if result:
|
||||
full_text = result
|
||||
for item in output_items:
|
||||
if isinstance(item, dict) and item.get("tool") and "fc" in item and "call_id" in item:
|
||||
current_tool_calls[item["call_id"]] = item["fc"]
|
||||
|
||||
out = []
|
||||
if not full_text and not current_tool_calls:
|
||||
print("[gemini-oauth] WARNING: completed with empty output", file=sys.stderr)
|
||||
|
||||
Reference in New Issue
Block a user