v3.3.0: fix auto-continue class breakage, add MAX_TOKENS auto-continue for Gemini/Antigravity, bump version label
This commit is contained in:
@@ -18,6 +18,7 @@
|
|||||||
- Added `response.function_call_arguments.delta` and `response.function_call_arguments.done` events so Codex can execute Antigravity tool calls and create files.
|
- Added `response.function_call_arguments.delta` and `response.function_call_arguments.done` events so Codex can execute Antigravity tool calls and create files.
|
||||||
- Fixed functionResponse name matching — uses the original functionCall name instead of falling back to call_id.
|
- Fixed functionResponse name matching — uses the original functionCall name instead of falling back to call_id.
|
||||||
- Strengthened Antigravity prompt policy: use tools immediately for file changes, avoid planning-only responses, and answer directly when no suitable tool exists.
|
- Strengthened Antigravity prompt policy: use tools immediately for file changes, avoid planning-only responses, and answer directly when no suitable tool exists.
|
||||||
|
- **Auto-continue on MAX_TOKENS** — when Gemini/Antigravity truncates a text response, the proxy transparently sends a continuation request and concatenates the output so Codex receives the complete response without manual intervention.
|
||||||
|
|
||||||
### Reliability + Routing
|
### Reliability + Routing
|
||||||
- Added BGP++ route scoring, route cooldowns, token buckets, and persisted route stats.
|
- Added BGP++ route scoring, route cooldowns, token buckets, and persisted route stats.
|
||||||
|
|||||||
Binary file not shown.
@@ -31,23 +31,9 @@ CHANGELOG = [
|
|||||||
"Added Gemini CLI OAuth backend using public Gemini CLI OAuth client",
|
"Added Gemini CLI OAuth backend using public Gemini CLI OAuth client",
|
||||||
"Antigravity now creates files via tool calls — full Codex agent loop with Gemini-style history hardening",
|
"Antigravity now creates files via tool calls — full Codex agent loop with Gemini-style history hardening",
|
||||||
"Fixed tool-call streaming: function_call_arguments delta/done events, thought signatures, functionResponse name matching",
|
"Fixed tool-call streaming: function_call_arguments delta/done events, thought signatures, functionResponse name matching",
|
||||||
|
"Auto-continue on MAX_TOKENS — proxy transparently requests continuation for truncated Gemini/Antigravity responses",
|
||||||
"Added Endpoint Doctor, adaptive BGP scoring, provider policies, adaptive compaction, log redaction",
|
"Added Endpoint Doctor, adaptive BGP scoring, provider policies, adaptive compaction, log redaction",
|
||||||
]),
|
]),
|
||||||
("3.1.0", "2026-05-20", [
|
|
||||||
"Initial Antigravity/Gemini CLI OAuth split, history hardening, SSE fixes",
|
|
||||||
]),
|
|
||||||
("3.0.0", "2026-05-20", [
|
|
||||||
"ThreadingHTTPServer with dynamic proxy ports and health-gated Codex launch",
|
|
||||||
"Atomic config writes, safe cleanup registry, graceful shutdown, and buffered SSE streaming",
|
|
||||||
"Usage Dashboard v2, TCP_NODELAY streaming, Anthropic prompt caching, and batched usage stats",
|
|
||||||
]),
|
|
||||||
("2.6.1", "2026-05-20", [
|
|
||||||
"Google OAuth rebuilt to emulate Gemini CLI — no client_secret.json needed",
|
|
||||||
"Uses Google's public OAuth client_id (same as gemini-cli)",
|
|
||||||
"PKCE + CSRF state protection for secure auth",
|
|
||||||
"Just click OAuth Login → browser opens → authorize → done",
|
|
||||||
"Includes cloud-platform scope for Gemini Code Assist compatibility",
|
|
||||||
]),
|
|
||||||
("2.6.0", "2026-05-20", [
|
("2.6.0", "2026-05-20", [
|
||||||
"Usage Dashboard — per-provider request/token/latency tracking",
|
"Usage Dashboard — per-provider request/token/latency tracking",
|
||||||
"Visual cards with success rate bars, model breakdown, error tracking",
|
"Visual cards with success rate bars, model breakdown, error tracking",
|
||||||
|
|||||||
@@ -1402,6 +1402,79 @@ def _upstream_timeout(body, stream):
|
|||||||
return min((180 if has_tools else 120) + n_items * 2, 300)
|
return min((180 if has_tools else 120) + n_items * 2, 300)
|
||||||
return min(60 + n_items * 2, 120)
|
return min(60 + n_items * 2, 120)
|
||||||
|
|
||||||
|
def _auto_continue_gemini(handler, flush_event, message_id, model, gen_config, gemini_tools, system_parts, project_id, headers, endpoints, url_suffix, accumulated_text, output_items, message_started):
|
||||||
|
max_continuations = 5
|
||||||
|
for _cont in range(max_continuations):
|
||||||
|
cont_contents = [
|
||||||
|
{"role": "model", "parts": [{"text": accumulated_text[-12000:]}]},
|
||||||
|
{"role": "user", "parts": [{"text": "Continue exactly where you left off. Do not repeat anything already written."}]},
|
||||||
|
]
|
||||||
|
cont_request = {"contents": cont_contents, "generationConfig": dict(gen_config)}
|
||||||
|
if system_parts:
|
||||||
|
cont_request["systemInstruction"] = {"parts": system_parts}
|
||||||
|
if gemini_tools:
|
||||||
|
cont_request["tools"] = gemini_tools
|
||||||
|
cont_wrapped = {"project": project_id, "model": model, "request": cont_request}
|
||||||
|
if OAUTH_PROVIDER == "google-antigravity":
|
||||||
|
cont_wrapped["requestType"] = "agent"
|
||||||
|
cont_wrapped["userAgent"] = "antigravity"
|
||||||
|
cont_wrapped["requestId"] = f"agent-{uuid.uuid4().hex[:12]}"
|
||||||
|
cont_body = json.dumps(cont_wrapped).encode()
|
||||||
|
upstream = None
|
||||||
|
for ep in endpoints:
|
||||||
|
target = f"{ep}/{url_suffix}"
|
||||||
|
req = urllib.request.Request(target, data=cont_body, headers=headers)
|
||||||
|
try:
|
||||||
|
upstream = urllib.request.urlopen(req, timeout=180)
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[auto-continue] {ep} failed: {e}", file=sys.stderr)
|
||||||
|
continue
|
||||||
|
if not upstream:
|
||||||
|
break
|
||||||
|
cont_text = ""
|
||||||
|
cont_finish = ""
|
||||||
|
cont_buf = ""
|
||||||
|
for raw_line in upstream:
|
||||||
|
line = raw_line.decode(errors="replace")
|
||||||
|
if line.startswith("data: "):
|
||||||
|
cont_buf += line[6:]
|
||||||
|
continue
|
||||||
|
if not line.strip() and cont_buf:
|
||||||
|
try:
|
||||||
|
chunk = json.loads(cont_buf)
|
||||||
|
except Exception:
|
||||||
|
cont_buf = ""
|
||||||
|
continue
|
||||||
|
cont_buf = ""
|
||||||
|
candidates = chunk.get("response", chunk).get("candidates", [])
|
||||||
|
if not candidates:
|
||||||
|
continue
|
||||||
|
cont_finish = candidates[0].get("finishReason", "")
|
||||||
|
parts = candidates[0].get("content", {}).get("parts", [])
|
||||||
|
for part in parts:
|
||||||
|
if part.get("thought"):
|
||||||
|
continue
|
||||||
|
if "text" in part and not part.get("functionCall"):
|
||||||
|
delta = part["text"]
|
||||||
|
if delta:
|
||||||
|
cont_text += delta
|
||||||
|
flush_event("response.output_text.delta", {"type": "response.output_text.delta", "output_index": 0, "content_index": 0, "delta": delta})
|
||||||
|
elif part.get("functionCall"):
|
||||||
|
fc = part["functionCall"]
|
||||||
|
call_id = f"call_{uuid.uuid4().hex[:24]}"
|
||||||
|
args_str = json.dumps(fc.get("args", fc.get("arguments", {})))
|
||||||
|
output_index = len(output_items)
|
||||||
|
flush_event("response.output_item.added", {"type": "response.output_item.added", "output_index": output_index, "item": {"type": "function_call", "id": call_id, "call_id": call_id, "name": fc.get("name", ""), "arguments": ""}})
|
||||||
|
flush_event("response.function_call_arguments.delta", {"type": "response.function_call_arguments.delta", "output_index": output_index, "item_id": call_id, "delta": args_str})
|
||||||
|
flush_event("response.function_call_arguments.done", {"type": "response.function_call_arguments.done", "output_index": output_index, "item_id": call_id, "arguments": args_str})
|
||||||
|
output_items.append({"tool": True, "fc": fc, "call_id": call_id})
|
||||||
|
accumulated_text += cont_text
|
||||||
|
print(f"[auto-continue] chunk {len(cont_text)} chars, finish={cont_finish}, total={len(accumulated_text)}", file=sys.stderr)
|
||||||
|
if cont_finish != "MAX_TOKENS":
|
||||||
|
break
|
||||||
|
return accumulated_text
|
||||||
|
|
||||||
class Handler(http.server.BaseHTTPRequestHandler):
|
class Handler(http.server.BaseHTTPRequestHandler):
|
||||||
protocol_version = "HTTP/1.1"
|
protocol_version = "HTTP/1.1"
|
||||||
|
|
||||||
@@ -1877,10 +1950,22 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
flush_event("response.function_call_arguments.done", {"type": "response.function_call_arguments.done", "output_index": output_index, "item_id": call_id, "arguments": args_str})
|
flush_event("response.function_call_arguments.done", {"type": "response.function_call_arguments.done", "output_index": output_index, "item_id": call_id, "arguments": args_str})
|
||||||
current_tool_calls[call_id] = fc
|
current_tool_calls[call_id] = fc
|
||||||
output_items.append({"tool": True})
|
output_items.append({"tool": True})
|
||||||
if OAUTH_PROVIDER == "google-antigravity" and full_text and candidates[0].get("finishReason"):
|
last_finish = candidates[0].get("finishReason", "")
|
||||||
|
if OAUTH_PROVIDER == "google-antigravity" and full_text and last_finish:
|
||||||
|
if last_finish == "MAX_TOKENS" and not current_tool_calls:
|
||||||
|
print(f"[gemini-oauth] MAX_TOKENS hit ({len(full_text)} chars), auto-continuing...", file=sys.stderr)
|
||||||
|
break
|
||||||
stream_finished = True
|
stream_finished = True
|
||||||
break
|
break
|
||||||
|
|
||||||
|
if OAUTH_PROVIDER.startswith("google") and full_text and not current_tool_calls and last_finish == "MAX_TOKENS" and not stream_finished:
|
||||||
|
result = _auto_continue_gemini(self, flush_event, message_id, model, gen_config, gemini_tools, system_parts, project_id, headers, endpoints, url_suffix, full_text, output_items, message_started)
|
||||||
|
if result:
|
||||||
|
full_text = result
|
||||||
|
for item in output_items:
|
||||||
|
if isinstance(item, dict) and item.get("tool") and "fc" in item and "call_id" in item:
|
||||||
|
current_tool_calls[item["call_id"]] = item["fc"]
|
||||||
|
|
||||||
out = []
|
out = []
|
||||||
if not full_text and not current_tool_calls:
|
if not full_text and not current_tool_calls:
|
||||||
print("[gemini-oauth] WARNING: completed with empty output", file=sys.stderr)
|
print("[gemini-oauth] WARNING: completed with empty output", file=sys.stderr)
|
||||||
|
|||||||
Reference in New Issue
Block a user