v10.13.8: FIX D force_finalize skip Gemini, FIX A status=failed, FIX B stream timeout, FIX C lock scope, threshold 8/40

This commit is contained in:
Roman | RyzenAdvanced
2026-05-27 17:52:17 +04:00
Unverified
parent 6861700c0d
commit 5055ff894d
4 changed files with 199 additions and 159 deletions

View File

@@ -28,13 +28,15 @@ model_catalog_json = ""
CHANGELOG = [ CHANGELOG = [
("10.13.8", "2026-05-27", [ ("10.13.8", "2026-05-27", [
"Fix: force_finalize skips Gemini call entirely (was hallucinating tool calls without tools)",
"Fix: _send_ag_finalize returns status=failed (was stored as valid history causing loops)",
"Fix: _forward_gemini_sse wrapped in try/except for TimeoutError/BrokenPipe",
"Fix: file tracker mutations inside lock scope (was racing in ThreadingHTTPServer)",
"Fix: compaction summary strips raw tool outputs (was re-triggering read loops)", "Fix: compaction summary strips raw tool outputs (was re-triggering read loops)",
"Fix: budget cap now strips tools from request (model literally cannot call tools)",
"Fix: detect get_goal/completion_budget null-tool loops (3+ consecutive → force finalize)",
"Fix: post-compaction write directive when 10+ reads with 0 writes", "Fix: post-compaction write directive when 10+ reads with 0 writes",
"Fix: strip timestamps from loop hash (<current_date> broke cross-session tracker)", "Fix: detect get_goal/completion_budget null-tool loops (3+ → force finalize)",
"Fix: strip base64 image data from tool outputs in normalizer", "Fix: read-loop threshold raised to 8 same-file / 40 total (was too aggressive at 5/30)",
"Fix: thread-safe file tracker, response logging for finalize/budget paths", "Fix: strip timestamps from loop hash, base64 image data from normalizer",
]), ]),
("3.12.1", "2026-05-27", [ ("3.12.1", "2026-05-27", [
"Fix Antigravity adapter (PR #15): simplified model resolution", "Fix Antigravity adapter (PR #15): simplified model resolution",

View File

@@ -84,13 +84,15 @@ model_catalog_json = ""
CHANGELOG = [ CHANGELOG = [
("10.13.8", "2026-05-27", [ ("10.13.8", "2026-05-27", [
"Fix: force_finalize skips Gemini call entirely (was hallucinating tool calls without tools)",
"Fix: _send_ag_finalize returns status=failed (was stored as valid history causing loops)",
"Fix: _forward_gemini_sse wrapped in try/except for TimeoutError/BrokenPipe",
"Fix: file tracker mutations inside lock scope (was racing in ThreadingHTTPServer)",
"Fix: compaction summary strips raw tool outputs (was re-triggering read loops)", "Fix: compaction summary strips raw tool outputs (was re-triggering read loops)",
"Fix: budget cap now strips tools from request (model literally cannot call tools)",
"Fix: detect get_goal/completion_budget null-tool loops (3+ consecutive → force finalize)",
"Fix: post-compaction write directive when 10+ reads with 0 writes", "Fix: post-compaction write directive when 10+ reads with 0 writes",
"Fix: strip timestamps from loop hash (<current_date> broke cross-session tracker)", "Fix: detect get_goal/completion_budget null-tool loops (3+ → force finalize)",
"Fix: strip base64 image data from tool outputs in normalizer", "Fix: read-loop threshold raised to 8 same-file / 40 total (was too aggressive at 5/30)",
"Fix: thread-safe file tracker, response logging for finalize/budget paths", "Fix: strip timestamps from loop hash, base64 image data from normalizer",
]), ]),
("3.12.1", "2026-05-27", [ ("3.12.1", "2026-05-27", [
"Fix Antigravity adapter (PR #15): simplify model resolution", "Fix Antigravity adapter (PR #15): simplify model resolution",

View File

@@ -198,9 +198,10 @@ if [ "$RUN_TASK" = "1" ]; then
CLI_VERSION=$(codex --version 2>/dev/null || echo "unknown") CLI_VERSION=$(codex --version 2>/dev/null || echo "unknown")
log_info "Codex CLI: $CLI_VERSION" log_info "Codex CLI: $CLI_VERSION"
TASK_PROMPT='Redesign the <div class="vdb-universe" id="vectordb"> section in site/index.html. Create a bold, innovative Steve Jobs-style design: boxy approach with contrasting boxes (one side white, one black), custom art seamless background that blends the two halves, think out of the box. Use pure CSS + HTML only, no external images. Make it visually stunning with geometric precision. The section is inside the existing page so keep the outer wrapper class vdb-universe with id=vectordb. Do NOT touch anything outside that section.' TASK_PROMPT='Create a file /tmp/e2e-test-output.txt with the text "Hello from Codex CLI E2E test" followed by the current date. Then read it back and confirm the content is correct. This is a simple smoke test.'
TASK_WORKSPACE="/home/roman/Codex-Launcher-Any-AI-Provider" TASK_WORKSPACE="/tmp/e2e-test-workspace"
mkdir -p "$TASK_WORKSPACE"
mkdir -p /tmp/antigravity-task-logs mkdir -p /tmp/antigravity-task-logs
TASK_PROXY_LOG="/tmp/antigravity-task-logs/proxy-$(date +%s).log" TASK_PROXY_LOG="/tmp/antigravity-task-logs/proxy-$(date +%s).log"
@@ -218,26 +219,16 @@ if [ "$RUN_TASK" = "1" ]; then
# Generate model catalog # Generate model catalog
CATALOG_PATH="$HOME/.cache/codex-proxy/models-Antigravity-Test.json" CATALOG_PATH="$HOME/.cache/codex-proxy/models-Antigravity-Test.json"
python3 -c " python3 -c "
import json import json, os
models = ['gemini-3.5-flash-high', 'gemini-3.5-flash-medium', 'gemini-3.5-flash-low', models = ['gemini-3.5-flash-high', 'gemini-3.5-flash-medium', 'gemini-3.5-flash-low',
'gemini-3.1-pro-high', 'gemini-3.1-pro-low', 'gemini-3.1-pro-high', 'gemini-3.1-pro-low',
'claude-sonnet-4-6', 'claude-opus-4-6-thinking', 'gpt-oss-120b-medium'] 'claude-sonnet-4-6', 'claude-opus-4-6-thinking', 'gpt-oss-120b-medium']
catalog = [] catalog = []
for m in models: for m in models:
catalog.append({ catalog.append({'slug':m,'model':m,'display_name':m,'description':'Antigravity '+m,'hidden':False,'isDefault':m=='gemini-3.5-flash-high','shell_type':'shell_command','visibility':'list','default_reasoning_level':'medium','supported_reasoning_levels':[{'effort':'low','description':'Fast'},{'effort':'medium','description':'Balanced'},{'effort':'high','description':'Deep'}]})
'slug': m, 'model': m, 'display_name': m, os.makedirs(os.path.dirname('$CATALOG_PATH'), exist_ok=True)
'description': f'Antigravity {m}', 'hidden': False, json.dump(catalog, open('$CATALOG_PATH','w'), indent=2)
'isDefault': m == 'gemini-3.5-flash-high', " || log_fail "Failed to create model catalog"
'shell_type': 'shell_command', 'visibility': 'list',
'default_reasoning_level': 'medium',
'supported_reasoning_levels': [
{'effort': 'low', 'description': 'Fast'},
{'effort': 'medium', 'description': 'Balanced'},
{'effort': 'high', 'description': 'Deep'},
],
})
json.dump(catalog, open('$CATALOG_PATH', 'w'), indent=2)
"
# Write main config # Write main config
cat > "$CONFIG_FILE" <<CONFEOF cat > "$CONFIG_FILE" <<CONFEOF
@@ -351,16 +342,15 @@ PROFEOF
# ── Launch Codex CLI with the task ── # ── Launch Codex CLI with the task ──
log_info "Launching Codex CLI with real task..." log_info "Launching Codex CLI with real task..."
log_info "Task: Redesign vectordb section (boxy black/white approach)" log_info "Task: Create and verify a simple test file"
log_info "Monitor log: $TASK_MONITOR_LOG" log_info "Monitor log: $TASK_MONITOR_LOG"
cd "$TASK_WORKSPACE" cd "$TASK_WORKSPACE"
# Run codex non-interactively with --quiet flag
set +e set +e
codex --profile Antigravity-Test -c "model=gemini-3.5-flash-high" \ codex exec --profile Antigravity-Test -c "model=gemini-3.5-flash-high" \
-s danger-full-access -a never \ -c 'sandbox_permissions=["disk-full-read-access","disk-full-write-access"]' \
-q "$TASK_PROMPT" \ "$TASK_PROMPT" \
> "$TASK_CLI_LOG" 2>&1 > "$TASK_CLI_LOG" 2>&1
CLI_EXIT=$? CLI_EXIT=$?
set -e set -e
@@ -429,21 +419,41 @@ PROFEOF
fi fi
fi fi
# Check if the file was actually modified # Check if the file was actually created
echo ""; echo "─── Test 4d: Task Output Quality ───" echo ""; echo "─── Test 4d: Task Output Quality ───"
if [ -f "$TASK_WORKSPACE/site/index.html" ]; then if [ -f "/tmp/e2e-test-output.txt" ]; then
VDB_LINES=$(grep -c "vectordb\|vdb-" "$TASK_WORKSPACE/site/index.html" || echo 0) CONTENT=$(cat /tmp/e2e-test-output.txt 2>/dev/null)
log_info "vectordb section has $VDB_LINES vdb-related lines" if echo "$CONTENT" | grep -q "Hello from Codex CLI E2E test"; then
log_pass "Task output file created with correct content"
# Check for common issues in the output
MALFORMED=$(grep -c "&lt;\|&gt;\|&amp;" "$TASK_WORKSPACE/site/index.html" || echo 0)
[ "$MALFORMED" -gt 100 ] && log_fail "Possible HTML encoding issue: $MALFORMED escaped entities"
# Check section is still intact
if grep -q 'id="vectordb"' "$TASK_WORKSPACE/site/index.html"; then
log_pass "vectordb section preserved"
else else
log_fail "vectordb section missing or corrupted" log_fail "Task output file exists but content is wrong: $CONTENT"
fi
else
log_fail "Task output file /tmp/e2e-test-output.txt was NOT created"
fi
# Check proxy log for tool-strip events (budget cap defense)
echo ""; echo "─── Test 4e: Anti-Loop Defense Verification ───"
if [ -f "/tmp/antigravity-test-proxy.log" ]; then
NULL_TOOL_LOOPS=$(grep -c "NULL-TOOL LOOP" /tmp/antigravity-test-proxy.log || echo 0)
TOOL_STRIPPED=$(grep -c "TOOLS STRIPPED" /tmp/antigravity-test-proxy.log || echo 0)
BUDGET_HIT=$(grep -c "HARD CAP" /tmp/antigravity-test-proxy.log || echo 0)
READ_LOOP=$(grep -c "FILE READ LOOP" /tmp/antigravity-test-proxy.log || echo 0)
FORCE_FINALIZE=$(grep -c "force_finalize" /tmp/antigravity-test-proxy.log || echo 0)
log_info "Anti-loop events: null-tool=$NULL_TOOL_LOOPS stripped=$TOOL_STRIPPED budget=$BUDGET_HIT read-loop=$READ_LOOP finalize=$FORCE_FINALIZE"
# For a simple task, none of these should fire
if [ "$BUDGET_HIT" -gt 0 ]; then
log_fail "Budget cap hit on simple task — model looping"
else
log_pass "No budget cap triggered (task completed cleanly)"
fi
if [ "$TOOL_STRIPPED" -gt 0 ]; then
log_fail "Tools were stripped — model hit hard limit"
else
log_pass "No tool stripping needed (model behaved)"
fi fi
fi fi

View File

@@ -5910,7 +5910,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
ft["total_reads"] += 1 ft["total_reads"] += 1
ft["path_counts"][detected_path] = ft["path_counts"].get(detected_path, 0) + 1 ft["path_counts"][detected_path] = ft["path_counts"].get(detected_path, 0) + 1
ft["last_path"] = detected_path ft["last_path"] = detected_path
if ft["path_counts"][detected_path] >= 5 or ft["total_reads"] > 30: if ft["path_counts"][detected_path] >= 8 or ft["total_reads"] > 40:
ag_state["force_finalize"] = True ag_state["force_finalize"] = True
print(f"[antigravity-loop] FILE READ LOOP: {detected_path} read " print(f"[antigravity-loop] FILE READ LOOP: {detected_path} read "
f"{ft['path_counts'][detected_path]}x, total={ft['total_reads']}", f"{ft['path_counts'][detected_path]}x, total={ft['total_reads']}",
@@ -5947,7 +5947,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
ag_state["last_tool_count"] = 1 ag_state["last_tool_count"] = 1
if ag_state.get("force_finalize"): if ag_state.get("force_finalize"):
contents.append({"role": "user", "parts": [{"text": "STOP CALLING TOOLS. APPLY THE FINAL EDIT OR SUMMARIZE WHAT BLOCKED YOU. DO NOT CALL ANY MORE TOOLS."}]}) return self._send_ag_finalize(
"Loop detected. The proxy is forcing a stop because the model repeatedly "
"called tools without making progress. Try a more specific or smaller request.",
stream=body.get("stream", False))
if not _antigravity_is_simple_user(latest_user): if not _antigravity_is_simple_user(latest_user):
contents.insert(0, {"role": "user", "parts": [{"text": _GEMINI_AGENT_GUARDRAIL}]}) contents.insert(0, {"role": "user", "parts": [{"text": _GEMINI_AGENT_GUARDRAIL}]})
@@ -6739,7 +6742,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
ft["total_reads"] += 1 ft["total_reads"] += 1
ft["path_counts"][dp] = ft["path_counts"].get(dp, 0) + 1 ft["path_counts"][dp] = ft["path_counts"].get(dp, 0) + 1
ft["last_path"] = dp ft["last_path"] = dp
if ft["path_counts"][dp] >= 5 or ft["total_reads"] > 30: if ft["path_counts"][dp] >= 8 or ft["total_reads"] > 40:
ag_state["force_finalize"] = True ag_state["force_finalize"] = True
print(f"[antigravity-loop] FILE READ LOOP: {dp} read " print(f"[antigravity-loop] FILE READ LOOP: {dp} read "
f"{ft['path_counts'][dp]}x, total={ft['total_reads']}", file=sys.stderr) f"{ft['path_counts'][dp]}x, total={ft['total_reads']}", file=sys.stderr)
@@ -6785,8 +6788,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
break break
if ag_state["force_finalize"]: if ag_state["force_finalize"]:
contents.append({"role": "user", "parts": [{"text": "STOP CALLING TOOLS. APPLY THE FINAL EDIT OR SUMMARIZE WHAT BLOCKED YOU. DO NOT CALL ANY MORE TOOLS. DO NOT PRODUCE ANY MORE PLANNING TEXT. DO NOT PRODUCE ANY MORE EXPLORATORY TOOL CALLS. PRODUCE A FINAL ANSWER OR A CLEAR STATEMENT OF WHAT IS PREVENTING YOU FROM COMPLETING THE TASK."}]}) return self._send_ag_finalize(
elif latest_lower and any(w in latest_lower for w in _EDIT_WORDS) and not ag_state["nudge_injected"] and not ag_state["force_finalize"]: "Loop detected. The proxy is forcing a stop because the model repeatedly "
"called tools without making progress. Try a more specific or smaller request.",
stream=body.get("stream", False) if isinstance(body, dict) else False)
elif latest_lower and any(w in latest_lower for w in _EDIT_WORDS) and not ag_state["nudge_injected"]:
contents.append({"role": "user", "parts": [{"text": "!!! ABSOLUTELY NO PLANNING - EMIT THE TOOL CALL NOW !!! IMPORTANT: The user is requesting a modification to existing files. You MUST use tools (exec_command, read_files, write, etc.) to make the changes RIGHT NOW. Do NOT just describe what to do — actually CALL THE TOOLS IN THIS RESPONSE. IMMEDIATELY INSPECT THE FILE OR LIST FILES USING exec_command TOOL CALL."}]}) contents.append({"role": "user", "parts": [{"text": "!!! ABSOLUTELY NO PLANNING - EMIT THE TOOL CALL NOW !!! IMPORTANT: The user is requesting a modification to existing files. You MUST use tools (exec_command, read_files, write, etc.) to make the changes RIGHT NOW. Do NOT just describe what to do — actually CALL THE TOOLS IN THIS RESPONSE. IMMEDIATELY INSPECT THE FILE OR LIST FILES USING exec_command TOOL CALL."}]})
ag_state["nudge_injected"] = True ag_state["nudge_injected"] = True
print(f"[antigravity] edit-intent detected; injected tool-use nudge (first time for this request)", file=sys.stderr) print(f"[antigravity] edit-intent detected; injected tool-use nudge (first time for this request)", file=sys.stderr)
@@ -7014,6 +7020,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
buf = "" buf = ""
stream_finished = False stream_finished = False
last_finish = ""
try:
for raw_line in _stream_with_idle_timeout(upstream, _idle_timeout_for_model(model)): for raw_line in _stream_with_idle_timeout(upstream, _idle_timeout_for_model(model)):
if tracker and tracker.cancelled.is_set(): if tracker and tracker.cancelled.is_set():
print("[gemini-oauth] stream cancelled", file=sys.stderr) print("[gemini-oauth] stream cancelled", file=sys.stderr)
@@ -7090,6 +7098,22 @@ class Handler(http.server.BaseHTTPRequestHandler):
break break
stream_finished = True stream_finished = True
break break
else:
if line.strip():
buf += line
except TimeoutError as te:
print(f"[{self._session_id}] [antigravity-v2] STREAM TIMEOUT: {te}", file=sys.stderr)
_log_resp(resp_id, "stream_timeout", [{"type": "error", "code": "stream_timeout", "message": str(te)}])
try:
flush_event("response.failed", {"type": "response.failed", "response": {"id": resp_id, "object": "response", "status": "failed", "error": {"type": "stream_timeout", "message": str(te)[:200]}}})
except Exception:
pass
self.close_connection = True
return
except (BrokenPipeError, ConnectionResetError, ConnectionAbortedError):
print(f"[{self._session_id}] [antigravity-v2] client disconnected during stream", file=sys.stderr)
_log_resp(resp_id, "client_disconnect", [])
return
if OAUTH_PROVIDER.startswith("google") and full_text and not current_tool_calls and last_finish == "MAX_TOKENS" and not stream_finished: if OAUTH_PROVIDER.startswith("google") and full_text and not current_tool_calls and last_finish == "MAX_TOKENS" and not stream_finished:
result = _auto_continue_gemini(self, flush_event, message_id, model, gen_config, gemini_tools, system_parts, project_id, headers, endpoints, url_suffix, full_text, output_items, message_started) result = _auto_continue_gemini(self, flush_event, message_id, model, gen_config, gemini_tools, system_parts, project_id, headers, endpoints, url_suffix, full_text, output_items, message_started)
@@ -8430,11 +8454,12 @@ class Handler(http.server.BaseHTTPRequestHandler):
def _send_ag_finalize(self, text, stream=False, is_responses_api=True): def _send_ag_finalize(self, text, stream=False, is_responses_api=True):
sid = getattr(self, '_session_id', 'fin') sid = getattr(self, '_session_id', 'fin')
print(f"[{sid}] [antigravity-finalize] Sending finalize response: {text[:80]}...", file=sys.stderr) print(f"[{sid}] [antigravity-finalize] Sending finalize-as-failed: {text[:80]}...", file=sys.stderr)
_log_resp(f"finalize-{sid}", "finalized", [{"type": "message", "content": [{"text": text}]}]) _log_resp(f"finalize-{sid}", "failed", [{"type": "error", "code": "rate_limit_error", "message": text}])
resp_id = f"resp_{uuid.uuid4().hex[:12]}" resp_id = f"resp_{uuid.uuid4().hex[:12]}"
msg_id = f"msg_{uuid.uuid4().hex[:12]}" msg_id = f"msg_{uuid.uuid4().hex[:12]}"
output_obj = [{"type": "message", "id": msg_id, "role": "assistant", error_output = [{"type": "error", "code": "rate_limit_error", "message": text}]
text_output = [{"type": "message", "id": msg_id, "role": "assistant",
"content": [{"type": "output_text", "text": text}]}] "content": [{"type": "output_text", "text": text}]}]
if stream: if stream:
events = [ events = [
@@ -8445,7 +8470,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
f"event: response.output_text.done\ndata: {json.dumps({'type':'response.output_text.done','output_index':0,'content_index':0,'text':text})}\n\n", f"event: response.output_text.done\ndata: {json.dumps({'type':'response.output_text.done','output_index':0,'content_index':0,'text':text})}\n\n",
f"event: response.content_part.done\ndata: {json.dumps({'type':'response.content_part.done','output_index':0,'content_index':0,'part':{'type':'output_text','text':text}})}\n\n", f"event: response.content_part.done\ndata: {json.dumps({'type':'response.content_part.done','output_index':0,'content_index':0,'part':{'type':'output_text','text':text}})}\n\n",
f"event: response.output_item.done\ndata: {json.dumps({'type':'response.output_item.done','output_index':0,'item':{'type':'message','id':msg_id,'role':'assistant','content':[{'type':'output_text','text':text}]}})}\n\n", f"event: response.output_item.done\ndata: {json.dumps({'type':'response.output_item.done','output_index':0,'item':{'type':'message','id':msg_id,'role':'assistant','content':[{'type':'output_text','text':text}]}})}\n\n",
f"event: response.completed\ndata: {json.dumps({'type':'response.completed','response':{'id':resp_id,'object':'response','status':'completed','output':output_obj}})}\n\n", f"event: response.failed\ndata: {json.dumps({'type':'response.failed','response':{'id':resp_id,'object':'response','status':'failed','output':error_output}})}\n\n",
] ]
self.send_response(200) self.send_response(200)
self.send_header("Content-Type", "text/event-stream") self.send_header("Content-Type", "text/event-stream")
@@ -8456,8 +8481,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
self.wfile.write(evt.encode()) self.wfile.write(evt.encode())
self.wfile.flush() self.wfile.flush()
else: else:
self.send_json(200, {"id": resp_id, "object": "response", "status": "completed", self.send_json(200, {"id": resp_id, "object": "response", "status": "failed",
"output": output_obj, "model": "gemini-3-flash"}) "output": error_output + text_output, "model": "gemini-3-flash",
"error": {"type": "rate_limit_error", "message": text}})
return None return None
def stream_buffered_events(self, event_iter, flush_interval=0.03, max_bytes=4096, on_event=None): def stream_buffered_events(self, event_iter, flush_interval=0.03, max_bytes=4096, on_event=None):