diff --git a/CHANGELOG.md b/CHANGELOG.md index f8a39b1..fedd780 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Changelog +## v3.13.5 (2026-05-27) + +**Anti-Loop & Flash Model Resilience, Auto Token Refresh** + +### New Features +- **Cross-session loop tracker**: Keys by user request hash — detects loops even when client creates new sessions per retry. Resets counter on new tasks. +- **Tool-call budget**: 150 calls max per task, warning at 80. Injects directive to stop reading and write, instead of killing the session. +- **File-path read-loop detection**: Same file read 5+ times or 30+ total file reads triggers force-finalize +- **Smart compaction summary**: Directive text when read-loop detected in compacted history +- **Model-aware idle timeout**: Flash/mini/haiku models get 120s timeout instead of 300s +- **Auto 401 token refresh**: On 401 transient, force-refreshes Google OAuth token and retries once +- **`_send_ag_finalize()` helper**: Returns synthetic response for hard terminations +- **Default provider policy**: Unrecognized providers get balanced compaction (128K context, 60 items) +- **Anti-stall self-kill fix**: No longer kills own parent process or process group +- **E2E test suite with real CLI task**: `test-antigravity.sh --task` + +### Bug Fixes +- Fix `_schema` NameError in smart-continue nudge (cobra91 PR #17) +- Fix `_anti_stall_cleanup()` killing own parent/shell wrapper +- Fix task_retry_count counting every turn instead of same-task retries +- Fix tool-call budget cap killing session instead of injecting directive +- Merged cobra91 PR #17: MSIX Desktop launch, button state + ## v3.13.0 (2026-05-27) **Codex Desktop Updater, Antigravity E2E, Profile System Fix** diff --git a/codex-launcher-gui b/codex-launcher-gui index d0a5bfa..3f6b115 100755 --- a/codex-launcher-gui +++ b/codex-launcher-gui @@ -27,7 +27,7 @@ model_catalog_json = "" """ CHANGELOG = [ - ("3.13.0", "2026-05-27", [ + ("3.13.5", "2026-05-27", [ "Codex Desktop Updater: auto-update from ilysenko/codex-desktop-linux", "Fix Antigravity: prod endpoint first, model resolution, OAUTH_PROVIDER derivation", "Fix Codex CLI 0.134.0 profile system: separate .config.toml files", diff --git a/codex-launcher_3.13.5_all.deb b/codex-launcher_3.13.5_all.deb new file mode 100644 index 0000000..092db06 Binary files /dev/null and b/codex-launcher_3.13.5_all.deb differ diff --git a/codex_launcher_lib.py b/codex_launcher_lib.py index b494e81..9372992 100644 --- a/codex_launcher_lib.py +++ b/codex_launcher_lib.py @@ -83,7 +83,7 @@ model_catalog_json = "" """ CHANGELOG = [ - ("3.13.0", "2026-05-27", [ + ("3.13.5", "2026-05-27", [ "Codex Desktop Updater: auto-update from ilysenko/codex-desktop-linux", "Fix Antigravity: prod endpoint first, model resolution, OAUTH_PROVIDER derivation", "Fix Codex CLI 0.134.0 profile system: separate .config.toml files", diff --git a/test-antigravity.sh b/test-antigravity.sh index f934fa6..1d69148 100644 --- a/test-antigravity.sh +++ b/test-antigravity.sh @@ -1,22 +1,36 @@ #!/usr/bin/env bash # ═══════════════════════════════════════════════════════════════════ -# test-antigravity.sh — End-to-end Antigravity proxy test +# test-antigravity.sh — End-to-end Antigravity proxy test + real task # -# Tests: token validity → direct REST probe → proxy adapter +# Phases: +# 1. Token validity +# 2. Direct REST endpoint probe +# 3. Proxy adapter (start proxy, test /responses) +# 4. Real Codex CLI task (if --task flag given) +# 5. Anomaly detection + analysis # -# Usage: bash ~/.local/bin/test-antigravity.sh [--verbose] +# Usage: +# bash ~/.local/bin/test-antigravity.sh # quick tests +# bash ~/.local/bin/test-antigravity.sh --task # + real CLI task +# bash ~/.local/bin/test-antigravity.sh --verbose # show all logs # Exit: 0 = all pass, 1 = some fail # ═══════════════════════════════════════════════════════════════════ -set -euo pipefail +set -uo pipefail -VERBOSE=0 -for arg in "$@"; do case "$arg" in --verbose|-v) VERBOSE=1 ;; esac; done +VERBOSE=0; RUN_TASK=0 +for arg in "$@"; do + case "$arg" in + --verbose|-v) VERBOSE=1 ;; + --task|-t) RUN_TASK=1 ;; + esac +done -RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m' +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; CYAN='\033[0;36m'; NC='\033[0m' PASS=0; FAIL=0; SKIP=0; RESULTS=() log_pass() { echo -e " ${GREEN}PASS${NC} $1"; ((PASS++)); RESULTS+=("PASS $1"); } log_fail() { echo -e " ${RED}FAIL${NC} $1"; ((FAIL++)); RESULTS+=("FAIL $1"); } log_skip() { echo -e " ${YELLOW}SKIP${NC} $1"; ((SKIP++)); RESULTS+=("SKIP $1"); } +log_info() { echo -e " ${CYAN}INFO${NC} $1"; } TOKEN_PATH="$HOME/.cache/codex-proxy/google-antigravity-oauth-token.json" [ ! -f "$TOKEN_PATH" ] && { echo "ERROR: No token file. Login via GUI first."; exit 1; } @@ -97,16 +111,19 @@ done # ── Test 3: Proxy adapter (start proxy, test /responses) ────────── echo ""; echo "─── Test 3: Proxy Adapter (end-to-end) ───" +set +e TEST_PORT=$(python3 -c "import socket; s=socket.socket(); s.bind(('',0)); print(s.getsockname()[1]); s.close()") PROXY_API_KEY="test-$RANDOM" -find /home/roman/.local/bin -name "__pycache__" -type d -exec rm -rf {} + 2>/dev/null || true +find /home/roman/.local/bin -name "__pycache__" -type d -exec rm -rf {} + 2>/dev/null; true PROXY_PID="" -PROXY_PORT=$TEST_PORT PROXY_API_KEY=$PROXY_API_KEY PROXY_BACKEND=gemini-oauth-antigravity \ - PROXY_TARGET_URL=https://cloudcode-pa.googleapis.com \ - python3 /home/roman/.local/bin/translate-proxy.py >/tmp/antigravity-test-proxy.log 2>&1 & +export PROXY_PORT=$TEST_PORT +export PROXY_API_KEY=$PROXY_API_KEY +export PROXY_BACKEND=gemini-oauth-antigravity +export PROXY_TARGET_URL=https://cloudcode-pa.googleapis.com +python3 /home/roman/.local/bin/translate-proxy.py >/tmp/antigravity-test-proxy.log 2>&1 & PROXY_PID=$! cleanup() { kill $PROXY_PID 2>/dev/null || true; wait $PROXY_PID 2>/dev/null || true; } @@ -171,6 +188,273 @@ print(d.get('error',{}).get('message','')[:120])" 2>/dev/null || echo "unknown") [ "$VERBOSE" = "1" ] && cat /tmp/antigravity-test-proxy.log fi +# ── Test 4: Real Codex CLI Task ──────────────────────────────────── +if [ "$RUN_TASK" = "1" ]; then + echo ""; echo "─── Test 4: Real Codex CLI Task ───" + + if ! command -v codex &>/dev/null; then + log_skip "Codex CLI not found" + else + CLI_VERSION=$(codex --version 2>/dev/null || echo "unknown") + log_info "Codex CLI: $CLI_VERSION" + + TASK_PROMPT='Redesign the
section in site/index.html. Create a bold, innovative Steve Jobs-style design: boxy approach with contrasting boxes (one side white, one black), custom art seamless background that blends the two halves, think out of the box. Use pure CSS + HTML only, no external images. Make it visually stunning with geometric precision. The section is inside the existing page so keep the outer wrapper class vdb-universe with id=vectordb. Do NOT touch anything outside that section.' + + TASK_WORKSPACE="/home/roman/Codex-Launcher-Any-AI-Provider" + + mkdir -p /tmp/antigravity-task-logs + TASK_PROXY_LOG="/tmp/antigravity-task-logs/proxy-$(date +%s).log" + TASK_CLI_LOG="/tmp/antigravity-task-logs/cli-$(date +%s).log" + TASK_MONITOR_LOG="/tmp/antigravity-task-logs/monitor-$(date +%s).log" + + # Set up proxy for CLI task (use the one already running on TEST_PORT) + # Write codex profile + config pointing to our test proxy + CONFIG_DIR="$HOME/.codex" + CONFIG_FILE="$CONFIG_DIR/config.toml" + CONFIG_BACKUP="$CONFIG_DIR/config.toml.task-backup" + + [ -f "$CONFIG_FILE" ] && cp "$CONFIG_FILE" "$CONFIG_BACKUP" + + # Generate model catalog + CATALOG_PATH="$HOME/.cache/codex-proxy/models-Antigravity-Test.json" + python3 -c " +import json +models = ['gemini-3.5-flash-high', 'gemini-3.5-flash-medium', 'gemini-3.5-flash-low', + 'gemini-3.1-pro-high', 'gemini-3.1-pro-low', + 'claude-sonnet-4-6', 'claude-opus-4-6-thinking', 'gpt-oss-120b-medium'] +catalog = [] +for m in models: + catalog.append({ + 'slug': m, 'model': m, 'display_name': m, + 'description': f'Antigravity {m}', 'hidden': False, + 'isDefault': m == 'gemini-3.5-flash-high', + 'shell_type': 'shell_command', 'visibility': 'list', + 'default_reasoning_level': 'medium', + 'supported_reasoning_levels': [ + {'effort': 'low', 'description': 'Fast'}, + {'effort': 'medium', 'description': 'Balanced'}, + {'effort': 'high', 'description': 'Deep'}, + ], + }) +json.dump(catalog, open('$CATALOG_PATH', 'w'), indent=2) +" + + # Write main config + cat > "$CONFIG_FILE" < "$PROFILE_FILE" <> "$TASK_MONITOR_LOG" + break + } + + # Check proxy is alive + if ! kill -0 $PROXY_PID 2>/dev/null; then + echo "[MONITOR] FATAL: Proxy process died" >> "$TASK_MONITOR_LOG" + break + fi + + # Count lines in proxy log + LINE_COUNT=$(wc -l < "$PROXY_LOG" 2>/dev/null || echo 0) + NEW_LINES=$(( LINE_COUNT - PREV_LINE_COUNT )) + PREV_LINE_COUNT=$LINE_COUNT + + # Stall detection: no new log lines for 3 consecutive checks = stalled + if [ "$NEW_LINES" -eq 0 ]; then + STALL_COUNT=$(( STALL_COUNT + 1 )) + if [ "$STALL_COUNT" -ge 18 ]; then + echo "[MONITOR] STALL: No proxy activity for 180s" >> "$TASK_MONITOR_LOG" + fi + else + STALL_COUNT=0 + fi + + # Loop detection: check if same tool call repeats + RECENT=$(tail -50 "$PROXY_LOG" 2>/dev/null | grep "exec_command" | tail -5 | md5sum | cut -c1-8) + if [ -n "$RECENT" ] && [ "$RECENT" = "$LOOP_DETECTOR" ]; then + LOOP_COUNT=$(( LOOP_COUNT + 1 )) + if [ "$LOOP_COUNT" -ge 6 ]; then + echo "[MONITOR] LOOP: Same tool calls repeating ($LOOP_COUNT times)" >> "$TASK_MONITOR_LOG" + fi + else + LOOP_DETECTOR="$RECENT" + LOOP_COUNT=0 + fi + + # Check for error patterns + ERRORS=$(tail -100 "$PROXY_LOG" 2>/dev/null | grep -ciE "error|failed|timeout|500|502|503|429" || echo 0) + if [ "$ERRORS" -gt 10 ]; then + echo "[MONITOR] ERRORS: $ERRORS error lines in last 100 log lines" >> "$TASK_MONITOR_LOG" + fi + + # Check for compaction issues + COMPACT_LINES=$(tail -200 "$PROXY_LOG" 2>/dev/null | grep -c "compacted\|compaction\|trimming" || echo 0) + if [ "$COMPACT_LINES" -gt 20 ]; then + echo "[MONITOR] COMPACTION: Excessive compaction ($COMPACT_LINES events)" >> "$TASK_MONITOR_LOG" + fi + + # Check context item count + HIGH_ITEM=$(tail -200 "$PROXY_LOG" 2>/dev/null | grep -oP '\[\d+\]' | grep -oP '\d+' | sort -rn | head -1 || echo 0) + if [ -n "$HIGH_ITEM" ] && [ "$HIGH_ITEM" -gt 100 ]; then + echo "[MONITOR] CONTEXT: High item count detected: [$HIGH_ITEM]" >> "$TASK_MONITOR_LOG" + fi + + # Log heartbeat + echo "[MONITOR] ${ELAPSED}s elapsed, ${LINE_COUNT} log lines, ${NEW_LINES} new, ${ERRORS} errors" >> "$TASK_MONITOR_LOG" + done + ) & + MONITOR_PID=$! + + # ── Launch Codex CLI with the task ── + log_info "Launching Codex CLI with real task..." + log_info "Task: Redesign vectordb section (boxy black/white approach)" + log_info "Monitor log: $TASK_MONITOR_LOG" + + cd "$TASK_WORKSPACE" + + # Run codex non-interactively with --quiet flag + set +e + codex --profile Antigravity-Test -c "model=gemini-3.5-flash-high" \ + -s danger-full-access -a never \ + -q "$TASK_PROMPT" \ + > "$TASK_CLI_LOG" 2>&1 + CLI_EXIT=$? + set -e + + # Stop monitor + kill $MONITOR_PID 2>/dev/null || true + wait $MONITOR_PID 2>/dev/null || true + + CLI_DURATION=$(wc -l < "$TASK_CLI_LOG" 2>/dev/null || echo 0) + log_info "CLI exited (code $CLI_EXIT, $CLI_DURATION output lines)" + + # ── Analyze results ── + echo ""; echo "─── Test 4a: CLI Task Results ───" + + if [ "$CLI_EXIT" -eq 0 ]; then + log_pass "CLI task completed successfully" + else + log_fail "CLI task failed (exit code $CLI_EXIT)" + echo " Last 10 lines of CLI output:" + tail -10 "$TASK_CLI_LOG" 2>/dev/null | sed 's/^/ /' + fi + + # Check monitor log for anomalies + echo ""; echo "─── Test 4b: Anomaly Analysis ───" + if [ -f "$TASK_MONITOR_LOG" ]; then + ANOMALIES=$(grep -c "\[MONITOR\]" "$TASK_MONITOR_LOG" 2>/dev/null || echo 0) + CRITICAL=$(grep -cE "FATAL|LOOP|TIMEOUT|STALL|ERRORS|COMPACTION|CONTEXT" "$TASK_MONITOR_LOG" 2>/dev/null || echo 0) + log_info "Monitor: $ANOMALIES checks, $CRITICAL anomalies detected" + + if [ "$CRITICAL" -gt 0 ]; then + echo -e " ${RED}ANOMALIES FOUND:${NC}" + grep -E "FATAL|LOOP|TIMEOUT|STALL|ERRORS|COMPACTION|CONTEXT" "$TASK_MONITOR_LOG" | while read line; do + echo -e " ${RED}$line${NC}" + done + log_fail "$CRITICAL anomalies detected during task" + else + log_pass "No anomalies detected during task" + fi + + [ "$VERBOSE" = "1" ] && cat "$TASK_MONITOR_LOG" + else + log_skip "No monitor log produced" + fi + + # Check proxy log for issues + echo ""; echo "─── Test 4c: Proxy Health ───" + if [ -f "/tmp/antigravity-test-proxy.log" ]; then + ERROR_COUNT=$(grep -ciE "error|failed|exception|traceback" /tmp/antigravity-test-proxy.log || echo 0) + TIMEOUT_COUNT=$(grep -ci "timeout\|timed.out" /tmp/antigravity-test-proxy.log || echo 0) + COMPACT_COUNT=$(grep -c "compacted\|compaction" /tmp/antigravity-test-proxy.log || echo 0) + ITEM_COUNT=$(grep -oP '\[\d+\]' /tmp/antigravity-test-proxy.log | grep -oP '\d+' | sort -rn | head -1 || echo 0) + + log_info "Proxy errors: $ERROR_COUNT, timeouts: $TIMEOUT_COUNT, compactions: $COMPACT_COUNT, max context items: $ITEM_COUNT" + + [ "$ERROR_COUNT" -gt 20 ] && log_fail "High error count: $ERROR_COUNT" + [ "$TIMEOUT_COUNT" -gt 5 ] && log_fail "Timeout count: $TIMEOUT_COUNT" + [ "$ITEM_COUNT" -gt 100 ] && log_fail "Context items grew to: $ITEM_COUNT (compaction may be failing)" + [ "$ITEM_COUNT" -le 100 ] && [ "$ITEM_COUNT" -gt 0 ] && log_pass "Context items stayed under 100 (max: $ITEM_COUNT)" + + # Check for repeated identical tool calls (loop detection) + DUPE_CALLS=$(grep "exec_command" /tmp/antigravity-test-proxy.log | sed 's/.*args=//' | sort | uniq -c | sort -rn | head -1 | awk '{print $1}' || echo 0) + if [ "$DUPE_CALLS" -gt 10 ]; then + log_fail "Loop detected: same tool call repeated $DUPE_CALLS times" + else + log_pass "No tool call loops (max repeat: $DUPE_CALLS)" + fi + fi + + # Check if the file was actually modified + echo ""; echo "─── Test 4d: Task Output Quality ───" + if [ -f "$TASK_WORKSPACE/site/index.html" ]; then + VDB_LINES=$(grep -c "vectordb\|vdb-" "$TASK_WORKSPACE/site/index.html" || echo 0) + log_info "vectordb section has $VDB_LINES vdb-related lines" + + # Check for common issues in the output + MALFORMED=$(grep -c "<\|>\|&" "$TASK_WORKSPACE/site/index.html" || echo 0) + [ "$MALFORMED" -gt 100 ] && log_fail "Possible HTML encoding issue: $MALFORMED escaped entities" + + # Check section is still intact + if grep -q 'id="vectordb"' "$TASK_WORKSPACE/site/index.html"; then + log_pass "vectordb section preserved" + else + log_fail "vectordb section missing or corrupted" + fi + fi + + # Restore original config + [ -f "$CONFIG_BACKUP" ] && mv "$CONFIG_BACKUP" "$CONFIG_FILE" + rm -f "$PROFILE_FILE" + + log_info "Config restored" + fi +fi + # ── Summary ─────────────────────────────────────────────────────── echo "" echo "═══════════════════════════════════════════════════════════════" diff --git a/translate-proxy.py b/translate-proxy.py index e84b907..e511ca8 100755 --- a/translate-proxy.py +++ b/translate-proxy.py @@ -380,6 +380,14 @@ _conn_pool_lock = threading.Lock() _conn_pool = {} _STREAM_IDLE_TIMEOUT = 300 + +def _idle_timeout_for_model(model, default=300): + if not model: + return default + m = model.lower() + if "flash" in m or "mini" in m or "haiku" in m: + return 120 + return default _MAX_CONCURRENT_REQUESTS = 3 _request_semaphore = threading.Semaphore(_MAX_CONCURRENT_REQUESTS) @@ -779,6 +787,20 @@ def _refresh_google_token(token_data, token_path): print(f"[oauth] refresh failed: {e}", file=sys.stderr) return token_data.get("access_token", "") +def _force_refresh_google_token(): + token_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", + "google-antigravity-oauth-token.json" if OAUTH_PROVIDER == "google-antigravity" + else "google-oauth-token.json") + try: + with open(token_path) as f: + token_data = json.load(f) + token_data["expires_at"] = 0 + new_token = _refresh_google_token(token_data, token_path) + return bool(new_token) + except Exception as e: + print(f"[oauth] force refresh failed: {e}", file=sys.stderr) + return False + # ═══════════════════════════════════════════════════════════════════ # Gemini 3 thought signature preservation # ═══════════════════════════════════════════════════════════════════ @@ -846,7 +868,12 @@ _GEMINI_AGENT_GUARDRAIL = ( _LOG_FILE_LOCK = threading.Lock() _ANTIGRAVITY_LOOP_TRACKER = {} _ANTIGRAVITY_LOOP_TRACKER_LOCK = threading.Lock() -def _antigravity_loop_key(session_id): +_ANTIGRAVITY_FILE_TRACKER = {} +_ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK = 150 +_ANTIGRAVITY_WARN_TOOL_CALLS_PER_TASK = 80 +def _antigravity_loop_key(session_id, user_request_hash=None): + if user_request_hash: + return f"ag:task:{user_request_hash}" return f"ag:{session_id}" def _validate_antigravity_version(version, access_token=None, project_id=None): @@ -4925,7 +4952,7 @@ def _auto_continue_gemini(handler, flush_event, message_id, model, gen_config, g cont_text = "" cont_finish = "" cont_buf = "" - for raw_line in _stream_with_idle_timeout(upstream): + for raw_line in _stream_with_idle_timeout(upstream, _idle_timeout_for_model(model)): line = raw_line.decode(errors="replace") if line.startswith("data: "): cont_buf += line[6:] @@ -5122,7 +5149,20 @@ def _antigravity_normalize_context(input_data, model=""): compaction_summaries.append(msg_item) if n_summarized > 0: - summary_text = f"[Tool history summary: {n_summarized} older tool outputs omitted. {n_tool_calls} prior function calls were made for file inspection/editing.]" + n_read_calls = sum(1 for it in input_data if isinstance(it, dict) and it.get("type") == "function_call" + and it.get("name", "") not in ("write", "apply_diff", "edit_file") + and "write" not in json.dumps(it.get("arguments", {})).lower()) + n_write_calls = n_tool_calls - n_read_calls + if n_read_calls > 10 and n_write_calls == 0: + summary_text = ( + f"[CONTEXT HISTORY: {n_summarized} prior tool calls compacted. " + f"YOU ALREADY READ THE TARGET FILE EXTENSIVELY. " + f"DO NOT READ ANY MORE FILES. " + f"YOU MUST NOW USE THE WRITE TOOL TO APPLY YOUR EDITS DIRECTLY. " + f"DO NOT call exec_command or read_files AGAIN.]" + ) + else: + summary_text = f"[Tool history summary: {n_summarized} older tool outputs omitted. {n_tool_calls} prior function calls were made.]" result.append({"type": "message", "role": "user", "content": [{"type": "input_text", "text": summary_text}]}) # CRITICAL: Add tool CALLS and their corresponding OUTPUTS in PAIRED ORDER @@ -5744,10 +5784,12 @@ class Handler(http.server.BaseHTTPRequestHandler): "latest_user_hash": None, "nudge_injected": False, "latest_user_appended": False, "tool_calls_for_request": 0, "repeated_tool": False, "force_finalize": False, "last_tool": None, "last_tool_count": 0, + "task_retry_count": 0, "total_tool_calls": 0, "first_seen": time.time(), } ag_state = _ANTIGRAVITY_LOOP_TRACKER[ag_key] latest_user = "" + latest_user_hash = None if isinstance(input_data, list): for item in reversed(input_data): if item.get("type") == "message" and item.get("role") == "user": @@ -5760,17 +5802,91 @@ class Handler(http.server.BaseHTTPRequestHandler): if latest_user: latest_norm = " ".join(latest_user.strip().split())[:200] latest_user_hash = hashlib.sha256(latest_norm.encode()).hexdigest()[:16] - if latest_user_hash != ag_state.get("latest_user_hash"): - ag_state["latest_user_hash"] = latest_user_hash - ag_state["nudge_injected"] = False - ag_state["latest_user_appended"] = False - ag_state["tool_calls_for_request"] = 0 - ag_state["repeated_tool"] = False - ag_state["force_finalize"] = False - ag_state["last_tool"] = None - ag_state["last_tool_count"] = 0 + + # Cross-session key: stable across retries for same task + if latest_user_hash: + task_key = _antigravity_loop_key(self._session_id, latest_user_hash) + else: + task_key = ag_key + if task_key != ag_key: + with _ANTIGRAVITY_LOOP_TRACKER_LOCK: + if task_key not in _ANTIGRAVITY_LOOP_TRACKER: + _ANTIGRAVITY_LOOP_TRACKER[task_key] = dict(_ANTIGRAVITY_LOOP_TRACKER.get(ag_key, { + "latest_user_hash": None, "nudge_injected": False, "latest_user_appended": False, + "tool_calls_for_request": 0, "repeated_tool": False, "force_finalize": False, + "last_tool": None, "last_tool_count": 0, + "task_retry_count": 0, "total_tool_calls": 0, "first_seen": time.time(), + })) + ag_state = _ANTIGRAVITY_LOOP_TRACKER[task_key] + ag_key = task_key + + with _ANTIGRAVITY_LOOP_TRACKER_LOCK: + if latest_user_hash and latest_user_hash != ag_state.get("latest_user_hash"): + ag_state["latest_user_hash"] = latest_user_hash + ag_state["nudge_injected"] = False + ag_state["latest_user_appended"] = False + ag_state["tool_calls_for_request"] = 0 + ag_state["repeated_tool"] = False + ag_state["last_tool"] = None + ag_state["last_tool_count"] = 0 + ag_state["task_retry_count"] = 1 + ag_state["total_tool_calls"] = 0 + ag_state["first_seen"] = time.time() + ag_state["force_finalize"] = False + else: + ag_state["task_retry_count"] = ag_state.get("task_retry_count", 0) + 1 + + # Cross-session retry cap — only fires when same task retried many times + if ag_state.get("task_retry_count", 0) >= 15: + ag_state["task_retry_count"] = 0 + ag_state["force_finalize"] = False + return self._send_ag_finalize( + "Task retry limit reached. Breaking out of loop. " + "Try a more specific or smaller request if needed.", + stream=body.get("stream", False)) + if ag_state.get("task_retry_count", 0) >= 8: + ag_state["force_finalize"] = True + + if isinstance(input_data, list): n_tool_calls = sum(1 for it in input_data if isinstance(it, dict) and it.get("type") == "function_call") ag_state["tool_calls_for_request"] = n_tool_calls + cumulative_calls = ag_state.get("total_tool_calls", 0) + n_tool_calls + ag_state["total_tool_calls"] = cumulative_calls + + if cumulative_calls > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK: + print(f"[{getattr(self, '_session_id', '?')}] [antigravity-budget] HARD CAP: {cumulative_calls} calls, injecting force-write directive", file=sys.stderr) + contents.append({"role": "user", "parts": [{"text": + f"CRITICAL BUDGET LIMIT: {cumulative_calls} tool calls made. " + f"YOU MUST STOP NOW. Do NOT call any more tools. " + f"Write your FINAL answer immediately using the information you already have. " + f"If you have file edits, apply them in this response using exec_command with a write command. " + f"DO NOT READ ANY MORE FILES."}]}) + elif cumulative_calls > _ANTIGRAVITY_WARN_TOOL_CALLS_PER_TASK: + contents.append({"role": "user", "parts": [{"text": + f"WARNING: {cumulative_calls} tool calls made. " + f"{_ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK - cumulative_calls} remaining before forced stop. " + f"STOP READING FILES AND APPLY YOUR EDITS NOW."}]}) + + # CHANGE 2: File-path read-loop detection + if ag_key not in _ANTIGRAVITY_FILE_TRACKER: + _ANTIGRAVITY_FILE_TRACKER[ag_key] = {"last_path": None, "path_counts": {}, "total_reads": 0} + ft = _ANTIGRAVITY_FILE_TRACKER[ag_key] + for item in reversed(input_data): + if isinstance(item, dict) and item.get("type") == "function_call": + args_str = json.dumps(item.get("arguments", {})) + file_match = re.search(r'(/[\w/.-]+\.(?:html|py|js|ts|css|json|md|yaml|yml|xml|txt|sh))', args_str) + if file_match: + detected_path = file_match.group(1) + ft["total_reads"] += 1 + ft["path_counts"][detected_path] = ft["path_counts"].get(detected_path, 0) + 1 + ft["last_path"] = detected_path + if ft["path_counts"][detected_path] >= 5 or ft["total_reads"] > 30: + ag_state["force_finalize"] = True + print(f"[antigravity-loop] FILE READ LOOP: {detected_path} read " + f"{ft['path_counts'][detected_path]}x, total={ft['total_reads']}", + file=sys.stderr) + break + last_tool_key = None for item in reversed(input_data): if isinstance(item, dict) and item.get("type") == "function_call": @@ -5893,6 +6009,23 @@ class Handler(http.server.BaseHTTPRequestHandler): return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) if err_class in ("auth_permanent", "forbidden", "account_banned", "validation_required"): return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) + if err_class == "auth_transient": + print(f"[{self._session_id}] [antigravity-v2] 401 transient, force-refreshing token", file=sys.stderr) + try: + _force_refresh_google_token() + access_token = _refresh_oauth_token() + headers["Authorization"] = f"Bearer {access_token}" + new_body_b = json.dumps(wrapped).encode() + retry_req = urllib.request.Request(target, data=new_body_b, headers=headers) + upstream = urllib.request.urlopen(retry_req, timeout=_upstream_timeout(body, stream)) + chosen_ep = ep + with _antigravity_endpoint_lock: + _antigravity_preferred_endpoint = ep + print(f"[{self._session_id}] [antigravity-v2] 401 retry succeeded", file=sys.stderr) + break + except Exception as retry_e: + print(f"[{self._session_id}] [antigravity-v2] 401 retry failed: {retry_e}", file=sys.stderr) + return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) if err_class == "service_disabled": _is_prod = "cloudcode-pa.googleapis.com" in ep and "sandbox" not in ep if _is_prod: @@ -6449,66 +6582,135 @@ class Handler(http.server.BaseHTTPRequestHandler): if not is_latest_simple: contents.insert(0, {"role": "user", "parts": [{"text": _GEMINI_AGENT_GUARDRAIL}]}) - if OAUTH_PROVIDER == "google-antigravity": - import hashlib - ag_key = _antigravity_loop_key(self._session_id) - with _ANTIGRAVITY_LOOP_TRACKER_LOCK: - if ag_key not in _ANTIGRAVITY_LOOP_TRACKER: - _ANTIGRAVITY_LOOP_TRACKER[ag_key] = { - "latest_user_hash": None, - "nudge_injected": False, - "latest_user_appended": False, - "tool_calls_for_request": 0, - "repeated_tool": False, - "force_finalize": False, - "last_tool": None, - "last_tool_count": 0, - } - ag_state = _ANTIGRAVITY_LOOP_TRACKER[ag_key] + if OAUTH_PROVIDER == "google-antigravity": + import hashlib + ag_key = _antigravity_loop_key(self._session_id) + with _ANTIGRAVITY_LOOP_TRACKER_LOCK: + if ag_key not in _ANTIGRAVITY_LOOP_TRACKER: + _ANTIGRAVITY_LOOP_TRACKER[ag_key] = { + "latest_user_hash": None, + "nudge_injected": False, + "latest_user_appended": False, + "tool_calls_for_request": 0, + "repeated_tool": False, + "force_finalize": False, + "last_tool": None, + "last_tool_count": 0, + "task_retry_count": 0, + "total_tool_calls": 0, + "first_seen": time.time(), + } + ag_state = _ANTIGRAVITY_LOOP_TRACKER[ag_key] - latest_user = "" - latest_user_hash = None - if isinstance(input_data, list): - for item in reversed(input_data): - if item.get("type") == "message" and item.get("role") == "user": - c = item.get("content", "") - if isinstance(c, str): - latest_user = c - elif isinstance(c, list): - latest_user = "\n".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict)) - break - if latest_user: - latest_norm = " ".join(latest_user.strip().split())[:200] - latest_user_hash = hashlib.sha256(latest_norm.encode()).hexdigest()[:16] - if latest_user_hash != ag_state["latest_user_hash"]: + latest_user = "" + latest_user_hash = None + if isinstance(input_data, list): + for item in reversed(input_data): + if item.get("type") == "message" and item.get("role") == "user": + c = item.get("content", "") + if isinstance(c, str): + latest_user = c + elif isinstance(c, list): + latest_user = "\n".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict)) + break + if latest_user: + latest_norm = " ".join(latest_user.strip().split())[:200] + latest_user_hash = hashlib.sha256(latest_norm.encode()).hexdigest()[:16] + + if latest_user_hash: + task_key = _antigravity_loop_key(self._session_id, latest_user_hash) + else: + task_key = ag_key + if task_key != ag_key: + with _ANTIGRAVITY_LOOP_TRACKER_LOCK: + if task_key not in _ANTIGRAVITY_LOOP_TRACKER: + _ANTIGRAVITY_LOOP_TRACKER[task_key] = dict(_ANTIGRAVITY_LOOP_TRACKER.get(ag_key, { + "latest_user_hash": None, "nudge_injected": False, + "latest_user_appended": False, "tool_calls_for_request": 0, + "repeated_tool": False, "force_finalize": False, + "last_tool": None, "last_tool_count": 0, + "task_retry_count": 0, "total_tool_calls": 0, "first_seen": time.time(), + })) + ag_state = _ANTIGRAVITY_LOOP_TRACKER[task_key] + ag_key = task_key + + with _ANTIGRAVITY_LOOP_TRACKER_LOCK: + if latest_user_hash and latest_user_hash != ag_state.get("latest_user_hash"): ag_state["latest_user_hash"] = latest_user_hash ag_state["nudge_injected"] = False ag_state["latest_user_appended"] = False ag_state["tool_calls_for_request"] = 0 ag_state["repeated_tool"] = False - ag_state["force_finalize"] = False ag_state["last_tool"] = None ag_state["last_tool_count"] = 0 - - if isinstance(input_data, list): - n_tool_calls = sum(1 for it in input_data if isinstance(it, dict) and it.get("type") == "function_call") - ag_state["tool_calls_for_request"] = n_tool_calls - last_tool_key = None - for item in reversed(input_data): - if isinstance(item, dict) and item.get("type") == "function_call": - fname = item.get("name", "") - args_str = json.dumps(item.get("arguments", {}), sort_keys=True)[:100] - last_tool_key = f"{fname}:{args_str}" - break - if last_tool_key: - if last_tool_key == ag_state["last_tool"]: - ag_state["last_tool_count"] += 1 - if ag_state["last_tool_count"] >= 5: - ag_state["repeated_tool"] = True - ag_state["force_finalize"] = True + ag_state["task_retry_count"] = 1 + ag_state["total_tool_calls"] = 0 + ag_state["first_seen"] = time.time() + ag_state["force_finalize"] = False else: - ag_state["last_tool"] = last_tool_key - ag_state["last_tool_count"] = 1 + ag_state["task_retry_count"] = ag_state.get("task_retry_count", 0) + 1 + + if ag_state.get("task_retry_count", 0) >= 15: + ag_state["task_retry_count"] = 0 + ag_state["force_finalize"] = False + self._send_ag_finalize("Task retry limit reached. Breaking loop.", + stream=body.get("stream", False) if isinstance(body, dict) else False) + return + if ag_state.get("task_retry_count", 0) >= 8: + ag_state["force_finalize"] = True + + if isinstance(input_data, list): + n_tool_calls = sum(1 for it in input_data if isinstance(it, dict) and it.get("type") == "function_call") + ag_state["tool_calls_for_request"] = n_tool_calls + cumulative_calls = ag_state.get("total_tool_calls", 0) + n_tool_calls + ag_state["total_tool_calls"] = cumulative_calls + + if cumulative_calls > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK: + print(f"[antigravity-budget] HARD CAP: {cumulative_calls} calls, injecting force-write", file=sys.stderr) + contents.append({"role": "user", "parts": [{"text": + f"CRITICAL BUDGET LIMIT: {cumulative_calls} tool calls. " + f"STOP ALL TOOL CALLS. Write your FINAL answer now. " + f"Apply any edits using exec_command with a write command in this response."}]}) + elif cumulative_calls > _ANTIGRAVITY_WARN_TOOL_CALLS_PER_TASK: + contents.append({"role": "user", "parts": [{"text": + f"WARNING: {cumulative_calls} tool calls. " + f"{_ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK - cumulative_calls} remaining. " + f"STOP READING AND WRITE NOW."}]}) + + if ag_key not in _ANTIGRAVITY_FILE_TRACKER: + _ANTIGRAVITY_FILE_TRACKER[ag_key] = {"last_path": None, "path_counts": {}, "total_reads": 0} + ft = _ANTIGRAVITY_FILE_TRACKER[ag_key] + for item in reversed(input_data): + if isinstance(item, dict) and item.get("type") == "function_call": + args_str = json.dumps(item.get("arguments", {})) + file_match = re.search(r'(/[\w/.-]+\.(?:html|py|js|ts|css|json|md|yaml|yml|xml|txt|sh))', args_str) + if file_match: + dp = file_match.group(1) + ft["total_reads"] += 1 + ft["path_counts"][dp] = ft["path_counts"].get(dp, 0) + 1 + ft["last_path"] = dp + if ft["path_counts"][dp] >= 5 or ft["total_reads"] > 30: + ag_state["force_finalize"] = True + print(f"[antigravity-loop] FILE READ LOOP: {dp} read " + f"{ft['path_counts'][dp]}x, total={ft['total_reads']}", file=sys.stderr) + break + + last_tool_key = None + for item in reversed(input_data): + if isinstance(item, dict) and item.get("type") == "function_call": + fname = item.get("name", "") + args_str = json.dumps(item.get("arguments", {}), sort_keys=True)[:100] + last_tool_key = f"{fname}:{args_str}" + break + if last_tool_key: + if last_tool_key == ag_state["last_tool"]: + ag_state["last_tool_count"] += 1 + if ag_state["last_tool_count"] >= 5: + ag_state["repeated_tool"] = True + ag_state["force_finalize"] = True + else: + ag_state["last_tool"] = last_tool_key + ag_state["last_tool_count"] = 1 _EDIT_WORDS = ("change", "fix", "update", "redesign", "rewrite", "modify", "improve", "replace", "edit", "make it", "add", "remove", "delete", "rename", "move", "convert") latest_lower = "" @@ -6671,6 +6873,23 @@ class Handler(http.server.BaseHTTPRequestHandler): return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) if err_class == "auth_permanent": return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) + if err_class == "auth_transient": + print(f"[{self._session_id}] {ep.replace('https://','')} 401 transient, force-refreshing token and retrying", file=sys.stderr) + try: + _force_refresh_google_token() + access_token = _refresh_oauth_token() + headers["Authorization"] = f"Bearer {access_token}" + new_body_b = json.dumps(wrapped).encode() + retry_req = urllib.request.Request(target, data=new_body_b, headers=headers) + upstream = urllib.request.urlopen(retry_req, timeout=_upstream_timeout(body, stream)) + chosen_ep = ep + with _antigravity_endpoint_lock: + _antigravity_preferred_endpoint = ep + print(f"[{self._session_id}] 401 retry succeeded after token refresh", file=sys.stderr) + break + except Exception as retry_e: + print(f"[{self._session_id}] 401 retry also failed: {retry_e}", file=sys.stderr) + return self.send_json(e.code, {"error": {"type": "upstream_error", "message": _sanitize_err_body(err_body)}}) if err_class in ("quota_exhausted", "rate_limited"): reset_s = _parse_rate_limit_reset(err_body) if ep == ordered[-1]: @@ -6730,7 +6949,7 @@ class Handler(http.server.BaseHTTPRequestHandler): buf = "" stream_finished = False - for raw_line in _stream_with_idle_timeout(upstream): + for raw_line in _stream_with_idle_timeout(upstream, _idle_timeout_for_model(model)): if tracker and tracker.cancelled.is_set(): print("[gemini-oauth] stream cancelled", file=sys.stderr) break @@ -8144,6 +8363,38 @@ class Handler(http.server.BaseHTTPRequestHandler): except (BrokenPipeError, ConnectionResetError, ConnectionAbortedError): pass + def _send_ag_finalize(self, text, stream=False, is_responses_api=True): + sid = getattr(self, '_session_id', 'fin') + print(f"[{sid}] [antigravity-finalize] Sending finalize response: {text[:80]}...", file=sys.stderr) + resp_id = f"resp_{uuid.uuid4().hex[:12]}" + msg_id = f"msg_{uuid.uuid4().hex[:12]}" + if is_responses_api: + output_obj = [{"type": "message", "id": msg_id, "role": "assistant", + "content": [{"type": "output_text", "text": text}]}] + if stream: + events = [ + f"event: response.created\ndata: {json.dumps({'type':'response.created','response':{'id':resp_id,'object':'response','status':'in_progress'}})}\n\n", + f"event: response.output_item.added\ndata: {json.dumps({'type':'response.output_item.added','output_index':0,'item':{'type':'message','id':msg_id,'role':'assistant','content':[]}})}\n\n", + f"event: response.content_part.added\ndata: {json.dumps({'type':'response.content_part.added','output_index':0,'content_index':0,'part':{'type':'output_text','text':''}})}\n\n", + f"event: response.output_text.delta\ndata: {json.dumps({'type':'response.output_text.delta','output_index':0,'content_index':0,'delta':text})}\n\n", + f"event: response.output_text.done\ndata: {json.dumps({'type':'response.output_text.done','output_index':0,'content_index':0,'text':text})}\n\n", + f"event: response.content_part.done\ndata: {json.dumps({'type':'response.content_part.done','output_index':0,'content_index':0,'part':{'type':'output_text','text':text}})}\n\n", + f"event: response.output_item.done\ndata: {json.dumps({'type':'response.output_item.done','output_index':0,'item':{'type':'message','id':msg_id,'role':'assistant','content':[{'type':'output_text','text':text}]}})}\n\n", + f"event: response.completed\ndata: {json.dumps({'type':'response.completed','response':{'id':resp_id,'object':'response','status':'completed','output':output_obj}})}\n\n", + ] + self.send_response(200) + self.send_header("Content-Type", "text/event-stream") + self.send_header("Cache-Control", "no-cache") + self.send_header("Connection", "keep-alive") + self.end_headers() + for evt in events: + self.wfile.write(evt.encode()) + self.wfile.flush() + else: + self.send_json(200, {"id": resp_id, "object": "response", "status": "completed", + "output": output_obj, "model": "gemini-3-flash"}) + return None + def stream_buffered_events(self, event_iter, flush_interval=0.03, max_bytes=4096, on_event=None): buf = bytearray() last_flush = time.monotonic()