v3.11.5: token-aware compaction, vision filter, universal adaptive compaction, smart-continue text detection

2026-05-26 16:14:05 +04:00
parent 028185652d
commit b029e7cb5e
9 changed files with 684 additions and 127 deletions
--- a/src/translate-proxy.py
+++ b/src/translate-proxy.py
@@ -787,6 +787,10 @@ _GEMINI_AGENT_GUARDRAIL = (
 )

 _LOG_FILE_LOCK = threading.Lock()
+_ANTIGRAVITY_LOOP_TRACKER = {}
+_ANTIGRAVITY_LOOP_TRACKER_LOCK = threading.Lock()
+def _antigravity_loop_key(session_id):
+    return f"ag:{session_id}"

 def _fetch_antigravity_version():
    cache_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", "antigravity-version.json")
@@ -1469,6 +1473,53 @@ _CROF_ADAPTIVE = {
    "min_keep_recent": 6,
 }

+_model_max_tokens = {}
+_model_max_tokens_lock = threading.Lock()
+
+def _estimate_tokens(item):
+    if not isinstance(item, dict):
+        return 4
+    t = item.get("type", "")
+    if t == "message":
+        content = item.get("content", "")
+        if isinstance(content, str):
+            return max(4, len(content) // 4)
+        elif isinstance(content, list):
+            total = 4
+            for part in content:
+                pt = part.get("type", "")
+                if pt in ("input_text", "output_text"):
+                    total += max(4, len(part.get("text", "")) // 4)
+                elif pt == "input_image":
+                    total += 800
+                elif pt in ("function_call",):
+                    total += max(20, len(part.get("arguments", "{}")) // 2)
+                elif pt == "function_call_output":
+                    total += max(8, len(part.get("output", "")) // 4)
+            return total
+    elif t in ("function_call_output",):
+        return max(8, len(item.get("output", "")) // 4)
+    elif t == "function_call":
+        return max(20, len(item.get("arguments", "{}")) // 2)
+    return 4
+
+def _estimate_input_tokens(input_data):
+    if not isinstance(input_data, list):
+        return 0
+    return sum(_estimate_tokens(i) for i in input_data)
+
+def _get_model_max_tokens(model):
+    with _model_max_tokens_lock:
+        return _model_max_tokens.get(model)
+
+def _set_model_max_tokens(model, tokens):
+    if model and tokens:
+        with _model_max_tokens_lock:
+            existing = _model_max_tokens.get(model)
+            if existing is None or tokens < existing:
+                _model_max_tokens[model] = tokens
+                print(f"[ctx-limit] learned {model} max ~{tokens} tokens", file=sys.stderr)
+
 _BGP_STATS_PATH = os.path.join(_LOG_DIR, "bgp-route-stats.json")
 _bgp_stats_lock = threading.Lock()

@@ -1534,8 +1585,6 @@ def _sorted_bgp_routes():
    return sorted(BGP_ROUTES, key=lambda r: _score_route(r, stats))

 def _crof_record(model, n_items, success):
-    if TARGET_URL and "crof.ai" not in TARGET_URL:
-        return
    if not isinstance(n_items, int) or n_items < 1:
        return
    entry = {"model": model, "items": n_items, "ok": success}
@@ -1561,20 +1610,36 @@ def _crof_record(model, n_items, success):
            global_limit = v["limit"]
    _CROF_ADAPTIVE["global_item_limit"] = global_limit

-    if TARGET_URL and "crof.ai" in TARGET_URL:
-        print(f"[crof-adaptive] model={model} items={n_items} {'OK' if success else 'FAIL'} -> limit={ml.get('limit',30)} global={global_limit}", file=sys.stderr)
+    print(f"[crof-adaptive] model={model} items={n_items} {'OK' if success else 'FAIL'} -> limit={ml.get('limit',30)} global={global_limit}", file=sys.stderr)

 def _crof_item_limit(model):
    ml = _CROF_ADAPTIVE["model_limits"].get(model, {})
    per_model = ml.get("limit", 30)
    return min(per_model, _CROF_ADAPTIVE["global_item_limit"])

-def _crof_compact_for_retry(input_data, model):
+def _crof_compact_for_retry(input_data, model, aggression=0):
    limit = _crof_item_limit(model)
-    if not isinstance(input_data, list) or len(input_data) <= limit:
+    if not isinstance(input_data, list) or len(input_data) < 2:
+        return input_data
+
+    max_tok = _get_model_max_tokens(model)
+    est = _estimate_input_tokens(input_data)
+    over_item_limit = len(input_data) > limit
+    over_token_limit = max_tok and est >= max_tok * 0.9
+
+    if not over_item_limit and not over_token_limit:
        return input_data

    keep = max(_CROF_ADAPTIVE["min_keep_recent"], limit // 3)
+    if over_token_limit:
+        ratio = est / max_tok
+        if aggression >= 1 or ratio > 1.5:
+            keep = max(2, _CROF_ADAPTIVE["min_keep_recent"] // 2)
+        elif ratio > 1.2:
+            keep = max(3, keep // 2)
+        print(f"[ctx-limit] model={model} est={est}tok max={max_tok}tok ratio={ratio:.2f} -> keep={keep}", file=sys.stderr)
+    elif over_item_limit:
+        keep = max(keep, 6)
    head_end = 0
    for i, item in enumerate(input_data):
        t = item.get("type")
@@ -1607,8 +1672,7 @@ def _crof_compact_for_retry(input_data, model):
        summary_lines.append(_item_summary(item, max_len=120))

    summary_msg = {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "\n".join(summary_lines)}]}
-    if TARGET_URL and "crof.ai" in TARGET_URL:
-        print(f"[crof-adaptive] RETRY compact: {len(input_data)} -> {len(head)+1+len(tail)} (limit={limit}, keep={len(tail)})", file=sys.stderr)
+    print(f"[crof-adaptive] RETRY compact: {len(input_data)} -> {len(head)+1+len(tail)} (limit={limit}, keep={len(tail)}, agg={aggression})", file=sys.stderr)
    return head + [summary_msg] + tail

 def _item_summary(item, max_len=200):
@@ -2051,6 +2115,18 @@ def synthesize_tool_results_for_chat(input_items):
 def has_function_call_output(input_items):
    return isinstance(input_items, list) and any(i.get("type") == "function_call_output" for i in input_items)

+_TOOL_CALL_TEXT_PATTERNS = re.compile(
+    r'(?:^|\n)[\s•\-\*]*\(?'
+    r'(?:exec_command|write_to_file|exec_bash|bash|run_command|shell|edit_file|read_file|search_files|list_files)'
+    r'[\s:]',
+    re.I | re.MULTILINE
+)
+
+def _text_looks_like_tool_calls(text):
+    if not text or len(text) < 6:
+        return False
+    return bool(_TOOL_CALL_TEXT_PATTERNS.search(text))
+
 # ═══════════════════════════════════════════════════════════════════
 # Log redaction
 # ═══════════════════════════════════════════════════════════════════
@@ -2233,9 +2309,14 @@ def _normalize_tool_args(raw_args):
    except json.JSONDecodeError:
        return raw_args

-_XML_TC_RE = re.compile(r'<tool_call>(\w+)(.*?)</tool_call>', re.DOTALL)
+_XML_TC_RE = re.compile(r'exec_command(.*?)</invoke>', re.DOTALL)
 _XML_ARG_VALUE_RE = re.compile(r'</?arg_value>\s*')

+_PAREN_TC_RE = re.compile(
+    r'(?:^|[\n•\-\*]\s*)\(\s*(exec_command|write_to_file|exec_bash|bash|run_command|shell|edit_file|read_file|search_files|list_files)\b\s*(.*?)\)',
+    re.DOTALL | re.I
+)
+
 def _extract_xml_tool_calls(text):
    if not text:
        return []
@@ -2262,6 +2343,68 @@ def _extract_xml_tool_calls(text):
        results.append({"name": name, "args": args_str, "call_id": f"xml_{len(results)}"})
    return results

+_NON_VISION_MODEL_PATTERNS = re.compile(
+    r'\b(deepseek|glm|mixtral|llama\b(?!.*vision)|command|dbrx|qwen\b(?!.*vl)|phi-?3(?!.*vision))',
+    re.I
+)
+
+_vision_fail_cache = set()
+_vision_fail_lock = threading.Lock()
+
+def _model_supports_vision(model):
+    if not model:
+        return True
+    with _vision_fail_lock:
+        if model in _vision_fail_cache:
+            return False
+    if _NON_VISION_MODEL_PATTERNS.search(model):
+        return False
+    return True
+
+def _mark_vision_fail(model):
+    if model:
+        with _vision_fail_lock:
+            _vision_fail_cache.add(model)
+
+def _strip_images_from_input(input_data, model):
+    if not isinstance(input_data, list) or _model_supports_vision(model):
+        return input_data
+    modified = False
+    result = []
+    for item in input_data:
+        if item.get("type") != "message":
+            result.append(item)
+            continue
+        content = item.get("content", [])
+        if isinstance(content, str):
+            result.append(item)
+            continue
+        new_content = []
+        has_img = False
+        for part in content:
+            if isinstance(part, str):
+                new_content.append(part)
+                continue
+            pt = part.get("type", "")
+            if pt in ("input_image", "image_url"):
+                if not has_img:
+                    fname = part.get("image_url", {}).get("url", part.get("url", "image.png"))
+                    if fname.startswith("data:"):
+                        fname = "screenshot.png"
+                    new_content.append({"type": "output_text", "text": f"[User attached image: {fname} — this model does not support vision]"})
+                    has_img = True
+                    modified = True
+            else:
+                new_content.append(part)
+        if modified:
+            result.append({**item, "content": new_content})
+        else:
+            result.append(item)
+    if modified:
+        print(f"[vision-filter] stripped {sum(1 for i in input_data if i.get('type')=='message' and any(c.get('type') in ('input_image','image_url') for c in (i.get('content') or []) if isinstance(c,dict)))} images for model={model}", file=sys.stderr)
+        return result
+    return input_data
+
 def oa_input_to_messages(input_data):
    msgs = []
    tool_name_by_id = {}
@@ -4889,12 +5032,25 @@ class Handler(http.server.BaseHTTPRequestHandler):
            body["input"] = input_data

        crof_limit = _crof_item_limit(model)
-        _crof_eligible = TARGET_URL and "crof.ai" in TARGET_URL
-        if _crof_eligible and not compacted and isinstance(input_data, list) and len(input_data) > crof_limit:
-            print(f"[crof-adaptive] proactive compact: {len(input_data)} items > limit {crof_limit}", file=sys.stderr)
-            input_data = _crof_compact_for_retry(input_data, model)
-            body = dict(body)
-            body["input"] = input_data
+        _crof_eligible = True
+        if _crof_eligible and not compacted and isinstance(input_data, list):
+            _needs_compact = len(input_data) > crof_limit
+            max_tok = _get_model_max_tokens(model)
+            est_tok = _estimate_input_tokens(input_data) if max_tok else 0
+            if not _needs_compact and max_tok and est_tok > max_tok * 0.8:
+                _needs_compact = True
+            if _needs_compact:
+                _agg = 0
+                if max_tok and est_tok > max_tok:
+                    _agg = 1
+                print(f"[crof-adaptive] proactive compact: {len(input_data)} items, est={est_tok}tok max={max_tok}tok agg={_agg}", file=sys.stderr)
+                input_data = _crof_compact_for_retry(input_data, model, aggression=_agg)
+                body = dict(body)
+                body["input"] = input_data
+
+        # Strip images for non-vision models
+        input_data = _strip_images_from_input(input_data, model)
+        body["input"] = input_data

        messages = oa_input_to_messages(input_data)
        messages = _inject_stored_reasoning(messages)
@@ -4927,14 +5083,19 @@ class Handler(http.server.BaseHTTPRequestHandler):
                except urllib.error.HTTPError as e:
                    err_body = e.read().decode()
                    if "context_length_exceeded" in err_body and attempt < max_retries:
-                        print(f"[{self._session_id}] context_length_exceeded (attempt {attempt+1}/{max_retries}), retrying with extreme compaction!", file=sys.stderr)
+                        import re as _re
+                        _tok_m = _re.search(r'~?(\d+)\s*tokens', err_body)
+                        if _tok_m:
+                            _set_model_max_tokens(model, int(_tok_m.group(1)))
+                        print(f"[{self._session_id}] context_length_exceeded (attempt {attempt+1}/{max_retries}), retrying with compaction (agg={attempt})!", file=sys.stderr)
                        policy = provider_policy()
                        if isinstance(input_data, list):
-                            print(f"[{self._session_id}] applying extreme compaction to {len(input_data)} items", file=sys.stderr)
-                            input_data = _crof_compact_for_retry(input_data, model)
+                            est = _estimate_input_tokens(input_data)
+                            print(f"[{self._session_id}] applying compaction to {len(input_data)} items ~{est}tok", file=sys.stderr)
+                            input_data = _crof_compact_for_retry(input_data, model, aggression=attempt)
                            body = dict(body)
                            body["input"] = input_data
-                            messages = oa_input_to_messages(input_data)
+                            messages = oa_input_to_messages(_strip_images_from_input(input_data, model))
                            messages = _inject_stored_reasoning(messages)
                            instructions = body.get("instructions", "").strip()
                            if instructions:
@@ -5267,31 +5428,88 @@ class Handler(http.server.BaseHTTPRequestHandler):
            if not is_latest_simple:
                contents.insert(0, {"role": "user", "parts": [{"text": _GEMINI_AGENT_GUARDRAIL}]})

-        if OAUTH_PROVIDER == "google-antigravity" and isinstance(input_data, list):
+        if OAUTH_PROVIDER == "google-antigravity":
+            import hashlib
+            ag_key = _antigravity_loop_key(self._session_id)
+            with _ANTIGRAVITY_LOOP_TRACKER_LOCK:
+                if ag_key not in _ANTIGRAVITY_LOOP_TRACKER:
+                    _ANTIGRAVITY_LOOP_TRACKER[ag_key] = {
+                        "latest_user_hash": None,
+                        "nudge_injected": False,
+                        "latest_user_appended": False,
+                        "tool_calls_for_request": 0,
+                        "repeated_tool": False,
+                        "force_finalize": False,
+                        "last_tool": None,
+                        "last_tool_count": 0,
+                    }
+                ag_state = _ANTIGRAVITY_LOOP_TRACKER[ag_key]
+
+            latest_user = ""
+            latest_user_hash = None
+            if isinstance(input_data, list):
+                for item in reversed(input_data):
+                    if item.get("type") == "message" and item.get("role") == "user":
+                        c = item.get("content", "")
+                        if isinstance(c, str):
+                            latest_user = c
+                        elif isinstance(c, list):
+                            latest_user = "\n".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict))
+                        break
+                if latest_user:
+                    latest_norm = " ".join(latest_user.strip().split())[:200]
+                    latest_user_hash = hashlib.sha256(latest_norm.encode()).hexdigest()[:16]
+                    if latest_user_hash != ag_state["latest_user_hash"]:
+                        ag_state["latest_user_hash"] = latest_user_hash
+                        ag_state["nudge_injected"] = False
+                        ag_state["latest_user_appended"] = False
+                        ag_state["tool_calls_for_request"] = 0
+                        ag_state["repeated_tool"] = False
+                        ag_state["force_finalize"] = False
+                        ag_state["last_tool"] = None
+                        ag_state["last_tool_count"] = 0
+
+            if isinstance(input_data, list):
+                n_tool_calls = sum(1 for it in input_data if isinstance(it, dict) and it.get("type") == "function_call")
+                ag_state["tool_calls_for_request"] = n_tool_calls
+                last_tool_key = None
+                for item in reversed(input_data):
+                    if isinstance(item, dict) and item.get("type") == "function_call":
+                        fname = item.get("name", "")
+                        args_str = json.dumps(item.get("arguments", {}), sort_keys=True)[:100]
+                        last_tool_key = f"{fname}:{args_str}"
+                        break
+                if last_tool_key:
+                    if last_tool_key == ag_state["last_tool"]:
+                        ag_state["last_tool_count"] += 1
+                        if ag_state["last_tool_count"] >= 5:
+                            ag_state["repeated_tool"] = True
+                            ag_state["force_finalize"] = True
+                    else:
+                        ag_state["last_tool"] = last_tool_key
+                        ag_state["last_tool_count"] = 1
+
            _EDIT_WORDS = ("change", "fix", "update", "redesign", "rewrite", "modify", "improve", "replace", "edit", "make it", "add", "remove", "delete", "rename", "move", "convert")
            latest_lower = ""
-            for item in reversed(input_data):
-                if item.get("type") == "message" and item.get("role") == "user":
-                    c = item.get("content", "")
-                    if isinstance(c, str): latest_lower = c.lower()
-                    elif isinstance(c, list): latest_lower = " ".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict)).lower()
-                    break
-            if latest_lower and any(w in latest_lower for w in _EDIT_WORDS):
-                n_tool_calls = sum(1 for it in input_data if isinstance(it, dict) and it.get("type") == "function_call")
-                contents.append({"role": "user", "parts": [{"text": "!!! ABSOLUTELY NO PLANNING - EMIT THE TOOL CALL NOW !!! IMPORTANT: The user is requesting a modification to existing files. You MUST use tools (exec_command, read_files, write, etc.) to make the changes RIGHT NOW. Do NOT just describe what to do — actually CALL THE TOOLS IN THIS RESPONSE. IMMEDIATELY INSPECT THE FILE OR LIST FILES USING exec_command TOOL CALL."}]})
-                print(f"[antigravity] edit-intent detected; injected tool-use nudge", file=sys.stderr)
+            if isinstance(input_data, list):
+                for item in reversed(input_data):
+                    if item.get("type") == "message" and item.get("role") == "user":
+                        c = item.get("content", "")
+                        if isinstance(c, str): latest_lower = c.lower()
+                        elif isinstance(c, list): latest_lower = " ".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict)).lower()
+                        break

-        if OAUTH_PROVIDER == "google-antigravity" and isinstance(input_data, list):
-            latest_user = ""
-            for item in reversed(input_data):
-                if item.get("type") == "message" and item.get("role") == "user":
-                    c = item.get("content", "")
-                    if isinstance(c, str):
-                        latest_user = c
-                    elif isinstance(c, list):
-                        latest_user = "\n".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict))
-                    break
-            if latest_user:
+            if ag_state["force_finalize"]:
+                contents.append({"role": "user", "parts": [{"text": "STOP CALLING TOOLS. APPLY THE FINAL EDIT OR SUMMARIZE WHAT BLOCKED YOU. DO NOT CALL ANY MORE TOOLS. DO NOT PRODUCE ANY MORE PLANNING TEXT. DO NOT PRODUCE ANY MORE EXPLORATORY TOOL CALLS. PRODUCE A FINAL ANSWER OR A CLEAR STATEMENT OF WHAT IS PREVENTING YOU FROM COMPLETING THE TASK."}]})
+            elif latest_lower and any(w in latest_lower for w in _EDIT_WORDS) and not ag_state["nudge_injected"] and not ag_state["force_finalize"]:
+                contents.append({"role": "user", "parts": [{"text": "!!! ABSOLUTELY NO PLANNING - EMIT THE TOOL CALL NOW !!! IMPORTANT: The user is requesting a modification to existing files. You MUST use tools (exec_command, read_files, write, etc.) to make the changes RIGHT NOW. Do NOT just describe what to do — actually CALL THE TOOLS IN THIS RESPONSE. IMMEDIATELY INSPECT THE FILE OR LIST FILES USING exec_command TOOL CALL."}]})
+                ag_state["nudge_injected"] = True
+                print(f"[antigravity] edit-intent detected; injected tool-use nudge (first time for this request)", file=sys.stderr)
+            else:
+                if ag_state["nudge_injected"]:
+                    print(f"[antigravity] edit-intent nudge already injected, skipping", file=sys.stderr)
+
+            if latest_user and not ag_state["latest_user_appended"] and not ag_state["force_finalize"]:
                latest_norm = " ".join(latest_user.strip().split())[:160]
                final_text = ""
                if contents:
@@ -5299,14 +5517,24 @@ class Handler(http.server.BaseHTTPRequestHandler):
                    if last.get("role") == "user":
                        final_text = " ".join(json.dumps(last.get("parts", []), ensure_ascii=False).split())
                if latest_norm[:120] not in final_text:
-                    print(f"[antigravity] latest user instruction was not final turn; appending", file=sys.stderr)
+                    print(f"[antigravity] latest user instruction was not final turn; appending (first time for this request)", file=sys.stderr)
                    contents.append({"role": "user", "parts": [{"text": latest_user}]})
+                    ag_state["latest_user_appended"] = True
                else:
                    print(f"[antigravity] latest user instruction is final turn", file=sys.stderr)
-                print(f"[{self._session_id}] [antigravity-debug] input_items={len(input_data) if isinstance(input_data, list) else 1} contents={len(contents)} latest={latest_user[:80]!r}", file=sys.stderr)
-                if contents:
-                    last_c = contents[-1]
-                    print(f"[{self._session_id}] [antigravity-debug] final_role={last_c.get('role')} preview={json.dumps(last_c.get('parts', []), ensure_ascii=False)[:200]}", file=sys.stderr)
+            else:
+                if ag_state["latest_user_appended"]:
+                    print(f"[antigravity] latest user instruction already appended, skipping", file=sys.stderr)
+
+            print(f"[antigravity-loop] latest_user_hash={latest_user_hash}", file=sys.stderr)
+            print(f"[antigravity-loop] tool_calls_for_request={ag_state['tool_calls_for_request']}", file=sys.stderr)
+            print(f"[antigravity-loop] repeated_tool={ag_state['repeated_tool']}", file=sys.stderr)
+            print(f"[antigravity-loop] nudge_injected={ag_state['nudge_injected']}", file=sys.stderr)
+            print(f"[antigravity-loop] force_finalize={ag_state['force_finalize']}", file=sys.stderr)
+            print(f"[{self._session_id}] [antigravity-debug] input_items={len(input_data) if isinstance(input_data, list) else 1} contents={len(contents)} latest={latest_user[:80]!r}", file=sys.stderr)
+            if contents:
+                last_c = contents[-1]
+                print(f"[{self._session_id}] [antigravity-debug] final_role={last_c.get('role')} preview={json.dumps(last_c.get('parts', []), ensure_ascii=False)[:200]}", file=sys.stderr)

        request_body = {"contents": contents}
        if system_parts:
@@ -5725,9 +5953,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
            last_status = None
            finish_reason = None
            has_content = False
+            has_message = False
+            has_tool_call = False

            def _observe_event(event):
-                nonlocal last_resp_id, last_output, last_status, finish_reason, has_content
+                nonlocal last_resp_id, last_output, last_status, finish_reason, has_content, has_message, has_tool_call
                for line in event.strip().split("\n"):
                    if line.startswith("data: "):
                        try:
@@ -5737,7 +5967,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
                                last_output = d.get("response", {}).get("output", [])
                                last_status = d.get("response", {}).get("status")
                                finish_reason = "length" if last_status == "incomplete" else "stop"
-                                has_content = any(o.get("type") == "message" for o in (last_output or []))
+                                has_tool_call = any(o.get("type") == "function_call" for o in (last_output or []))
+                                has_message = any(o.get("type") == "message" for o in (last_output or []))
+                                has_content = has_message or has_tool_call
                        except Exception:
                            pass

@@ -5749,7 +5981,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
                        break
                    collected_events.append(event)
                    _observe_event(event)
-                print(f"[{self._session_id}] stream ended: events={len(collected_events)} finish={finish_reason} has_content={has_content} elapsed={time.time()-t0:.1f}s", file=sys.stderr)
+                print(f"[{self._session_id}] stream ended: events={len(collected_events)} finish={finish_reason} has_content={has_content} has_message={has_message} has_tool_call={has_tool_call} elapsed={time.time()-t0:.1f}s", file=sys.stderr)
            except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
                print("[translate-proxy] client disconnected during stream", file=sys.stderr)
                _crof_record(model, n_items, False)
@@ -5805,6 +6037,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
                        last_resp_id = last_output = last_status = None
                        finish_reason = None
                        has_content = False
+                        has_message = False
+                        has_tool_call = False
                        for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")):
                            collected_events.append(event)
                            _observe_event(event)
@@ -5813,7 +6047,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
                        print(f"[provider-sensor] synthetic retry failed: {e}", file=sys.stderr)

            # Auto-retry on finish_reason=length with no content due to too much context.
-            if finish_reason == "length" and not has_content and isinstance(input_data, list) and len(input_data) > 5 and TARGET_URL and "crof.ai" in TARGET_URL:
+            if finish_reason == "length" and not has_content and isinstance(input_data, list) and len(input_data) > 5:
                print(f"[crof-adaptive] RETRY: finish_reason=length with no content, compacting {n_items} items", file=sys.stderr)
                new_input = _crof_compact_for_retry(input_data, model)
                if len(new_input) < len(input_data):
@@ -5836,6 +6070,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
                        last_resp_id = last_output = last_status = None
                        finish_reason = None
                        has_content = False
+                        has_message = False
+                        has_tool_call = False
                        for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")):
                            collected_events.append(event)
                            _observe_event(event)
@@ -5943,9 +6179,17 @@ class Handler(http.server.BaseHTTPRequestHandler):
                _smart_attempt = 0
                while _smart_attempt < _smart_max:
                    _has_tool_calls_in_output = any(o.get("type") == "function_call" for o in (last_output or []))
+                    last_text = ""
+                    for o in (last_output or []):
+                        if o.get("type") == "message":
+                            for c in (o.get("content") or []):
+                                if isinstance(c, dict) and c.get("type") == "output_text":
+                                    last_text += c.get("text", "")
+                    _looks_like_tools = _text_looks_like_tool_calls(last_text)
+                    _has_prior_tool_ctx = has_function_call_output(input_data)
                    if not (finish_reason == "stop" and has_content and not _has_tool_calls_in_output
                            and isinstance(input_data, list) and len(input_data) >= 3
-                            and has_function_call_output(input_data)):
+                            and (_has_prior_tool_ctx or _looks_like_tools)):
                        break
                    _smart_attempt += 1
                    _nudges = [
@@ -5954,12 +6198,6 @@ class Handler(http.server.BaseHTTPRequestHandler):
                    ]
                    nudge_text = _nudges[min(_smart_attempt - 1, len(_nudges) - 1)]
                    # Try extracting XML tool calls from text as fallback before nudging
-                    last_text = ""
-                    for o in (last_output or []):
-                        if o.get("type") == "message":
-                            for c in (o.get("content") or []):
-                                if isinstance(c, dict) and c.get("type") == "output_text":
-                                    last_text += c.get("text", "")
                    xml_fc = _extract_xml_tool_calls(last_text)
                    if xml_fc:
                        print(f"[{self._session_id}] [smart-continue] extracted {len(xml_fc)} XML tool calls from text, injecting and retrying", file=sys.stderr)
@@ -5979,6 +6217,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
                            last_resp_id = last_output = last_status = None
                            finish_reason = None
                            has_content = False
+                            has_message = False
+                            has_tool_call = False
                            for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")):
                                collected_events.append(event)
                                _observe_event(event)
@@ -5988,19 +6228,21 @@ class Handler(http.server.BaseHTTPRequestHandler):
                            print(f"[{self._session_id}] [smart-continue] XML injection retry failed: {e}", file=sys.stderr)
                            break
                    _nudge_msg = {"role": "user", "content": nudge_text}
-                    nudge_messages = oa_input_to_messages(input_data) + [_nudge_msg]
+                    nudge_messages = oa_input_to_messages(_strip_images_from_input(input_data, model)) + [_nudge_msg]
                    instructions = body.get("instructions", "").strip()
                    if instructions:
                        nudge_messages.insert(0, {"role": "system", "content": instructions})
                    nudge_chat_body = self._build_chat_body(model, nudge_messages, body, stream)
                    nudge_req = urllib.request.Request(target, data=json.dumps(nudge_chat_body).encode(), headers=fwd)
-                    print(f"[{self._session_id}] [smart-continue] attempt {_smart_attempt}/{_smart_max}: model stopped mid-task, nudging", file=sys.stderr)
+                    print(f"[{self._session_id}] [smart-continue] attempt {_smart_attempt}/{_smart_max}: model stopped mid-task (prior_ctx={_has_prior_tool_ctx} text_tools={_looks_like_tools}), nudging", file=sys.stderr)
                    try:
                        retry_upstream = urllib.request.urlopen(nudge_req, timeout=_upstream_timeout(body, True))
                        collected_events = []
                        last_resp_id = last_output = last_status = None
                        finish_reason = None
                        has_content = False
+                        has_message = False
+                        has_tool_call = False
                        for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")):
                            collected_events.append(event)
                            _observe_event(event)