v3.11.5: token-aware compaction, vision filter, universal adaptive compaction, smart-continue text detection
This commit is contained in:
@@ -787,6 +787,10 @@ _GEMINI_AGENT_GUARDRAIL = (
|
||||
)
|
||||
|
||||
_LOG_FILE_LOCK = threading.Lock()
|
||||
_ANTIGRAVITY_LOOP_TRACKER = {}
|
||||
_ANTIGRAVITY_LOOP_TRACKER_LOCK = threading.Lock()
|
||||
def _antigravity_loop_key(session_id):
|
||||
return f"ag:{session_id}"
|
||||
|
||||
def _fetch_antigravity_version():
|
||||
cache_path = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy", "antigravity-version.json")
|
||||
@@ -1469,6 +1473,53 @@ _CROF_ADAPTIVE = {
|
||||
"min_keep_recent": 6,
|
||||
}
|
||||
|
||||
_model_max_tokens = {}
|
||||
_model_max_tokens_lock = threading.Lock()
|
||||
|
||||
def _estimate_tokens(item):
|
||||
if not isinstance(item, dict):
|
||||
return 4
|
||||
t = item.get("type", "")
|
||||
if t == "message":
|
||||
content = item.get("content", "")
|
||||
if isinstance(content, str):
|
||||
return max(4, len(content) // 4)
|
||||
elif isinstance(content, list):
|
||||
total = 4
|
||||
for part in content:
|
||||
pt = part.get("type", "")
|
||||
if pt in ("input_text", "output_text"):
|
||||
total += max(4, len(part.get("text", "")) // 4)
|
||||
elif pt == "input_image":
|
||||
total += 800
|
||||
elif pt in ("function_call",):
|
||||
total += max(20, len(part.get("arguments", "{}")) // 2)
|
||||
elif pt == "function_call_output":
|
||||
total += max(8, len(part.get("output", "")) // 4)
|
||||
return total
|
||||
elif t in ("function_call_output",):
|
||||
return max(8, len(item.get("output", "")) // 4)
|
||||
elif t == "function_call":
|
||||
return max(20, len(item.get("arguments", "{}")) // 2)
|
||||
return 4
|
||||
|
||||
def _estimate_input_tokens(input_data):
|
||||
if not isinstance(input_data, list):
|
||||
return 0
|
||||
return sum(_estimate_tokens(i) for i in input_data)
|
||||
|
||||
def _get_model_max_tokens(model):
|
||||
with _model_max_tokens_lock:
|
||||
return _model_max_tokens.get(model)
|
||||
|
||||
def _set_model_max_tokens(model, tokens):
|
||||
if model and tokens:
|
||||
with _model_max_tokens_lock:
|
||||
existing = _model_max_tokens.get(model)
|
||||
if existing is None or tokens < existing:
|
||||
_model_max_tokens[model] = tokens
|
||||
print(f"[ctx-limit] learned {model} max ~{tokens} tokens", file=sys.stderr)
|
||||
|
||||
_BGP_STATS_PATH = os.path.join(_LOG_DIR, "bgp-route-stats.json")
|
||||
_bgp_stats_lock = threading.Lock()
|
||||
|
||||
@@ -1534,8 +1585,6 @@ def _sorted_bgp_routes():
|
||||
return sorted(BGP_ROUTES, key=lambda r: _score_route(r, stats))
|
||||
|
||||
def _crof_record(model, n_items, success):
|
||||
if TARGET_URL and "crof.ai" not in TARGET_URL:
|
||||
return
|
||||
if not isinstance(n_items, int) or n_items < 1:
|
||||
return
|
||||
entry = {"model": model, "items": n_items, "ok": success}
|
||||
@@ -1561,20 +1610,36 @@ def _crof_record(model, n_items, success):
|
||||
global_limit = v["limit"]
|
||||
_CROF_ADAPTIVE["global_item_limit"] = global_limit
|
||||
|
||||
if TARGET_URL and "crof.ai" in TARGET_URL:
|
||||
print(f"[crof-adaptive] model={model} items={n_items} {'OK' if success else 'FAIL'} -> limit={ml.get('limit',30)} global={global_limit}", file=sys.stderr)
|
||||
print(f"[crof-adaptive] model={model} items={n_items} {'OK' if success else 'FAIL'} -> limit={ml.get('limit',30)} global={global_limit}", file=sys.stderr)
|
||||
|
||||
def _crof_item_limit(model):
|
||||
ml = _CROF_ADAPTIVE["model_limits"].get(model, {})
|
||||
per_model = ml.get("limit", 30)
|
||||
return min(per_model, _CROF_ADAPTIVE["global_item_limit"])
|
||||
|
||||
def _crof_compact_for_retry(input_data, model):
|
||||
def _crof_compact_for_retry(input_data, model, aggression=0):
|
||||
limit = _crof_item_limit(model)
|
||||
if not isinstance(input_data, list) or len(input_data) <= limit:
|
||||
if not isinstance(input_data, list) or len(input_data) < 2:
|
||||
return input_data
|
||||
|
||||
max_tok = _get_model_max_tokens(model)
|
||||
est = _estimate_input_tokens(input_data)
|
||||
over_item_limit = len(input_data) > limit
|
||||
over_token_limit = max_tok and est >= max_tok * 0.9
|
||||
|
||||
if not over_item_limit and not over_token_limit:
|
||||
return input_data
|
||||
|
||||
keep = max(_CROF_ADAPTIVE["min_keep_recent"], limit // 3)
|
||||
if over_token_limit:
|
||||
ratio = est / max_tok
|
||||
if aggression >= 1 or ratio > 1.5:
|
||||
keep = max(2, _CROF_ADAPTIVE["min_keep_recent"] // 2)
|
||||
elif ratio > 1.2:
|
||||
keep = max(3, keep // 2)
|
||||
print(f"[ctx-limit] model={model} est={est}tok max={max_tok}tok ratio={ratio:.2f} -> keep={keep}", file=sys.stderr)
|
||||
elif over_item_limit:
|
||||
keep = max(keep, 6)
|
||||
head_end = 0
|
||||
for i, item in enumerate(input_data):
|
||||
t = item.get("type")
|
||||
@@ -1607,8 +1672,7 @@ def _crof_compact_for_retry(input_data, model):
|
||||
summary_lines.append(_item_summary(item, max_len=120))
|
||||
|
||||
summary_msg = {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "\n".join(summary_lines)}]}
|
||||
if TARGET_URL and "crof.ai" in TARGET_URL:
|
||||
print(f"[crof-adaptive] RETRY compact: {len(input_data)} -> {len(head)+1+len(tail)} (limit={limit}, keep={len(tail)})", file=sys.stderr)
|
||||
print(f"[crof-adaptive] RETRY compact: {len(input_data)} -> {len(head)+1+len(tail)} (limit={limit}, keep={len(tail)}, agg={aggression})", file=sys.stderr)
|
||||
return head + [summary_msg] + tail
|
||||
|
||||
def _item_summary(item, max_len=200):
|
||||
@@ -2051,6 +2115,18 @@ def synthesize_tool_results_for_chat(input_items):
|
||||
def has_function_call_output(input_items):
|
||||
return isinstance(input_items, list) and any(i.get("type") == "function_call_output" for i in input_items)
|
||||
|
||||
_TOOL_CALL_TEXT_PATTERNS = re.compile(
|
||||
r'(?:^|\n)[\s•\-\*]*\(?'
|
||||
r'(?:exec_command|write_to_file|exec_bash|bash|run_command|shell|edit_file|read_file|search_files|list_files)'
|
||||
r'[\s:]',
|
||||
re.I | re.MULTILINE
|
||||
)
|
||||
|
||||
def _text_looks_like_tool_calls(text):
|
||||
if not text or len(text) < 6:
|
||||
return False
|
||||
return bool(_TOOL_CALL_TEXT_PATTERNS.search(text))
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
# Log redaction
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
@@ -2233,9 +2309,14 @@ def _normalize_tool_args(raw_args):
|
||||
except json.JSONDecodeError:
|
||||
return raw_args
|
||||
|
||||
_XML_TC_RE = re.compile(r'<tool_call>(\w+)(.*?)</tool_call>', re.DOTALL)
|
||||
_XML_TC_RE = re.compile(r'exec_command(.*?)</invoke>', re.DOTALL)
|
||||
_XML_ARG_VALUE_RE = re.compile(r'</?arg_value>\s*')
|
||||
|
||||
_PAREN_TC_RE = re.compile(
|
||||
r'(?:^|[\n•\-\*]\s*)\(\s*(exec_command|write_to_file|exec_bash|bash|run_command|shell|edit_file|read_file|search_files|list_files)\b\s*(.*?)\)',
|
||||
re.DOTALL | re.I
|
||||
)
|
||||
|
||||
def _extract_xml_tool_calls(text):
|
||||
if not text:
|
||||
return []
|
||||
@@ -2262,6 +2343,68 @@ def _extract_xml_tool_calls(text):
|
||||
results.append({"name": name, "args": args_str, "call_id": f"xml_{len(results)}"})
|
||||
return results
|
||||
|
||||
_NON_VISION_MODEL_PATTERNS = re.compile(
|
||||
r'\b(deepseek|glm|mixtral|llama\b(?!.*vision)|command|dbrx|qwen\b(?!.*vl)|phi-?3(?!.*vision))',
|
||||
re.I
|
||||
)
|
||||
|
||||
_vision_fail_cache = set()
|
||||
_vision_fail_lock = threading.Lock()
|
||||
|
||||
def _model_supports_vision(model):
|
||||
if not model:
|
||||
return True
|
||||
with _vision_fail_lock:
|
||||
if model in _vision_fail_cache:
|
||||
return False
|
||||
if _NON_VISION_MODEL_PATTERNS.search(model):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _mark_vision_fail(model):
|
||||
if model:
|
||||
with _vision_fail_lock:
|
||||
_vision_fail_cache.add(model)
|
||||
|
||||
def _strip_images_from_input(input_data, model):
|
||||
if not isinstance(input_data, list) or _model_supports_vision(model):
|
||||
return input_data
|
||||
modified = False
|
||||
result = []
|
||||
for item in input_data:
|
||||
if item.get("type") != "message":
|
||||
result.append(item)
|
||||
continue
|
||||
content = item.get("content", [])
|
||||
if isinstance(content, str):
|
||||
result.append(item)
|
||||
continue
|
||||
new_content = []
|
||||
has_img = False
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
new_content.append(part)
|
||||
continue
|
||||
pt = part.get("type", "")
|
||||
if pt in ("input_image", "image_url"):
|
||||
if not has_img:
|
||||
fname = part.get("image_url", {}).get("url", part.get("url", "image.png"))
|
||||
if fname.startswith("data:"):
|
||||
fname = "screenshot.png"
|
||||
new_content.append({"type": "output_text", "text": f"[User attached image: {fname} — this model does not support vision]"})
|
||||
has_img = True
|
||||
modified = True
|
||||
else:
|
||||
new_content.append(part)
|
||||
if modified:
|
||||
result.append({**item, "content": new_content})
|
||||
else:
|
||||
result.append(item)
|
||||
if modified:
|
||||
print(f"[vision-filter] stripped {sum(1 for i in input_data if i.get('type')=='message' and any(c.get('type') in ('input_image','image_url') for c in (i.get('content') or []) if isinstance(c,dict)))} images for model={model}", file=sys.stderr)
|
||||
return result
|
||||
return input_data
|
||||
|
||||
def oa_input_to_messages(input_data):
|
||||
msgs = []
|
||||
tool_name_by_id = {}
|
||||
@@ -4889,12 +5032,25 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
body["input"] = input_data
|
||||
|
||||
crof_limit = _crof_item_limit(model)
|
||||
_crof_eligible = TARGET_URL and "crof.ai" in TARGET_URL
|
||||
if _crof_eligible and not compacted and isinstance(input_data, list) and len(input_data) > crof_limit:
|
||||
print(f"[crof-adaptive] proactive compact: {len(input_data)} items > limit {crof_limit}", file=sys.stderr)
|
||||
input_data = _crof_compact_for_retry(input_data, model)
|
||||
body = dict(body)
|
||||
body["input"] = input_data
|
||||
_crof_eligible = True
|
||||
if _crof_eligible and not compacted and isinstance(input_data, list):
|
||||
_needs_compact = len(input_data) > crof_limit
|
||||
max_tok = _get_model_max_tokens(model)
|
||||
est_tok = _estimate_input_tokens(input_data) if max_tok else 0
|
||||
if not _needs_compact and max_tok and est_tok > max_tok * 0.8:
|
||||
_needs_compact = True
|
||||
if _needs_compact:
|
||||
_agg = 0
|
||||
if max_tok and est_tok > max_tok:
|
||||
_agg = 1
|
||||
print(f"[crof-adaptive] proactive compact: {len(input_data)} items, est={est_tok}tok max={max_tok}tok agg={_agg}", file=sys.stderr)
|
||||
input_data = _crof_compact_for_retry(input_data, model, aggression=_agg)
|
||||
body = dict(body)
|
||||
body["input"] = input_data
|
||||
|
||||
# Strip images for non-vision models
|
||||
input_data = _strip_images_from_input(input_data, model)
|
||||
body["input"] = input_data
|
||||
|
||||
messages = oa_input_to_messages(input_data)
|
||||
messages = _inject_stored_reasoning(messages)
|
||||
@@ -4927,14 +5083,19 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
except urllib.error.HTTPError as e:
|
||||
err_body = e.read().decode()
|
||||
if "context_length_exceeded" in err_body and attempt < max_retries:
|
||||
print(f"[{self._session_id}] context_length_exceeded (attempt {attempt+1}/{max_retries}), retrying with extreme compaction!", file=sys.stderr)
|
||||
import re as _re
|
||||
_tok_m = _re.search(r'~?(\d+)\s*tokens', err_body)
|
||||
if _tok_m:
|
||||
_set_model_max_tokens(model, int(_tok_m.group(1)))
|
||||
print(f"[{self._session_id}] context_length_exceeded (attempt {attempt+1}/{max_retries}), retrying with compaction (agg={attempt})!", file=sys.stderr)
|
||||
policy = provider_policy()
|
||||
if isinstance(input_data, list):
|
||||
print(f"[{self._session_id}] applying extreme compaction to {len(input_data)} items", file=sys.stderr)
|
||||
input_data = _crof_compact_for_retry(input_data, model)
|
||||
est = _estimate_input_tokens(input_data)
|
||||
print(f"[{self._session_id}] applying compaction to {len(input_data)} items ~{est}tok", file=sys.stderr)
|
||||
input_data = _crof_compact_for_retry(input_data, model, aggression=attempt)
|
||||
body = dict(body)
|
||||
body["input"] = input_data
|
||||
messages = oa_input_to_messages(input_data)
|
||||
messages = oa_input_to_messages(_strip_images_from_input(input_data, model))
|
||||
messages = _inject_stored_reasoning(messages)
|
||||
instructions = body.get("instructions", "").strip()
|
||||
if instructions:
|
||||
@@ -5267,31 +5428,88 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
if not is_latest_simple:
|
||||
contents.insert(0, {"role": "user", "parts": [{"text": _GEMINI_AGENT_GUARDRAIL}]})
|
||||
|
||||
if OAUTH_PROVIDER == "google-antigravity" and isinstance(input_data, list):
|
||||
if OAUTH_PROVIDER == "google-antigravity":
|
||||
import hashlib
|
||||
ag_key = _antigravity_loop_key(self._session_id)
|
||||
with _ANTIGRAVITY_LOOP_TRACKER_LOCK:
|
||||
if ag_key not in _ANTIGRAVITY_LOOP_TRACKER:
|
||||
_ANTIGRAVITY_LOOP_TRACKER[ag_key] = {
|
||||
"latest_user_hash": None,
|
||||
"nudge_injected": False,
|
||||
"latest_user_appended": False,
|
||||
"tool_calls_for_request": 0,
|
||||
"repeated_tool": False,
|
||||
"force_finalize": False,
|
||||
"last_tool": None,
|
||||
"last_tool_count": 0,
|
||||
}
|
||||
ag_state = _ANTIGRAVITY_LOOP_TRACKER[ag_key]
|
||||
|
||||
latest_user = ""
|
||||
latest_user_hash = None
|
||||
if isinstance(input_data, list):
|
||||
for item in reversed(input_data):
|
||||
if item.get("type") == "message" and item.get("role") == "user":
|
||||
c = item.get("content", "")
|
||||
if isinstance(c, str):
|
||||
latest_user = c
|
||||
elif isinstance(c, list):
|
||||
latest_user = "\n".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict))
|
||||
break
|
||||
if latest_user:
|
||||
latest_norm = " ".join(latest_user.strip().split())[:200]
|
||||
latest_user_hash = hashlib.sha256(latest_norm.encode()).hexdigest()[:16]
|
||||
if latest_user_hash != ag_state["latest_user_hash"]:
|
||||
ag_state["latest_user_hash"] = latest_user_hash
|
||||
ag_state["nudge_injected"] = False
|
||||
ag_state["latest_user_appended"] = False
|
||||
ag_state["tool_calls_for_request"] = 0
|
||||
ag_state["repeated_tool"] = False
|
||||
ag_state["force_finalize"] = False
|
||||
ag_state["last_tool"] = None
|
||||
ag_state["last_tool_count"] = 0
|
||||
|
||||
if isinstance(input_data, list):
|
||||
n_tool_calls = sum(1 for it in input_data if isinstance(it, dict) and it.get("type") == "function_call")
|
||||
ag_state["tool_calls_for_request"] = n_tool_calls
|
||||
last_tool_key = None
|
||||
for item in reversed(input_data):
|
||||
if isinstance(item, dict) and item.get("type") == "function_call":
|
||||
fname = item.get("name", "")
|
||||
args_str = json.dumps(item.get("arguments", {}), sort_keys=True)[:100]
|
||||
last_tool_key = f"{fname}:{args_str}"
|
||||
break
|
||||
if last_tool_key:
|
||||
if last_tool_key == ag_state["last_tool"]:
|
||||
ag_state["last_tool_count"] += 1
|
||||
if ag_state["last_tool_count"] >= 5:
|
||||
ag_state["repeated_tool"] = True
|
||||
ag_state["force_finalize"] = True
|
||||
else:
|
||||
ag_state["last_tool"] = last_tool_key
|
||||
ag_state["last_tool_count"] = 1
|
||||
|
||||
_EDIT_WORDS = ("change", "fix", "update", "redesign", "rewrite", "modify", "improve", "replace", "edit", "make it", "add", "remove", "delete", "rename", "move", "convert")
|
||||
latest_lower = ""
|
||||
for item in reversed(input_data):
|
||||
if item.get("type") == "message" and item.get("role") == "user":
|
||||
c = item.get("content", "")
|
||||
if isinstance(c, str): latest_lower = c.lower()
|
||||
elif isinstance(c, list): latest_lower = " ".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict)).lower()
|
||||
break
|
||||
if latest_lower and any(w in latest_lower for w in _EDIT_WORDS):
|
||||
n_tool_calls = sum(1 for it in input_data if isinstance(it, dict) and it.get("type") == "function_call")
|
||||
contents.append({"role": "user", "parts": [{"text": "!!! ABSOLUTELY NO PLANNING - EMIT THE TOOL CALL NOW !!! IMPORTANT: The user is requesting a modification to existing files. You MUST use tools (exec_command, read_files, write, etc.) to make the changes RIGHT NOW. Do NOT just describe what to do — actually CALL THE TOOLS IN THIS RESPONSE. IMMEDIATELY INSPECT THE FILE OR LIST FILES USING exec_command TOOL CALL."}]})
|
||||
print(f"[antigravity] edit-intent detected; injected tool-use nudge", file=sys.stderr)
|
||||
if isinstance(input_data, list):
|
||||
for item in reversed(input_data):
|
||||
if item.get("type") == "message" and item.get("role") == "user":
|
||||
c = item.get("content", "")
|
||||
if isinstance(c, str): latest_lower = c.lower()
|
||||
elif isinstance(c, list): latest_lower = " ".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict)).lower()
|
||||
break
|
||||
|
||||
if OAUTH_PROVIDER == "google-antigravity" and isinstance(input_data, list):
|
||||
latest_user = ""
|
||||
for item in reversed(input_data):
|
||||
if item.get("type") == "message" and item.get("role") == "user":
|
||||
c = item.get("content", "")
|
||||
if isinstance(c, str):
|
||||
latest_user = c
|
||||
elif isinstance(c, list):
|
||||
latest_user = "\n".join(p.get("text", p.get("input_text", "")) for p in c if isinstance(p, dict))
|
||||
break
|
||||
if latest_user:
|
||||
if ag_state["force_finalize"]:
|
||||
contents.append({"role": "user", "parts": [{"text": "STOP CALLING TOOLS. APPLY THE FINAL EDIT OR SUMMARIZE WHAT BLOCKED YOU. DO NOT CALL ANY MORE TOOLS. DO NOT PRODUCE ANY MORE PLANNING TEXT. DO NOT PRODUCE ANY MORE EXPLORATORY TOOL CALLS. PRODUCE A FINAL ANSWER OR A CLEAR STATEMENT OF WHAT IS PREVENTING YOU FROM COMPLETING THE TASK."}]})
|
||||
elif latest_lower and any(w in latest_lower for w in _EDIT_WORDS) and not ag_state["nudge_injected"] and not ag_state["force_finalize"]:
|
||||
contents.append({"role": "user", "parts": [{"text": "!!! ABSOLUTELY NO PLANNING - EMIT THE TOOL CALL NOW !!! IMPORTANT: The user is requesting a modification to existing files. You MUST use tools (exec_command, read_files, write, etc.) to make the changes RIGHT NOW. Do NOT just describe what to do — actually CALL THE TOOLS IN THIS RESPONSE. IMMEDIATELY INSPECT THE FILE OR LIST FILES USING exec_command TOOL CALL."}]})
|
||||
ag_state["nudge_injected"] = True
|
||||
print(f"[antigravity] edit-intent detected; injected tool-use nudge (first time for this request)", file=sys.stderr)
|
||||
else:
|
||||
if ag_state["nudge_injected"]:
|
||||
print(f"[antigravity] edit-intent nudge already injected, skipping", file=sys.stderr)
|
||||
|
||||
if latest_user and not ag_state["latest_user_appended"] and not ag_state["force_finalize"]:
|
||||
latest_norm = " ".join(latest_user.strip().split())[:160]
|
||||
final_text = ""
|
||||
if contents:
|
||||
@@ -5299,14 +5517,24 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
if last.get("role") == "user":
|
||||
final_text = " ".join(json.dumps(last.get("parts", []), ensure_ascii=False).split())
|
||||
if latest_norm[:120] not in final_text:
|
||||
print(f"[antigravity] latest user instruction was not final turn; appending", file=sys.stderr)
|
||||
print(f"[antigravity] latest user instruction was not final turn; appending (first time for this request)", file=sys.stderr)
|
||||
contents.append({"role": "user", "parts": [{"text": latest_user}]})
|
||||
ag_state["latest_user_appended"] = True
|
||||
else:
|
||||
print(f"[antigravity] latest user instruction is final turn", file=sys.stderr)
|
||||
print(f"[{self._session_id}] [antigravity-debug] input_items={len(input_data) if isinstance(input_data, list) else 1} contents={len(contents)} latest={latest_user[:80]!r}", file=sys.stderr)
|
||||
if contents:
|
||||
last_c = contents[-1]
|
||||
print(f"[{self._session_id}] [antigravity-debug] final_role={last_c.get('role')} preview={json.dumps(last_c.get('parts', []), ensure_ascii=False)[:200]}", file=sys.stderr)
|
||||
else:
|
||||
if ag_state["latest_user_appended"]:
|
||||
print(f"[antigravity] latest user instruction already appended, skipping", file=sys.stderr)
|
||||
|
||||
print(f"[antigravity-loop] latest_user_hash={latest_user_hash}", file=sys.stderr)
|
||||
print(f"[antigravity-loop] tool_calls_for_request={ag_state['tool_calls_for_request']}", file=sys.stderr)
|
||||
print(f"[antigravity-loop] repeated_tool={ag_state['repeated_tool']}", file=sys.stderr)
|
||||
print(f"[antigravity-loop] nudge_injected={ag_state['nudge_injected']}", file=sys.stderr)
|
||||
print(f"[antigravity-loop] force_finalize={ag_state['force_finalize']}", file=sys.stderr)
|
||||
print(f"[{self._session_id}] [antigravity-debug] input_items={len(input_data) if isinstance(input_data, list) else 1} contents={len(contents)} latest={latest_user[:80]!r}", file=sys.stderr)
|
||||
if contents:
|
||||
last_c = contents[-1]
|
||||
print(f"[{self._session_id}] [antigravity-debug] final_role={last_c.get('role')} preview={json.dumps(last_c.get('parts', []), ensure_ascii=False)[:200]}", file=sys.stderr)
|
||||
|
||||
request_body = {"contents": contents}
|
||||
if system_parts:
|
||||
@@ -5725,9 +5953,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
last_status = None
|
||||
finish_reason = None
|
||||
has_content = False
|
||||
has_message = False
|
||||
has_tool_call = False
|
||||
|
||||
def _observe_event(event):
|
||||
nonlocal last_resp_id, last_output, last_status, finish_reason, has_content
|
||||
nonlocal last_resp_id, last_output, last_status, finish_reason, has_content, has_message, has_tool_call
|
||||
for line in event.strip().split("\n"):
|
||||
if line.startswith("data: "):
|
||||
try:
|
||||
@@ -5737,7 +5967,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
last_output = d.get("response", {}).get("output", [])
|
||||
last_status = d.get("response", {}).get("status")
|
||||
finish_reason = "length" if last_status == "incomplete" else "stop"
|
||||
has_content = any(o.get("type") == "message" for o in (last_output or []))
|
||||
has_tool_call = any(o.get("type") == "function_call" for o in (last_output or []))
|
||||
has_message = any(o.get("type") == "message" for o in (last_output or []))
|
||||
has_content = has_message or has_tool_call
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -5749,7 +5981,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
break
|
||||
collected_events.append(event)
|
||||
_observe_event(event)
|
||||
print(f"[{self._session_id}] stream ended: events={len(collected_events)} finish={finish_reason} has_content={has_content} elapsed={time.time()-t0:.1f}s", file=sys.stderr)
|
||||
print(f"[{self._session_id}] stream ended: events={len(collected_events)} finish={finish_reason} has_content={has_content} has_message={has_message} has_tool_call={has_tool_call} elapsed={time.time()-t0:.1f}s", file=sys.stderr)
|
||||
except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
|
||||
print("[translate-proxy] client disconnected during stream", file=sys.stderr)
|
||||
_crof_record(model, n_items, False)
|
||||
@@ -5805,6 +6037,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
last_resp_id = last_output = last_status = None
|
||||
finish_reason = None
|
||||
has_content = False
|
||||
has_message = False
|
||||
has_tool_call = False
|
||||
for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")):
|
||||
collected_events.append(event)
|
||||
_observe_event(event)
|
||||
@@ -5813,7 +6047,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
print(f"[provider-sensor] synthetic retry failed: {e}", file=sys.stderr)
|
||||
|
||||
# Auto-retry on finish_reason=length with no content due to too much context.
|
||||
if finish_reason == "length" and not has_content and isinstance(input_data, list) and len(input_data) > 5 and TARGET_URL and "crof.ai" in TARGET_URL:
|
||||
if finish_reason == "length" and not has_content and isinstance(input_data, list) and len(input_data) > 5:
|
||||
print(f"[crof-adaptive] RETRY: finish_reason=length with no content, compacting {n_items} items", file=sys.stderr)
|
||||
new_input = _crof_compact_for_retry(input_data, model)
|
||||
if len(new_input) < len(input_data):
|
||||
@@ -5836,6 +6070,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
last_resp_id = last_output = last_status = None
|
||||
finish_reason = None
|
||||
has_content = False
|
||||
has_message = False
|
||||
has_tool_call = False
|
||||
for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")):
|
||||
collected_events.append(event)
|
||||
_observe_event(event)
|
||||
@@ -5943,9 +6179,17 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
_smart_attempt = 0
|
||||
while _smart_attempt < _smart_max:
|
||||
_has_tool_calls_in_output = any(o.get("type") == "function_call" for o in (last_output or []))
|
||||
last_text = ""
|
||||
for o in (last_output or []):
|
||||
if o.get("type") == "message":
|
||||
for c in (o.get("content") or []):
|
||||
if isinstance(c, dict) and c.get("type") == "output_text":
|
||||
last_text += c.get("text", "")
|
||||
_looks_like_tools = _text_looks_like_tool_calls(last_text)
|
||||
_has_prior_tool_ctx = has_function_call_output(input_data)
|
||||
if not (finish_reason == "stop" and has_content and not _has_tool_calls_in_output
|
||||
and isinstance(input_data, list) and len(input_data) >= 3
|
||||
and has_function_call_output(input_data)):
|
||||
and (_has_prior_tool_ctx or _looks_like_tools)):
|
||||
break
|
||||
_smart_attempt += 1
|
||||
_nudges = [
|
||||
@@ -5954,12 +6198,6 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
]
|
||||
nudge_text = _nudges[min(_smart_attempt - 1, len(_nudges) - 1)]
|
||||
# Try extracting XML tool calls from text as fallback before nudging
|
||||
last_text = ""
|
||||
for o in (last_output or []):
|
||||
if o.get("type") == "message":
|
||||
for c in (o.get("content") or []):
|
||||
if isinstance(c, dict) and c.get("type") == "output_text":
|
||||
last_text += c.get("text", "")
|
||||
xml_fc = _extract_xml_tool_calls(last_text)
|
||||
if xml_fc:
|
||||
print(f"[{self._session_id}] [smart-continue] extracted {len(xml_fc)} XML tool calls from text, injecting and retrying", file=sys.stderr)
|
||||
@@ -5979,6 +6217,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
last_resp_id = last_output = last_status = None
|
||||
finish_reason = None
|
||||
has_content = False
|
||||
has_message = False
|
||||
has_tool_call = False
|
||||
for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")):
|
||||
collected_events.append(event)
|
||||
_observe_event(event)
|
||||
@@ -5988,19 +6228,21 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
print(f"[{self._session_id}] [smart-continue] XML injection retry failed: {e}", file=sys.stderr)
|
||||
break
|
||||
_nudge_msg = {"role": "user", "content": nudge_text}
|
||||
nudge_messages = oa_input_to_messages(input_data) + [_nudge_msg]
|
||||
nudge_messages = oa_input_to_messages(_strip_images_from_input(input_data, model)) + [_nudge_msg]
|
||||
instructions = body.get("instructions", "").strip()
|
||||
if instructions:
|
||||
nudge_messages.insert(0, {"role": "system", "content": instructions})
|
||||
nudge_chat_body = self._build_chat_body(model, nudge_messages, body, stream)
|
||||
nudge_req = urllib.request.Request(target, data=json.dumps(nudge_chat_body).encode(), headers=fwd)
|
||||
print(f"[{self._session_id}] [smart-continue] attempt {_smart_attempt}/{_smart_max}: model stopped mid-task, nudging", file=sys.stderr)
|
||||
print(f"[{self._session_id}] [smart-continue] attempt {_smart_attempt}/{_smart_max}: model stopped mid-task (prior_ctx={_has_prior_tool_ctx} text_tools={_looks_like_tools}), nudging", file=sys.stderr)
|
||||
try:
|
||||
retry_upstream = urllib.request.urlopen(nudge_req, timeout=_upstream_timeout(body, True))
|
||||
collected_events = []
|
||||
last_resp_id = last_output = last_status = None
|
||||
finish_reason = None
|
||||
has_content = False
|
||||
has_message = False
|
||||
has_tool_call = False
|
||||
for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")):
|
||||
collected_events.append(event)
|
||||
_observe_event(event)
|
||||
|
||||
Reference in New Issue
Block a user