v10.13.8: intelligent model profiles — dynamic limits per model capability
This commit is contained in:
@@ -382,12 +382,89 @@ _conn_pool = {}
|
|||||||
_STREAM_IDLE_TIMEOUT = 300
|
_STREAM_IDLE_TIMEOUT = 300
|
||||||
|
|
||||||
def _idle_timeout_for_model(model, default=300):
|
def _idle_timeout_for_model(model, default=300):
|
||||||
|
return _model_profile(model).get("idle_timeout", default)
|
||||||
|
|
||||||
|
_MODEL_PROFILES = {
|
||||||
|
"flash": {
|
||||||
|
"idle_timeout": 120, "max_tool_calls": 100, "warn_tool_calls": 60,
|
||||||
|
"max_reads_no_write": 10, "warn_reads_no_write": 6,
|
||||||
|
"max_input_items": 120, "tool_output_limit": 8000, "compaction": "balanced",
|
||||||
|
"reasoning_budget": 8192, "max_tokens": 65536,
|
||||||
|
},
|
||||||
|
"gemini-3.5-flash": {
|
||||||
|
"idle_timeout": 120, "max_tool_calls": 100, "warn_tool_calls": 60,
|
||||||
|
"max_reads_no_write": 10, "warn_reads_no_write": 6,
|
||||||
|
"max_input_items": 120, "tool_output_limit": 8000, "compaction": "balanced",
|
||||||
|
"reasoning_budget": 8192, "max_tokens": 65536,
|
||||||
|
},
|
||||||
|
"gemini-3.1-pro": {
|
||||||
|
"idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80,
|
||||||
|
"max_reads_no_write": 12, "warn_reads_no_write": 8,
|
||||||
|
"max_input_items": 200, "tool_output_limit": 8000, "compaction": "conservative",
|
||||||
|
"reasoning_budget": 24576, "max_tokens": 65536,
|
||||||
|
},
|
||||||
|
"pro": {
|
||||||
|
"idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80,
|
||||||
|
"max_reads_no_write": 12, "warn_reads_no_write": 8,
|
||||||
|
"max_input_items": 200, "tool_output_limit": 8000, "compaction": "conservative",
|
||||||
|
"reasoning_budget": 24576, "max_tokens": 65536,
|
||||||
|
},
|
||||||
|
"sonnet": {
|
||||||
|
"idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80,
|
||||||
|
"max_reads_no_write": 10, "warn_reads_no_write": 7,
|
||||||
|
"max_input_items": 180, "tool_output_limit": 8000, "compaction": "balanced",
|
||||||
|
"reasoning_budget": 16384, "max_tokens": 65536,
|
||||||
|
},
|
||||||
|
"opus": {
|
||||||
|
"idle_timeout": 600, "max_tool_calls": 200, "warn_tool_calls": 100,
|
||||||
|
"max_reads_no_write": 8, "warn_reads_no_write": 5,
|
||||||
|
"max_input_items": 250, "tool_output_limit": 10000, "compaction": "conservative",
|
||||||
|
"reasoning_budget": 32768, "max_tokens": 131072,
|
||||||
|
},
|
||||||
|
"deepseek": {
|
||||||
|
"idle_timeout": 300, "max_tool_calls": 120, "warn_tool_calls": 70,
|
||||||
|
"max_reads_no_write": 10, "warn_reads_no_write": 7,
|
||||||
|
"max_input_items": 150, "tool_output_limit": 6000, "compaction": "balanced",
|
||||||
|
"reasoning_budget": 16384, "max_tokens": 65536,
|
||||||
|
},
|
||||||
|
"qwen": {
|
||||||
|
"idle_timeout": 300, "max_tool_calls": 120, "warn_tool_calls": 70,
|
||||||
|
"max_reads_no_write": 10, "warn_reads_no_write": 7,
|
||||||
|
"max_input_items": 150, "tool_output_limit": 6000, "compaction": "balanced",
|
||||||
|
"reasoning_budget": 16384, "max_tokens": 65536,
|
||||||
|
},
|
||||||
|
"gpt-oss": {
|
||||||
|
"idle_timeout": 300, "max_tool_calls": 100, "warn_tool_calls": 60,
|
||||||
|
"max_reads_no_write": 10, "warn_reads_no_write": 6,
|
||||||
|
"max_input_items": 120, "tool_output_limit": 6000, "compaction": "balanced",
|
||||||
|
"reasoning_budget": 8192, "max_tokens": 32768,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
_DEFAULT_MODEL_PROFILE = {
|
||||||
|
"idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80,
|
||||||
|
"max_reads_no_write": 12, "warn_reads_no_write": 8,
|
||||||
|
"max_input_items": 150, "tool_output_limit": 6000, "compaction": "balanced",
|
||||||
|
"reasoning_budget": 16384, "max_tokens": 65536,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _model_profile(model):
|
||||||
if not model:
|
if not model:
|
||||||
return default
|
return dict(_DEFAULT_MODEL_PROFILE)
|
||||||
m = model.lower()
|
m = model.lower().replace("-", "").replace("_", "").replace(" ", "")
|
||||||
if "flash" in m or "mini" in m or "haiku" in m:
|
for key, profile in _MODEL_PROFILES.items():
|
||||||
return 120
|
key_norm = key.replace("-", "").replace("_", "").replace(" ", "")
|
||||||
return default
|
if key_norm in m:
|
||||||
|
return dict(profile)
|
||||||
|
if "flash" in m or "mini" in m or "haiku" in m or "tiny" in m:
|
||||||
|
return dict(_MODEL_PROFILES["flash"])
|
||||||
|
if "opus" in m or "ultra" in m:
|
||||||
|
return dict(_MODEL_PROFILES["opus"])
|
||||||
|
if "sonnet" in m:
|
||||||
|
return dict(_MODEL_PROFILES["sonnet"])
|
||||||
|
if "pro" in m and "flash" not in m:
|
||||||
|
return dict(_MODEL_PROFILES["pro"])
|
||||||
|
return dict(_DEFAULT_MODEL_PROFILE)
|
||||||
_MAX_CONCURRENT_REQUESTS = 3
|
_MAX_CONCURRENT_REQUESTS = 3
|
||||||
_request_semaphore = threading.Semaphore(_MAX_CONCURRENT_REQUESTS)
|
_request_semaphore = threading.Semaphore(_MAX_CONCURRENT_REQUESTS)
|
||||||
|
|
||||||
@@ -5882,18 +5959,24 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
cumulative_calls = ag_state.get("total_tool_calls", 0) + n_tool_calls
|
cumulative_calls = ag_state.get("total_tool_calls", 0) + n_tool_calls
|
||||||
ag_state["total_tool_calls"] = cumulative_calls
|
ag_state["total_tool_calls"] = cumulative_calls
|
||||||
|
|
||||||
if cumulative_calls > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK:
|
_mp = _model_profile(model)
|
||||||
print(f"[{getattr(self, '_session_id', '?')}] [antigravity-budget] HARD CAP: {cumulative_calls} calls, injecting force-write directive", file=sys.stderr)
|
_mp_max_calls = _mp["max_tool_calls"]
|
||||||
|
_mp_warn_calls = _mp["warn_tool_calls"]
|
||||||
|
_mp_max_reads = _mp["max_reads_no_write"]
|
||||||
|
_mp_warn_reads = _mp["warn_reads_no_write"]
|
||||||
|
|
||||||
|
if cumulative_calls > _mp_max_calls:
|
||||||
|
print(f"[{getattr(self, '_session_id', '?')}] [antigravity-budget] HARD CAP: {cumulative_calls}/{_mp_max_calls} calls (model={model}), injecting force-write directive", file=sys.stderr)
|
||||||
contents.append({"role": "user", "parts": [{"text":
|
contents.append({"role": "user", "parts": [{"text":
|
||||||
f"CRITICAL BUDGET LIMIT: {cumulative_calls} tool calls made. "
|
f"CRITICAL BUDGET LIMIT: {cumulative_calls} tool calls made. "
|
||||||
f"YOU MUST STOP NOW. Do NOT call any more tools. "
|
f"YOU MUST STOP NOW. Do NOT call any more tools. "
|
||||||
f"Write your FINAL answer immediately using the information you already have. "
|
f"Write your FINAL answer immediately using the information you already have. "
|
||||||
f"If you have file edits, apply them in this response using exec_command with a write command. "
|
f"If you have file edits, apply them in this response using exec_command with a write command. "
|
||||||
f"DO NOT READ ANY MORE FILES."}]})
|
f"DO NOT READ ANY MORE FILES."}]})
|
||||||
elif cumulative_calls > _ANTIGRAVITY_WARN_TOOL_CALLS_PER_TASK:
|
elif cumulative_calls > _mp_warn_calls:
|
||||||
contents.append({"role": "user", "parts": [{"text":
|
contents.append({"role": "user", "parts": [{"text":
|
||||||
f"WARNING: {cumulative_calls} tool calls made. "
|
f"WARNING: {cumulative_calls} tool calls made. "
|
||||||
f"{_ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK - cumulative_calls} remaining before forced stop. "
|
f"{_mp_max_calls - cumulative_calls} remaining before forced stop. "
|
||||||
f"STOP READING FILES AND APPLY YOUR EDITS NOW."}]})
|
f"STOP READING FILES AND APPLY YOUR EDITS NOW."}]})
|
||||||
|
|
||||||
# CHANGE 2: Read-vs-write loop detection
|
# CHANGE 2: Read-vs-write loop detection
|
||||||
@@ -5918,10 +6001,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
ft["reads"] += 1
|
ft["reads"] += 1
|
||||||
n_reads = ft["reads"]
|
n_reads = ft["reads"]
|
||||||
n_writes = ft["writes"]
|
n_writes = ft["writes"]
|
||||||
if n_reads >= 12 and n_writes == 0:
|
if n_reads >= _mp_max_reads and n_writes == 0:
|
||||||
ag_state["force_finalize"] = True
|
ag_state["force_finalize"] = True
|
||||||
print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes — model never writes", file=sys.stderr)
|
print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes (model={model}, limit={_mp_max_reads})", file=sys.stderr)
|
||||||
elif n_reads >= 8 and n_writes == 0 and not ag_state.get("force_finalize"):
|
elif n_reads >= _mp_warn_reads and n_writes == 0 and not ag_state.get("force_finalize"):
|
||||||
contents.append({"role": "user", "parts": [{"text":
|
contents.append({"role": "user", "parts": [{"text":
|
||||||
f"WARNING: You have made {n_reads} tool calls and ZERO writes. "
|
f"WARNING: You have made {n_reads} tool calls and ZERO writes. "
|
||||||
f"You MUST apply your edit NOW using exec_command with a python write. "
|
f"You MUST apply your edit NOW using exec_command with a python write. "
|
||||||
@@ -5975,7 +6058,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
request_body["systemInstruction"] = {"role": "user", "parts": system_parts}
|
request_body["systemInstruction"] = {"role": "user", "parts": system_parts}
|
||||||
if gen_config:
|
if gen_config:
|
||||||
request_body["generationConfig"] = gen_config
|
request_body["generationConfig"] = gen_config
|
||||||
_budget_exceeded = ag_state.get("total_tool_calls", 0) > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK
|
_budget_exceeded = ag_state.get("total_tool_calls", 0) > _mp.get("max_tool_calls", 150)
|
||||||
if gemini_tools and not _budget_exceeded and not ag_state.get("force_finalize"):
|
if gemini_tools and not _budget_exceeded and not ag_state.get("force_finalize"):
|
||||||
request_body["tools"] = gemini_tools
|
request_body["tools"] = gemini_tools
|
||||||
elif _budget_exceeded or ag_state.get("force_finalize"):
|
elif _budget_exceeded or ag_state.get("force_finalize"):
|
||||||
@@ -6727,16 +6810,22 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
cumulative_calls = ag_state.get("total_tool_calls", 0) + n_tool_calls
|
cumulative_calls = ag_state.get("total_tool_calls", 0) + n_tool_calls
|
||||||
ag_state["total_tool_calls"] = cumulative_calls
|
ag_state["total_tool_calls"] = cumulative_calls
|
||||||
|
|
||||||
if cumulative_calls > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK:
|
_mp_oa = _model_profile(model)
|
||||||
print(f"[antigravity-budget] HARD CAP: {cumulative_calls} calls, injecting force-write", file=sys.stderr)
|
_mp_max = _mp_oa["max_tool_calls"]
|
||||||
|
_mp_warn = _mp_oa["warn_tool_calls"]
|
||||||
|
_mp_maxr = _mp_oa["max_reads_no_write"]
|
||||||
|
_mp_warnr = _mp_oa["warn_reads_no_write"]
|
||||||
|
|
||||||
|
if cumulative_calls > _mp_max:
|
||||||
|
print(f"[antigravity-budget] HARD CAP: {cumulative_calls}/{_mp_max} calls (model={model}), injecting force-write", file=sys.stderr)
|
||||||
contents.append({"role": "user", "parts": [{"text":
|
contents.append({"role": "user", "parts": [{"text":
|
||||||
f"CRITICAL BUDGET LIMIT: {cumulative_calls} tool calls. "
|
f"CRITICAL BUDGET LIMIT: {cumulative_calls} tool calls. "
|
||||||
f"STOP ALL TOOL CALLS. Write your FINAL answer now. "
|
f"STOP ALL TOOL CALLS. Write your FINAL answer now. "
|
||||||
f"Apply any edits using exec_command with a write command in this response."}]})
|
f"Apply any edits using exec_command with a write command in this response."}]})
|
||||||
elif cumulative_calls > _ANTIGRAVITY_WARN_TOOL_CALLS_PER_TASK:
|
elif cumulative_calls > _mp_warn:
|
||||||
contents.append({"role": "user", "parts": [{"text":
|
contents.append({"role": "user", "parts": [{"text":
|
||||||
f"WARNING: {cumulative_calls} tool calls. "
|
f"WARNING: {cumulative_calls} tool calls. "
|
||||||
f"{_ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK - cumulative_calls} remaining. "
|
f"{_mp_max - cumulative_calls} remaining. "
|
||||||
f"STOP READING AND WRITE NOW."}]})
|
f"STOP READING AND WRITE NOW."}]})
|
||||||
|
|
||||||
with _ANTIGRAVITY_LOOP_TRACKER_LOCK:
|
with _ANTIGRAVITY_LOOP_TRACKER_LOCK:
|
||||||
@@ -6758,10 +6847,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
ft["reads"] += 1
|
ft["reads"] += 1
|
||||||
n_reads = ft["reads"]
|
n_reads = ft["reads"]
|
||||||
n_writes = ft["writes"]
|
n_writes = ft["writes"]
|
||||||
if n_reads >= 12 and n_writes == 0:
|
if n_reads >= _mp_maxr and n_writes == 0:
|
||||||
ag_state["force_finalize"] = True
|
ag_state["force_finalize"] = True
|
||||||
print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes — model never writes", file=sys.stderr)
|
print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes (model={model}, limit={_mp_maxr})", file=sys.stderr)
|
||||||
elif n_reads >= 8 and n_writes == 0 and not ag_state.get("force_finalize"):
|
elif n_reads >= _mp_warnr and n_writes == 0 and not ag_state.get("force_finalize"):
|
||||||
contents.append({"role": "user", "parts": [{"text":
|
contents.append({"role": "user", "parts": [{"text":
|
||||||
f"WARNING: You have made {n_reads} tool calls and ZERO writes. "
|
f"WARNING: You have made {n_reads} tool calls and ZERO writes. "
|
||||||
f"You MUST apply your edit NOW using exec_command with a python write. "
|
f"You MUST apply your edit NOW using exec_command with a python write. "
|
||||||
@@ -6851,7 +6940,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
request_body["systemInstruction"] = {"parts": system_parts}
|
request_body["systemInstruction"] = {"parts": system_parts}
|
||||||
if gen_config:
|
if gen_config:
|
||||||
request_body["generationConfig"] = gen_config
|
request_body["generationConfig"] = gen_config
|
||||||
_budget_exceeded_oa = ag_state.get("total_tool_calls", 0) > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK
|
_budget_exceeded_oa = ag_state.get("total_tool_calls", 0) > _mp_oa.get("max_tool_calls", 150)
|
||||||
if gemini_tools and not _budget_exceeded_oa and not ag_state.get("force_finalize"):
|
if gemini_tools and not _budget_exceeded_oa and not ag_state.get("force_finalize"):
|
||||||
request_body["tools"] = gemini_tools
|
request_body["tools"] = gemini_tools
|
||||||
elif _budget_exceeded_oa or ag_state.get("force_finalize"):
|
elif _budget_exceeded_oa or ag_state.get("force_finalize"):
|
||||||
|
|||||||
Reference in New Issue
Block a user