v10.13.8: intelligent model profiles — dynamic limits per model capability

This commit is contained in:
Roman | RyzenAdvanced
2026-05-27 18:04:01 +04:00
Unverified
parent eebffaab4b
commit d273bf2518

View File

@@ -382,12 +382,89 @@ _conn_pool = {}
_STREAM_IDLE_TIMEOUT = 300 _STREAM_IDLE_TIMEOUT = 300
def _idle_timeout_for_model(model, default=300): def _idle_timeout_for_model(model, default=300):
return _model_profile(model).get("idle_timeout", default)
_MODEL_PROFILES = {
"flash": {
"idle_timeout": 120, "max_tool_calls": 100, "warn_tool_calls": 60,
"max_reads_no_write": 10, "warn_reads_no_write": 6,
"max_input_items": 120, "tool_output_limit": 8000, "compaction": "balanced",
"reasoning_budget": 8192, "max_tokens": 65536,
},
"gemini-3.5-flash": {
"idle_timeout": 120, "max_tool_calls": 100, "warn_tool_calls": 60,
"max_reads_no_write": 10, "warn_reads_no_write": 6,
"max_input_items": 120, "tool_output_limit": 8000, "compaction": "balanced",
"reasoning_budget": 8192, "max_tokens": 65536,
},
"gemini-3.1-pro": {
"idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80,
"max_reads_no_write": 12, "warn_reads_no_write": 8,
"max_input_items": 200, "tool_output_limit": 8000, "compaction": "conservative",
"reasoning_budget": 24576, "max_tokens": 65536,
},
"pro": {
"idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80,
"max_reads_no_write": 12, "warn_reads_no_write": 8,
"max_input_items": 200, "tool_output_limit": 8000, "compaction": "conservative",
"reasoning_budget": 24576, "max_tokens": 65536,
},
"sonnet": {
"idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80,
"max_reads_no_write": 10, "warn_reads_no_write": 7,
"max_input_items": 180, "tool_output_limit": 8000, "compaction": "balanced",
"reasoning_budget": 16384, "max_tokens": 65536,
},
"opus": {
"idle_timeout": 600, "max_tool_calls": 200, "warn_tool_calls": 100,
"max_reads_no_write": 8, "warn_reads_no_write": 5,
"max_input_items": 250, "tool_output_limit": 10000, "compaction": "conservative",
"reasoning_budget": 32768, "max_tokens": 131072,
},
"deepseek": {
"idle_timeout": 300, "max_tool_calls": 120, "warn_tool_calls": 70,
"max_reads_no_write": 10, "warn_reads_no_write": 7,
"max_input_items": 150, "tool_output_limit": 6000, "compaction": "balanced",
"reasoning_budget": 16384, "max_tokens": 65536,
},
"qwen": {
"idle_timeout": 300, "max_tool_calls": 120, "warn_tool_calls": 70,
"max_reads_no_write": 10, "warn_reads_no_write": 7,
"max_input_items": 150, "tool_output_limit": 6000, "compaction": "balanced",
"reasoning_budget": 16384, "max_tokens": 65536,
},
"gpt-oss": {
"idle_timeout": 300, "max_tool_calls": 100, "warn_tool_calls": 60,
"max_reads_no_write": 10, "warn_reads_no_write": 6,
"max_input_items": 120, "tool_output_limit": 6000, "compaction": "balanced",
"reasoning_budget": 8192, "max_tokens": 32768,
},
}
_DEFAULT_MODEL_PROFILE = {
"idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80,
"max_reads_no_write": 12, "warn_reads_no_write": 8,
"max_input_items": 150, "tool_output_limit": 6000, "compaction": "balanced",
"reasoning_budget": 16384, "max_tokens": 65536,
}
def _model_profile(model):
if not model: if not model:
return default return dict(_DEFAULT_MODEL_PROFILE)
m = model.lower() m = model.lower().replace("-", "").replace("_", "").replace(" ", "")
if "flash" in m or "mini" in m or "haiku" in m: for key, profile in _MODEL_PROFILES.items():
return 120 key_norm = key.replace("-", "").replace("_", "").replace(" ", "")
return default if key_norm in m:
return dict(profile)
if "flash" in m or "mini" in m or "haiku" in m or "tiny" in m:
return dict(_MODEL_PROFILES["flash"])
if "opus" in m or "ultra" in m:
return dict(_MODEL_PROFILES["opus"])
if "sonnet" in m:
return dict(_MODEL_PROFILES["sonnet"])
if "pro" in m and "flash" not in m:
return dict(_MODEL_PROFILES["pro"])
return dict(_DEFAULT_MODEL_PROFILE)
_MAX_CONCURRENT_REQUESTS = 3 _MAX_CONCURRENT_REQUESTS = 3
_request_semaphore = threading.Semaphore(_MAX_CONCURRENT_REQUESTS) _request_semaphore = threading.Semaphore(_MAX_CONCURRENT_REQUESTS)
@@ -5882,18 +5959,24 @@ class Handler(http.server.BaseHTTPRequestHandler):
cumulative_calls = ag_state.get("total_tool_calls", 0) + n_tool_calls cumulative_calls = ag_state.get("total_tool_calls", 0) + n_tool_calls
ag_state["total_tool_calls"] = cumulative_calls ag_state["total_tool_calls"] = cumulative_calls
if cumulative_calls > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK: _mp = _model_profile(model)
print(f"[{getattr(self, '_session_id', '?')}] [antigravity-budget] HARD CAP: {cumulative_calls} calls, injecting force-write directive", file=sys.stderr) _mp_max_calls = _mp["max_tool_calls"]
_mp_warn_calls = _mp["warn_tool_calls"]
_mp_max_reads = _mp["max_reads_no_write"]
_mp_warn_reads = _mp["warn_reads_no_write"]
if cumulative_calls > _mp_max_calls:
print(f"[{getattr(self, '_session_id', '?')}] [antigravity-budget] HARD CAP: {cumulative_calls}/{_mp_max_calls} calls (model={model}), injecting force-write directive", file=sys.stderr)
contents.append({"role": "user", "parts": [{"text": contents.append({"role": "user", "parts": [{"text":
f"CRITICAL BUDGET LIMIT: {cumulative_calls} tool calls made. " f"CRITICAL BUDGET LIMIT: {cumulative_calls} tool calls made. "
f"YOU MUST STOP NOW. Do NOT call any more tools. " f"YOU MUST STOP NOW. Do NOT call any more tools. "
f"Write your FINAL answer immediately using the information you already have. " f"Write your FINAL answer immediately using the information you already have. "
f"If you have file edits, apply them in this response using exec_command with a write command. " f"If you have file edits, apply them in this response using exec_command with a write command. "
f"DO NOT READ ANY MORE FILES."}]}) f"DO NOT READ ANY MORE FILES."}]})
elif cumulative_calls > _ANTIGRAVITY_WARN_TOOL_CALLS_PER_TASK: elif cumulative_calls > _mp_warn_calls:
contents.append({"role": "user", "parts": [{"text": contents.append({"role": "user", "parts": [{"text":
f"WARNING: {cumulative_calls} tool calls made. " f"WARNING: {cumulative_calls} tool calls made. "
f"{_ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK - cumulative_calls} remaining before forced stop. " f"{_mp_max_calls - cumulative_calls} remaining before forced stop. "
f"STOP READING FILES AND APPLY YOUR EDITS NOW."}]}) f"STOP READING FILES AND APPLY YOUR EDITS NOW."}]})
# CHANGE 2: Read-vs-write loop detection # CHANGE 2: Read-vs-write loop detection
@@ -5918,10 +6001,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
ft["reads"] += 1 ft["reads"] += 1
n_reads = ft["reads"] n_reads = ft["reads"]
n_writes = ft["writes"] n_writes = ft["writes"]
if n_reads >= 12 and n_writes == 0: if n_reads >= _mp_max_reads and n_writes == 0:
ag_state["force_finalize"] = True ag_state["force_finalize"] = True
print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes model never writes", file=sys.stderr) print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes (model={model}, limit={_mp_max_reads})", file=sys.stderr)
elif n_reads >= 8 and n_writes == 0 and not ag_state.get("force_finalize"): elif n_reads >= _mp_warn_reads and n_writes == 0 and not ag_state.get("force_finalize"):
contents.append({"role": "user", "parts": [{"text": contents.append({"role": "user", "parts": [{"text":
f"WARNING: You have made {n_reads} tool calls and ZERO writes. " f"WARNING: You have made {n_reads} tool calls and ZERO writes. "
f"You MUST apply your edit NOW using exec_command with a python write. " f"You MUST apply your edit NOW using exec_command with a python write. "
@@ -5975,7 +6058,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
request_body["systemInstruction"] = {"role": "user", "parts": system_parts} request_body["systemInstruction"] = {"role": "user", "parts": system_parts}
if gen_config: if gen_config:
request_body["generationConfig"] = gen_config request_body["generationConfig"] = gen_config
_budget_exceeded = ag_state.get("total_tool_calls", 0) > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK _budget_exceeded = ag_state.get("total_tool_calls", 0) > _mp.get("max_tool_calls", 150)
if gemini_tools and not _budget_exceeded and not ag_state.get("force_finalize"): if gemini_tools and not _budget_exceeded and not ag_state.get("force_finalize"):
request_body["tools"] = gemini_tools request_body["tools"] = gemini_tools
elif _budget_exceeded or ag_state.get("force_finalize"): elif _budget_exceeded or ag_state.get("force_finalize"):
@@ -6727,16 +6810,22 @@ class Handler(http.server.BaseHTTPRequestHandler):
cumulative_calls = ag_state.get("total_tool_calls", 0) + n_tool_calls cumulative_calls = ag_state.get("total_tool_calls", 0) + n_tool_calls
ag_state["total_tool_calls"] = cumulative_calls ag_state["total_tool_calls"] = cumulative_calls
if cumulative_calls > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK: _mp_oa = _model_profile(model)
print(f"[antigravity-budget] HARD CAP: {cumulative_calls} calls, injecting force-write", file=sys.stderr) _mp_max = _mp_oa["max_tool_calls"]
_mp_warn = _mp_oa["warn_tool_calls"]
_mp_maxr = _mp_oa["max_reads_no_write"]
_mp_warnr = _mp_oa["warn_reads_no_write"]
if cumulative_calls > _mp_max:
print(f"[antigravity-budget] HARD CAP: {cumulative_calls}/{_mp_max} calls (model={model}), injecting force-write", file=sys.stderr)
contents.append({"role": "user", "parts": [{"text": contents.append({"role": "user", "parts": [{"text":
f"CRITICAL BUDGET LIMIT: {cumulative_calls} tool calls. " f"CRITICAL BUDGET LIMIT: {cumulative_calls} tool calls. "
f"STOP ALL TOOL CALLS. Write your FINAL answer now. " f"STOP ALL TOOL CALLS. Write your FINAL answer now. "
f"Apply any edits using exec_command with a write command in this response."}]}) f"Apply any edits using exec_command with a write command in this response."}]})
elif cumulative_calls > _ANTIGRAVITY_WARN_TOOL_CALLS_PER_TASK: elif cumulative_calls > _mp_warn:
contents.append({"role": "user", "parts": [{"text": contents.append({"role": "user", "parts": [{"text":
f"WARNING: {cumulative_calls} tool calls. " f"WARNING: {cumulative_calls} tool calls. "
f"{_ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK - cumulative_calls} remaining. " f"{_mp_max - cumulative_calls} remaining. "
f"STOP READING AND WRITE NOW."}]}) f"STOP READING AND WRITE NOW."}]})
with _ANTIGRAVITY_LOOP_TRACKER_LOCK: with _ANTIGRAVITY_LOOP_TRACKER_LOCK:
@@ -6758,10 +6847,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
ft["reads"] += 1 ft["reads"] += 1
n_reads = ft["reads"] n_reads = ft["reads"]
n_writes = ft["writes"] n_writes = ft["writes"]
if n_reads >= 12 and n_writes == 0: if n_reads >= _mp_maxr and n_writes == 0:
ag_state["force_finalize"] = True ag_state["force_finalize"] = True
print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes model never writes", file=sys.stderr) print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes (model={model}, limit={_mp_maxr})", file=sys.stderr)
elif n_reads >= 8 and n_writes == 0 and not ag_state.get("force_finalize"): elif n_reads >= _mp_warnr and n_writes == 0 and not ag_state.get("force_finalize"):
contents.append({"role": "user", "parts": [{"text": contents.append({"role": "user", "parts": [{"text":
f"WARNING: You have made {n_reads} tool calls and ZERO writes. " f"WARNING: You have made {n_reads} tool calls and ZERO writes. "
f"You MUST apply your edit NOW using exec_command with a python write. " f"You MUST apply your edit NOW using exec_command with a python write. "
@@ -6851,7 +6940,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
request_body["systemInstruction"] = {"parts": system_parts} request_body["systemInstruction"] = {"parts": system_parts}
if gen_config: if gen_config:
request_body["generationConfig"] = gen_config request_body["generationConfig"] = gen_config
_budget_exceeded_oa = ag_state.get("total_tool_calls", 0) > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK _budget_exceeded_oa = ag_state.get("total_tool_calls", 0) > _mp_oa.get("max_tool_calls", 150)
if gemini_tools and not _budget_exceeded_oa and not ag_state.get("force_finalize"): if gemini_tools and not _budget_exceeded_oa and not ag_state.get("force_finalize"):
request_body["tools"] = gemini_tools request_body["tools"] = gemini_tools
elif _budget_exceeded_oa or ag_state.get("force_finalize"): elif _budget_exceeded_oa or ag_state.get("force_finalize"):