From d273bf25185492f96aae9caa39c9741c16a7bd2c Mon Sep 17 00:00:00 2001 From: Roman | RyzenAdvanced Date: Wed, 27 May 2026 18:04:01 +0400 Subject: [PATCH] =?UTF-8?q?v10.13.8:=20intelligent=20model=20profiles=20?= =?UTF-8?q?=E2=80=94=20dynamic=20limits=20per=20model=20capability?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- translate-proxy.py | 131 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 110 insertions(+), 21 deletions(-) diff --git a/translate-proxy.py b/translate-proxy.py index 9c180a5..c40e92e 100755 --- a/translate-proxy.py +++ b/translate-proxy.py @@ -382,12 +382,89 @@ _conn_pool = {} _STREAM_IDLE_TIMEOUT = 300 def _idle_timeout_for_model(model, default=300): + return _model_profile(model).get("idle_timeout", default) + +_MODEL_PROFILES = { + "flash": { + "idle_timeout": 120, "max_tool_calls": 100, "warn_tool_calls": 60, + "max_reads_no_write": 10, "warn_reads_no_write": 6, + "max_input_items": 120, "tool_output_limit": 8000, "compaction": "balanced", + "reasoning_budget": 8192, "max_tokens": 65536, + }, + "gemini-3.5-flash": { + "idle_timeout": 120, "max_tool_calls": 100, "warn_tool_calls": 60, + "max_reads_no_write": 10, "warn_reads_no_write": 6, + "max_input_items": 120, "tool_output_limit": 8000, "compaction": "balanced", + "reasoning_budget": 8192, "max_tokens": 65536, + }, + "gemini-3.1-pro": { + "idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80, + "max_reads_no_write": 12, "warn_reads_no_write": 8, + "max_input_items": 200, "tool_output_limit": 8000, "compaction": "conservative", + "reasoning_budget": 24576, "max_tokens": 65536, + }, + "pro": { + "idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80, + "max_reads_no_write": 12, "warn_reads_no_write": 8, + "max_input_items": 200, "tool_output_limit": 8000, "compaction": "conservative", + "reasoning_budget": 24576, "max_tokens": 65536, + }, + "sonnet": { + "idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80, + "max_reads_no_write": 10, "warn_reads_no_write": 7, + "max_input_items": 180, "tool_output_limit": 8000, "compaction": "balanced", + "reasoning_budget": 16384, "max_tokens": 65536, + }, + "opus": { + "idle_timeout": 600, "max_tool_calls": 200, "warn_tool_calls": 100, + "max_reads_no_write": 8, "warn_reads_no_write": 5, + "max_input_items": 250, "tool_output_limit": 10000, "compaction": "conservative", + "reasoning_budget": 32768, "max_tokens": 131072, + }, + "deepseek": { + "idle_timeout": 300, "max_tool_calls": 120, "warn_tool_calls": 70, + "max_reads_no_write": 10, "warn_reads_no_write": 7, + "max_input_items": 150, "tool_output_limit": 6000, "compaction": "balanced", + "reasoning_budget": 16384, "max_tokens": 65536, + }, + "qwen": { + "idle_timeout": 300, "max_tool_calls": 120, "warn_tool_calls": 70, + "max_reads_no_write": 10, "warn_reads_no_write": 7, + "max_input_items": 150, "tool_output_limit": 6000, "compaction": "balanced", + "reasoning_budget": 16384, "max_tokens": 65536, + }, + "gpt-oss": { + "idle_timeout": 300, "max_tool_calls": 100, "warn_tool_calls": 60, + "max_reads_no_write": 10, "warn_reads_no_write": 6, + "max_input_items": 120, "tool_output_limit": 6000, "compaction": "balanced", + "reasoning_budget": 8192, "max_tokens": 32768, + }, +} + +_DEFAULT_MODEL_PROFILE = { + "idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80, + "max_reads_no_write": 12, "warn_reads_no_write": 8, + "max_input_items": 150, "tool_output_limit": 6000, "compaction": "balanced", + "reasoning_budget": 16384, "max_tokens": 65536, +} + +def _model_profile(model): if not model: - return default - m = model.lower() - if "flash" in m or "mini" in m or "haiku" in m: - return 120 - return default + return dict(_DEFAULT_MODEL_PROFILE) + m = model.lower().replace("-", "").replace("_", "").replace(" ", "") + for key, profile in _MODEL_PROFILES.items(): + key_norm = key.replace("-", "").replace("_", "").replace(" ", "") + if key_norm in m: + return dict(profile) + if "flash" in m or "mini" in m or "haiku" in m or "tiny" in m: + return dict(_MODEL_PROFILES["flash"]) + if "opus" in m or "ultra" in m: + return dict(_MODEL_PROFILES["opus"]) + if "sonnet" in m: + return dict(_MODEL_PROFILES["sonnet"]) + if "pro" in m and "flash" not in m: + return dict(_MODEL_PROFILES["pro"]) + return dict(_DEFAULT_MODEL_PROFILE) _MAX_CONCURRENT_REQUESTS = 3 _request_semaphore = threading.Semaphore(_MAX_CONCURRENT_REQUESTS) @@ -5882,18 +5959,24 @@ class Handler(http.server.BaseHTTPRequestHandler): cumulative_calls = ag_state.get("total_tool_calls", 0) + n_tool_calls ag_state["total_tool_calls"] = cumulative_calls - if cumulative_calls > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK: - print(f"[{getattr(self, '_session_id', '?')}] [antigravity-budget] HARD CAP: {cumulative_calls} calls, injecting force-write directive", file=sys.stderr) + _mp = _model_profile(model) + _mp_max_calls = _mp["max_tool_calls"] + _mp_warn_calls = _mp["warn_tool_calls"] + _mp_max_reads = _mp["max_reads_no_write"] + _mp_warn_reads = _mp["warn_reads_no_write"] + + if cumulative_calls > _mp_max_calls: + print(f"[{getattr(self, '_session_id', '?')}] [antigravity-budget] HARD CAP: {cumulative_calls}/{_mp_max_calls} calls (model={model}), injecting force-write directive", file=sys.stderr) contents.append({"role": "user", "parts": [{"text": f"CRITICAL BUDGET LIMIT: {cumulative_calls} tool calls made. " f"YOU MUST STOP NOW. Do NOT call any more tools. " f"Write your FINAL answer immediately using the information you already have. " f"If you have file edits, apply them in this response using exec_command with a write command. " f"DO NOT READ ANY MORE FILES."}]}) - elif cumulative_calls > _ANTIGRAVITY_WARN_TOOL_CALLS_PER_TASK: + elif cumulative_calls > _mp_warn_calls: contents.append({"role": "user", "parts": [{"text": f"WARNING: {cumulative_calls} tool calls made. " - f"{_ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK - cumulative_calls} remaining before forced stop. " + f"{_mp_max_calls - cumulative_calls} remaining before forced stop. " f"STOP READING FILES AND APPLY YOUR EDITS NOW."}]}) # CHANGE 2: Read-vs-write loop detection @@ -5918,10 +6001,10 @@ class Handler(http.server.BaseHTTPRequestHandler): ft["reads"] += 1 n_reads = ft["reads"] n_writes = ft["writes"] - if n_reads >= 12 and n_writes == 0: + if n_reads >= _mp_max_reads and n_writes == 0: ag_state["force_finalize"] = True - print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes — model never writes", file=sys.stderr) - elif n_reads >= 8 and n_writes == 0 and not ag_state.get("force_finalize"): + print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes (model={model}, limit={_mp_max_reads})", file=sys.stderr) + elif n_reads >= _mp_warn_reads and n_writes == 0 and not ag_state.get("force_finalize"): contents.append({"role": "user", "parts": [{"text": f"WARNING: You have made {n_reads} tool calls and ZERO writes. " f"You MUST apply your edit NOW using exec_command with a python write. " @@ -5975,7 +6058,7 @@ class Handler(http.server.BaseHTTPRequestHandler): request_body["systemInstruction"] = {"role": "user", "parts": system_parts} if gen_config: request_body["generationConfig"] = gen_config - _budget_exceeded = ag_state.get("total_tool_calls", 0) > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK + _budget_exceeded = ag_state.get("total_tool_calls", 0) > _mp.get("max_tool_calls", 150) if gemini_tools and not _budget_exceeded and not ag_state.get("force_finalize"): request_body["tools"] = gemini_tools elif _budget_exceeded or ag_state.get("force_finalize"): @@ -6727,16 +6810,22 @@ class Handler(http.server.BaseHTTPRequestHandler): cumulative_calls = ag_state.get("total_tool_calls", 0) + n_tool_calls ag_state["total_tool_calls"] = cumulative_calls - if cumulative_calls > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK: - print(f"[antigravity-budget] HARD CAP: {cumulative_calls} calls, injecting force-write", file=sys.stderr) + _mp_oa = _model_profile(model) + _mp_max = _mp_oa["max_tool_calls"] + _mp_warn = _mp_oa["warn_tool_calls"] + _mp_maxr = _mp_oa["max_reads_no_write"] + _mp_warnr = _mp_oa["warn_reads_no_write"] + + if cumulative_calls > _mp_max: + print(f"[antigravity-budget] HARD CAP: {cumulative_calls}/{_mp_max} calls (model={model}), injecting force-write", file=sys.stderr) contents.append({"role": "user", "parts": [{"text": f"CRITICAL BUDGET LIMIT: {cumulative_calls} tool calls. " f"STOP ALL TOOL CALLS. Write your FINAL answer now. " f"Apply any edits using exec_command with a write command in this response."}]}) - elif cumulative_calls > _ANTIGRAVITY_WARN_TOOL_CALLS_PER_TASK: + elif cumulative_calls > _mp_warn: contents.append({"role": "user", "parts": [{"text": f"WARNING: {cumulative_calls} tool calls. " - f"{_ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK - cumulative_calls} remaining. " + f"{_mp_max - cumulative_calls} remaining. " f"STOP READING AND WRITE NOW."}]}) with _ANTIGRAVITY_LOOP_TRACKER_LOCK: @@ -6758,10 +6847,10 @@ class Handler(http.server.BaseHTTPRequestHandler): ft["reads"] += 1 n_reads = ft["reads"] n_writes = ft["writes"] - if n_reads >= 12 and n_writes == 0: + if n_reads >= _mp_maxr and n_writes == 0: ag_state["force_finalize"] = True - print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes — model never writes", file=sys.stderr) - elif n_reads >= 8 and n_writes == 0 and not ag_state.get("force_finalize"): + print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes (model={model}, limit={_mp_maxr})", file=sys.stderr) + elif n_reads >= _mp_warnr and n_writes == 0 and not ag_state.get("force_finalize"): contents.append({"role": "user", "parts": [{"text": f"WARNING: You have made {n_reads} tool calls and ZERO writes. " f"You MUST apply your edit NOW using exec_command with a python write. " @@ -6851,7 +6940,7 @@ class Handler(http.server.BaseHTTPRequestHandler): request_body["systemInstruction"] = {"parts": system_parts} if gen_config: request_body["generationConfig"] = gen_config - _budget_exceeded_oa = ag_state.get("total_tool_calls", 0) > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK + _budget_exceeded_oa = ag_state.get("total_tool_calls", 0) > _mp_oa.get("max_tool_calls", 150) if gemini_tools and not _budget_exceeded_oa and not ag_state.get("force_finalize"): request_body["tools"] = gemini_tools elif _budget_exceeded_oa or ag_state.get("force_finalize"):