From d273bf25185492f96aae9caa39c9741c16a7bd2c Mon Sep 17 00:00:00 2001
From: Roman | RyzenAdvanced <admin@rommark.dev>
Date: Wed, 27 May 2026 18:04:01 +0400
Subject: [PATCH] =?UTF-8?q?v10.13.8:=20intelligent=20model=20profiles=20?=
 =?UTF-8?q?=E2=80=94=20dynamic=20limits=20per=20model=20capability?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 translate-proxy.py | 131 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 110 insertions(+), 21 deletions(-)

diff --git a/translate-proxy.py b/translate-proxy.py
index 9c180a5..c40e92e 100755
--- a/translate-proxy.py
+++ b/translate-proxy.py
@@ -382,12 +382,89 @@ _conn_pool = {}
 _STREAM_IDLE_TIMEOUT = 300
 
 def _idle_timeout_for_model(model, default=300):
+    return _model_profile(model).get("idle_timeout", default)
+
+_MODEL_PROFILES = {
+    "flash": {
+        "idle_timeout": 120, "max_tool_calls": 100, "warn_tool_calls": 60,
+        "max_reads_no_write": 10, "warn_reads_no_write": 6,
+        "max_input_items": 120, "tool_output_limit": 8000, "compaction": "balanced",
+        "reasoning_budget": 8192, "max_tokens": 65536,
+    },
+    "gemini-3.5-flash": {
+        "idle_timeout": 120, "max_tool_calls": 100, "warn_tool_calls": 60,
+        "max_reads_no_write": 10, "warn_reads_no_write": 6,
+        "max_input_items": 120, "tool_output_limit": 8000, "compaction": "balanced",
+        "reasoning_budget": 8192, "max_tokens": 65536,
+    },
+    "gemini-3.1-pro": {
+        "idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80,
+        "max_reads_no_write": 12, "warn_reads_no_write": 8,
+        "max_input_items": 200, "tool_output_limit": 8000, "compaction": "conservative",
+        "reasoning_budget": 24576, "max_tokens": 65536,
+    },
+    "pro": {
+        "idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80,
+        "max_reads_no_write": 12, "warn_reads_no_write": 8,
+        "max_input_items": 200, "tool_output_limit": 8000, "compaction": "conservative",
+        "reasoning_budget": 24576, "max_tokens": 65536,
+    },
+    "sonnet": {
+        "idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80,
+        "max_reads_no_write": 10, "warn_reads_no_write": 7,
+        "max_input_items": 180, "tool_output_limit": 8000, "compaction": "balanced",
+        "reasoning_budget": 16384, "max_tokens": 65536,
+    },
+    "opus": {
+        "idle_timeout": 600, "max_tool_calls": 200, "warn_tool_calls": 100,
+        "max_reads_no_write": 8, "warn_reads_no_write": 5,
+        "max_input_items": 250, "tool_output_limit": 10000, "compaction": "conservative",
+        "reasoning_budget": 32768, "max_tokens": 131072,
+    },
+    "deepseek": {
+        "idle_timeout": 300, "max_tool_calls": 120, "warn_tool_calls": 70,
+        "max_reads_no_write": 10, "warn_reads_no_write": 7,
+        "max_input_items": 150, "tool_output_limit": 6000, "compaction": "balanced",
+        "reasoning_budget": 16384, "max_tokens": 65536,
+    },
+    "qwen": {
+        "idle_timeout": 300, "max_tool_calls": 120, "warn_tool_calls": 70,
+        "max_reads_no_write": 10, "warn_reads_no_write": 7,
+        "max_input_items": 150, "tool_output_limit": 6000, "compaction": "balanced",
+        "reasoning_budget": 16384, "max_tokens": 65536,
+    },
+    "gpt-oss": {
+        "idle_timeout": 300, "max_tool_calls": 100, "warn_tool_calls": 60,
+        "max_reads_no_write": 10, "warn_reads_no_write": 6,
+        "max_input_items": 120, "tool_output_limit": 6000, "compaction": "balanced",
+        "reasoning_budget": 8192, "max_tokens": 32768,
+    },
+}
+
+_DEFAULT_MODEL_PROFILE = {
+    "idle_timeout": 300, "max_tool_calls": 150, "warn_tool_calls": 80,
+    "max_reads_no_write": 12, "warn_reads_no_write": 8,
+    "max_input_items": 150, "tool_output_limit": 6000, "compaction": "balanced",
+    "reasoning_budget": 16384, "max_tokens": 65536,
+}
+
+def _model_profile(model):
     if not model:
-        return default
-    m = model.lower()
-    if "flash" in m or "mini" in m or "haiku" in m:
-        return 120
-    return default
+        return dict(_DEFAULT_MODEL_PROFILE)
+    m = model.lower().replace("-", "").replace("_", "").replace(" ", "")
+    for key, profile in _MODEL_PROFILES.items():
+        key_norm = key.replace("-", "").replace("_", "").replace(" ", "")
+        if key_norm in m:
+            return dict(profile)
+    if "flash" in m or "mini" in m or "haiku" in m or "tiny" in m:
+        return dict(_MODEL_PROFILES["flash"])
+    if "opus" in m or "ultra" in m:
+        return dict(_MODEL_PROFILES["opus"])
+    if "sonnet" in m:
+        return dict(_MODEL_PROFILES["sonnet"])
+    if "pro" in m and "flash" not in m:
+        return dict(_MODEL_PROFILES["pro"])
+    return dict(_DEFAULT_MODEL_PROFILE)
 _MAX_CONCURRENT_REQUESTS = 3
 _request_semaphore = threading.Semaphore(_MAX_CONCURRENT_REQUESTS)
 
@@ -5882,18 +5959,24 @@ class Handler(http.server.BaseHTTPRequestHandler):
             cumulative_calls = ag_state.get("total_tool_calls", 0) + n_tool_calls
             ag_state["total_tool_calls"] = cumulative_calls
 
-            if cumulative_calls > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK:
-                print(f"[{getattr(self, '_session_id', '?')}] [antigravity-budget] HARD CAP: {cumulative_calls} calls, injecting force-write directive", file=sys.stderr)
+            _mp = _model_profile(model)
+            _mp_max_calls = _mp["max_tool_calls"]
+            _mp_warn_calls = _mp["warn_tool_calls"]
+            _mp_max_reads = _mp["max_reads_no_write"]
+            _mp_warn_reads = _mp["warn_reads_no_write"]
+
+            if cumulative_calls > _mp_max_calls:
+                print(f"[{getattr(self, '_session_id', '?')}] [antigravity-budget] HARD CAP: {cumulative_calls}/{_mp_max_calls} calls (model={model}), injecting force-write directive", file=sys.stderr)
                 contents.append({"role": "user", "parts": [{"text":
                     f"CRITICAL BUDGET LIMIT: {cumulative_calls} tool calls made. "
                     f"YOU MUST STOP NOW. Do NOT call any more tools. "
                     f"Write your FINAL answer immediately using the information you already have. "
                     f"If you have file edits, apply them in this response using exec_command with a write command. "
                     f"DO NOT READ ANY MORE FILES."}]})
-            elif cumulative_calls > _ANTIGRAVITY_WARN_TOOL_CALLS_PER_TASK:
+            elif cumulative_calls > _mp_warn_calls:
                 contents.append({"role": "user", "parts": [{"text":
                     f"WARNING: {cumulative_calls} tool calls made. "
-                    f"{_ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK - cumulative_calls} remaining before forced stop. "
+                    f"{_mp_max_calls - cumulative_calls} remaining before forced stop. "
                     f"STOP READING FILES AND APPLY YOUR EDITS NOW."}]})
 
             # CHANGE 2: Read-vs-write loop detection
@@ -5918,10 +6001,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
                             ft["reads"] += 1
                 n_reads = ft["reads"]
                 n_writes = ft["writes"]
-                if n_reads >= 12 and n_writes == 0:
+                if n_reads >= _mp_max_reads and n_writes == 0:
                     ag_state["force_finalize"] = True
-                    print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes — model never writes", file=sys.stderr)
-                elif n_reads >= 8 and n_writes == 0 and not ag_state.get("force_finalize"):
+                    print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes (model={model}, limit={_mp_max_reads})", file=sys.stderr)
+                elif n_reads >= _mp_warn_reads and n_writes == 0 and not ag_state.get("force_finalize"):
                     contents.append({"role": "user", "parts": [{"text":
                         f"WARNING: You have made {n_reads} tool calls and ZERO writes. "
                         f"You MUST apply your edit NOW using exec_command with a python write. "
@@ -5975,7 +6058,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
         request_body["systemInstruction"] = {"role": "user", "parts": system_parts}
         if gen_config:
             request_body["generationConfig"] = gen_config
-        _budget_exceeded = ag_state.get("total_tool_calls", 0) > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK
+        _budget_exceeded = ag_state.get("total_tool_calls", 0) > _mp.get("max_tool_calls", 150)
         if gemini_tools and not _budget_exceeded and not ag_state.get("force_finalize"):
             request_body["tools"] = gemini_tools
         elif _budget_exceeded or ag_state.get("force_finalize"):
@@ -6727,16 +6810,22 @@ class Handler(http.server.BaseHTTPRequestHandler):
                     cumulative_calls = ag_state.get("total_tool_calls", 0) + n_tool_calls
                     ag_state["total_tool_calls"] = cumulative_calls
 
-                    if cumulative_calls > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK:
-                        print(f"[antigravity-budget] HARD CAP: {cumulative_calls} calls, injecting force-write", file=sys.stderr)
+                    _mp_oa = _model_profile(model)
+                    _mp_max = _mp_oa["max_tool_calls"]
+                    _mp_warn = _mp_oa["warn_tool_calls"]
+                    _mp_maxr = _mp_oa["max_reads_no_write"]
+                    _mp_warnr = _mp_oa["warn_reads_no_write"]
+
+                    if cumulative_calls > _mp_max:
+                        print(f"[antigravity-budget] HARD CAP: {cumulative_calls}/{_mp_max} calls (model={model}), injecting force-write", file=sys.stderr)
                         contents.append({"role": "user", "parts": [{"text":
                             f"CRITICAL BUDGET LIMIT: {cumulative_calls} tool calls. "
                             f"STOP ALL TOOL CALLS. Write your FINAL answer now. "
                             f"Apply any edits using exec_command with a write command in this response."}]})
-                    elif cumulative_calls > _ANTIGRAVITY_WARN_TOOL_CALLS_PER_TASK:
+                    elif cumulative_calls > _mp_warn:
                         contents.append({"role": "user", "parts": [{"text":
                             f"WARNING: {cumulative_calls} tool calls. "
-                            f"{_ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK - cumulative_calls} remaining. "
+                            f"{_mp_max - cumulative_calls} remaining. "
                             f"STOP READING AND WRITE NOW."}]})
 
                     with _ANTIGRAVITY_LOOP_TRACKER_LOCK:
@@ -6758,10 +6847,10 @@ class Handler(http.server.BaseHTTPRequestHandler):
                                     ft["reads"] += 1
                         n_reads = ft["reads"]
                         n_writes = ft["writes"]
-                        if n_reads >= 12 and n_writes == 0:
+                        if n_reads >= _mp_maxr and n_writes == 0:
                             ag_state["force_finalize"] = True
-                            print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes — model never writes", file=sys.stderr)
-                        elif n_reads >= 8 and n_writes == 0 and not ag_state.get("force_finalize"):
+                            print(f"[antigravity-loop] READ-WRITE IMBALANCE: {n_reads} reads, {n_writes} writes (model={model}, limit={_mp_maxr})", file=sys.stderr)
+                        elif n_reads >= _mp_warnr and n_writes == 0 and not ag_state.get("force_finalize"):
                             contents.append({"role": "user", "parts": [{"text":
                                 f"WARNING: You have made {n_reads} tool calls and ZERO writes. "
                                 f"You MUST apply your edit NOW using exec_command with a python write. "
@@ -6851,7 +6940,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
             request_body["systemInstruction"] = {"parts": system_parts}
         if gen_config:
             request_body["generationConfig"] = gen_config
-        _budget_exceeded_oa = ag_state.get("total_tool_calls", 0) > _ANTIGRAVITY_MAX_TOOL_CALLS_PER_TASK
+        _budget_exceeded_oa = ag_state.get("total_tool_calls", 0) > _mp_oa.get("max_tool_calls", 150)
         if gemini_tools and not _budget_exceeded_oa and not ag_state.get("force_finalize"):
             request_body["tools"] = gemini_tools
         elif _budget_exceeded_oa or ag_state.get("force_finalize"):