v2.2.0: per-provider reasoning controls (on/off + effort level)

- Add Reasoning On/Off toggle and Effort selector in endpoint editor - Proxy sends enable_thinking=false when reasoning is OFF - Proxy sends reasoning_effort level when reasoning is ON - Strip reasoning_content from output, force max_tokens=64000 minimum - Fixes Crof mimo-v2.5-pro and similar reasoning model token exhaustion
2026-05-20 12:20:33 +04:00
parent 77423c5c35
commit 9532ba40f3
5 changed files with 50 additions and 2 deletions
--- a/src/translate-proxy.py
+++ b/src/translate-proxy.py
@@ -81,6 +81,8 @@ TARGET_URL = CONFIG["target_url"].rstrip("/")
 API_KEY = CONFIG["api_key"]
 MODELS = CONFIG["models"]
 CC_VERSION = CONFIG.get("cc_version", "")
+REASONING_ENABLED = CONFIG.get("reasoning_enabled", True)
+REASONING_EFFORT = CONFIG.get("reasoning_effort", "medium")

 # ═══════════════════════════════════════════════════════════════════
 # Shared helpers
@@ -889,6 +891,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
        if body.get("tool_choice"):
            chat_body["tool_choice"] = body["tool_choice"]
        chat_body["stream"] = stream
+        if not REASONING_ENABLED:
+            chat_body["enable_thinking"] = False
+        chat_body["reasoning_effort"] = REASONING_EFFORT if REASONING_ENABLED else "none"

        target = upstream_target(TARGET_URL, "/chat/completions")
        fwd = forwarded_headers(self.headers, {