v2.2.0: per-provider reasoning controls (on/off + effort level)

- Add Reasoning On/Off toggle and Effort selector in endpoint editor
- Proxy sends enable_thinking=false when reasoning is OFF
- Proxy sends reasoning_effort level when reasoning is ON
- Strip reasoning_content from output, force max_tokens=64000 minimum
- Fixes Crof mimo-v2.5-pro and similar reasoning model token exhaustion
This commit is contained in:
Roman
2026-05-20 12:20:33 +04:00
Unverified
parent 77423c5c35
commit 9532ba40f3
5 changed files with 50 additions and 2 deletions

View File

@@ -81,6 +81,8 @@ TARGET_URL = CONFIG["target_url"].rstrip("/")
API_KEY = CONFIG["api_key"]
MODELS = CONFIG["models"]
CC_VERSION = CONFIG.get("cc_version", "")
REASONING_ENABLED = CONFIG.get("reasoning_enabled", True)
REASONING_EFFORT = CONFIG.get("reasoning_effort", "medium")
# ═══════════════════════════════════════════════════════════════════
# Shared helpers
@@ -889,6 +891,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
if body.get("tool_choice"):
chat_body["tool_choice"] = body["tool_choice"]
chat_body["stream"] = stream
if not REASONING_ENABLED:
chat_body["enable_thinking"] = False
chat_body["reasoning_effort"] = REASONING_EFFORT if REASONING_ENABLED else "none"
target = upstream_target(TARGET_URL, "/chat/completions")
fwd = forwarded_headers(self.headers, {