v2.2.0: per-provider reasoning controls (on/off + effort level)
- Add Reasoning On/Off toggle and Effort selector in endpoint editor - Proxy sends enable_thinking=false when reasoning is OFF - Proxy sends reasoning_effort level when reasoning is ON - Strip reasoning_content from output, force max_tokens=64000 minimum - Fixes Crof mimo-v2.5-pro and similar reasoning model token exhaustion
This commit is contained in:
@@ -81,6 +81,8 @@ TARGET_URL = CONFIG["target_url"].rstrip("/")
|
||||
API_KEY = CONFIG["api_key"]
|
||||
MODELS = CONFIG["models"]
|
||||
CC_VERSION = CONFIG.get("cc_version", "")
|
||||
REASONING_ENABLED = CONFIG.get("reasoning_enabled", True)
|
||||
REASONING_EFFORT = CONFIG.get("reasoning_effort", "medium")
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
# Shared helpers
|
||||
@@ -889,6 +891,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
if body.get("tool_choice"):
|
||||
chat_body["tool_choice"] = body["tool_choice"]
|
||||
chat_body["stream"] = stream
|
||||
if not REASONING_ENABLED:
|
||||
chat_body["enable_thinking"] = False
|
||||
chat_body["reasoning_effort"] = REASONING_EFFORT if REASONING_ENABLED else "none"
|
||||
|
||||
target = upstream_target(TARGET_URL, "/chat/completions")
|
||||
fwd = forwarded_headers(self.headers, {
|
||||
|
||||
Reference in New Issue
Block a user