v2.2.0: per-provider reasoning controls (on/off + effort level)

- Add Reasoning On/Off toggle and Effort selector in endpoint editor
- Proxy sends enable_thinking=false when reasoning is OFF
- Proxy sends reasoning_effort level when reasoning is ON
- Strip reasoning_content from output, force max_tokens=64000 minimum
- Fixes Crof mimo-v2.5-pro and similar reasoning model token exhaustion
This commit is contained in:
Roman
2026-05-20 12:20:33 +04:00
Unverified
parent 77423c5c35
commit 9532ba40f3
5 changed files with 50 additions and 2 deletions

View File

@@ -1,5 +1,17 @@
# Changelog # Changelog
## v2.2.0 (2026-05-20)
- **Added per-provider Reasoning controls in endpoint editor**
- Reasoning On/Off toggle — disable reasoning for models that exhaust output tokens (e.g., Crof mimo-v2.5-pro)
- Reasoning Effort selector: None, Minimal, Low, Medium, High, Max
- When reasoning is OFF: sends `enable_thinking=false` + `reasoning_effort=none` to upstream API
- When reasoning is ON: sends user-selected effort level (default: Medium)
- Settings stored per-endpoint, passed through proxy config to upstream requests
- Strip `reasoning_content` from proxy output — Codex doesn't use it, avoids token waste
- Force `max_tokens=64000` minimum for openai-compat providers — room for both reasoning and content
- Inspired by unsloth's reasoning control patterns for Qwen/GPT-OSS models
## v2.1.3 (2026-05-19) ## v2.1.3 (2026-05-19)
- **Fixed Crof mimo-v2.5-pro stopping mid-response (finish_reason=length)** - **Fixed Crof mimo-v2.5-pro stopping mid-response (finish_reason=length)**

Binary file not shown.

Binary file not shown.

View File

@@ -24,6 +24,15 @@ model_catalog_json = ""
""" """
CHANGELOG = [ CHANGELOG = [
("2.2.0", "2026-05-20", [
"Added per-provider Reasoning On/Off toggle in endpoint editor",
"Added Reasoning Effort level per provider: None, Minimal, Low, Medium, High, Max",
"When reasoning is OFF: sends enable_thinking=false + reasoning_effort=none to upstream API",
"When reasoning is ON: sends user-selected effort level (default: Medium)",
"Fixes Crof mimo-v2.5-pro and similar reasoning models exhausting output tokens",
"Strip reasoning_content from proxy output — Codex doesn't use it",
"Force max_tokens=64000 minimum for openai-compat providers",
]),
("2.1.3", "2026-05-19", [ ("2.1.3", "2026-05-19", [
"Fixed Crof mimo-v2.5-pro stopping: reasoning_content exhausted all output tokens", "Fixed Crof mimo-v2.5-pro stopping: reasoning_content exhausted all output tokens",
"Strip reasoning_content from proxy output — Codex doesn't use it, avoids token waste", "Strip reasoning_content from proxy output — Codex doesn't use it, avoids token waste",
@@ -434,6 +443,8 @@ def _start_proxy_for(endpoint, logfn):
"target_url": normalize_base_url(endpoint["base_url"]), "target_url": normalize_base_url(endpoint["base_url"]),
"api_key": endpoint["api_key"], "api_key": endpoint["api_key"],
"cc_version": endpoint.get("cc_version", ""), "cc_version": endpoint.get("cc_version", ""),
"reasoning_enabled": endpoint.get("reasoning_enabled", True),
"reasoning_effort": endpoint.get("reasoning_effort", "medium"),
"models": [{"id": m, "object": "model", "created": 1700000000, "owned_by": endpoint["name"]} "models": [{"id": m, "object": "model", "created": 1700000000, "owned_by": endpoint["name"]}
for m in endpoint.get("models", [])], for m in endpoint.get("models", [])],
} }
@@ -532,7 +543,7 @@ class LauncherWin(Gtk.Window):
# header row # header row
hdr = Gtk.Box(spacing=8) hdr = Gtk.Box(spacing=8)
vbox.pack_start(hdr, False, False, 0) vbox.pack_start(hdr, False, False, 0)
lbl = Gtk.Label(label="<b>Codex Launcher v2.1.3</b>") lbl = Gtk.Label(label="<b>Codex Launcher v2.2.0</b>")
lbl.set_use_markup(True) lbl.set_use_markup(True)
hdr.pack_start(lbl, False, False, 0) hdr.pack_start(lbl, False, False, 0)
changelog_btn = Gtk.Button(label="Changelog") changelog_btn = Gtk.Button(label="Changelog")
@@ -1371,7 +1382,7 @@ class EditEndpointDialog(Gtk.Dialog):
"base_url": "", "api_key": "", "default_model": "", "models": [], "base_url": "", "api_key": "", "default_model": "", "models": [],
"provider_preset": "Custom", "provider_preset": "Custom",
} }
self.set_default_size(480, 420) self.set_default_size(480, 520)
area = self.get_content_area() area = self.get_content_area()
area.set_spacing(6) area.set_spacing(6)
@@ -1419,6 +1430,20 @@ class EditEndpointDialog(Gtk.Dialog):
self._entry_cc_ver.set_placeholder_text("e.g. 0.26.8 (Command Code only)") self._entry_cc_ver.set_placeholder_text("e.g. 0.26.8 (Command Code only)")
add_row(5, "CC Version:", self._entry_cc_ver) add_row(5, "CC Version:", self._entry_cc_ver)
self._switch_reasoning = Gtk.Switch()
self._switch_reasoning.set_active(self._data.get("reasoning_enabled", True))
self._switch_reasoning.connect("notify::active", lambda *a: self._on_reasoning_toggled())
add_row(6, "Reasoning:", self._switch_reasoning)
self._combo_effort = Gtk.ComboBoxText()
for ev, el in [("none", "None"), ("minimal", "Minimal"), ("low", "Low"),
("medium", "Medium"), ("high", "High"), ("max", "Max")]:
self._combo_effort.append(ev, el)
saved_effort = self._data.get("reasoning_effort", "medium")
self._combo_effort.set_active_id(saved_effort if saved_effort in ("none","minimal","low","medium","high","max") else "medium")
add_row(7, "Effort:", self._combo_effort)
self._on_reasoning_toggled()
# Models # Models
mlbl = Gtk.Label(label="Models:", xalign=0) mlbl = Gtk.Label(label="Models:", xalign=0)
area.pack_start(mlbl, False, False, 4) area.pack_start(mlbl, False, False, 4)
@@ -1522,6 +1547,10 @@ class EditEndpointDialog(Gtk.Dialog):
if initial and self._data.get("models"): if initial and self._data.get("models"):
self._refresh_default_combo(self._data.get("default_model", "")) self._refresh_default_combo(self._data.get("default_model", ""))
def _on_reasoning_toggled(self, *_):
active = self._switch_reasoning.get_active()
self._combo_effort.set_sensitive(active)
def _remove_model(self, path): def _remove_model(self, path):
current = self._combo_default.get_active_text() current = self._combo_default.get_active_text()
self._model_store.remove(self._model_store.get_iter(path)) self._model_store.remove(self._model_store.get_iter(path))
@@ -1620,6 +1649,8 @@ class EditEndpointDialog(Gtk.Dialog):
cc_ver = self._entry_cc_ver.get_text().strip() cc_ver = self._entry_cc_ver.get_text().strip()
if cc_ver: if cc_ver:
new_ep["cc_version"] = cc_ver new_ep["cc_version"] = cc_ver
new_ep["reasoning_enabled"] = self._switch_reasoning.get_active()
new_ep["reasoning_effort"] = self._combo_effort.get_active_id() or "medium"
new_ep["base_url"] = normalize_base_url(new_ep["base_url"]) new_ep["base_url"] = normalize_base_url(new_ep["base_url"])
# Update or append # Update or append

View File

@@ -81,6 +81,8 @@ TARGET_URL = CONFIG["target_url"].rstrip("/")
API_KEY = CONFIG["api_key"] API_KEY = CONFIG["api_key"]
MODELS = CONFIG["models"] MODELS = CONFIG["models"]
CC_VERSION = CONFIG.get("cc_version", "") CC_VERSION = CONFIG.get("cc_version", "")
REASONING_ENABLED = CONFIG.get("reasoning_enabled", True)
REASONING_EFFORT = CONFIG.get("reasoning_effort", "medium")
# ═══════════════════════════════════════════════════════════════════ # ═══════════════════════════════════════════════════════════════════
# Shared helpers # Shared helpers
@@ -889,6 +891,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
if body.get("tool_choice"): if body.get("tool_choice"):
chat_body["tool_choice"] = body["tool_choice"] chat_body["tool_choice"] = body["tool_choice"]
chat_body["stream"] = stream chat_body["stream"] = stream
if not REASONING_ENABLED:
chat_body["enable_thinking"] = False
chat_body["reasoning_effort"] = REASONING_EFFORT if REASONING_ENABLED else "none"
target = upstream_target(TARGET_URL, "/chat/completions") target = upstream_target(TARGET_URL, "/chat/completions")
fwd = forwarded_headers(self.headers, { fwd = forwarded_headers(self.headers, {