v2.2.0: per-provider reasoning controls (on/off + effort level)
- Add Reasoning On/Off toggle and Effort selector in endpoint editor - Proxy sends enable_thinking=false when reasoning is OFF - Proxy sends reasoning_effort level when reasoning is ON - Strip reasoning_content from output, force max_tokens=64000 minimum - Fixes Crof mimo-v2.5-pro and similar reasoning model token exhaustion
This commit is contained in:
12
CHANGELOG.md
12
CHANGELOG.md
@@ -1,5 +1,17 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## v2.2.0 (2026-05-20)
|
||||||
|
|
||||||
|
- **Added per-provider Reasoning controls in endpoint editor**
|
||||||
|
- Reasoning On/Off toggle — disable reasoning for models that exhaust output tokens (e.g., Crof mimo-v2.5-pro)
|
||||||
|
- Reasoning Effort selector: None, Minimal, Low, Medium, High, Max
|
||||||
|
- When reasoning is OFF: sends `enable_thinking=false` + `reasoning_effort=none` to upstream API
|
||||||
|
- When reasoning is ON: sends user-selected effort level (default: Medium)
|
||||||
|
- Settings stored per-endpoint, passed through proxy config to upstream requests
|
||||||
|
- Strip `reasoning_content` from proxy output — Codex doesn't use it, avoids token waste
|
||||||
|
- Force `max_tokens=64000` minimum for openai-compat providers — room for both reasoning and content
|
||||||
|
- Inspired by unsloth's reasoning control patterns for Qwen/GPT-OSS models
|
||||||
|
|
||||||
## v2.1.3 (2026-05-19)
|
## v2.1.3 (2026-05-19)
|
||||||
|
|
||||||
- **Fixed Crof mimo-v2.5-pro stopping mid-response (finish_reason=length)**
|
- **Fixed Crof mimo-v2.5-pro stopping mid-response (finish_reason=length)**
|
||||||
|
|||||||
Binary file not shown.
BIN
codex-launcher_2.2.0_all.deb
Normal file
BIN
codex-launcher_2.2.0_all.deb
Normal file
Binary file not shown.
@@ -24,6 +24,15 @@ model_catalog_json = ""
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
CHANGELOG = [
|
CHANGELOG = [
|
||||||
|
("2.2.0", "2026-05-20", [
|
||||||
|
"Added per-provider Reasoning On/Off toggle in endpoint editor",
|
||||||
|
"Added Reasoning Effort level per provider: None, Minimal, Low, Medium, High, Max",
|
||||||
|
"When reasoning is OFF: sends enable_thinking=false + reasoning_effort=none to upstream API",
|
||||||
|
"When reasoning is ON: sends user-selected effort level (default: Medium)",
|
||||||
|
"Fixes Crof mimo-v2.5-pro and similar reasoning models exhausting output tokens",
|
||||||
|
"Strip reasoning_content from proxy output — Codex doesn't use it",
|
||||||
|
"Force max_tokens=64000 minimum for openai-compat providers",
|
||||||
|
]),
|
||||||
("2.1.3", "2026-05-19", [
|
("2.1.3", "2026-05-19", [
|
||||||
"Fixed Crof mimo-v2.5-pro stopping: reasoning_content exhausted all output tokens",
|
"Fixed Crof mimo-v2.5-pro stopping: reasoning_content exhausted all output tokens",
|
||||||
"Strip reasoning_content from proxy output — Codex doesn't use it, avoids token waste",
|
"Strip reasoning_content from proxy output — Codex doesn't use it, avoids token waste",
|
||||||
@@ -434,6 +443,8 @@ def _start_proxy_for(endpoint, logfn):
|
|||||||
"target_url": normalize_base_url(endpoint["base_url"]),
|
"target_url": normalize_base_url(endpoint["base_url"]),
|
||||||
"api_key": endpoint["api_key"],
|
"api_key": endpoint["api_key"],
|
||||||
"cc_version": endpoint.get("cc_version", ""),
|
"cc_version": endpoint.get("cc_version", ""),
|
||||||
|
"reasoning_enabled": endpoint.get("reasoning_enabled", True),
|
||||||
|
"reasoning_effort": endpoint.get("reasoning_effort", "medium"),
|
||||||
"models": [{"id": m, "object": "model", "created": 1700000000, "owned_by": endpoint["name"]}
|
"models": [{"id": m, "object": "model", "created": 1700000000, "owned_by": endpoint["name"]}
|
||||||
for m in endpoint.get("models", [])],
|
for m in endpoint.get("models", [])],
|
||||||
}
|
}
|
||||||
@@ -532,7 +543,7 @@ class LauncherWin(Gtk.Window):
|
|||||||
# header row
|
# header row
|
||||||
hdr = Gtk.Box(spacing=8)
|
hdr = Gtk.Box(spacing=8)
|
||||||
vbox.pack_start(hdr, False, False, 0)
|
vbox.pack_start(hdr, False, False, 0)
|
||||||
lbl = Gtk.Label(label="<b>Codex Launcher v2.1.3</b>")
|
lbl = Gtk.Label(label="<b>Codex Launcher v2.2.0</b>")
|
||||||
lbl.set_use_markup(True)
|
lbl.set_use_markup(True)
|
||||||
hdr.pack_start(lbl, False, False, 0)
|
hdr.pack_start(lbl, False, False, 0)
|
||||||
changelog_btn = Gtk.Button(label="Changelog")
|
changelog_btn = Gtk.Button(label="Changelog")
|
||||||
@@ -1371,7 +1382,7 @@ class EditEndpointDialog(Gtk.Dialog):
|
|||||||
"base_url": "", "api_key": "", "default_model": "", "models": [],
|
"base_url": "", "api_key": "", "default_model": "", "models": [],
|
||||||
"provider_preset": "Custom",
|
"provider_preset": "Custom",
|
||||||
}
|
}
|
||||||
self.set_default_size(480, 420)
|
self.set_default_size(480, 520)
|
||||||
|
|
||||||
area = self.get_content_area()
|
area = self.get_content_area()
|
||||||
area.set_spacing(6)
|
area.set_spacing(6)
|
||||||
@@ -1419,6 +1430,20 @@ class EditEndpointDialog(Gtk.Dialog):
|
|||||||
self._entry_cc_ver.set_placeholder_text("e.g. 0.26.8 (Command Code only)")
|
self._entry_cc_ver.set_placeholder_text("e.g. 0.26.8 (Command Code only)")
|
||||||
add_row(5, "CC Version:", self._entry_cc_ver)
|
add_row(5, "CC Version:", self._entry_cc_ver)
|
||||||
|
|
||||||
|
self._switch_reasoning = Gtk.Switch()
|
||||||
|
self._switch_reasoning.set_active(self._data.get("reasoning_enabled", True))
|
||||||
|
self._switch_reasoning.connect("notify::active", lambda *a: self._on_reasoning_toggled())
|
||||||
|
add_row(6, "Reasoning:", self._switch_reasoning)
|
||||||
|
|
||||||
|
self._combo_effort = Gtk.ComboBoxText()
|
||||||
|
for ev, el in [("none", "None"), ("minimal", "Minimal"), ("low", "Low"),
|
||||||
|
("medium", "Medium"), ("high", "High"), ("max", "Max")]:
|
||||||
|
self._combo_effort.append(ev, el)
|
||||||
|
saved_effort = self._data.get("reasoning_effort", "medium")
|
||||||
|
self._combo_effort.set_active_id(saved_effort if saved_effort in ("none","minimal","low","medium","high","max") else "medium")
|
||||||
|
add_row(7, "Effort:", self._combo_effort)
|
||||||
|
self._on_reasoning_toggled()
|
||||||
|
|
||||||
# Models
|
# Models
|
||||||
mlbl = Gtk.Label(label="Models:", xalign=0)
|
mlbl = Gtk.Label(label="Models:", xalign=0)
|
||||||
area.pack_start(mlbl, False, False, 4)
|
area.pack_start(mlbl, False, False, 4)
|
||||||
@@ -1522,6 +1547,10 @@ class EditEndpointDialog(Gtk.Dialog):
|
|||||||
if initial and self._data.get("models"):
|
if initial and self._data.get("models"):
|
||||||
self._refresh_default_combo(self._data.get("default_model", ""))
|
self._refresh_default_combo(self._data.get("default_model", ""))
|
||||||
|
|
||||||
|
def _on_reasoning_toggled(self, *_):
|
||||||
|
active = self._switch_reasoning.get_active()
|
||||||
|
self._combo_effort.set_sensitive(active)
|
||||||
|
|
||||||
def _remove_model(self, path):
|
def _remove_model(self, path):
|
||||||
current = self._combo_default.get_active_text()
|
current = self._combo_default.get_active_text()
|
||||||
self._model_store.remove(self._model_store.get_iter(path))
|
self._model_store.remove(self._model_store.get_iter(path))
|
||||||
@@ -1620,6 +1649,8 @@ class EditEndpointDialog(Gtk.Dialog):
|
|||||||
cc_ver = self._entry_cc_ver.get_text().strip()
|
cc_ver = self._entry_cc_ver.get_text().strip()
|
||||||
if cc_ver:
|
if cc_ver:
|
||||||
new_ep["cc_version"] = cc_ver
|
new_ep["cc_version"] = cc_ver
|
||||||
|
new_ep["reasoning_enabled"] = self._switch_reasoning.get_active()
|
||||||
|
new_ep["reasoning_effort"] = self._combo_effort.get_active_id() or "medium"
|
||||||
new_ep["base_url"] = normalize_base_url(new_ep["base_url"])
|
new_ep["base_url"] = normalize_base_url(new_ep["base_url"])
|
||||||
|
|
||||||
# Update or append
|
# Update or append
|
||||||
|
|||||||
@@ -81,6 +81,8 @@ TARGET_URL = CONFIG["target_url"].rstrip("/")
|
|||||||
API_KEY = CONFIG["api_key"]
|
API_KEY = CONFIG["api_key"]
|
||||||
MODELS = CONFIG["models"]
|
MODELS = CONFIG["models"]
|
||||||
CC_VERSION = CONFIG.get("cc_version", "")
|
CC_VERSION = CONFIG.get("cc_version", "")
|
||||||
|
REASONING_ENABLED = CONFIG.get("reasoning_enabled", True)
|
||||||
|
REASONING_EFFORT = CONFIG.get("reasoning_effort", "medium")
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════
|
||||||
# Shared helpers
|
# Shared helpers
|
||||||
@@ -889,6 +891,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
if body.get("tool_choice"):
|
if body.get("tool_choice"):
|
||||||
chat_body["tool_choice"] = body["tool_choice"]
|
chat_body["tool_choice"] = body["tool_choice"]
|
||||||
chat_body["stream"] = stream
|
chat_body["stream"] = stream
|
||||||
|
if not REASONING_ENABLED:
|
||||||
|
chat_body["enable_thinking"] = False
|
||||||
|
chat_body["reasoning_effort"] = REASONING_EFFORT if REASONING_ENABLED else "none"
|
||||||
|
|
||||||
target = upstream_target(TARGET_URL, "/chat/completions")
|
target = upstream_target(TARGET_URL, "/chat/completions")
|
||||||
fwd = forwarded_headers(self.headers, {
|
fwd = forwarded_headers(self.headers, {
|
||||||
|
|||||||
Reference in New Issue
Block a user