v2.2.0: per-provider reasoning controls (on/off + effort level)

- Add Reasoning On/Off toggle and Effort selector in endpoint editor - Proxy sends enable_thinking=false when reasoning is OFF - Proxy sends reasoning_effort level when reasoning is ON - Strip reasoning_content from output, force max_tokens=64000 minimum - Fixes Crof mimo-v2.5-pro and similar reasoning model token exhaustion
2026-05-20 12:20:33 +04:00
parent 77423c5c35
commit 9532ba40f3
5 changed files with 50 additions and 2 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,17 @@
 # Changelog
 ## v2.2.0 (2026-05-20)
 - **Added per-provider Reasoning controls in endpoint editor**
  - Reasoning On/Off toggle — disable reasoning for models that exhaust output tokens (e.g., Crof mimo-v2.5-pro)
  - Reasoning Effort selector: None, Minimal, Low, Medium, High, Max
  - When reasoning is OFF: sends `enable_thinking=false` + `reasoning_effort=none` to upstream API
  - When reasoning is ON: sends user-selected effort level (default: Medium)
  - Settings stored per-endpoint, passed through proxy config to upstream requests
 - Strip `reasoning_content` from proxy output — Codex doesn't use it, avoids token waste
 - Force `max_tokens=64000` minimum for openai-compat providers — room for both reasoning and content
 - Inspired by unsloth's reasoning control patterns for Qwen/GPT-OSS models
 ## v2.1.3 (2026-05-19)
 - **Fixed Crof mimo-v2.5-pro stopping mid-response (finish_reason=length)**
--- a/codex-launcher_2.1.3_all.deb
+++ b/codex-launcher_2.1.3_all.deb
--- a/codex-launcher_2.2.0_all.deb
+++ b/codex-launcher_2.2.0_all.deb
--- a/src/codex-launcher-gui
+++ b/src/codex-launcher-gui
@@ -24,6 +24,15 @@ model_catalog_json = ""
 """
 CHANGELOG = [
    ("2.2.0", "2026-05-20", [
        "Added per-provider Reasoning On/Off toggle in endpoint editor",
        "Added Reasoning Effort level per provider: None, Minimal, Low, Medium, High, Max",
        "When reasoning is OFF: sends enable_thinking=false + reasoning_effort=none to upstream API",
        "When reasoning is ON: sends user-selected effort level (default: Medium)",
        "Fixes Crof mimo-v2.5-pro and similar reasoning models exhausting output tokens",
        "Strip reasoning_content from proxy output — Codex doesn't use it",
        "Force max_tokens=64000 minimum for openai-compat providers",
    ]),
    ("2.1.3", "2026-05-19", [
        "Fixed Crof mimo-v2.5-pro stopping: reasoning_content exhausted all output tokens",
        "Strip reasoning_content from proxy output — Codex doesn't use it, avoids token waste",
@@ -434,6 +443,8 @@ def _start_proxy_for(endpoint, logfn):
        "target_url": normalize_base_url(endpoint["base_url"]),
        "api_key": endpoint["api_key"],
        "cc_version": endpoint.get("cc_version", ""),
        "reasoning_enabled": endpoint.get("reasoning_enabled", True),
        "reasoning_effort": endpoint.get("reasoning_effort", "medium"),
        "models": [{"id": m, "object": "model", "created": 1700000000, "owned_by": endpoint["name"]}
                   for m in endpoint.get("models", [])],
    }
@@ -532,7 +543,7 @@ class LauncherWin(Gtk.Window):
        # header row
        hdr = Gtk.Box(spacing=8)
        vbox.pack_start(hdr, False, False, 0)
-        lbl = Gtk.Label(label="<b>Codex Launcher v2.1.3</b>")
+        lbl = Gtk.Label(label="<b>Codex Launcher v2.2.0</b>")
        lbl.set_use_markup(True)
        hdr.pack_start(lbl, False, False, 0)
        changelog_btn = Gtk.Button(label="Changelog")
@@ -1371,7 +1382,7 @@ class EditEndpointDialog(Gtk.Dialog):
            "base_url": "", "api_key": "", "default_model": "", "models": [],
            "provider_preset": "Custom",
        }
-        self.set_default_size(480, 420)
+        self.set_default_size(480, 520)
        area = self.get_content_area()
        area.set_spacing(6)
@@ -1419,6 +1430,20 @@ class EditEndpointDialog(Gtk.Dialog):
        self._entry_cc_ver.set_placeholder_text("e.g. 0.26.8 (Command Code only)")
        add_row(5, "CC Version:", self._entry_cc_ver)
        self._switch_reasoning = Gtk.Switch()
        self._switch_reasoning.set_active(self._data.get("reasoning_enabled", True))
        self._switch_reasoning.connect("notify::active", lambda *a: self._on_reasoning_toggled())
        add_row(6, "Reasoning:", self._switch_reasoning)
        self._combo_effort = Gtk.ComboBoxText()
        for ev, el in [("none", "None"), ("minimal", "Minimal"), ("low", "Low"),
                       ("medium", "Medium"), ("high", "High"), ("max", "Max")]:
            self._combo_effort.append(ev, el)
        saved_effort = self._data.get("reasoning_effort", "medium")
        self._combo_effort.set_active_id(saved_effort if saved_effort in ("none","minimal","low","medium","high","max") else "medium")
        add_row(7, "Effort:", self._combo_effort)
        self._on_reasoning_toggled()
        # Models
        mlbl = Gtk.Label(label="Models:", xalign=0)
        area.pack_start(mlbl, False, False, 4)
@@ -1522,6 +1547,10 @@ class EditEndpointDialog(Gtk.Dialog):
        if initial and self._data.get("models"):
            self._refresh_default_combo(self._data.get("default_model", ""))
    def _on_reasoning_toggled(self, *_):
        active = self._switch_reasoning.get_active()
        self._combo_effort.set_sensitive(active)
    def _remove_model(self, path):
        current = self._combo_default.get_active_text()
        self._model_store.remove(self._model_store.get_iter(path))
@@ -1620,6 +1649,8 @@ class EditEndpointDialog(Gtk.Dialog):
        cc_ver = self._entry_cc_ver.get_text().strip()
        if cc_ver:
            new_ep["cc_version"] = cc_ver
        new_ep["reasoning_enabled"] = self._switch_reasoning.get_active()
        new_ep["reasoning_effort"] = self._combo_effort.get_active_id() or "medium"
        new_ep["base_url"] = normalize_base_url(new_ep["base_url"])
        # Update or append
--- a/src/translate-proxy.py
+++ b/src/translate-proxy.py
@@ -81,6 +81,8 @@ TARGET_URL = CONFIG["target_url"].rstrip("/")
 API_KEY = CONFIG["api_key"]
 MODELS = CONFIG["models"]
 CC_VERSION = CONFIG.get("cc_version", "")
 REASONING_ENABLED = CONFIG.get("reasoning_enabled", True)
 REASONING_EFFORT = CONFIG.get("reasoning_effort", "medium")
 # ═══════════════════════════════════════════════════════════════════
 # Shared helpers
@@ -889,6 +891,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
        if body.get("tool_choice"):
            chat_body["tool_choice"] = body["tool_choice"]
        chat_body["stream"] = stream
        if not REASONING_ENABLED:
            chat_body["enable_thinking"] = False
        chat_body["reasoning_effort"] = REASONING_EFFORT if REASONING_ENABLED else "none"
        target = upstream_target(TARGET_URL, "/chat/completions")
        fwd = forwarded_headers(self.headers, {