v2.3.0: adaptive Crof self-healing system

- Per-model success/failure tracking with dynamic item limits - Proactive compaction when above learned limit - Auto-retry on finish_reason=length with aggressive re-compaction - Tested: kimi-k2.6 (27 items) and mimo-v2.5-pro both completed - All previous fixes included: _ts crash, connection reset, timeout, orphaned fco
2026-05-20 14:32:36 +04:00
parent 60106955ab
commit 27b22f4fd8
5 changed files with 238 additions and 15 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,20 @@
 # Changelog
 ## v2.3.0 (2026-05-20)
 - **Adaptive Crof self-healing system**
  - Tracks per-model success/failure history with item counts
  - Dynamically learns max item limit per model (starts at 30, adjusts down on failures)
  - Proactively compacts input when above learned limit before sending to upstream
  - Auto-retry on `finish_reason=length` with aggressive re-compaction and resend
  - Prevents `stream disconnected` and `incomplete` errors on long conversations
  - All tracking logged to stderr: `[crof-adaptive] model=X items=N OK/FAIL -> limit=N`
 - Fixed `NameError: _ts` crash in debug logging
 - Fixed `ConnectionResetError` crash on client disconnect during streaming
 - Added 180s upstream timeout to prevent hanging connections
 - Compaction now preserves function_call/function_call_output pairs (no orphaned tool outputs)
 - Fixed reasoning control: `reasoning_effort=none` always sends both params
 ## v2.2.1 (2026-05-20)
 - **Fixed compaction orphaning function_call_output items** — root cause of Crof `incomplete` responses
--- a/codex-launcher_2.2.1_all.deb
+++ b/codex-launcher_2.2.1_all.deb
--- a/codex-launcher_2.3.0_all.deb
+++ b/codex-launcher_2.3.0_all.deb
--- a/src/codex-launcher-gui
+++ b/src/codex-launcher-gui
@@ -24,6 +24,13 @@ model_catalog_json = ""
 """
 CHANGELOG = [
    ("2.3.0", "2026-05-20", [
        "Adaptive Crof self-healing system — auto-adjusts to Crof model limits",
        "Tracks per-model success/failure history, learns item count limits dynamically",
        "Proactively compacts input when above learned limit before sending to Crof",
        "Auto-retries on finish_reason=length — aggressively compacts and resends",
        "Prevents 'stream disconnected' and 'incomplete' errors on long conversations",
    ]),
    ("2.2.1", "2026-05-20", [
        "Fixed compaction orphaning function_call_output items — root cause of Crof incomplete responses",
        "Compaction now respects function_call/function_call_output pairs — no more dangling tool results",
@@ -548,7 +555,7 @@ class LauncherWin(Gtk.Window):
        # header row
        hdr = Gtk.Box(spacing=8)
        vbox.pack_start(hdr, False, False, 0)
-        lbl = Gtk.Label(label="<b>Codex Launcher v2.2.1</b>")
+        lbl = Gtk.Label(label="<b>Codex Launcher v2.3.0</b>")
        lbl.set_use_markup(True)
        hdr.pack_start(lbl, False, False, 0)
        changelog_btn = Gtk.Button(label="Changelog")
--- a/src/translate-proxy.py
+++ b/src/translate-proxy.py
@@ -171,6 +171,87 @@ _MAX_INPUT_ITEMS = 30
 _MAX_TOOL_OUTPUT_CHARS = 8000
 _COMPACT_KEEP_RECENT = 10
 _CROF_ADAPTIVE = {
    "fail_history": [],
    "model_limits": {},
    "global_item_limit": 30,
    "min_keep_recent": 4,
 }
 def _crof_record(model, n_items, success):
    if not isinstance(n_items, int) or n_items < 1:
        return
    entry = {"model": model, "items": n_items, "ok": success}
    hist = _CROF_ADAPTIVE["fail_history"]
    hist.append(entry)
    if len(hist) > 200:
        _CROF_ADAPTIVE["fail_history"] = hist[-100:]
    ml = _CROF_ADAPTIVE["model_limits"].setdefault(model, {"ok_max": 30, "fail_min": 0, "limit": 30})
    if success and n_items > ml["ok_max"]:
        ml["ok_max"] = n_items
    if not success and (ml["fail_min"] == 0 or n_items < ml["fail_min"]):
        ml["fail_min"] = n_items
    if ml["fail_min"] > 0 and ml["ok_max"] >= ml["fail_min"]:
        ml["limit"] = ml["fail_min"] - 1
    elif ml["fail_min"] > 0:
        ml["limit"] = max(ml["fail_min"] - 2, _CROF_ADAPTIVE["min_keep_recent"] + 2)
    global_limit = 30
    for m, v in _CROF_ADAPTIVE["model_limits"].items():
        if v.get("limit", 30) < global_limit:
            global_limit = v["limit"]
    _CROF_ADAPTIVE["global_item_limit"] = global_limit
    print(f"[crof-adaptive] model={model} items={n_items} {'OK' if success else 'FAIL'} -> limit={ml.get('limit',30)} global={global_limit}", file=sys.stderr)
 def _crof_item_limit(model):
    ml = _CROF_ADAPTIVE["model_limits"].get(model, {})
    per_model = ml.get("limit", 30)
    return min(per_model, _CROF_ADAPTIVE["global_item_limit"])
 def _crof_compact_for_retry(input_data, model):
    limit = _crof_item_limit(model)
    if not isinstance(input_data, list) or len(input_data) <= limit:
        return input_data
    keep = max(_CROF_ADAPTIVE["min_keep_recent"], limit // 3)
    head_end = 0
    for i, item in enumerate(input_data):
        t = item.get("type")
        if t == "message" and item.get("role") in ("developer", "system"):
            head_end = i + 1
        elif t == "message" and item.get("role") == "user" and head_end == i:
            head_end = i + 1
        else:
            break
    head = input_data[:head_end]
    tail_start = max(head_end, len(input_data) - keep)
    while tail_start > head_end:
        t = input_data[tail_start].get("type")
        r = input_data[tail_start].get("role", "")
        if t in ("function_call_output", "function_call"):
            tail_start -= 1
        elif t == "message" and r == "assistant":
            tail_start -= 1
        else:
            break
    tail = input_data[tail_start:]
    body = input_data[head_end:tail_start]
    if not body:
        return head + tail
    summary_lines = [f"[Auto-compacted: {len(body)} turns removed (adaptive limit={limit})]"]
    for item in body[-5:]:
        summary_lines.append(_item_summary(item, max_len=120))
    summary_msg = {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "\n".join(summary_lines)}]}
    print(f"[crof-adaptive] RETRY compact: {len(input_data)} -> {len(head)+1+len(tail)} (limit={limit}, keep={len(tail)})", file=sys.stderr)
    return head + [summary_msg] + tail
 def _item_summary(item, max_len=200):
    t = item.get("type")
    if t == "message":
@@ -888,6 +969,15 @@ class Handler(http.server.BaseHTTPRequestHandler):
    def _handle_openai_compat(self, body, model, stream):
        input_data = body.get("input", "")
        # Adaptive: proactively compact if above learned Crof limit
        crof_limit = _crof_item_limit(model)
        if isinstance(input_data, list) and len(input_data) > crof_limit:
            print(f"[crof-adaptive] proactive compact: {len(input_data)} items > limit {crof_limit}", file=sys.stderr)
            input_data = _crof_compact_for_retry(input_data, model)
            body = dict(body)
            body["input"] = input_data
        messages = oa_input_to_messages(input_data)
        instructions = body.get("instructions", "").strip()
        if instructions:
@@ -914,25 +1004,136 @@ class Handler(http.server.BaseHTTPRequestHandler):
            "Content-Type": "application/json",
            "Authorization": f"Bearer {API_KEY}",
        }, browser_ua=True)
-        print(f"[translate-proxy] POST {target} model={model} stream={stream} ua={fwd.get('User-Agent','')[:50]}", file=sys.stderr)
+        print(f"[translate-proxy] POST {target} model={model} stream={stream} items={len(input_data) if isinstance(input_data,list) else 1} ua={fwd.get('User-Agent','')[:50]}", file=sys.stderr)
-        _crof_debug_path = os.path.join(_LOG_DIR, "crof-upstream.jsonl")
+
        with open(_crof_debug_path, "a") as _cdf:
            _cdf.write(json.dumps({
                "model": model, "max_tokens": chat_body.get("max_tokens"),
                "reasoning_effort": chat_body.get("reasoning_effort"),
                "enable_thinking": chat_body.get("enable_thinking", "NOT_SENT"),
                "n_messages": len(chat_body.get("messages", [])),
                "has_tools": bool(chat_body.get("tools")),
            }) + "\n")
        req = urllib.request.Request(
            target,
            data=json.dumps(chat_body).encode(),
            headers=fwd,
        )
-        self._forward(req, stream, model,
+        self._forward_oa_compat(req, stream, model, chat_body, body, input_data, fwd, target, tools)
-            lambda r: oa_resp_to_responses(json.loads(r.read()), model),
+
-            lambda s: oa_stream_to_sse(s, model, body.get("request_id") or body.get("id")),
+    def _forward_oa_compat(self, req, stream, model, chat_body, body, input_data, fwd, target, tools):
-            input_data=body.get("input", ""))
+        try:
            upstream = urllib.request.urlopen(req, timeout=180)
        except urllib.error.HTTPError as e:
            err = e.read().decode()
            return self.send_json(e.code, {"error": {"type": "upstream_error", "message": err}})
        except Exception as e:
            return self.send_json(500, {"error": {"type": "proxy_error", "message": str(e)}})
        n_items = len(input_data) if isinstance(input_data, list) else 1
        if stream:
            self.send_response(200)
            self.send_header("Content-Type", "text/event-stream")
            self.send_header("Cache-Control", "no-cache")
            self.send_header("Connection", "keep-alive")
            self.end_headers()
            collected_events = []
            last_resp_id = None
            last_output = None
            last_status = None
            finish_reason = None
            has_content = False
            try:
                for event in oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")):
                    self.wfile.write(event.encode("utf-8"))
                    self.wfile.flush()
                    collected_events.append(event)
                    for line in event.strip().split("\n"):
                        if line.startswith("data: "):
                            try:
                                d = json.loads(line[6:])
                                if d.get("type") == "response.completed":
                                    last_resp_id = d.get("response", {}).get("id")
                                    last_output = d.get("response", {}).get("output", [])
                                    last_status = d.get("response", {}).get("status")
                                    fr_map = {"completed": "stop", "incomplete": "length"}
                                    finish_reason = "length" if last_status == "incomplete" else "stop"
                                    has_content = any(o.get("type") == "message" for o in (last_output or []))
                            except: pass
            except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
                print("[translate-proxy] client disconnected during stream", file=sys.stderr)
                _crof_record(model, n_items, False)
                _log_resp(last_resp_id, "client_disconnect", last_output)
                return
            # Record outcome
            success = (finish_reason != "length")
            _crof_record(model, n_items, success)
            _log_resp(last_resp_id, last_status, last_output)
            if last_resp_id and input_data is not None:
                store_response(last_resp_id, input_data, last_output)
            # Auto-retry on finish_reason=length with no content
            if finish_reason == "length" and not has_content and isinstance(input_data, list) and len(input_data) > 5:
                print(f"[crof-adaptive] RETRY: finish_reason=length with no content, compacting {n_items} items", file=sys.stderr)
                new_input = _crof_compact_for_retry(input_data, model)
                if len(new_input) < len(input_data):
                    new_body = dict(body)
                    new_body["input"] = new_input
                    new_messages = oa_input_to_messages(new_input)
                    instructions = body.get("instructions", "").strip()
                    if instructions:
                        new_messages.insert(0, {"role": "system", "content": instructions})
                    new_chat_body = dict(chat_body)
                    new_chat_body["messages"] = new_messages
                    new_req = urllib.request.Request(
                        target,
                        data=json.dumps(new_chat_body).encode(),
                        headers=fwd,
                    )
                    self._forward_oa_compat_retry(new_req, model, new_chat_body, body, new_input)
        else:
            result = oa_resp_to_responses(json.loads(upstream.read()), model)
            success = result.get("status") != "incomplete"
            _crof_record(model, n_items, success)
            self.send_json(200, result)
            rid = result.get("id")
            _log_resp(rid, result.get("status"), result.get("output", []))
            if rid and input_data is not None:
                store_response(rid, input_data, result.get("output", []))
    def _forward_oa_compat_retry(self, req, model, chat_body, body, input_data):
        try:
            upstream = urllib.request.urlopen(req, timeout=180)
        except Exception as e:
            print(f"[crof-adaptive] retry failed: {e}", file=sys.stderr)
            return
        self.send_response(200)
        self.send_header("Content-Type", "text/event-stream")
        self.send_header("Cache-Control", "no-cache")
        self.send_header("Connection", "keep-alive")
        self.end_headers()
        last_resp_id = None
        last_output = None
        last_status = None
        try:
            for event in oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")):
                self.wfile.write(event.encode("utf-8"))
                self.wfile.flush()
                for line in event.strip().split("\n"):
                    if line.startswith("data: "):
                        try:
                            d = json.loads(line[6:])
                            if d.get("type") == "response.completed":
                                last_resp_id = d.get("response", {}).get("id")
                                last_output = d.get("response", {}).get("output", [])
                                last_status = d.get("response", {}).get("status")
                        except: pass
        except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
            print("[translate-proxy] client disconnected during retry stream", file=sys.stderr)
        n_items = len(input_data) if isinstance(input_data, list) else 1
        _crof_record(model, n_items, last_status == "completed")
        _log_resp(last_resp_id, last_status or "retry_disconnect", last_output)
        if last_resp_id and input_data is not None:
            store_response(last_resp_id, input_data, last_output)
    def _handle_anthropic(self, body, model, stream):
        input_data = body.get("input", "")