v2.3.0: adaptive Crof self-healing system
- Per-model success/failure tracking with dynamic item limits - Proactive compaction when above learned limit - Auto-retry on finish_reason=length with aggressive re-compaction - Tested: kimi-k2.6 (27 items) and mimo-v2.5-pro both completed - All previous fixes included: _ts crash, connection reset, timeout, orphaned fco
This commit is contained in:
15
CHANGELOG.md
15
CHANGELOG.md
@@ -1,5 +1,20 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## v2.3.0 (2026-05-20)
|
||||||
|
|
||||||
|
- **Adaptive Crof self-healing system**
|
||||||
|
- Tracks per-model success/failure history with item counts
|
||||||
|
- Dynamically learns max item limit per model (starts at 30, adjusts down on failures)
|
||||||
|
- Proactively compacts input when above learned limit before sending to upstream
|
||||||
|
- Auto-retry on `finish_reason=length` with aggressive re-compaction and resend
|
||||||
|
- Prevents `stream disconnected` and `incomplete` errors on long conversations
|
||||||
|
- All tracking logged to stderr: `[crof-adaptive] model=X items=N OK/FAIL -> limit=N`
|
||||||
|
- Fixed `NameError: _ts` crash in debug logging
|
||||||
|
- Fixed `ConnectionResetError` crash on client disconnect during streaming
|
||||||
|
- Added 180s upstream timeout to prevent hanging connections
|
||||||
|
- Compaction now preserves function_call/function_call_output pairs (no orphaned tool outputs)
|
||||||
|
- Fixed reasoning control: `reasoning_effort=none` always sends both params
|
||||||
|
|
||||||
## v2.2.1 (2026-05-20)
|
## v2.2.1 (2026-05-20)
|
||||||
|
|
||||||
- **Fixed compaction orphaning function_call_output items** — root cause of Crof `incomplete` responses
|
- **Fixed compaction orphaning function_call_output items** — root cause of Crof `incomplete` responses
|
||||||
|
|||||||
Binary file not shown.
BIN
codex-launcher_2.3.0_all.deb
Normal file
BIN
codex-launcher_2.3.0_all.deb
Normal file
Binary file not shown.
@@ -24,6 +24,13 @@ model_catalog_json = ""
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
CHANGELOG = [
|
CHANGELOG = [
|
||||||
|
("2.3.0", "2026-05-20", [
|
||||||
|
"Adaptive Crof self-healing system — auto-adjusts to Crof model limits",
|
||||||
|
"Tracks per-model success/failure history, learns item count limits dynamically",
|
||||||
|
"Proactively compacts input when above learned limit before sending to Crof",
|
||||||
|
"Auto-retries on finish_reason=length — aggressively compacts and resends",
|
||||||
|
"Prevents 'stream disconnected' and 'incomplete' errors on long conversations",
|
||||||
|
]),
|
||||||
("2.2.1", "2026-05-20", [
|
("2.2.1", "2026-05-20", [
|
||||||
"Fixed compaction orphaning function_call_output items — root cause of Crof incomplete responses",
|
"Fixed compaction orphaning function_call_output items — root cause of Crof incomplete responses",
|
||||||
"Compaction now respects function_call/function_call_output pairs — no more dangling tool results",
|
"Compaction now respects function_call/function_call_output pairs — no more dangling tool results",
|
||||||
@@ -548,7 +555,7 @@ class LauncherWin(Gtk.Window):
|
|||||||
# header row
|
# header row
|
||||||
hdr = Gtk.Box(spacing=8)
|
hdr = Gtk.Box(spacing=8)
|
||||||
vbox.pack_start(hdr, False, False, 0)
|
vbox.pack_start(hdr, False, False, 0)
|
||||||
lbl = Gtk.Label(label="<b>Codex Launcher v2.2.1</b>")
|
lbl = Gtk.Label(label="<b>Codex Launcher v2.3.0</b>")
|
||||||
lbl.set_use_markup(True)
|
lbl.set_use_markup(True)
|
||||||
hdr.pack_start(lbl, False, False, 0)
|
hdr.pack_start(lbl, False, False, 0)
|
||||||
changelog_btn = Gtk.Button(label="Changelog")
|
changelog_btn = Gtk.Button(label="Changelog")
|
||||||
|
|||||||
@@ -171,6 +171,87 @@ _MAX_INPUT_ITEMS = 30
|
|||||||
_MAX_TOOL_OUTPUT_CHARS = 8000
|
_MAX_TOOL_OUTPUT_CHARS = 8000
|
||||||
_COMPACT_KEEP_RECENT = 10
|
_COMPACT_KEEP_RECENT = 10
|
||||||
|
|
||||||
|
_CROF_ADAPTIVE = {
|
||||||
|
"fail_history": [],
|
||||||
|
"model_limits": {},
|
||||||
|
"global_item_limit": 30,
|
||||||
|
"min_keep_recent": 4,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _crof_record(model, n_items, success):
|
||||||
|
if not isinstance(n_items, int) or n_items < 1:
|
||||||
|
return
|
||||||
|
entry = {"model": model, "items": n_items, "ok": success}
|
||||||
|
hist = _CROF_ADAPTIVE["fail_history"]
|
||||||
|
hist.append(entry)
|
||||||
|
if len(hist) > 200:
|
||||||
|
_CROF_ADAPTIVE["fail_history"] = hist[-100:]
|
||||||
|
|
||||||
|
ml = _CROF_ADAPTIVE["model_limits"].setdefault(model, {"ok_max": 30, "fail_min": 0, "limit": 30})
|
||||||
|
if success and n_items > ml["ok_max"]:
|
||||||
|
ml["ok_max"] = n_items
|
||||||
|
if not success and (ml["fail_min"] == 0 or n_items < ml["fail_min"]):
|
||||||
|
ml["fail_min"] = n_items
|
||||||
|
|
||||||
|
if ml["fail_min"] > 0 and ml["ok_max"] >= ml["fail_min"]:
|
||||||
|
ml["limit"] = ml["fail_min"] - 1
|
||||||
|
elif ml["fail_min"] > 0:
|
||||||
|
ml["limit"] = max(ml["fail_min"] - 2, _CROF_ADAPTIVE["min_keep_recent"] + 2)
|
||||||
|
|
||||||
|
global_limit = 30
|
||||||
|
for m, v in _CROF_ADAPTIVE["model_limits"].items():
|
||||||
|
if v.get("limit", 30) < global_limit:
|
||||||
|
global_limit = v["limit"]
|
||||||
|
_CROF_ADAPTIVE["global_item_limit"] = global_limit
|
||||||
|
|
||||||
|
print(f"[crof-adaptive] model={model} items={n_items} {'OK' if success else 'FAIL'} -> limit={ml.get('limit',30)} global={global_limit}", file=sys.stderr)
|
||||||
|
|
||||||
|
def _crof_item_limit(model):
|
||||||
|
ml = _CROF_ADAPTIVE["model_limits"].get(model, {})
|
||||||
|
per_model = ml.get("limit", 30)
|
||||||
|
return min(per_model, _CROF_ADAPTIVE["global_item_limit"])
|
||||||
|
|
||||||
|
def _crof_compact_for_retry(input_data, model):
|
||||||
|
limit = _crof_item_limit(model)
|
||||||
|
if not isinstance(input_data, list) or len(input_data) <= limit:
|
||||||
|
return input_data
|
||||||
|
|
||||||
|
keep = max(_CROF_ADAPTIVE["min_keep_recent"], limit // 3)
|
||||||
|
head_end = 0
|
||||||
|
for i, item in enumerate(input_data):
|
||||||
|
t = item.get("type")
|
||||||
|
if t == "message" and item.get("role") in ("developer", "system"):
|
||||||
|
head_end = i + 1
|
||||||
|
elif t == "message" and item.get("role") == "user" and head_end == i:
|
||||||
|
head_end = i + 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
head = input_data[:head_end]
|
||||||
|
tail_start = max(head_end, len(input_data) - keep)
|
||||||
|
while tail_start > head_end:
|
||||||
|
t = input_data[tail_start].get("type")
|
||||||
|
r = input_data[tail_start].get("role", "")
|
||||||
|
if t in ("function_call_output", "function_call"):
|
||||||
|
tail_start -= 1
|
||||||
|
elif t == "message" and r == "assistant":
|
||||||
|
tail_start -= 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
tail = input_data[tail_start:]
|
||||||
|
body = input_data[head_end:tail_start]
|
||||||
|
|
||||||
|
if not body:
|
||||||
|
return head + tail
|
||||||
|
|
||||||
|
summary_lines = [f"[Auto-compacted: {len(body)} turns removed (adaptive limit={limit})]"]
|
||||||
|
for item in body[-5:]:
|
||||||
|
summary_lines.append(_item_summary(item, max_len=120))
|
||||||
|
|
||||||
|
summary_msg = {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "\n".join(summary_lines)}]}
|
||||||
|
print(f"[crof-adaptive] RETRY compact: {len(input_data)} -> {len(head)+1+len(tail)} (limit={limit}, keep={len(tail)})", file=sys.stderr)
|
||||||
|
return head + [summary_msg] + tail
|
||||||
|
|
||||||
def _item_summary(item, max_len=200):
|
def _item_summary(item, max_len=200):
|
||||||
t = item.get("type")
|
t = item.get("type")
|
||||||
if t == "message":
|
if t == "message":
|
||||||
@@ -888,6 +969,15 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
|
|
||||||
def _handle_openai_compat(self, body, model, stream):
|
def _handle_openai_compat(self, body, model, stream):
|
||||||
input_data = body.get("input", "")
|
input_data = body.get("input", "")
|
||||||
|
|
||||||
|
# Adaptive: proactively compact if above learned Crof limit
|
||||||
|
crof_limit = _crof_item_limit(model)
|
||||||
|
if isinstance(input_data, list) and len(input_data) > crof_limit:
|
||||||
|
print(f"[crof-adaptive] proactive compact: {len(input_data)} items > limit {crof_limit}", file=sys.stderr)
|
||||||
|
input_data = _crof_compact_for_retry(input_data, model)
|
||||||
|
body = dict(body)
|
||||||
|
body["input"] = input_data
|
||||||
|
|
||||||
messages = oa_input_to_messages(input_data)
|
messages = oa_input_to_messages(input_data)
|
||||||
instructions = body.get("instructions", "").strip()
|
instructions = body.get("instructions", "").strip()
|
||||||
if instructions:
|
if instructions:
|
||||||
@@ -914,25 +1004,136 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
|||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"Authorization": f"Bearer {API_KEY}",
|
"Authorization": f"Bearer {API_KEY}",
|
||||||
}, browser_ua=True)
|
}, browser_ua=True)
|
||||||
print(f"[translate-proxy] POST {target} model={model} stream={stream} ua={fwd.get('User-Agent','')[:50]}", file=sys.stderr)
|
print(f"[translate-proxy] POST {target} model={model} stream={stream} items={len(input_data) if isinstance(input_data,list) else 1} ua={fwd.get('User-Agent','')[:50]}", file=sys.stderr)
|
||||||
_crof_debug_path = os.path.join(_LOG_DIR, "crof-upstream.jsonl")
|
|
||||||
with open(_crof_debug_path, "a") as _cdf:
|
|
||||||
_cdf.write(json.dumps({
|
|
||||||
"model": model, "max_tokens": chat_body.get("max_tokens"),
|
|
||||||
"reasoning_effort": chat_body.get("reasoning_effort"),
|
|
||||||
"enable_thinking": chat_body.get("enable_thinking", "NOT_SENT"),
|
|
||||||
"n_messages": len(chat_body.get("messages", [])),
|
|
||||||
"has_tools": bool(chat_body.get("tools")),
|
|
||||||
}) + "\n")
|
|
||||||
req = urllib.request.Request(
|
req = urllib.request.Request(
|
||||||
target,
|
target,
|
||||||
data=json.dumps(chat_body).encode(),
|
data=json.dumps(chat_body).encode(),
|
||||||
headers=fwd,
|
headers=fwd,
|
||||||
)
|
)
|
||||||
self._forward(req, stream, model,
|
self._forward_oa_compat(req, stream, model, chat_body, body, input_data, fwd, target, tools)
|
||||||
lambda r: oa_resp_to_responses(json.loads(r.read()), model),
|
|
||||||
lambda s: oa_stream_to_sse(s, model, body.get("request_id") or body.get("id")),
|
def _forward_oa_compat(self, req, stream, model, chat_body, body, input_data, fwd, target, tools):
|
||||||
input_data=body.get("input", ""))
|
try:
|
||||||
|
upstream = urllib.request.urlopen(req, timeout=180)
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
err = e.read().decode()
|
||||||
|
return self.send_json(e.code, {"error": {"type": "upstream_error", "message": err}})
|
||||||
|
except Exception as e:
|
||||||
|
return self.send_json(500, {"error": {"type": "proxy_error", "message": str(e)}})
|
||||||
|
|
||||||
|
n_items = len(input_data) if isinstance(input_data, list) else 1
|
||||||
|
|
||||||
|
if stream:
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/event-stream")
|
||||||
|
self.send_header("Cache-Control", "no-cache")
|
||||||
|
self.send_header("Connection", "keep-alive")
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
collected_events = []
|
||||||
|
last_resp_id = None
|
||||||
|
last_output = None
|
||||||
|
last_status = None
|
||||||
|
finish_reason = None
|
||||||
|
has_content = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
for event in oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")):
|
||||||
|
self.wfile.write(event.encode("utf-8"))
|
||||||
|
self.wfile.flush()
|
||||||
|
collected_events.append(event)
|
||||||
|
for line in event.strip().split("\n"):
|
||||||
|
if line.startswith("data: "):
|
||||||
|
try:
|
||||||
|
d = json.loads(line[6:])
|
||||||
|
if d.get("type") == "response.completed":
|
||||||
|
last_resp_id = d.get("response", {}).get("id")
|
||||||
|
last_output = d.get("response", {}).get("output", [])
|
||||||
|
last_status = d.get("response", {}).get("status")
|
||||||
|
fr_map = {"completed": "stop", "incomplete": "length"}
|
||||||
|
finish_reason = "length" if last_status == "incomplete" else "stop"
|
||||||
|
has_content = any(o.get("type") == "message" for o in (last_output or []))
|
||||||
|
except: pass
|
||||||
|
except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
|
||||||
|
print("[translate-proxy] client disconnected during stream", file=sys.stderr)
|
||||||
|
_crof_record(model, n_items, False)
|
||||||
|
_log_resp(last_resp_id, "client_disconnect", last_output)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Record outcome
|
||||||
|
success = (finish_reason != "length")
|
||||||
|
_crof_record(model, n_items, success)
|
||||||
|
_log_resp(last_resp_id, last_status, last_output)
|
||||||
|
if last_resp_id and input_data is not None:
|
||||||
|
store_response(last_resp_id, input_data, last_output)
|
||||||
|
|
||||||
|
# Auto-retry on finish_reason=length with no content
|
||||||
|
if finish_reason == "length" and not has_content and isinstance(input_data, list) and len(input_data) > 5:
|
||||||
|
print(f"[crof-adaptive] RETRY: finish_reason=length with no content, compacting {n_items} items", file=sys.stderr)
|
||||||
|
new_input = _crof_compact_for_retry(input_data, model)
|
||||||
|
if len(new_input) < len(input_data):
|
||||||
|
new_body = dict(body)
|
||||||
|
new_body["input"] = new_input
|
||||||
|
new_messages = oa_input_to_messages(new_input)
|
||||||
|
instructions = body.get("instructions", "").strip()
|
||||||
|
if instructions:
|
||||||
|
new_messages.insert(0, {"role": "system", "content": instructions})
|
||||||
|
new_chat_body = dict(chat_body)
|
||||||
|
new_chat_body["messages"] = new_messages
|
||||||
|
new_req = urllib.request.Request(
|
||||||
|
target,
|
||||||
|
data=json.dumps(new_chat_body).encode(),
|
||||||
|
headers=fwd,
|
||||||
|
)
|
||||||
|
self._forward_oa_compat_retry(new_req, model, new_chat_body, body, new_input)
|
||||||
|
else:
|
||||||
|
result = oa_resp_to_responses(json.loads(upstream.read()), model)
|
||||||
|
success = result.get("status") != "incomplete"
|
||||||
|
_crof_record(model, n_items, success)
|
||||||
|
self.send_json(200, result)
|
||||||
|
rid = result.get("id")
|
||||||
|
_log_resp(rid, result.get("status"), result.get("output", []))
|
||||||
|
if rid and input_data is not None:
|
||||||
|
store_response(rid, input_data, result.get("output", []))
|
||||||
|
|
||||||
|
def _forward_oa_compat_retry(self, req, model, chat_body, body, input_data):
|
||||||
|
try:
|
||||||
|
upstream = urllib.request.urlopen(req, timeout=180)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[crof-adaptive] retry failed: {e}", file=sys.stderr)
|
||||||
|
return
|
||||||
|
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/event-stream")
|
||||||
|
self.send_header("Cache-Control", "no-cache")
|
||||||
|
self.send_header("Connection", "keep-alive")
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
last_resp_id = None
|
||||||
|
last_output = None
|
||||||
|
last_status = None
|
||||||
|
try:
|
||||||
|
for event in oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")):
|
||||||
|
self.wfile.write(event.encode("utf-8"))
|
||||||
|
self.wfile.flush()
|
||||||
|
for line in event.strip().split("\n"):
|
||||||
|
if line.startswith("data: "):
|
||||||
|
try:
|
||||||
|
d = json.loads(line[6:])
|
||||||
|
if d.get("type") == "response.completed":
|
||||||
|
last_resp_id = d.get("response", {}).get("id")
|
||||||
|
last_output = d.get("response", {}).get("output", [])
|
||||||
|
last_status = d.get("response", {}).get("status")
|
||||||
|
except: pass
|
||||||
|
except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
|
||||||
|
print("[translate-proxy] client disconnected during retry stream", file=sys.stderr)
|
||||||
|
|
||||||
|
n_items = len(input_data) if isinstance(input_data, list) else 1
|
||||||
|
_crof_record(model, n_items, last_status == "completed")
|
||||||
|
_log_resp(last_resp_id, last_status or "retry_disconnect", last_output)
|
||||||
|
if last_resp_id and input_data is not None:
|
||||||
|
store_response(last_resp_id, input_data, last_output)
|
||||||
|
|
||||||
def _handle_anthropic(self, body, model, stream):
|
def _handle_anthropic(self, body, model, stream):
|
||||||
input_data = body.get("input", "")
|
input_data = body.get("input", "")
|
||||||
|
|||||||
Reference in New Issue
Block a user