v2.3.0: adaptive Crof self-healing system

- Per-model success/failure tracking with dynamic item limits
- Proactive compaction when above learned limit
- Auto-retry on finish_reason=length with aggressive re-compaction
- Tested: kimi-k2.6 (27 items) and mimo-v2.5-pro both completed
- All previous fixes included: _ts crash, connection reset, timeout, orphaned fco
This commit is contained in:
Roman
2026-05-20 14:32:36 +04:00
Unverified
parent 60106955ab
commit 27b22f4fd8
5 changed files with 238 additions and 15 deletions

View File

@@ -1,5 +1,20 @@
# Changelog # Changelog
## v2.3.0 (2026-05-20)
- **Adaptive Crof self-healing system**
- Tracks per-model success/failure history with item counts
- Dynamically learns max item limit per model (starts at 30, adjusts down on failures)
- Proactively compacts input when above learned limit before sending to upstream
- Auto-retry on `finish_reason=length` with aggressive re-compaction and resend
- Prevents `stream disconnected` and `incomplete` errors on long conversations
- All tracking logged to stderr: `[crof-adaptive] model=X items=N OK/FAIL -> limit=N`
- Fixed `NameError: _ts` crash in debug logging
- Fixed `ConnectionResetError` crash on client disconnect during streaming
- Added 180s upstream timeout to prevent hanging connections
- Compaction now preserves function_call/function_call_output pairs (no orphaned tool outputs)
- Fixed reasoning control: `reasoning_effort=none` always sends both params
## v2.2.1 (2026-05-20) ## v2.2.1 (2026-05-20)
- **Fixed compaction orphaning function_call_output items** — root cause of Crof `incomplete` responses - **Fixed compaction orphaning function_call_output items** — root cause of Crof `incomplete` responses

Binary file not shown.

Binary file not shown.

View File

@@ -24,6 +24,13 @@ model_catalog_json = ""
""" """
CHANGELOG = [ CHANGELOG = [
("2.3.0", "2026-05-20", [
"Adaptive Crof self-healing system — auto-adjusts to Crof model limits",
"Tracks per-model success/failure history, learns item count limits dynamically",
"Proactively compacts input when above learned limit before sending to Crof",
"Auto-retries on finish_reason=length — aggressively compacts and resends",
"Prevents 'stream disconnected' and 'incomplete' errors on long conversations",
]),
("2.2.1", "2026-05-20", [ ("2.2.1", "2026-05-20", [
"Fixed compaction orphaning function_call_output items — root cause of Crof incomplete responses", "Fixed compaction orphaning function_call_output items — root cause of Crof incomplete responses",
"Compaction now respects function_call/function_call_output pairs — no more dangling tool results", "Compaction now respects function_call/function_call_output pairs — no more dangling tool results",
@@ -548,7 +555,7 @@ class LauncherWin(Gtk.Window):
# header row # header row
hdr = Gtk.Box(spacing=8) hdr = Gtk.Box(spacing=8)
vbox.pack_start(hdr, False, False, 0) vbox.pack_start(hdr, False, False, 0)
lbl = Gtk.Label(label="<b>Codex Launcher v2.2.1</b>") lbl = Gtk.Label(label="<b>Codex Launcher v2.3.0</b>")
lbl.set_use_markup(True) lbl.set_use_markup(True)
hdr.pack_start(lbl, False, False, 0) hdr.pack_start(lbl, False, False, 0)
changelog_btn = Gtk.Button(label="Changelog") changelog_btn = Gtk.Button(label="Changelog")

View File

@@ -171,6 +171,87 @@ _MAX_INPUT_ITEMS = 30
_MAX_TOOL_OUTPUT_CHARS = 8000 _MAX_TOOL_OUTPUT_CHARS = 8000
_COMPACT_KEEP_RECENT = 10 _COMPACT_KEEP_RECENT = 10
_CROF_ADAPTIVE = {
"fail_history": [],
"model_limits": {},
"global_item_limit": 30,
"min_keep_recent": 4,
}
def _crof_record(model, n_items, success):
if not isinstance(n_items, int) or n_items < 1:
return
entry = {"model": model, "items": n_items, "ok": success}
hist = _CROF_ADAPTIVE["fail_history"]
hist.append(entry)
if len(hist) > 200:
_CROF_ADAPTIVE["fail_history"] = hist[-100:]
ml = _CROF_ADAPTIVE["model_limits"].setdefault(model, {"ok_max": 30, "fail_min": 0, "limit": 30})
if success and n_items > ml["ok_max"]:
ml["ok_max"] = n_items
if not success and (ml["fail_min"] == 0 or n_items < ml["fail_min"]):
ml["fail_min"] = n_items
if ml["fail_min"] > 0 and ml["ok_max"] >= ml["fail_min"]:
ml["limit"] = ml["fail_min"] - 1
elif ml["fail_min"] > 0:
ml["limit"] = max(ml["fail_min"] - 2, _CROF_ADAPTIVE["min_keep_recent"] + 2)
global_limit = 30
for m, v in _CROF_ADAPTIVE["model_limits"].items():
if v.get("limit", 30) < global_limit:
global_limit = v["limit"]
_CROF_ADAPTIVE["global_item_limit"] = global_limit
print(f"[crof-adaptive] model={model} items={n_items} {'OK' if success else 'FAIL'} -> limit={ml.get('limit',30)} global={global_limit}", file=sys.stderr)
def _crof_item_limit(model):
ml = _CROF_ADAPTIVE["model_limits"].get(model, {})
per_model = ml.get("limit", 30)
return min(per_model, _CROF_ADAPTIVE["global_item_limit"])
def _crof_compact_for_retry(input_data, model):
limit = _crof_item_limit(model)
if not isinstance(input_data, list) or len(input_data) <= limit:
return input_data
keep = max(_CROF_ADAPTIVE["min_keep_recent"], limit // 3)
head_end = 0
for i, item in enumerate(input_data):
t = item.get("type")
if t == "message" and item.get("role") in ("developer", "system"):
head_end = i + 1
elif t == "message" and item.get("role") == "user" and head_end == i:
head_end = i + 1
else:
break
head = input_data[:head_end]
tail_start = max(head_end, len(input_data) - keep)
while tail_start > head_end:
t = input_data[tail_start].get("type")
r = input_data[tail_start].get("role", "")
if t in ("function_call_output", "function_call"):
tail_start -= 1
elif t == "message" and r == "assistant":
tail_start -= 1
else:
break
tail = input_data[tail_start:]
body = input_data[head_end:tail_start]
if not body:
return head + tail
summary_lines = [f"[Auto-compacted: {len(body)} turns removed (adaptive limit={limit})]"]
for item in body[-5:]:
summary_lines.append(_item_summary(item, max_len=120))
summary_msg = {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "\n".join(summary_lines)}]}
print(f"[crof-adaptive] RETRY compact: {len(input_data)} -> {len(head)+1+len(tail)} (limit={limit}, keep={len(tail)})", file=sys.stderr)
return head + [summary_msg] + tail
def _item_summary(item, max_len=200): def _item_summary(item, max_len=200):
t = item.get("type") t = item.get("type")
if t == "message": if t == "message":
@@ -888,6 +969,15 @@ class Handler(http.server.BaseHTTPRequestHandler):
def _handle_openai_compat(self, body, model, stream): def _handle_openai_compat(self, body, model, stream):
input_data = body.get("input", "") input_data = body.get("input", "")
# Adaptive: proactively compact if above learned Crof limit
crof_limit = _crof_item_limit(model)
if isinstance(input_data, list) and len(input_data) > crof_limit:
print(f"[crof-adaptive] proactive compact: {len(input_data)} items > limit {crof_limit}", file=sys.stderr)
input_data = _crof_compact_for_retry(input_data, model)
body = dict(body)
body["input"] = input_data
messages = oa_input_to_messages(input_data) messages = oa_input_to_messages(input_data)
instructions = body.get("instructions", "").strip() instructions = body.get("instructions", "").strip()
if instructions: if instructions:
@@ -914,25 +1004,136 @@ class Handler(http.server.BaseHTTPRequestHandler):
"Content-Type": "application/json", "Content-Type": "application/json",
"Authorization": f"Bearer {API_KEY}", "Authorization": f"Bearer {API_KEY}",
}, browser_ua=True) }, browser_ua=True)
print(f"[translate-proxy] POST {target} model={model} stream={stream} ua={fwd.get('User-Agent','')[:50]}", file=sys.stderr) print(f"[translate-proxy] POST {target} model={model} stream={stream} items={len(input_data) if isinstance(input_data,list) else 1} ua={fwd.get('User-Agent','')[:50]}", file=sys.stderr)
_crof_debug_path = os.path.join(_LOG_DIR, "crof-upstream.jsonl")
with open(_crof_debug_path, "a") as _cdf:
_cdf.write(json.dumps({
"model": model, "max_tokens": chat_body.get("max_tokens"),
"reasoning_effort": chat_body.get("reasoning_effort"),
"enable_thinking": chat_body.get("enable_thinking", "NOT_SENT"),
"n_messages": len(chat_body.get("messages", [])),
"has_tools": bool(chat_body.get("tools")),
}) + "\n")
req = urllib.request.Request( req = urllib.request.Request(
target, target,
data=json.dumps(chat_body).encode(), data=json.dumps(chat_body).encode(),
headers=fwd, headers=fwd,
) )
self._forward(req, stream, model, self._forward_oa_compat(req, stream, model, chat_body, body, input_data, fwd, target, tools)
lambda r: oa_resp_to_responses(json.loads(r.read()), model),
lambda s: oa_stream_to_sse(s, model, body.get("request_id") or body.get("id")), def _forward_oa_compat(self, req, stream, model, chat_body, body, input_data, fwd, target, tools):
input_data=body.get("input", "")) try:
upstream = urllib.request.urlopen(req, timeout=180)
except urllib.error.HTTPError as e:
err = e.read().decode()
return self.send_json(e.code, {"error": {"type": "upstream_error", "message": err}})
except Exception as e:
return self.send_json(500, {"error": {"type": "proxy_error", "message": str(e)}})
n_items = len(input_data) if isinstance(input_data, list) else 1
if stream:
self.send_response(200)
self.send_header("Content-Type", "text/event-stream")
self.send_header("Cache-Control", "no-cache")
self.send_header("Connection", "keep-alive")
self.end_headers()
collected_events = []
last_resp_id = None
last_output = None
last_status = None
finish_reason = None
has_content = False
try:
for event in oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")):
self.wfile.write(event.encode("utf-8"))
self.wfile.flush()
collected_events.append(event)
for line in event.strip().split("\n"):
if line.startswith("data: "):
try:
d = json.loads(line[6:])
if d.get("type") == "response.completed":
last_resp_id = d.get("response", {}).get("id")
last_output = d.get("response", {}).get("output", [])
last_status = d.get("response", {}).get("status")
fr_map = {"completed": "stop", "incomplete": "length"}
finish_reason = "length" if last_status == "incomplete" else "stop"
has_content = any(o.get("type") == "message" for o in (last_output or []))
except: pass
except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
print("[translate-proxy] client disconnected during stream", file=sys.stderr)
_crof_record(model, n_items, False)
_log_resp(last_resp_id, "client_disconnect", last_output)
return
# Record outcome
success = (finish_reason != "length")
_crof_record(model, n_items, success)
_log_resp(last_resp_id, last_status, last_output)
if last_resp_id and input_data is not None:
store_response(last_resp_id, input_data, last_output)
# Auto-retry on finish_reason=length with no content
if finish_reason == "length" and not has_content and isinstance(input_data, list) and len(input_data) > 5:
print(f"[crof-adaptive] RETRY: finish_reason=length with no content, compacting {n_items} items", file=sys.stderr)
new_input = _crof_compact_for_retry(input_data, model)
if len(new_input) < len(input_data):
new_body = dict(body)
new_body["input"] = new_input
new_messages = oa_input_to_messages(new_input)
instructions = body.get("instructions", "").strip()
if instructions:
new_messages.insert(0, {"role": "system", "content": instructions})
new_chat_body = dict(chat_body)
new_chat_body["messages"] = new_messages
new_req = urllib.request.Request(
target,
data=json.dumps(new_chat_body).encode(),
headers=fwd,
)
self._forward_oa_compat_retry(new_req, model, new_chat_body, body, new_input)
else:
result = oa_resp_to_responses(json.loads(upstream.read()), model)
success = result.get("status") != "incomplete"
_crof_record(model, n_items, success)
self.send_json(200, result)
rid = result.get("id")
_log_resp(rid, result.get("status"), result.get("output", []))
if rid and input_data is not None:
store_response(rid, input_data, result.get("output", []))
def _forward_oa_compat_retry(self, req, model, chat_body, body, input_data):
try:
upstream = urllib.request.urlopen(req, timeout=180)
except Exception as e:
print(f"[crof-adaptive] retry failed: {e}", file=sys.stderr)
return
self.send_response(200)
self.send_header("Content-Type", "text/event-stream")
self.send_header("Cache-Control", "no-cache")
self.send_header("Connection", "keep-alive")
self.end_headers()
last_resp_id = None
last_output = None
last_status = None
try:
for event in oa_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")):
self.wfile.write(event.encode("utf-8"))
self.wfile.flush()
for line in event.strip().split("\n"):
if line.startswith("data: "):
try:
d = json.loads(line[6:])
if d.get("type") == "response.completed":
last_resp_id = d.get("response", {}).get("id")
last_output = d.get("response", {}).get("output", [])
last_status = d.get("response", {}).get("status")
except: pass
except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
print("[translate-proxy] client disconnected during retry stream", file=sys.stderr)
n_items = len(input_data) if isinstance(input_data, list) else 1
_crof_record(model, n_items, last_status == "completed")
_log_resp(last_resp_id, last_status or "retry_disconnect", last_output)
if last_resp_id and input_data is not None:
store_response(last_resp_id, input_data, last_output)
def _handle_anthropic(self, body, model, stream): def _handle_anthropic(self, body, model, stream):
input_data = body.get("input", "") input_data = body.get("input", "")