v3.11.6: Antigravity loop breakers, vision/OCR preprocessing, has_content fix, auth config error fix, install.ps1

This commit is contained in:
Roman | RyzenAdvanced
2026-05-26 18:07:42 +04:00
Unverified
parent b029e7cb5e
commit e59ef6f28a
8 changed files with 340 additions and 10 deletions

View File

@@ -27,6 +27,12 @@ model_catalog_json = ""
"""
CHANGELOG = [
("3.11.6", "2026-05-26", [
"Antigravity loop breakers: per-session tracking, repeated tool detection",
"has_content fix: function_call counts as valid output",
"Latest user instruction appended once per request for Antigravity",
"Antigravity-only changes, no touch to other providers",
]),
("3.11.5", "2026-05-26", [
"Token-aware compaction: fixes context_length_exceeded on small-context models",
"Proactive compaction triggers on token count, not just item count",
@@ -2140,6 +2146,8 @@ class LauncherWin(Gtk.Window):
self._relogin_btn.set_sensitive("cli" not in self._missing)
elif status == "not_installed":
self._auth_label.set_markup("<span foreground='#888'>Auth: N/A (CLI not installed)</span>")
elif status == "not_configured":
self._auth_label.set_markup("<span foreground='#d29922'>⚠ Config missing — launch once to create</span>")
else:
self._auth_label.set_markup(f"<span foreground='#d29922'>⚠ Auth: {msg}</span>")
self._relogin_btn.set_sensitive("cli" not in self._missing)

View File

@@ -83,13 +83,21 @@ model_catalog_json = ""
"""
CHANGELOG = [
("3.11.6", "2026-05-26", [
"Antigravity loop breakers: per-session tracking, edit-intent nudge (first turn only)",
"Loop breaker: same tool+args repeated 5+ times triggers force finalization",
"Latest user instruction appended exactly once per request",
"Detailed [antigravity-loop] logging for all tracking fields",
"has_content fix: function_call now counts as valid output (no more infinite loops)",
"Antigravity-only changes, no touch to other providers",
]),
("3.11.5", "2026-05-26", [
"Token-aware compaction: fixes context_length_exceeded on small-context models (25 items × 1600 tokens)",
"Token-aware compaction: fixes context_length_exceeded on small-context models (25 items x 1600 tokens)",
"Proactive compaction triggers on token count (>80% model limit), not just item count",
"Universal adaptive compaction: removed crof.ai-only gates, all providers get compaction",
"Vision model detection: strips images for non-vision models, keeps for vision-capable ones",
"Per-model token limit learning from context_length_exceeded error messages",
"Compaction aggression levels: normal vs extreme when tokens > 1.5× model limit",
"Compaction aggression levels: normal vs extreme when tokens > 1.5x model limit",
"Smart-continue text-tool detection: triggers on tool-call text patterns, not just function_call_output",
"Active endpoint sync: GUI auto-removes stale endpoint references on startup",
]),
@@ -1713,6 +1721,10 @@ def check_codex_auth():
return ("unknown", "No output from codex login status")
except FileNotFoundError:
return ("not_installed", "codex not found")
except OSError as e:
if e.errno == 2:
return ("not_configured", "Config not found — launch Codex once to create it")
return ("error", str(e))
except Exception as e:
return ("error", str(e))

View File

@@ -157,7 +157,7 @@ Architecture:
import json, http.server, socketserver, urllib.request, urllib.parse, urllib.error, re
import time, uuid, os, sys, argparse, threading, socket, collections, contextlib, signal
import secrets, string
import secrets, string, hashlib
import dataclasses
import http.client
import selectors
@@ -219,6 +219,9 @@ def load_config():
"backend_type": ("PROXY_BACKEND", None, str),
"target_url": ("PROXY_TARGET_URL", "ZAI_BASE_URL", str),
"api_key": ("PROXY_API_KEY", "ZAI_API_KEY", str),
"vision_fallback_url": ("VISION_FALLBACK_URL", None, str),
"vision_fallback_model": ("VISION_FALLBACK_MODEL", None, str),
"vision_fallback_key": ("VISION_FALLBACK_KEY", None, str),
}
for ck, (ev1, ev2, conv) in env_map.items():
if ck not in cfg:
@@ -260,6 +263,9 @@ PROMPT_ENHANCER_MODE = "offline"
PROMPT_ENHANCER_MODEL = ""
PROMPT_ENHANCER_URL = ""
PROMPT_ENHANCER_KEY = ""
VISION_FALLBACK_URL = ""
VISION_FALLBACK_MODEL = ""
VISION_FALLBACK_KEY = ""
SERVER = None
if _IS_WINDOWS:
@@ -855,6 +861,7 @@ def _init_runtime():
global CONFIG, PORT, BACKEND, TARGET_URL, API_KEY, OAUTH_PROVIDER, _antigravity_version
global MODELS, CC_VERSION, REASONING_ENABLED, REASONING_EFFORT, BGP_ROUTES
global _api_key_pool, PROMPT_ENHANCER
global VISION_FALLBACK_URL, VISION_FALLBACK_MODEL, VISION_FALLBACK_KEY
CONFIG = load_config()
PORT = CONFIG["port"]
@@ -872,6 +879,9 @@ def _init_runtime():
PROMPT_ENHANCER_MODEL = CONFIG.get("prompt_enhancer_model", "")
PROMPT_ENHANCER_URL = CONFIG.get("prompt_enhancer_url", "")
PROMPT_ENHANCER_KEY = CONFIG.get("prompt_enhancer_key", "")
VISION_FALLBACK_URL = CONFIG.get("vision_fallback_url") or "https://api.kilo.ai/api/gateway/chat/completions"
VISION_FALLBACK_MODEL = CONFIG.get("vision_fallback_model") or "kilo-auto/small"
VISION_FALLBACK_KEY = CONFIG.get("vision_fallback_key") or ""
BGP_ROUTES = CONFIG.get("bgp_routes", [])
_api_key_pool = None
if API_KEY and "," in API_KEY and not OAUTH_PROVIDER.startswith("google") and BACKEND not in ("codebuff", "freebuff"):
@@ -2366,6 +2376,113 @@ def _mark_vision_fail(model):
with _vision_fail_lock:
_vision_fail_cache.add(model)
def _vision_describe_image(img_data, cache):
"""Call vision fallback API to describe a single image."""
if not VISION_FALLBACK_URL:
return None
if isinstance(img_data, dict):
img_url = img_data.get("url", "")
if not img_url:
inner = img_data.get("image_url", img_data)
img_url = inner.get("url", "") if isinstance(inner, dict) else str(inner)
else:
img_url = str(img_data)
if not img_url:
return None
img_hash = hashlib.md5(img_url.encode("utf-8", errors="replace")).hexdigest()
if img_hash in cache:
return cache[img_hash]
try:
payload = json.dumps({
"model": VISION_FALLBACK_MODEL,
"messages": [{"role": "user", "content": [
{"type": "text", "text": "Describe the content of this image in detail. If it contains text, transcribe it fully."},
{"type": "image_url", "image_url": {"url": img_url}},
]}],
"max_tokens": 1024,
"stream": False,
}).encode()
headers = {"Content-Type": "application/json"}
if VISION_FALLBACK_KEY:
headers["Authorization"] = f"Bearer {VISION_FALLBACK_KEY}"
req = urllib.request.Request(VISION_FALLBACK_URL, data=payload, headers=headers)
resp = urllib.request.urlopen(req, timeout=30)
body = json.loads(resp.read().decode())
choices = body.get("choices", [])
if choices:
msg = choices[0].get("message", {})
desc = msg.get("content", "")
if desc:
cache[img_hash] = desc
return desc
except Exception as e:
print(f"[vision-fallback] error describing image: {e}", file=sys.stderr)
return None
def _preprocess_vision(messages, schema):
"""Replace image blocks with text descriptions when provider lacks vision support."""
if schema.supports_vision:
return messages
cache = {}
for msg in messages:
content = msg.get("content")
if not isinstance(content, list):
continue
new_parts = []
changed = False
for part in content:
if isinstance(part, dict) and part.get("type") in ("image_url", "input_image"):
changed = True
img_data = part.get("image_url", part)
description = _vision_describe_image(img_data, cache)
if description:
new_parts.append({"type": "text", "text": f"[Image: {description}]"})
else:
new_parts.append({"type": "text", "text": "[Image: description unavailable - text-only model]"})
else:
new_parts.append(part)
if changed:
msg["content"] = new_parts
return messages
def _preprocess_vision_input(input_data, schema):
"""Replace input_image blocks in Responses API input format with text descriptions."""
if schema.supports_vision:
return input_data
if not isinstance(input_data, list):
return input_data
cache = {}
changed_any = False
for item in input_data:
if item.get("type") != "message":
continue
content = item.get("content")
if not isinstance(content, list):
continue
new_parts = []
changed = False
for part in content:
if isinstance(part, dict) and part.get("type") in ("input_image", "image_url"):
changed = True
img_url = ""
if part.get("type") == "input_image":
img_url = part.get("image_url", {}).get("url", "")
else:
img_url = part.get("image_url", {}).get("url", part.get("url", ""))
desc = _vision_describe_image({"url": img_url}, cache)
if desc:
new_parts.append({"type": "input_text", "text": f"[Image: {desc}]"})
else:
new_parts.append({"type": "input_text", "text": "[Image: description unavailable - text-only model]"})
else:
new_parts.append(part)
if changed:
item["content"] = new_parts
changed_any = True
return input_data
def _strip_images_from_input(input_data, model):
if not isinstance(input_data, list) or _model_supports_vision(model):
return input_data
@@ -4014,6 +4131,7 @@ class ProviderSchema:
})
response_format: str = "auto" # "sse" | "raw_json" | "ndjson" | "auto"
stream_format: str = "auto" # "sse_data" | "sse_event" | "raw_lines" | "json_lines"
supports_vision: bool = True
def hints(self) -> dict:
"""Return a dict for storing in provider-caps.json."""
@@ -4023,7 +4141,10 @@ class ProviderSchema:
continue
if isinstance(v, dict) and not v:
continue
if v is False:
if k == "supports_vision":
if v is not False:
continue
elif v is False:
continue
if v == "":
continue
@@ -4193,6 +4314,15 @@ class ErrorAnalyzer:
elif re.search(r"tool-call|tool_call.*format", err):
hints["tool_decl_format"] = "command_code"
# ── Response/Stream format hints from content-type or error ──
# ── Vision support detection ──
if re.search(r"unknown variant\b.*image_url", err) or \
re.search(r"unexpected.*image_url", err) or \
re.search(r"does not support.*image", err) or \
re.search(r"image.*not.*support", err) or \
re.search(r"unsupported.*content.*type.*image", err):
hints["supports_vision"] = False
# ── Response/Stream format hints from content-type or error ──
if re.search(r"content.type.*text/event.stream", err) or \
re.search(r"stream.*sse|sse.*expected", err):
@@ -4253,6 +4383,7 @@ def _load_schema(target_url=None, backend=None, model=None):
})),
response_format=data.get("response_format", "auto"),
stream_format=data.get("stream_format", "auto"),
supports_vision=data.get("supports_vision", True),
)
@@ -5053,6 +5184,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
body["input"] = input_data
messages = oa_input_to_messages(input_data)
_schema = _load_schema(model=model)
if _schema and not _schema.supports_vision:
messages = _preprocess_vision(messages, _schema)
messages = _inject_stored_reasoning(messages)
instructions = body.get("instructions", "").strip()
if instructions:
@@ -5082,6 +5216,18 @@ class Handler(http.server.BaseHTTPRequestHandler):
upstream = urllib.request.urlopen(req, timeout=_upstream_timeout(body, stream))
except urllib.error.HTTPError as e:
err_body = e.read().decode()
if re.search(r"unknown variant\b.*image_url", err_body.lower()) or \
re.search(r"unexpected.*image_url", err_body.lower()) or \
re.search(r"does not support.*image", err_body.lower()):
_schema = _load_schema(model=model)
if _schema:
_schema.supports_vision = False
if attempt < max_retries:
print(f"[{self._session_id}] vision not supported, retrying with image preprocessing", file=sys.stderr)
messages = _preprocess_vision(messages, _schema) if _schema else messages
chat_body = self._build_chat_body(model, messages, body, stream)
chat_body_b = json.dumps(chat_body).encode()
continue
if "context_length_exceeded" in err_body and attempt < max_retries:
import re as _re
_tok_m = _re.search(r'~?(\d+)\s*tokens', err_body)
@@ -6869,7 +7015,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
prev_content_type = None # for oscillation detection
for attempt in range(max_retries + 1):
adapter = SchemaAdapter(schema)
messages = adapter.convert(input_data, instructions)
processed_input = _preprocess_vision_input(input_data, schema) if not schema.supports_vision else input_data
messages = adapter.convert(processed_input, instructions)
use_cc_wrap = schema.cc_body_wrap or is_cc
# Build auth header from schema