v3.11.0: merge cobra PR, smart-continue, hot-reload, XML extraction
- Merge PR #5 from cobra91: concurrency semaphore, auto-continue, SO_REUSEADDR, proxy-stderr.log, stream diagnostics, timeout handler, restart proxy fix - Tool call argument normalizer, smart-continue loop, XML extraction - API key hot-reload with mtime tracking + /admin/ endpoints - GUI hot-reload on endpoint edit with upstream verification - Synthetic tool-results disabled (caused deepseek-v4-pro truncation) - Version bump 3.10.12 -> 3.11.0, rebuild .deb
This commit is contained in:
@@ -26,6 +26,19 @@ model_catalog_json = ""
|
||||
"""
|
||||
|
||||
CHANGELOG = [
|
||||
("3.11.0", "2026-05-26", [
|
||||
"Merge cobra PR: concurrency semaphore (max 3), auto-continue for truncated text",
|
||||
"SO_REUSEADDR on sticky port, proxy-stderr.log, stream diagnostics logging",
|
||||
"Timeout/OSError handler sends response.failed SSE instead of silent drop",
|
||||
"Restart Proxy button: only restarts proxy without killing Codex Desktop",
|
||||
"Tool call argument normalizer: fixes Arguments→arguments, strips markdown wrapping",
|
||||
"Smart-continue loop (2× retries): escalating nudges when model stops text-only mid-task",
|
||||
"XML tool call extraction: parses <tool_call> patterns from text, injects as real calls",
|
||||
"Auto-continue + smart-continue ordered with skip guard to avoid double-firing",
|
||||
"API key hot-reload with mtime tracking + /admin/reload + /admin/verify-key endpoints",
|
||||
"GUI hot-reload: auto-refreshes proxy key on endpoint edit, verifies with upstream",
|
||||
"Synthetic tool-results disabled: was causing deepseek-v4-pro truncation on opencode.ai",
|
||||
]),
|
||||
("3.10.4", "2026-05-25", [
|
||||
"OAuth Secrets editor in GUI — update client ID/secret without editing files",
|
||||
"Secrets stored in ~/.config/codex-launcher/oauth-secrets.json (not in repo)",
|
||||
@@ -361,7 +374,7 @@ PROVIDER_PRESETS = {
|
||||
},
|
||||
"Google Antigravity (OAuth)": {
|
||||
"backend_type": "gemini-oauth-antigravity",
|
||||
"base_url": "https://daily-cloudcode-pa.sandbox.googleapis.com",
|
||||
"base_url": "https://cloudcode-pa.googleapis.com",
|
||||
"oauth_provider": "google-antigravity",
|
||||
"models": [
|
||||
"Gemini 3.5 Flash (High)", "Gemini 3.5 Flash (Medium)", "Gemini 3.5 Flash (Low)",
|
||||
@@ -1782,6 +1795,64 @@ class AIMonitoringWindow(Gtk.Window):
|
||||
# Main window
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
def _oauth_discover_project(access_token, token_path, tokens):
|
||||
project_id = ""
|
||||
try:
|
||||
lr = urllib.request.Request(
|
||||
"https://cloudcode-pa.googleapis.com/v1internal:loadCodeAssist",
|
||||
data=json.dumps({}).encode(),
|
||||
headers={"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"User-Agent": "google-api-nodejs-client/9.15.1"})
|
||||
lresp = urllib.request.urlopen(lr, timeout=15)
|
||||
ldata = json.loads(lresp.read())
|
||||
p = ldata.get("cloudaicompanionProject", "")
|
||||
if isinstance(p, dict):
|
||||
project_id = p.get("id", "")
|
||||
elif isinstance(p, str):
|
||||
project_id = p
|
||||
except Exception:
|
||||
pass
|
||||
if not project_id:
|
||||
return ""
|
||||
try:
|
||||
test_url = f"https://cloudcode-pa.googleapis.com/v1internal:listModels?project={project_id}"
|
||||
test_req = urllib.request.Request(test_url,
|
||||
headers={"Authorization": f"Bearer {access_token}",
|
||||
"User-Agent": "google-api-nodejs-client/9.15.1"})
|
||||
urllib.request.urlopen(test_req, timeout=10)
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 403 and "SERVICE_DISABLED" in (e.read().decode()[:500]):
|
||||
print(f"[oauth] project {project_id} has API disabled, searching for valid project...", file=sys.stderr)
|
||||
try:
|
||||
list_req = urllib.request.Request(
|
||||
"https://cloudresourcemanager.googleapis.com/v1/projects?filter=lifecycleState:ACTIVE",
|
||||
headers={"Authorization": f"Bearer {access_token}"})
|
||||
list_resp = urllib.request.urlopen(list_req, timeout=15)
|
||||
projects = json.loads(list_resp.read()).get("projects", [])
|
||||
for proj in projects:
|
||||
pid = proj.get("projectId", "")
|
||||
if not pid or pid == project_id:
|
||||
continue
|
||||
try:
|
||||
t2 = urllib.request.Request(
|
||||
f"https://cloudcode-pa.googleapis.com/v1internal:listModels?project={pid}",
|
||||
headers={"Authorization": f"Bearer {access_token}",
|
||||
"User-Agent": "google-api-nodejs-client/9.15.1"})
|
||||
urllib.request.urlopen(t2, timeout=10)
|
||||
project_id = pid
|
||||
print(f"[oauth] found working project: {pid}", file=sys.stderr)
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
tokens["project_id"] = project_id
|
||||
with open(token_path, "w") as f:
|
||||
json.dump(tokens, f, indent=2)
|
||||
os.chmod(token_path, 0o600)
|
||||
return project_id
|
||||
|
||||
class LauncherWin(Gtk.Window):
|
||||
def __init__(self):
|
||||
super().__init__(title="Codex Launcher")
|
||||
@@ -1798,7 +1869,7 @@ class LauncherWin(Gtk.Window):
|
||||
# header row
|
||||
hdr = Gtk.Box(spacing=8)
|
||||
vbox.pack_start(hdr, False, False, 0)
|
||||
lbl = Gtk.Label(label="<b>Codex Launcher v3.10.7</b>")
|
||||
lbl = Gtk.Label(label="<b>Codex Launcher v3.10.9</b>")
|
||||
lbl.set_use_markup(True)
|
||||
hdr.pack_start(lbl, False, False, 0)
|
||||
changelog_btn = Gtk.Button(label="Changelog")
|
||||
@@ -2832,63 +2903,163 @@ class LauncherWin(Gtk.Window):
|
||||
_stop_proxy()
|
||||
Gtk.main_quit()
|
||||
|
||||
def _google_reoauth(self, provider):
|
||||
secrets_path = os.path.expanduser("~/.config/codex-launcher/oauth-secrets.json")
|
||||
try:
|
||||
with open(secrets_path) as f:
|
||||
secrets = json.load(f)
|
||||
except Exception:
|
||||
secrets = {}
|
||||
def _google_reoauth(self, provider, parent_dlg=None):
|
||||
import http.server
|
||||
is_antigravity = provider == "google-antigravity"
|
||||
sec_key = "antigravity" if is_antigravity else "gemini_cli"
|
||||
sec = secrets.get(sec_key, {})
|
||||
client_id = sec.get("client_id", "")
|
||||
client_secret = sec.get("client_secret", "")
|
||||
if not client_id or not client_secret:
|
||||
_sp = os.path.expanduser("~/.config/codex-launcher/oauth-secrets.json")
|
||||
try:
|
||||
with open(_sp) as _f:
|
||||
_secrets_data = json.load(_f)
|
||||
except Exception:
|
||||
_secrets_data = {}
|
||||
sec = _secrets_data.get(sec_key, {})
|
||||
CLIENT_ID = sec.get("client_id", "")
|
||||
CLIENT_SECRET = sec.get("client_secret", "")
|
||||
if not CLIENT_ID or not CLIENT_SECRET:
|
||||
self._show_error_dialog("Missing OAuth secrets",
|
||||
f"No client_id/client_secret for {sec_key}.\nSet them in OAuth Secrets first.")
|
||||
return
|
||||
token_file = "google-antigravity-oauth-token.json" if is_antigravity else "google-cli-oauth-token.json"
|
||||
token_path = os.path.expanduser(f"~/.cache/codex-proxy/{token_file}")
|
||||
redirect = "urn:ietf:wg:oauth:2.0:oob"
|
||||
auth_url = (f"https://accounts.google.com/o/oauth2/v2/auth?client_id={client_id}"
|
||||
f"&redirect_uri={urllib.parse.quote(redirect)}"
|
||||
f"&response_type=code&scope={urllib.parse.quote('https://www.googleapis.com/auth/cloud-platform')}"
|
||||
f"&access_type=offline&prompt=consent")
|
||||
webbrowser.open(auth_url)
|
||||
code_dlg = Gtk.Dialog(title=f"Re-OAuth: {'Antigravity' if is_antigravity else 'Gemini CLI'}", parent=self, modal=True)
|
||||
code_dlg.add_button("Cancel", Gtk.ResponseType.CANCEL)
|
||||
code_dlg.add_button("Exchange", Gtk.ResponseType.OK)
|
||||
code_dlg.set_default_size(500, 180)
|
||||
ca = code_dlg.get_content_area()
|
||||
provider_kind = "antigravity" if is_antigravity else "cli"
|
||||
|
||||
if is_antigravity:
|
||||
SCOPES = [
|
||||
"https://www.googleapis.com/auth/cloud-platform",
|
||||
"https://www.googleapis.com/auth/userinfo.email",
|
||||
"https://www.googleapis.com/auth/userinfo.profile",
|
||||
"https://www.googleapis.com/auth/cclog",
|
||||
"https://www.googleapis.com/auth/experimentsandconfigs",
|
||||
]
|
||||
port = 51121
|
||||
redirect_uri = f"http://localhost:{port}/oauth-callback"
|
||||
callback_path = "/oauth-callback"
|
||||
else:
|
||||
SCOPES = [
|
||||
"https://www.googleapis.com/auth/cloud-platform",
|
||||
"https://www.googleapis.com/auth/userinfo.email",
|
||||
"https://www.googleapis.com/auth/userinfo.profile",
|
||||
]
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
s.bind(("127.0.0.1", 0))
|
||||
port = s.getsockname()[1]
|
||||
redirect_uri = f"http://127.0.0.1:{port}/oauth2callback"
|
||||
callback_path = "/oauth2callback"
|
||||
|
||||
state = secrets.token_hex(32)
|
||||
verifier = secrets.token_urlsafe(64)
|
||||
challenge = base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest()).rstrip(b"=").decode()
|
||||
|
||||
scope_str = " ".join(SCOPES)
|
||||
auth_url = (
|
||||
f"https://accounts.google.com/o/oauth2/v2/auth?"
|
||||
f"client_id={CLIENT_ID}"
|
||||
f"&redirect_uri={urllib.parse.quote(redirect_uri)}"
|
||||
f"&response_type=code"
|
||||
f"&scope={urllib.parse.quote(scope_str)}"
|
||||
f"&access_type=offline"
|
||||
f"&prompt=select_account%20consent"
|
||||
f"&state={state}"
|
||||
f"&code_challenge={challenge}"
|
||||
f"&code_challenge_method=S256"
|
||||
)
|
||||
|
||||
oauth_dlg = Gtk.Dialog(title=f"Re-OAuth: {'Antigravity' if is_antigravity else 'Gemini CLI'}", parent=parent_dlg or self, modal=True)
|
||||
oauth_dlg.add_button("Cancel", Gtk.ResponseType.CANCEL)
|
||||
oauth_dlg.set_default_size(520, 200)
|
||||
ca = oauth_dlg.get_content_area()
|
||||
ca.set_margin_start(12)
|
||||
ca.set_margin_end(12)
|
||||
ca.set_spacing(6)
|
||||
ca.pack_start(Gtk.Label(label="Browser opened for Google OAuth.\nPaste the authorization code below:", xalign=0), False, False, 0)
|
||||
code_entry = Gtk.Entry()
|
||||
code_entry.set_placeholder_text("4/0AX...")
|
||||
ca.pack_start(code_entry, False, False, 4)
|
||||
ca.pack_start(Gtk.Label(label=f"<b>Re-authenticating {'Antigravity' if is_antigravity else 'Gemini CLI'}</b>", use_markup=True, xalign=0), False, False, 0)
|
||||
link_lbl = Gtk.Label(label="Click here to open Google authorization", use_markup=True, xalign=0)
|
||||
link_lbl.set_markup(f'<a href="{auth_url}">Click here to open Google authorization</a>')
|
||||
ca.pack_start(link_lbl, False, False, 4)
|
||||
status_lbl = Gtk.Label(label="Waiting for browser callback...", xalign=0)
|
||||
ca.pack_start(status_lbl, False, False, 4)
|
||||
ca.show_all()
|
||||
if code_dlg.run() == Gtk.ResponseType.OK:
|
||||
code = code_entry.get_text().strip()
|
||||
if code:
|
||||
|
||||
code_holder = [None]
|
||||
error_holder = [None]
|
||||
|
||||
class OAuthHandler(http.server.BaseHTTPRequestHandler):
|
||||
def do_GET(self2):
|
||||
qs = urllib.parse.urlparse(self2.path).query
|
||||
params = urllib.parse.parse_qs(qs)
|
||||
if "code" in params:
|
||||
if params.get("state", [None])[0] != state:
|
||||
self2.send_response(400)
|
||||
self2.end_headers()
|
||||
self2.wfile.write(b"CSRF state mismatch")
|
||||
error_holder[0] = "CSRF state mismatch"
|
||||
return
|
||||
code_holder[0] = params["code"][0]
|
||||
self2.send_response(302)
|
||||
self2.send_header("Location", "https://developers.google.com/gemini-code-assist/auth_success_gemini")
|
||||
self2.end_headers()
|
||||
else:
|
||||
error_holder[0] = params.get("error", ["unknown"])[0]
|
||||
self2.send_response(302)
|
||||
self2.send_header("Location", "https://developers.google.com/gemini-code-assist/auth_failure_gemini")
|
||||
self2.end_headers()
|
||||
def log_message(self2, fmt, *args):
|
||||
pass
|
||||
|
||||
try:
|
||||
bind_host = "localhost" if is_antigravity else "127.0.0.1"
|
||||
server = http.server.HTTPServer((bind_host, port), OAuthHandler)
|
||||
except OSError:
|
||||
status_lbl.set_text(f"Port {port} in use — close other apps and retry.")
|
||||
oauth_dlg.run()
|
||||
oauth_dlg.destroy()
|
||||
return
|
||||
|
||||
def _wait():
|
||||
deadline = time.time() + 120
|
||||
while code_holder[0] is None and error_holder[0] is None and time.time() < deadline:
|
||||
server.handle_request()
|
||||
server.server_close()
|
||||
if code_holder[0]:
|
||||
try:
|
||||
tok_req = urllib.request.Request("https://oauth2.googleapis.com/token",
|
||||
data=urllib.parse.urlencode({
|
||||
"code": code, "client_id": client_id, "client_secret": client_secret,
|
||||
"redirect_uri": redirect, "grant_type": "authorization_code"
|
||||
}).encode(),
|
||||
tok_data = urllib.parse.urlencode({
|
||||
"code": code_holder[0], "client_id": CLIENT_ID, "client_secret": CLIENT_SECRET,
|
||||
"redirect_uri": redirect_uri, "grant_type": "authorization_code",
|
||||
"code_verifier": verifier,
|
||||
}).encode()
|
||||
req = urllib.request.Request("https://oauth2.googleapis.com/token", data=tok_data,
|
||||
headers={"Content-Type": "application/x-www-form-urlencoded"})
|
||||
tok_resp = urllib.request.urlopen(tok_req, timeout=30)
|
||||
tok_data = json.loads(tok_resp.read())
|
||||
tok_data["_updated"] = time.time()
|
||||
resp = urllib.request.urlopen(req, timeout=30)
|
||||
tokens = json.loads(resp.read())
|
||||
tokens["client_id"] = CLIENT_ID
|
||||
tokens["client_secret"] = CLIENT_SECRET
|
||||
tokens["provider_kind"] = provider_kind
|
||||
tokens["expires_at"] = time.time() + tokens.get("expires_in", 3600)
|
||||
os.makedirs(os.path.dirname(token_path), exist_ok=True)
|
||||
with open(token_path, "w") as f:
|
||||
json.dump(tok_data, f, indent=2)
|
||||
self._log(f"[oauth] Refreshed {provider} token → {token_path}")
|
||||
json.dump(tokens, f, indent=2)
|
||||
os.chmod(token_path, 0o600)
|
||||
project_id = _oauth_discover_project(tokens["access_token"], token_path, tokens)
|
||||
def _on_success():
|
||||
status_lbl.set_text(f"Authorization successful! Project: {project_id or 'none'}")
|
||||
GLib.timeout_add_seconds(2, lambda: oauth_dlg.destroy())
|
||||
return False
|
||||
GLib.idle_add(_on_success)
|
||||
except Exception as e:
|
||||
self._show_error_dialog("Token exchange failed", str(e)[:300])
|
||||
code_dlg.destroy()
|
||||
def _on_err(exc=str(e)):
|
||||
status_lbl.set_text(f"Token exchange failed: {exc[:200]}")
|
||||
return False
|
||||
GLib.idle_add(_on_err)
|
||||
else:
|
||||
def _on_fail(err=error_holder[0]):
|
||||
status_lbl.set_text(f"Failed: {err or 'No code received'}")
|
||||
return False
|
||||
GLib.idle_add(_on_fail)
|
||||
|
||||
webbrowser.open(auth_url)
|
||||
threading.Thread(target=_wait, daemon=True).start()
|
||||
oauth_dlg.run()
|
||||
oauth_dlg.destroy()
|
||||
|
||||
def _codebuff_reoauth(self):
|
||||
self._codebuff_oauth_standalone()
|
||||
@@ -3019,7 +3190,7 @@ class LauncherWin(Gtk.Window):
|
||||
hdr_row.pack_start(Gtk.Label(label=f"\n<b>{section_label}</b>", use_markup=True, xalign=0), True, True, 0)
|
||||
reauth_btn = Gtk.Button(label="Re-OAuth")
|
||||
reauth_btn.set_size_request(80, -1)
|
||||
reauth_btn.connect("clicked", lambda b, p=oauth_prov: self._google_reoauth(p))
|
||||
reauth_btn.connect("clicked", lambda b, p=oauth_prov: self._google_reoauth(p, dlg))
|
||||
hdr_row.pack_end(reauth_btn, False, False, 0)
|
||||
import_btn = Gtk.Button(label="Import JSON")
|
||||
import_btn.set_size_request(100, -1)
|
||||
@@ -3868,32 +4039,8 @@ class EditEndpointDialog(Gtk.Dialog):
|
||||
json.dump(tokens, f, indent=2)
|
||||
os.chmod(token_path, 0o600)
|
||||
_oauth_log(f"Token saved to {token_path}")
|
||||
project_id = ""
|
||||
try:
|
||||
_oauth_log("Discovering project ID via loadCodeAssist...")
|
||||
lr = urllib.request.Request(
|
||||
"https://cloudcode-pa.googleapis.com/v1internal:loadCodeAssist",
|
||||
data=json.dumps({}).encode(),
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {tokens['access_token']}",
|
||||
"User-Agent": "google-api-nodejs-client/9.15.1",
|
||||
})
|
||||
lresp = urllib.request.urlopen(lr, timeout=15)
|
||||
ldata = json.loads(lresp.read())
|
||||
p = ldata.get("cloudaicompanionProject", "")
|
||||
if isinstance(p, dict):
|
||||
project_id = p.get("id", "")
|
||||
elif isinstance(p, str):
|
||||
project_id = p
|
||||
_oauth_log(f"Project ID: {project_id or '(none)'}")
|
||||
if project_id:
|
||||
tokens["project_id"] = project_id
|
||||
with open(token_path, "w") as f2:
|
||||
json.dump(tokens, f2, indent=2)
|
||||
os.chmod(token_path, 0o600)
|
||||
except Exception as pe:
|
||||
_oauth_log(f"loadCodeAssist failed (non-fatal): {pe}")
|
||||
project_id = _oauth_discover_project(tokens["access_token"], token_path, tokens)
|
||||
_oauth_log(f"Project ID: {project_id or '(none)'}")
|
||||
if is_antigravity:
|
||||
found_models = [
|
||||
"gemini-2.5-flash", "gemini-2.5-pro",
|
||||
@@ -3915,7 +4062,7 @@ class EditEndpointDialog(Gtk.Dialog):
|
||||
for mc in probe_candidates:
|
||||
try:
|
||||
pr = urllib.request.Request(
|
||||
"https://daily-cloudcode-pa.sandbox.googleapis.com/v1internal:generateContent",
|
||||
"https://cloudcode-pa.googleapis.com/v1internal:generateContent",
|
||||
data=json.dumps({
|
||||
"project": project_id,
|
||||
"model": mc,
|
||||
@@ -4264,10 +4411,54 @@ class EditEndpointDialog(Gtk.Dialog):
|
||||
data["default"] = name
|
||||
|
||||
save_endpoints(data)
|
||||
self._hot_reload_proxy_key(new_ep)
|
||||
self._parent_mgr._rebuild()
|
||||
self._parent_mgr._parent._on_endpoints_updated()
|
||||
self.destroy()
|
||||
|
||||
def _hot_reload_proxy_key(self, ep):
|
||||
try:
|
||||
ep_name = ep.get("name", "")
|
||||
proxy_port = None
|
||||
import glob as _glob
|
||||
for cfg_file in _glob.glob(str(PROXY_CONFIG_DIR / "proxy-*.json")):
|
||||
try:
|
||||
with open(cfg_file) as f:
|
||||
pcfg = json.load(f)
|
||||
if ep_name.lower().replace(" ", "-") in cfg_file.lower():
|
||||
proxy_port = pcfg.get("port")
|
||||
pcfg["api_key"] = ep.get("api_key", "")
|
||||
with open(cfg_file, "w") as f:
|
||||
json.dump(pcfg, f, indent=2)
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
if proxy_port:
|
||||
import urllib.request as _ur
|
||||
try:
|
||||
url = f"http://127.0.0.1:{proxy_port}/admin/reload"
|
||||
resp = _ur.urlopen(url, timeout=3)
|
||||
result = json.loads(resp.read())
|
||||
reloaded = result.get("reloaded", False)
|
||||
preview = result.get("api_key_preview", "?")
|
||||
self._parent_mgr._parent.log(
|
||||
f"[hot-reload] key {'updated' if reloaded else 'unchanged'}: {preview}")
|
||||
if reloaded:
|
||||
verify_url = f"http://127.0.0.1:{proxy_port}/admin/verify-key"
|
||||
vresp = _ur.urlopen(verify_url, timeout=10)
|
||||
vresult = json.loads(vresp.read())
|
||||
valid = vresult.get("valid", False)
|
||||
if valid:
|
||||
self._parent_mgr._parent.log(
|
||||
f"[hot-reload] key verified OK ({vresult.get('models', '?')} models)")
|
||||
else:
|
||||
self._parent_mgr._parent.log(
|
||||
f"[hot-reload] WARNING: key verification failed: {vresult.get('error', 'unknown')}")
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _show_error(self, msg):
|
||||
d = Gtk.MessageDialog(self, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.OK, msg)
|
||||
d.run(); d.destroy()
|
||||
|
||||
@@ -83,6 +83,19 @@ model_catalog_json = ""
|
||||
"""
|
||||
|
||||
CHANGELOG = [
|
||||
("3.11.0", "2026-05-26", [
|
||||
"Merge cobra PR: concurrency semaphore (max 3), auto-continue for truncated text",
|
||||
"SO_REUSEADDR on sticky port, proxy-stderr.log, stream diagnostics logging",
|
||||
"Timeout/OSError handler sends response.failed SSE instead of silent drop",
|
||||
"Restart Proxy button: only restarts proxy without killing Codex Desktop",
|
||||
"Tool call argument normalizer: fixes Arguments→arguments, strips markdown wrapping",
|
||||
"Smart-continue loop (2× retries): escalating nudges when model stops text-only mid-task",
|
||||
"XML tool call extraction: parses <tool_call> patterns from text, injects as real calls",
|
||||
"Auto-continue + smart-continue ordered with skip guard to avoid double-firing",
|
||||
"API key hot-reload with mtime tracking + /admin/reload + /admin/verify-key endpoints",
|
||||
"GUI hot-reload: auto-refreshes proxy key on endpoint edit, verifies with upstream",
|
||||
"Synthetic tool-results disabled: was causing deepseek-v4-pro truncation on opencode.ai",
|
||||
]),
|
||||
("3.10.12", "2026-05-26", [
|
||||
"Sticky endpoint: caches last working endpoint, sequential fallback on failure",
|
||||
"Endpoint order: cloudcode-pa first (matches agy CLI), daily-cloudcode-pa fallback",
|
||||
@@ -1468,6 +1481,7 @@ def _pick_free_port():
|
||||
try:
|
||||
saved = int(_PROXY_PORT_FILE.read_text().strip())
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
s.bind(("127.0.0.1", saved))
|
||||
return saved
|
||||
except (ValueError, OSError, FileNotFoundError):
|
||||
@@ -1559,11 +1573,19 @@ def _start_proxy_with_config(pcfg_path, port, logfn):
|
||||
)
|
||||
_register_pgid_entry("proxy", _proxy_proc.pid)
|
||||
|
||||
_proxy_log_path = PROXY_CONFIG_DIR / "proxy-stderr.log"
|
||||
_proxy_log_file = open(_proxy_log_path, "a", encoding="utf-8")
|
||||
|
||||
def _pipe_stderr():
|
||||
if not _proxy_proc.stderr:
|
||||
return
|
||||
for line in _proxy_proc.stderr:
|
||||
logfn(f"[proxy] {line.rstrip()}")
|
||||
try:
|
||||
_proxy_log_file.write(line)
|
||||
_proxy_log_file.flush()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
threading.Thread(target=_pipe_stderr, daemon=True).start()
|
||||
|
||||
|
||||
@@ -323,6 +323,8 @@ _conn_pool_lock = threading.Lock()
|
||||
_conn_pool = {}
|
||||
|
||||
_STREAM_IDLE_TIMEOUT = 300
|
||||
_MAX_CONCURRENT_REQUESTS = 3
|
||||
_request_semaphore = threading.Semaphore(_MAX_CONCURRENT_REQUESTS)
|
||||
|
||||
_CODEBUFF_AUTH_URL = "https://www.codebuff.com"
|
||||
_CODEBUFF_API_URL = "https://www.codebuff.com"
|
||||
@@ -4829,6 +4831,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
_last_user_urls.append(url_m.group(0))
|
||||
save_request_snapshot(request_id, body)
|
||||
_req_t0 = time.time()
|
||||
wait_start = time.monotonic()
|
||||
_request_semaphore.acquire()
|
||||
wait_ms = (time.monotonic() - wait_start) * 1000
|
||||
if wait_ms > 100:
|
||||
print(f"[{_sid}] waited {wait_ms:.0f}ms for upstream slot (concurrency gate)", file=sys.stderr)
|
||||
try:
|
||||
with RequestTracker(request_id) as tracker:
|
||||
if BACKEND == "auto":
|
||||
@@ -4847,6 +4854,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
except Exception as _snap_err:
|
||||
update_snapshot_response(request_id, "error", time.time() - _req_t0, _snap_err)
|
||||
raise
|
||||
finally:
|
||||
_request_semaphore.release()
|
||||
|
||||
def _handle_openai_compat(self, body, model, stream, tracker=None):
|
||||
input_data = body.get("input", "")
|
||||
@@ -4859,7 +4868,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
body = dict(body)
|
||||
body["input"] = input_data
|
||||
|
||||
if (policy.get("synthetic_tool_results") or _provider_cap(model, "synthetic_tool_results", False)) and isinstance(input_data, list):
|
||||
# synthetic tool-results disabled: causes deepseek-v4-pro truncation on opencode.ai
|
||||
if False and (policy.get("synthetic_tool_results") or _provider_cap(model, "synthetic_tool_results", False)) and isinstance(input_data, list):
|
||||
input_data, synthesized = synthesize_tool_results_for_chat(input_data)
|
||||
if synthesized:
|
||||
print("[provider-adapter] using synthetic tool-result continuation", file=sys.stderr)
|
||||
@@ -5739,11 +5749,25 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
break
|
||||
collected_events.append(event)
|
||||
_observe_event(event)
|
||||
print(f"[{self._session_id}] stream ended: events={len(collected_events)} finish={finish_reason} has_content={has_content} elapsed={time.time()-t0:.1f}s", file=sys.stderr)
|
||||
except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
|
||||
print("[translate-proxy] client disconnected during stream", file=sys.stderr)
|
||||
_crof_record(model, n_items, False)
|
||||
_log_resp(last_resp_id, "client_disconnect", last_output)
|
||||
return
|
||||
except (TimeoutError, OSError, urllib.error.URLError) as e:
|
||||
print(f"[translate-proxy] upstream error during stream: {type(e).__name__}: {e}", file=sys.stderr)
|
||||
err_resp_id = body.get("request_id") or body.get("id") or uid("resp")
|
||||
try:
|
||||
self.wfile.write(emit("response.failed", {"type": "response.failed",
|
||||
"response": {"id": err_resp_id, "error": {"type": "upstream_error",
|
||||
"code": "stream_interrupted", "message": str(e)[:200]}}}).encode())
|
||||
self.wfile.flush()
|
||||
except Exception:
|
||||
pass
|
||||
_crof_record(model, n_items, False)
|
||||
_log_resp(last_resp_id, "upstream_error", last_output)
|
||||
return
|
||||
|
||||
# Record outcome
|
||||
success = (finish_reason != "length")
|
||||
@@ -5819,43 +5843,160 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
except Exception as e:
|
||||
print(f"[crof-adaptive] retry failed: {e}", file=sys.stderr)
|
||||
|
||||
# ── Auto-continue for truncated responses ── (cobra PR)
|
||||
_ac_did_run = False
|
||||
if stream and collected_events:
|
||||
_ac_text = ""
|
||||
_ac_msg_id = _ac_resp_id = None
|
||||
for _ev in collected_events:
|
||||
for _ln in _ev.strip().split("\n"):
|
||||
if not _ln.startswith("data: "):
|
||||
continue
|
||||
try:
|
||||
_d = json.loads(_ln[6:])
|
||||
_t = _d.get("type")
|
||||
if _t == "response.output_text.done":
|
||||
_ac_text = _d.get("text", "")
|
||||
elif _t == "response.output_item.added" and _d.get("item",{}).get("type") == "message":
|
||||
_ac_msg_id = _d.get("item",{}).get("id")
|
||||
elif _t == "response.completed":
|
||||
_ac_resp_id = _d.get("response",{}).get("id")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_ac_tc = reasoning_out.get("tool_calls", [])
|
||||
_ac_truncated = False
|
||||
if not _ac_tc and _ac_text:
|
||||
_ac_stripped = _ac_text.rstrip()
|
||||
if finish_reason == "length":
|
||||
_ac_truncated = True
|
||||
elif len(_ac_stripped) > 10 and _ac_stripped[-1] in "(:,;…":
|
||||
_ac_truncated = True
|
||||
|
||||
if _ac_truncated and _ac_text:
|
||||
print(f"[{self._session_id}] auto-continue: truncated (finish={finish_reason}, ends '{_ac_text.rstrip()[-10:]}')", file=sys.stderr)
|
||||
_ac_did_run = True
|
||||
_ac_cut = len(collected_events)
|
||||
for _i, _ev2 in enumerate(collected_events):
|
||||
if "response.output_text.done" in _ev2:
|
||||
_ac_cut = _i
|
||||
break
|
||||
collected_events = collected_events[:_ac_cut]
|
||||
|
||||
_ac_accumulated = _ac_text
|
||||
_ac_max = 3
|
||||
for _ac_attempt in range(_ac_max):
|
||||
try:
|
||||
_ac_cont_msgs = list(chat_body.get("messages", []))
|
||||
_ac_cont_msgs.append({"role": "assistant", "content": _ac_accumulated})
|
||||
_ac_cont_msgs.append({"role": "user", "content": "Continue exactly where you left off. Do not repeat anything already written."})
|
||||
_ac_cont_body = dict(chat_body)
|
||||
_ac_cont_body["messages"] = _ac_cont_msgs
|
||||
_ac_cont_body["stream"] = False
|
||||
_ac_cont_req = urllib.request.Request(target, data=json.dumps(_ac_cont_body).encode(), headers=fwd)
|
||||
_ac_cont_resp = json.loads(urllib.request.urlopen(_ac_cont_req, timeout=120).read())
|
||||
_ac_choices = _ac_cont_resp.get("choices", [])
|
||||
if _ac_choices:
|
||||
_ac_chunk = _ac_choices[0].get("message",{}).get("content","")
|
||||
if not _ac_chunk:
|
||||
_ac_chunk = _ac_choices[0].get("delta",{}).get("content","")
|
||||
_ac_finish = _ac_choices[0].get("finish_reason")
|
||||
if _ac_chunk:
|
||||
_ac_accumulated += _ac_chunk
|
||||
collected_events.append(emit("response.output_text.delta", {
|
||||
"type": "response.output_text.delta",
|
||||
"delta": _ac_chunk, "item_id": _ac_msg_id, "content_index": 0}))
|
||||
if _ac_finish != "length":
|
||||
break
|
||||
_ac_text = _ac_accumulated
|
||||
except Exception as _ac_e:
|
||||
print(f"[{self._session_id}] auto-continue attempt {_ac_attempt+1} failed: {_ac_e}", file=sys.stderr)
|
||||
break
|
||||
|
||||
if _ac_msg_id:
|
||||
collected_events.append(emit("response.output_text.done", {
|
||||
"type": "response.output_text.done",
|
||||
"text": _ac_accumulated, "item_id": _ac_msg_id, "content_index": 0}))
|
||||
collected_events.append(emit("response.content_part.done", {
|
||||
"type": "response.content_part.done",
|
||||
"part": {"type": "output_text", "text": _ac_accumulated, "annotations": []}, "item_id": _ac_msg_id}))
|
||||
collected_events.append(emit("response.output_item.done", {
|
||||
"type": "response.output_item.done",
|
||||
"item": {"type": "message", "id": _ac_msg_id, "role": "assistant", "status": "completed",
|
||||
"content": [{"type": "output_text", "text": _ac_accumulated, "annotations": []}]}}))
|
||||
if _ac_resp_id:
|
||||
collected_events.append(emit("response.completed", {
|
||||
"type": "response.completed",
|
||||
"response": {"id": _ac_resp_id, "object": "response", "model": model,
|
||||
"status": "completed", "created": int(time.time()),
|
||||
"output": [{"type": "message", "id": _ac_msg_id, "role": "assistant",
|
||||
"status": "completed",
|
||||
"content": [{"type": "output_text", "text": _ac_accumulated, "annotations": []}]}]}}))
|
||||
has_content = True
|
||||
finish_reason = "stop"
|
||||
print(f"[{self._session_id}] auto-continue done: {len(_ac_text)} -> {len(_ac_accumulated)} chars", file=sys.stderr)
|
||||
|
||||
# Smart continuation: loop with escalating nudges when model stops text-only mid-task.
|
||||
_smart_max = 2
|
||||
_smart_attempt = 0
|
||||
while _smart_attempt < _smart_max:
|
||||
_has_tool_calls_in_output = any(o.get("type") == "function_call" for o in (last_output or []))
|
||||
if not (finish_reason == "stop" and has_content and not _has_tool_calls_in_output
|
||||
and isinstance(input_data, list) and len(input_data) >= 3
|
||||
and has_function_call_output(input_data)):
|
||||
break
|
||||
_smart_attempt += 1
|
||||
_nudges = [
|
||||
"Continue with the task using tool calls. Do NOT describe what to do — call the appropriate functions.",
|
||||
"You MUST use tool calls to complete the task. Read files, run commands, and make changes using tools. Do NOT output XML tool calls as text.",
|
||||
]
|
||||
nudge_text = _nudges[min(_smart_attempt - 1, len(_nudges) - 1)]
|
||||
# Try extracting XML tool calls from text as fallback before nudging
|
||||
last_text = ""
|
||||
for o in (last_output or []):
|
||||
if o.get("type") == "message":
|
||||
for c in (o.get("content") or []):
|
||||
if isinstance(c, dict) and c.get("type") == "output_text":
|
||||
last_text += c.get("text", "")
|
||||
xml_fc = _extract_xml_tool_calls(last_text)
|
||||
if xml_fc:
|
||||
print(f"[{self._session_id}] [smart-continue] extracted {len(xml_fc)} XML tool calls from text, injecting and retrying", file=sys.stderr)
|
||||
fake_input = list(input_data)
|
||||
for xfc in xml_fc:
|
||||
fake_input.append({"type": "function_call", "id": uid("fcx"), "call_id": uid("fcx"),
|
||||
"name": xfc["name"], "arguments": xfc["args"], "status": "completed"})
|
||||
fake_messages = oa_input_to_messages(fake_input)
|
||||
# Skip if auto-continue already handled the response.
|
||||
if not _ac_did_run:
|
||||
_smart_max = 2
|
||||
_smart_attempt = 0
|
||||
while _smart_attempt < _smart_max:
|
||||
_has_tool_calls_in_output = any(o.get("type") == "function_call" for o in (last_output or []))
|
||||
if not (finish_reason == "stop" and has_content and not _has_tool_calls_in_output
|
||||
and isinstance(input_data, list) and len(input_data) >= 3
|
||||
and has_function_call_output(input_data)):
|
||||
break
|
||||
_smart_attempt += 1
|
||||
_nudges = [
|
||||
"Continue with the task using tool calls. Do NOT describe what to do — call the appropriate functions.",
|
||||
"You MUST use tool calls to complete the task. Read files, run commands, and make changes using tools. Do NOT output XML tool calls as text.",
|
||||
]
|
||||
nudge_text = _nudges[min(_smart_attempt - 1, len(_nudges) - 1)]
|
||||
# Try extracting XML tool calls from text as fallback before nudging
|
||||
last_text = ""
|
||||
for o in (last_output or []):
|
||||
if o.get("type") == "message":
|
||||
for c in (o.get("content") or []):
|
||||
if isinstance(c, dict) and c.get("type") == "output_text":
|
||||
last_text += c.get("text", "")
|
||||
xml_fc = _extract_xml_tool_calls(last_text)
|
||||
if xml_fc:
|
||||
print(f"[{self._session_id}] [smart-continue] extracted {len(xml_fc)} XML tool calls from text, injecting and retrying", file=sys.stderr)
|
||||
fake_input = list(input_data)
|
||||
for xfc in xml_fc:
|
||||
fake_input.append({"type": "function_call", "id": uid("fcx"), "call_id": uid("fcx"),
|
||||
"name": xfc["name"], "arguments": xfc["args"], "status": "completed"})
|
||||
fake_messages = oa_input_to_messages(fake_input)
|
||||
instructions = body.get("instructions", "").strip()
|
||||
if instructions:
|
||||
fake_messages.insert(0, {"role": "system", "content": instructions})
|
||||
fake_chat_body = self._build_chat_body(model, fake_messages, body, stream)
|
||||
fake_req = urllib.request.Request(target, data=json.dumps(fake_chat_body).encode(), headers=fwd)
|
||||
try:
|
||||
retry_upstream = urllib.request.urlopen(fake_req, timeout=_upstream_timeout(body, True))
|
||||
collected_events = []
|
||||
last_resp_id = last_output = last_status = None
|
||||
finish_reason = None
|
||||
has_content = False
|
||||
for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")):
|
||||
collected_events.append(event)
|
||||
_observe_event(event)
|
||||
input_data = fake_input
|
||||
continue
|
||||
except Exception as e:
|
||||
print(f"[{self._session_id}] [smart-continue] XML injection retry failed: {e}", file=sys.stderr)
|
||||
break
|
||||
_nudge_msg = {"role": "user", "content": nudge_text}
|
||||
nudge_messages = oa_input_to_messages(input_data) + [_nudge_msg]
|
||||
instructions = body.get("instructions", "").strip()
|
||||
if instructions:
|
||||
fake_messages.insert(0, {"role": "system", "content": instructions})
|
||||
fake_chat_body = self._build_chat_body(model, fake_messages, body, stream)
|
||||
fake_req = urllib.request.Request(target, data=json.dumps(fake_chat_body).encode(), headers=fwd)
|
||||
nudge_messages.insert(0, {"role": "system", "content": instructions})
|
||||
nudge_chat_body = self._build_chat_body(model, nudge_messages, body, stream)
|
||||
nudge_req = urllib.request.Request(target, data=json.dumps(nudge_chat_body).encode(), headers=fwd)
|
||||
print(f"[{self._session_id}] [smart-continue] attempt {_smart_attempt}/{_smart_max}: model stopped mid-task, nudging", file=sys.stderr)
|
||||
try:
|
||||
retry_upstream = urllib.request.urlopen(fake_req, timeout=_upstream_timeout(body, True))
|
||||
retry_upstream = urllib.request.urlopen(nudge_req, timeout=_upstream_timeout(body, True))
|
||||
collected_events = []
|
||||
last_resp_id = last_output = last_status = None
|
||||
finish_reason = None
|
||||
@@ -5863,31 +6004,9 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")):
|
||||
collected_events.append(event)
|
||||
_observe_event(event)
|
||||
input_data = fake_input
|
||||
continue
|
||||
except Exception as e:
|
||||
print(f"[{self._session_id}] [smart-continue] XML injection retry failed: {e}", file=sys.stderr)
|
||||
print(f"[{self._session_id}] [smart-continue] nudge attempt {_smart_attempt} failed: {e}", file=sys.stderr)
|
||||
break
|
||||
_nudge_msg = {"role": "user", "content": nudge_text}
|
||||
nudge_messages = oa_input_to_messages(input_data) + [_nudge_msg]
|
||||
instructions = body.get("instructions", "").strip()
|
||||
if instructions:
|
||||
nudge_messages.insert(0, {"role": "system", "content": instructions})
|
||||
nudge_chat_body = self._build_chat_body(model, nudge_messages, body, stream)
|
||||
nudge_req = urllib.request.Request(target, data=json.dumps(nudge_chat_body).encode(), headers=fwd)
|
||||
print(f"[{self._session_id}] [smart-continue] attempt {_smart_attempt}/{_smart_max}: model stopped mid-task, nudging", file=sys.stderr)
|
||||
try:
|
||||
retry_upstream = urllib.request.urlopen(nudge_req, timeout=_upstream_timeout(body, True))
|
||||
collected_events = []
|
||||
last_resp_id = last_output = last_status = None
|
||||
finish_reason = None
|
||||
has_content = False
|
||||
for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")):
|
||||
collected_events.append(event)
|
||||
_observe_event(event)
|
||||
except Exception as e:
|
||||
print(f"[{self._session_id}] [smart-continue] nudge attempt {_smart_attempt} failed: {e}", file=sys.stderr)
|
||||
break
|
||||
|
||||
self.stream_buffered_events(collected_events)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user