diff --git a/src/translate-proxy.py b/src/translate-proxy.py index 9701a28..5fd20a5 100755 --- a/src/translate-proxy.py +++ b/src/translate-proxy.py @@ -188,6 +188,7 @@ DEFAULT_MODELS = { } def load_config(): + global _CONFIG_PATH, _CONFIG_MTIME p = argparse.ArgumentParser(description="Responses API translation proxy") p.add_argument("--config", help="JSON config file path") p.add_argument("--port", type=int, default=None) @@ -195,16 +196,21 @@ def load_config(): p.add_argument("--target-url", default=None) p.add_argument("--api-key", default=None) p.add_argument("--models-file", default=None, help="JSON file with model list array") - args = p.parse_args() + _args = p.parse_args() cfg = {} - if args.config: - with open(args.config) as f: + if _args.config: + _CONFIG_PATH = os.path.abspath(_args.config) + with open(_args.config) as f: cfg = json.load(f) + try: + _CONFIG_MTIME = os.path.getmtime(_CONFIG_PATH) + except OSError: + pass for ck, ak in [("port", "port"), ("backend_type", "backend"), ("target_url", "target_url"), ("api_key", "api_key")]: - v = getattr(args, ak, None) + v = getattr(_args, ak, None) if v is not None: cfg[ck] = v @@ -226,8 +232,8 @@ def load_config(): cfg.setdefault("api_key", "") models = cfg.get("models", []) - if not models and args.models_file: - with open(args.models_file) as f: + if not models and _args.models_file: + with open(_args.models_file) as f: models = json.load(f) if not models: models = DEFAULT_MODELS.get(cfg["backend_type"], []) @@ -236,6 +242,8 @@ def load_config(): return cfg CONFIG = None +_CONFIG_PATH = None +_CONFIG_MTIME = 0 PORT = 8080 BACKEND = "openai-compat" TARGET_URL = "" @@ -867,6 +875,68 @@ def _init_runtime(): _antigravity_version = _ensure_antigravity_version() print(f"[antigravity] version={_antigravity_version}", file=sys.stderr) +def _verify_api_key(key, target_url): + if not key or not target_url: + return {"valid": False, "error": "missing key or url"} + test_url = upstream_target(target_url, "/models") + if not test_url: + return {"valid": False, "error": "invalid target url"} + try: + req = urllib.request.Request(test_url, headers={ + "Authorization": f"Bearer {key}", + "Content-Type": "application/json", + }) + resp = urllib.request.urlopen(req, timeout=10) + body = resp.read().decode() + model_count = 0 + try: + data = json.loads(body) + model_count = len(data.get("data", [])) + except Exception: + pass + return {"valid": True, "status": resp.status, "models": model_count} + except urllib.error.HTTPError as e: + err = e.read().decode()[:200] + return {"valid": False, "status": e.code, "error": err} + except Exception as e: + return {"valid": False, "error": str(e)[:200]} + +_HOT_RELOAD_LOCK = threading.Lock() + +def _hot_reload_api_key(): + global API_KEY, _api_key_pool, _CONFIG_MTIME + if not _CONFIG_PATH: + return False + try: + cur_mtime = os.path.getmtime(_CONFIG_PATH) + except OSError: + return False + if cur_mtime <= _CONFIG_MTIME: + return False + with _HOT_RELOAD_LOCK: + try: + cur_mtime2 = os.path.getmtime(_CONFIG_PATH) + if cur_mtime2 <= _CONFIG_MTIME: + return False + with open(_CONFIG_PATH) as f: + new_cfg = json.load(f) + new_key = (new_cfg.get("api_key") or "").strip() + if not new_key or new_key == API_KEY: + _CONFIG_MTIME = cur_mtime2 + return False + old_preview = API_KEY[:8] + "..." if len(API_KEY) > 8 else "(empty)" + new_preview = new_key[:8] + "..." if len(new_key) > 8 else "(empty)" + API_KEY = new_key + _CONFIG_MTIME = cur_mtime2 + if API_KEY and "," in API_KEY and not OAUTH_PROVIDER.startswith("google") and BACKEND not in ("codebuff", "freebuff"): + _api_key_pool = APIKeyPool(BACKEND, API_KEY) + print(f"[hot-reload] API key pool refreshed: {len(_api_key_pool._accounts)} keys", file=sys.stderr) + print(f"[hot-reload] API key updated: {old_preview} -> {new_preview}", file=sys.stderr) + return True + except Exception as e: + print(f"[hot-reload] error: {e}", file=sys.stderr) + return False + bgp_models = [] for _r in BGP_ROUTES: for _m in _r.get("models", [{"id": _r.get("model", "unknown")}]): @@ -1689,7 +1759,8 @@ _PROVIDER_POLICIES = { "openrouter": {"reasoning_mode": "provider_default", "max_tokens": 32768, "strip_reasoning": True, "tool_output_limit": 6000, "max_input_items": 35, "compaction": "balanced"}, "openadapter": {"reasoning_mode": "off", "max_tokens": 32768, "strip_reasoning": True, - "tool_output_limit": 2000, "max_input_items": 15, "compaction": "aggressive"}, + "tool_output_limit": 1000, "max_input_items": 10, "compaction": "aggressive", + "synthetic_tool_results": True}, "cloudcode-pa": {"compaction": "aggressive", "context_size": 1000000, "tool_output_limit": 6000, "max_input_items": 60}, "googleapis": {"compaction": "balanced", "context_size": 1000000, @@ -4585,12 +4656,23 @@ class Handler(http.server.BaseHTTPRequestHandler): pass _uptime = time.time() - _START_TIME if '_START_TIME' in dir() else 0 self.send_json(200, {"ok": True, "backend": BACKEND, - "target_url": TARGET_URL, - "models": [m.get("id") for m in MODELS], - "bgp_routes": len(BGP_ROUTES), - "uptime_s": round(_uptime, 1), - "memory_mb": round(_mem_mb, 1), - "requests_total": _STATS.get("requests", 0)}) + "target_url": TARGET_URL, + "models": [m.get("id") for m in MODELS], + "bgp_routes": len(BGP_ROUTES), + "uptime_s": round(_uptime, 1), + "memory_mb": round(_mem_mb, 1), + "requests_total": _STATS.get("requests", 0)}) + elif self.path == "/admin/reload": + reloaded = _hot_reload_api_key() + key_preview = API_KEY[:8] + "..." if len(API_KEY) > 8 else "(empty)" + self.send_json(200, {"ok": True, "reloaded": reloaded, + "api_key_preview": key_preview, + "config_path": _CONFIG_PATH or "none"}) + elif self.path == "/admin/verify-key": + result = _verify_api_key(API_KEY, TARGET_URL) + key_preview = API_KEY[:8] + "..." if len(API_KEY) > 8 else "(empty)" + result["api_key_preview"] = key_preview + self.send_json(200, result) else: self.send_error(404) @@ -4612,6 +4694,7 @@ class Handler(http.server.BaseHTTPRequestHandler): _logf = None def _handle(self): + _hot_reload_api_key() try: clen = int(self.headers.get("Content-Length", 0)) body = json.loads(self.rfile.read(clen)) @@ -5665,6 +5748,35 @@ class Handler(http.server.BaseHTTPRequestHandler): except Exception as e: print(f"[crof-adaptive] retry failed: {e}", file=sys.stderr) + # Smart continuation: if model returned finish_reason=stop with only text (no tool calls) + # during an active tool-using session, nudge it to continue working. + _has_tool_calls_in_output = any(o.get("type") == "function_call" for o in (last_output or [])) + if (finish_reason == "stop" and has_content and not _has_tool_calls_in_output + and isinstance(input_data, list) and len(input_data) >= 3 + and has_function_call_output(input_data)): + _nudge_msg = { + "role": "user", + "content": "Continue with the task. If you need to make changes or gather more information, use the appropriate tools. Do NOT just describe what to do — take action using tool calls." + } + nudge_messages = oa_input_to_messages(input_data) + [_nudge_msg] + instructions = body.get("instructions", "").strip() + if instructions: + nudge_messages.insert(0, {"role": "system", "content": instructions}) + nudge_chat_body = self._build_chat_body(model, nudge_messages, body, stream) + nudge_req = urllib.request.Request(target, data=json.dumps(nudge_chat_body).encode(), headers=fwd) + print(f"[{self._session_id}] [smart-continue] model stopped mid-task without tool calls, nudging continuation", file=sys.stderr) + try: + retry_upstream = urllib.request.urlopen(nudge_req, timeout=_upstream_timeout(body, True)) + collected_events = [] + last_resp_id = last_output = last_status = None + finish_reason = None + has_content = False + for event in oa_stream_to_sse(retry_upstream, model, body.get("request_id") or body.get("id")): + collected_events.append(event) + _observe_event(event) + except Exception as e: + print(f"[{self._session_id}] [smart-continue] nudge retry failed: {e}", file=sys.stderr) + self.stream_buffered_events(collected_events) else: result = oa_resp_to_responses(json.loads(upstream.read()), model) @@ -6609,7 +6721,7 @@ def main(): _anti_stall_cleanup() _init_runtime() try: - _current_cfg = os.path.basename(args.config) if args.config else "" + _current_cfg = os.path.basename(_CONFIG_PATH) if _CONFIG_PATH else "" for _f in os.listdir(_LOG_DIR): if _f.startswith("proxy-") and _f.endswith(".json") and _f != _current_cfg: os.remove(os.path.join(_LOG_DIR, _f))