sync: PR #21 - MiMo compat fix, endpoint edit dedup, anti-stall Windows compat, AGENTS.md/CLAUDE.md

This commit is contained in:
Roman | RyzenAdvanced
2026-05-27 22:00:12 +04:00
Unverified
parent 825ec43580
commit 745d3f9eb1
3 changed files with 194 additions and 36 deletions

View File

@@ -5677,23 +5677,40 @@ class Handler(http.server.BaseHTTPRequestHandler):
break
self._forward_oa_compat(upstream, stream, model, chat_body, body, input_data, fwd, target, tracker)
@staticmethod
def _is_mimo_provider():
return "xiaomimimo.com" in TARGET_URL
def _build_chat_body(self, model, messages, body, stream):
chat_body = {"model": model, "messages": messages}
is_mimo = self._is_mimo_provider()
for k in ("temperature", "top_p"):
if k in body:
chat_body[k] = body[k]
chat_body["max_tokens"] = max(body.get("max_output_tokens", 0), 64000)
max_tok = max(body.get("max_output_tokens", 0), 64000)
if is_mimo:
chat_body["max_completion_tokens"] = max_tok
else:
chat_body["max_tokens"] = max_tok
tools = oa_convert_tools(body.get("tools"))
if tools:
chat_body["tools"] = tools
if body.get("tool_choice"):
chat_body["tool_choice"] = body["tool_choice"]
chat_body["stream"] = stream
if not REASONING_ENABLED or REASONING_EFFORT == "none":
chat_body["enable_thinking"] = False
chat_body["reasoning_effort"] = "none"
if is_mimo:
if not REASONING_ENABLED or REASONING_EFFORT == "none":
chat_body["thinking"] = {"type": "disabled"}
else:
mimo_effort = {"minimal": "low", "max": "high"}.get(REASONING_EFFORT, REASONING_EFFORT)
chat_body["thinking"] = {"type": "enabled"}
chat_body["reasoning_effort"] = mimo_effort
else:
chat_body["reasoning_effort"] = REASONING_EFFORT
if not REASONING_ENABLED or REASONING_EFFORT == "none":
chat_body["enable_thinking"] = False
chat_body["reasoning_effort"] = "none"
else:
chat_body["reasoning_effort"] = REASONING_EFFORT
return chat_body
def _handle_antigravity_v2(self, body, model, stream, tracker=None):
@@ -8572,42 +8589,67 @@ def _handle_shutdown_signal(sig, frame):
SERVER.shutdown()
def _anti_stall_cleanup():
import subprocess as _sp
my_pid = os.getpid()
my_ppid = os.getppid()
my_pgid = os.getpgid(0)
killed = []
try:
import subprocess as _sp
out = _sp.run(["pgrep", "-f", "translate-proxy"], capture_output=True, text=True, timeout=5).stdout.strip()
for pid_str in out.splitlines():
pid_str = pid_str.strip()
if not pid_str or not pid_str.isdigit():
continue
pid = int(pid_str)
if pid == my_pid or pid == my_ppid:
continue
try:
pgid = os.getpgid(pid)
if pgid == my_pgid:
if sys.platform == "win32":
out = _sp.run(
["tasklist", "/FI", "IMAGENAME eq python.exe", "/FO", "CSV", "/NH"],
capture_output=True, text=True, timeout=5,
).stdout.strip()
for line in out.splitlines():
parts = line.split(",")
if len(parts) >= 2:
pid_str = parts[1].strip('"')
if not pid_str.isdigit():
continue
pid = int(pid_str)
if pid == my_pid:
continue
cmd = _sp.run(
["wmic", "process", "where", f"ProcessId={pid}", "/FORMAT:CommandLine", "/VALUE"],
capture_output=True, text=True, timeout=5,
).stdout.strip()
if "translate-proxy" not in cmd:
continue
try:
_sp.run(["taskkill", "/PID", str(pid), "/F"], capture_output=True, timeout=5)
killed.append(pid)
except Exception:
pass
else:
my_ppid = os.getppid()
my_pgid = os.getpgid(0)
out = _sp.run(["pgrep", "-f", "translate-proxy"], capture_output=True, text=True, timeout=5).stdout.strip()
for pid_str in out.splitlines():
pid_str = pid_str.strip()
if not pid_str or not pid_str.isdigit():
continue
except OSError:
pass
try:
stat = open(f"/proc/{pid}/stat").read().split()
start_ticks = int(stat[21])
import time as _t
ticks_per_sec = os.sysconf('SC_CLK_TCK')
start_time = start_ticks / ticks_per_sec
age = _t.time() - start_time
if age < 60:
pid = int(pid_str)
if pid == my_pid or pid == my_ppid:
continue
except Exception:
continue
try:
os.kill(pid, signal.SIGTERM)
killed.append(pid)
except (ProcessLookupError, PermissionError):
pass
try:
pgid = os.getpgid(pid)
if pgid == my_pgid:
continue
except OSError:
pass
try:
stat = open(f"/proc/{pid}/stat").read().split()
start_ticks = int(stat[21])
ticks_per_sec = os.sysconf('SC_CLK_TCK')
start_time = start_ticks / ticks_per_sec
age = time.time() - start_time
if age < 60:
continue
except Exception:
continue
try:
os.kill(pid, signal.SIGTERM)
killed.append(pid)
except (ProcessLookupError, PermissionError):
pass
except Exception:
pass
try:
@@ -8621,6 +8663,7 @@ def _anti_stall_cleanup():
print(f"[anti-stall] killed {len(killed)} stale proxy process(es): {killed}", flush=True)
time.sleep(1)
def main():
global SERVER, _START_TIME
_START_TIME = time.time()