diff --git a/CHANGELOG.md b/CHANGELOG.md
index b2be983..8b70c8e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,30 @@
# Changelog
+## v2.7.0 (2026-05-20)
+
+- **Usage Dashboard redesigned** (inspired by OpenUsage design patterns)
+ - Deep Space dark theme with Catppuccin-inspired color palette
+ - Header with animated status dots (OK/WARN/ERR provider health)
+ - KPI summary strip: total providers, requests, token volume, avg latency
+ - Provider cards with colored borders matching health status
+ - Status pills: OK (green), WARN (yellow), ERR (red)
+ - Colored section separators per metric type (Usage=yellow, Models=lavender)
+ - Model composition bar: stacked horizontal segments per model share
+ - Per-model breakdown with mini progress bars, percentage, request counts
+ - Per-model token breakdown (in/out) when available
+ - Token formatting: 1.2M, 45.3K instead of raw numbers
+ - Duration formatting: 1.5h, 3.2m instead of raw seconds
+ - Error section with warning icon
+
+- **TCP_NODELAY streaming optimization**
+ - Disables Nagle's algorithm on streaming connections
+ - Reduces per-packet latency by up to 40ms on small SSE events
+ - Applied to all 4 streaming code paths (openai-compat, retry, command-code, generic)
+
+- **Anthropic prompt caching**
+ - System prompts now sent as `cache_control: ephemeral` structured format
+ - Enables Anthropic's automatic prompt caching (saves tokens + cost on repeated prompts)
+
## v2.6.1 (2026-05-20)
- **Google OAuth rebuilt to emulate Gemini CLI**
diff --git a/codex-launcher_2.6.1_all.deb b/codex-launcher_2.6.1_all.deb
deleted file mode 100644
index 0a72c25..0000000
Binary files a/codex-launcher_2.6.1_all.deb and /dev/null differ
diff --git a/codex-launcher_2.7.0_all.deb b/codex-launcher_2.7.0_all.deb
new file mode 100644
index 0000000..1ba20a6
Binary files /dev/null and b/codex-launcher_2.7.0_all.deb differ
diff --git a/src/codex-launcher-gui b/src/codex-launcher-gui
index 18e54da..26a9a1d 100755
--- a/src/codex-launcher-gui
+++ b/src/codex-launcher-gui
@@ -647,7 +647,7 @@ class LauncherWin(Gtk.Window):
# header row
hdr = Gtk.Box(spacing=8)
vbox.pack_start(hdr, False, False, 0)
- lbl = Gtk.Label(label="Codex Launcher v2.6.1")
+ lbl = Gtk.Label(label="Codex Launcher v2.7.0")
lbl.set_use_markup(True)
hdr.pack_start(lbl, False, False, 0)
changelog_btn = Gtk.Button(label="Changelog")
@@ -2481,10 +2481,17 @@ class BGPRouteDialog(Gtk.Dialog):
self._combo_model.set_active(0)
-_USAGE_COLORS = {
- "green": "#27ae60", "yellow": "#f39c12", "orange": "#e67e22",
- "red": "#e74c3c", "blue": "#3498db", "purple": "#9b59b6",
- "dark": "#2c3e50", "light": "#ecf0f1", "mid": "#bdc3c7",
+_U = {
+ "base": "#0C0E16", "surface0": "#161928", "surface1": "#1E2235",
+ "surface2": "#2A2F47", "text": "#E4E6F0", "subtext": "#B0B4C8",
+ "dim": "#5C6180", "accent": "#7EB8F7", "blue": "#5DA4E8",
+ "sapphire": "#4EC5C1", "green": "#59D4A0", "yellow": "#F0C75E",
+ "red": "#F06A77", "peach": "#F09860", "teal": "#4EC5C1",
+ "lavender": "#A899F0", "sky": "#70C8E8", "maroon": "#C44B5C",
+ "flamingo": "#E878B0", "rosewater": "#F0D0C0",
+ "model_palette": ["#F09860", "#4EC5C1", "#5DA4E8", "#59D4A0",
+ "#F0C75E", "#A899F0", "#70C8E8", "#E878B0",
+ "#C44B5C", "#F0D0C0", "#7EB8F7", "#F06A77"],
}
_USAGE_STATS_FILE = HOME / ".cache/codex-proxy/usage-stats.json"
@@ -2497,44 +2504,60 @@ def _load_usage_stats():
pass
return {"providers": {}, "updated": None}
-def _bar_color(pct):
- if pct < 0.5:
- return _USAGE_COLORS["green"]
- if pct < 0.8:
- return _USAGE_COLORS["yellow"]
- return _USAGE_COLORS["red"]
+def _fmt_tok(n):
+ if n >= 1_000_000:
+ return f"{n/1_000_000:.1f}M"
+ if n >= 1_000:
+ return f"{n/1_000:.1f}K"
+ return str(n)
+
+def _fmt_dur(s):
+ if s >= 3600:
+ return f"{s/3600:.1f}h"
+ if s >= 60:
+ return f"{s/60:.1f}m"
+ return f"{s:.1f}s"
+
+def _status_pill(success_rate, fail_pct):
+ if fail_pct > 0.15:
+ return ("ERR", _U["red"])
+ if fail_pct > 0.05:
+ return ("WARN", _U["yellow"])
+ return ("OK", _U["green"])
+
+def _make_css_widget(css_str):
+ p = Gtk.CssProvider()
+ p.load_from_data(css_str.encode())
+ return p
+
+def _apply_css(widget, css_str):
+ ctx = widget.get_style_context()
+ ctx.add_provider(_make_css_widget(css_str), Gtk.STYLE_PROVIDER_PRIORITY_USER)
+
class UsageWindow(Gtk.Window):
def __init__(self, parent):
- super().__init__(title="Usage Stats")
+ super().__init__(title="Usage Dashboard")
self.set_transient_for(parent)
- self.set_default_size(640, 560)
+ self.set_default_size(720, 640)
self.set_position(Gtk.WindowPosition.CENTER)
self._parent = parent
+ _apply_css(self, f"""
+ window {{ background-color: {_U["base"]}; }}
+ separator {{ background-color: {_U["surface1"]}; }}
+ """)
+
vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=0)
self.add(vbox)
- header = Gtk.Box(spacing=8)
- header.set_margin_start(16)
- header.set_margin_end(16)
- header.set_margin_top(12)
- header.set_margin_bottom(8)
- vbox.pack_start(header, False, False, 0)
- title = Gtk.Label()
- title.set_markup('Usage Dashboard')
- header.pack_start(title, False, False, 0)
- refresh_btn = Gtk.Button(label="Refresh")
- refresh_btn.connect("clicked", lambda b: self._refresh())
- header.pack_end(refresh_btn, False, False, 0)
- self._updated_lbl = Gtk.Label()
- self._updated_lbl.set_markup('Never')
- header.pack_end(self._updated_lbl, False, False, 8)
-
+ self._build_header(vbox)
+ self._build_summary_strip(vbox)
sep = Gtk.Separator()
vbox.pack_start(sep, False, False, 0)
- self._cards_box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+ self._cards_box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6)
+ self._cards_box.set_margin_top(8)
sw = Gtk.ScrolledWindow()
sw.set_policy(Gtk.PolicyType.NEVER, Gtk.PolicyType.AUTOMATIC)
sw.add(self._cards_box)
@@ -2543,133 +2566,337 @@ class UsageWindow(Gtk.Window):
self._refresh()
self.show_all()
+ def _build_header(self, parent):
+ hdr = Gtk.Box(spacing=8)
+ hdr.set_margin_start(16)
+ hdr.set_margin_end(16)
+ hdr.set_margin_top(12)
+ hdr.set_margin_bottom(6)
+ parent.pack_start(hdr, False, False, 0)
+
+ bolt = Gtk.Label()
+ bolt.set_markup(f'\u26A1')
+ hdr.pack_start(bolt, False, False, 0)
+
+ title = Gtk.Label()
+ title.set_markup(f'Usage Dashboard')
+ hdr.pack_start(title, False, False, 0)
+
+ self._status_dots = Gtk.Label()
+ hdr.pack_start(self._status_dots, False, False, 8)
+
+ self._updated_lbl = Gtk.Label()
+ self._updated_lbl.set_markup(f'Never')
+ hdr.pack_end(self._updated_lbl, False, False, 4)
+
+ refresh_btn = Gtk.Button(label="Refresh")
+ _apply_css(refresh_btn, f"""
+ button {{ color: {_U["text"]}; background-color: {_U["surface0"]};
+ border: 1px solid {_U["surface1"]}; border-radius: 6px; padding: 4px 12px; }}
+ button:hover {{ background-color: {_U["surface1"]}; }}
+ """)
+ refresh_btn.connect("clicked", lambda b: self._refresh())
+ hdr.pack_end(refresh_btn, False, False, 0)
+
+ def _build_summary_strip(self, parent):
+ strip = Gtk.Box(spacing=0)
+ strip.set_margin_start(16)
+ strip.set_margin_end(16)
+ strip.set_margin_bottom(6)
+ _apply_css(strip, f"box {{ background-color: {_U["surface0"]}; border-radius: 8px; padding: 8px 12px; }}")
+ parent.pack_start(strip, False, False, 0)
+
+ self._kpi_boxes = {}
+ for key, label, icon in [
+ ("providers", "Providers", "\U0001F4CA"),
+ ("requests", "Requests", "\u26A1"),
+ ("tokens", "Tokens", "\U0001F9E0"),
+ ("latency", "Avg Latency", "\u23F1"),
+ ]:
+ box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=1)
+ lbl = Gtk.Label()
+ lbl.set_markup(f'{icon} {label}')
+ lbl.set_xalign(0)
+ box.pack_start(lbl, False, False, 0)
+ val = Gtk.Label()
+ val.set_markup(f'-')
+ val.set_xalign(0)
+ box.pack_start(val, False, False, 0)
+ box.set_margin_end(20)
+ strip.pack_start(box, False, False, 0)
+ self._kpi_boxes[key] = val
+
def _refresh(self):
for c in self._cards_box.get_children():
self._cards_box.remove(c)
stats = _load_usage_stats()
updated = stats.get("updated")
if updated:
- self._updated_lbl.set_markup(f'Updated: {updated}')
+ self._updated_lbl.set_markup(f'{updated}')
providers = stats.get("providers", {})
if not providers:
empty = Gtk.Label()
- empty.set_markup('No usage data yet.\nLaunch a session to start tracking.')
+ empty.set_markup(f'No usage data yet.\nLaunch a session to start tracking.')
empty.set_margin_top(60)
self._cards_box.pack_start(empty, False, False, 0)
self._cards_box.show_all()
return
+ total_req = 0
+ total_tok_in = 0
+ total_tok_out = 0
+ total_dur = 0.0
+ n_ok = 0
+ n_warn = 0
+ n_err = 0
+
sorted_providers = sorted(providers.items(), key=lambda x: x[1].get("total_requests", 0), reverse=True)
+ for prov_name, prov_data in sorted_providers:
+ t = prov_data.get("total_requests", 0)
+ total_req += t
+ total_tok_in += prov_data.get("total_tokens_in", 0)
+ total_tok_out += prov_data.get("total_tokens_out", 0)
+ total_dur += prov_data.get("total_duration_s", 0.0)
+ fail = prov_data.get("failures", 0)
+ fail_pct = fail / t if t > 0 else 0
+ _, sc = _status_pill(0, fail_pct)
+ if fail_pct > 0.15:
+ n_err += 1
+ elif fail_pct > 0.05:
+ n_warn += 1
+ else:
+ n_ok += 1
+
+ self._kpi_boxes["providers"].set_markup(
+ f'{len(providers)}')
+ self._kpi_boxes["requests"].set_markup(
+ f'{total_req:,}')
+ tok_sum = total_tok_in + total_tok_out
+ tok_str = f"{_fmt_tok(tok_sum)} in:{_fmt_tok(total_tok_in)} out:{_fmt_tok(total_tok_out)}" if tok_sum else "N/A"
+ self._kpi_boxes["tokens"].set_markup(
+ f'{tok_str}')
+ avg_lat = total_dur / total_req if total_req > 0 else 0
+ self._kpi_boxes["latency"].set_markup(
+ f'{_fmt_dur(avg_lat)}')
+
+ dots_parts = []
+ if n_ok:
+ dots_parts.append(f'\u25CF{n_ok}')
+ if n_warn:
+ dots_parts.append(f'\u25D0{n_warn}')
+ if n_err:
+ dots_parts.append(f'\u2717{n_err}')
+ if dots_parts:
+ self._status_dots.set_markup(" ".join(dots_parts))
+
for prov_name, prov_data in sorted_providers:
card = self._build_card(prov_name, prov_data)
self._cards_box.pack_start(card, False, False, 0)
self._cards_box.show_all()
def _build_card(self, name, data):
- frame = Gtk.Frame()
- frame.set_margin_start(12)
- frame.set_margin_end(12)
- frame.set_margin_top(4)
- frame.set_margin_bottom(4)
- style = frame.get_style_context()
- style.add_class("card")
-
- outer = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=4)
- outer.set_margin_start(12)
- outer.set_margin_end(12)
- outer.set_margin_top(8)
- outer.set_margin_bottom(8)
- frame.add(outer)
-
- top_row = Gtk.Box(spacing=8)
- outer.pack_start(top_row, False, False, 0)
+ card = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=0)
+ card.set_margin_start(12)
+ card.set_margin_end(12)
+ _apply_css(card, f"""
+ box {{ background-color: {_U["surface0"]}; border-radius: 10px;
+ border: 1px solid {_U["surface1"]}; }}
+ """)
total = data.get("total_requests", 0)
ok = data.get("successes", 0)
fail = data.get("failures", 0)
success_rate = ok / total if total > 0 else 1.0
+ fail_pct = fail / total if total > 0 else 0
+ status_text, status_color = _status_pill(success_rate, fail_pct)
+
+ border_color = status_color
+ _apply_css(card, f"""
+ box {{ background-color: {_U["surface0"]}; border-radius: 10px;
+ border: 1px solid {border_color}; }}
+ """)
+
+ inner = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=3)
+ inner.set_margin_start(14)
+ inner.set_margin_end(14)
+ inner.set_margin_top(10)
+ inner.set_margin_bottom(10)
+ card.pack_start(inner, False, False, 0)
+
+ top = Gtk.Box(spacing=6)
+ inner.pack_start(top, False, False, 0)
+
+ dot = Gtk.Label()
+ dot.set_markup(f'\u25CF')
+ top.pack_start(dot, False, False, 0)
name_lbl = Gtk.Label()
short = name.replace("https://", "").replace("http://", "").split("/")[0]
- name_lbl.set_markup(f'{short}')
- top_row.pack_start(name_lbl, False, False, 0)
+ name_lbl.set_markup(f'{short}')
+ top.pack_start(name_lbl, False, False, 0)
+
+ pill = Gtk.Label()
+ pill.set_markup(f' {status_text} ')
+ top.pack_start(pill, False, False, 4)
req_lbl = Gtk.Label()
- req_lbl.set_markup(f'{total} requests')
- top_row.pack_start(req_lbl, False, False, 8)
-
- if fail > 0:
- err_lbl = Gtk.Label()
- err_lbl.set_markup(f'{fail} failed')
- top_row.pack_start(err_lbl, False, False, 4)
+ req_lbl.set_markup(f'{total} req')
+ top.pack_start(req_lbl, False, False, 6)
last_used = data.get("last_used", "")
if last_used:
lu_lbl = Gtk.Label()
- lu_lbl.set_markup(f'{last_used}')
- top_row.pack_end(lu_lbl, False, False, 0)
+ lu_lbl.set_markup(f'{last_used}')
+ top.pack_end(lu_lbl, False, False, 0)
+
+ sep1 = Gtk.Separator()
+ _apply_css(sep1, f"separator {{ background-color: {status_color}; margin-top: 4px; }}")
+ inner.pack_start(sep1, False, False, 0)
+
+ gauge_box = Gtk.Box(spacing=4)
+ gauge_box.set_margin_top(4)
+ inner.pack_start(gauge_box, False, False, 0)
+
+ gauge_label = Gtk.Label()
+ gauge_label.set_markup(f'\u26A1')
+ gauge_box.pack_start(gauge_label, False, False, 0)
- # Progress bar for success rate
bar = Gtk.ProgressBar()
bar.set_fraction(success_rate)
bar_pct = int(success_rate * 100)
- bar.set_text(f"{bar_pct}% success")
+ bar.set_text(f"{bar_pct}%")
bar.set_show_text(True)
- bar.set_margin_top(2)
- bar.set_margin_bottom(2)
- color = _bar_color(1.0 - success_rate)
- bar_css = f'progress {{ background-color: {color}; border-radius: 4px; }} trough {{ border-radius: 4px; min-height: 10px; }}'
- provider = Gtk.CssProvider()
- provider.load_from_data(bar_css.encode())
- bar.get_style_context().add_provider(provider, Gtk.STYLE_PROVIDER_PRIORITY_USER)
- outer.pack_start(bar, False, False, 0)
+ bar_css = f"""
+ progress {{ background-color: {status_color}; border-radius: 6px; }}
+ trough {{ background-color: {_U["surface1"]}; border-radius: 6px; min-height: 12px; }}
+ """
+ _apply_css(bar, bar_css)
+ bar.set_hexpand(True)
+ gauge_box.pack_start(bar, True, True, 0)
- # Stats row
- stats_row = Gtk.Box(spacing=16)
- outer.pack_start(stats_row, False, False, 0)
+ if fail > 0:
+ fail_lbl = Gtk.Label()
+ fail_lbl.set_markup(f'{fail} fail')
+ gauge_box.pack_end(fail_lbl, False, False, 0)
+
+ metrics_box = Gtk.Box(spacing=0)
+ metrics_box.set_margin_top(4)
+ inner.pack_start(metrics_box, False, False, 0)
t_in = data.get("total_tokens_in", 0)
t_out = data.get("total_tokens_out", 0)
dur = data.get("total_duration_s", 0.0)
avg_dur = dur / total if total > 0 else 0
- for label, value in [
- ("Tokens In", f"{t_in:,}"),
- ("Tokens Out", f"{t_out:,}"),
- ("Avg Latency", f"{avg_dur:.1f}s"),
+ for label, value, color in [
+ ("Tokens In", f"{_fmt_tok(t_in)}", _U["sapphire"]),
+ ("Tokens Out", f"{_fmt_tok(t_out)}", _U["peach"]),
+ ("Avg Latency", _fmt_dur(avg_dur), _U["sky"]),
+ ("Duration", _fmt_dur(dur), _U["lavender"]),
]:
- box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=1)
+ box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=0)
l = Gtk.Label()
- l.set_markup(f'{label}')
+ l.set_markup(f'{label}')
+ l.set_xalign(0)
box.pack_start(l, False, False, 0)
v = Gtk.Label()
- v.set_markup(f'{value}')
+ v.set_markup(f'{value}')
+ v.set_xalign(0)
box.pack_start(v, False, False, 0)
- stats_row.pack_start(box, False, False, 0)
+ box.set_margin_end(16)
+ metrics_box.pack_start(box, False, False, 0)
- # Models breakdown
models = data.get("models", {})
- if len(models) > 0:
- model_str = " ".join(
- f'{m} '
- f'({md.get("requests",0)})'
- for m, md in sorted(models.items(), key=lambda x: x[1].get("requests", 0), reverse=True)[:4]
- )
- m_lbl = Gtk.Label()
- m_lbl.set_markup(f'Models: {model_str}')
- m_lbl.set_line_wrap(True)
- m_lbl.set_xalign(0)
- outer.pack_start(m_lbl, False, False, 2)
+ if models:
+ self._build_models_section(inner, models, total)
- # Error info
last_err = data.get("last_error")
if last_err:
+ err_box = Gtk.Box(spacing=4)
+ err_box.set_margin_top(4)
+ inner.pack_start(err_box, False, False, 0)
+ icon = Gtk.Label()
+ icon.set_markup(f'\u26A0')
+ err_box.pack_start(icon, False, False, 0)
err_lbl = Gtk.Label()
- err_lbl.set_markup(f'Last error: {last_err}')
+ err_lbl.set_markup(f'{last_err}')
err_lbl.set_xalign(0)
- outer.pack_start(err_lbl, False, False, 0)
+ err_lbl.set_line_wrap(True)
+ err_box.pack_start(err_lbl, False, False, 0)
- return frame
+ return card
+
+ def _build_models_section(self, parent, models, total_req):
+ sep_m = Gtk.Separator()
+ _apply_css(sep_m, f"separator {{ background-color: {_U["lavender"]}; margin-top: 4px; margin-bottom: 2px; }}")
+ parent.pack_start(sep_m, False, False, 0)
+
+ header = Gtk.Box(spacing=4)
+ header.set_margin_top(2)
+ parent.pack_start(header, False, False, 0)
+ icon = Gtk.Label()
+ icon.set_markup(f'\U0001F916')
+ header.pack_start(icon, False, False, 0)
+ lbl = Gtk.Label()
+ lbl.set_markup(f'Models')
+ header.pack_start(lbl, False, False, 0)
+
+ sorted_models = sorted(models.items(), key=lambda x: x[1].get("requests", 0), reverse=True)
+
+ if total_req > 0:
+ comp_bar = Gtk.Box(spacing=0)
+ _apply_css(comp_bar, f"box {{ background-color: {_U["surface1"]}; border-radius: 4px; min-height: 8px; margin-top: 2px; }}")
+ parent.pack_start(comp_bar, False, False, 0)
+ for i, (mname, mdata) in enumerate(sorted_models):
+ m_req = mdata.get("requests", 0)
+ pct = m_req / total_req
+ if pct < 0.01:
+ continue
+ seg = Gtk.Box()
+ color = _U["model_palette"][i % len(_U["model_palette"])]
+ _apply_css(seg, f"box {{ background-color: {color}; min-height: 8px; }}")
+ seg.set_size_request(max(int(pct * 400), 4), 8)
+ comp_bar.pack_start(seg, False, False, 0)
+
+ models_box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=1)
+ models_box.set_margin_top(2)
+ parent.pack_start(models_box, False, False, 0)
+
+ for i, (mname, mdata) in enumerate(sorted_models[:6]):
+ row = Gtk.Box(spacing=6)
+ models_box.pack_start(row, False, False, 0)
+ color = _U["model_palette"][i % len(_U["model_palette"])]
+ dot = Gtk.Label()
+ dot.set_markup(f'\u25CF')
+ row.pack_start(dot, False, False, 0)
+ m_lbl = Gtk.Label()
+ m_lbl.set_markup(f'{mname}')
+ m_lbl.set_xalign(0)
+ m_lbl.set_size_request(120, -1)
+ row.pack_start(m_lbl, False, False, 0)
+
+ m_req = mdata.get("requests", 0)
+ pct = m_req / total_req * 100 if total_req > 0 else 0
+
+ m_bar = Gtk.ProgressBar()
+ m_bar.set_fraction(m_req / total_req if total_req > 0 else 0)
+ _apply_css(m_bar, f"""
+ progress {{ background-color: {color}; border-radius: 3px; }}
+ trough {{ background-color: {_U["surface1"]}; border-radius: 3px; min-height: 6px; }}
+ """)
+ m_bar.set_size_request(80, -1)
+ row.pack_start(m_bar, False, False, 0)
+
+ pct_lbl = Gtk.Label()
+ pct_lbl.set_markup(f'{pct:.0f}% ({m_req})')
+ row.pack_start(pct_lbl, False, False, 0)
+
+ m_in = mdata.get("tokens_in", 0)
+ m_out = mdata.get("tokens_out", 0)
+ if m_in or m_out:
+ tok_lbl = Gtk.Label()
+ tok_lbl.set_markup(f'in:{_fmt_tok(m_in)} out:{_fmt_tok(m_out)}')
+ row.pack_end(tok_lbl, False, False, 0)
def main():
diff --git a/src/translate-proxy.py b/src/translate-proxy.py
index 1d2442e..aa087f4 100755
--- a/src/translate-proxy.py
+++ b/src/translate-proxy.py
@@ -11,7 +11,7 @@ Usage:
python3 translate-proxy.py --backend openai-compat --target-url https://... --api-key sk-...
"""
-import json, http.server, urllib.request, time, uuid, os, sys, argparse
+import json, http.server, urllib.request, time, uuid, os, sys, argparse, threading, socket
# ═══════════════════════════════════════════════════════════════════
# Config
@@ -141,6 +141,8 @@ _pool = uuid.uuid4().hex[:8]
_response_store = {}
_MAX_STORED = 50
+_LOG_DIR = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy")
+os.makedirs(_LOG_DIR, exist_ok=True)
_stats_path = os.path.join(_LOG_DIR, "usage-stats.json")
_stats_lock = threading.Lock()
@@ -961,9 +963,6 @@ def cc_stream_to_sse(cc_stream, model, req_id):
# HTTP Server
# ═══════════════════════════════════════════════════════════════════
-_LOG_DIR = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy")
-os.makedirs(_LOG_DIR, exist_ok=True)
-
def _log_resp(resp_id, status, output):
try:
import datetime as _dt
@@ -1209,6 +1208,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
self.send_header("Cache-Control", "no-cache")
self.send_header("Connection", "keep-alive")
self.end_headers()
+ if hasattr(self, 'connection') and self.connection:
+ try:
+ self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+ except Exception:
+ pass
collected_events = []
last_resp_id = None
@@ -1290,6 +1294,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
self.send_header("Cache-Control", "no-cache")
self.send_header("Connection", "keep-alive")
self.end_headers()
+ if hasattr(self, 'connection') and self.connection:
+ try:
+ self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+ except Exception:
+ pass
last_resp_id = None
last_output = None
@@ -1322,7 +1331,8 @@ class Handler(http.server.BaseHTTPRequestHandler):
"max_tokens": body.get("max_output_tokens", 8192)}
instructions = body.get("instructions", "").strip()
if instructions:
- an_body["system"] = instructions
+ an_body["system"] = [{"type": "text", "text": instructions,
+ "cache_control": {"type": "ephemeral"}}]
for k in ("temperature", "top_p"):
if k in body:
an_body[k] = body[k]
@@ -1429,6 +1439,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
self.send_header("Cache-Control", "no-cache")
self.send_header("Connection", "keep-alive")
self.end_headers()
+ if hasattr(self, 'connection') and self.connection:
+ try:
+ self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+ except Exception:
+ pass
last_resp_id = None
last_output = None
for event in cc_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")):
@@ -1476,6 +1491,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
self.send_header("Cache-Control", "no-cache")
self.send_header("Connection", "keep-alive")
self.end_headers()
+ if hasattr(self, 'connection') and self.connection:
+ try:
+ self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+ except Exception:
+ pass
last_resp_id = None
last_output = None
last_status = None