diff --git a/CHANGELOG.md b/CHANGELOG.md index b2be983..8b70c8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,30 @@ # Changelog +## v2.7.0 (2026-05-20) + +- **Usage Dashboard redesigned** (inspired by OpenUsage design patterns) + - Deep Space dark theme with Catppuccin-inspired color palette + - Header with animated status dots (OK/WARN/ERR provider health) + - KPI summary strip: total providers, requests, token volume, avg latency + - Provider cards with colored borders matching health status + - Status pills: OK (green), WARN (yellow), ERR (red) + - Colored section separators per metric type (Usage=yellow, Models=lavender) + - Model composition bar: stacked horizontal segments per model share + - Per-model breakdown with mini progress bars, percentage, request counts + - Per-model token breakdown (in/out) when available + - Token formatting: 1.2M, 45.3K instead of raw numbers + - Duration formatting: 1.5h, 3.2m instead of raw seconds + - Error section with warning icon + +- **TCP_NODELAY streaming optimization** + - Disables Nagle's algorithm on streaming connections + - Reduces per-packet latency by up to 40ms on small SSE events + - Applied to all 4 streaming code paths (openai-compat, retry, command-code, generic) + +- **Anthropic prompt caching** + - System prompts now sent as `cache_control: ephemeral` structured format + - Enables Anthropic's automatic prompt caching (saves tokens + cost on repeated prompts) + ## v2.6.1 (2026-05-20) - **Google OAuth rebuilt to emulate Gemini CLI** diff --git a/codex-launcher_2.6.1_all.deb b/codex-launcher_2.6.1_all.deb deleted file mode 100644 index 0a72c25..0000000 Binary files a/codex-launcher_2.6.1_all.deb and /dev/null differ diff --git a/codex-launcher_2.7.0_all.deb b/codex-launcher_2.7.0_all.deb new file mode 100644 index 0000000..1ba20a6 Binary files /dev/null and b/codex-launcher_2.7.0_all.deb differ diff --git a/src/codex-launcher-gui b/src/codex-launcher-gui index 18e54da..26a9a1d 100755 --- a/src/codex-launcher-gui +++ b/src/codex-launcher-gui @@ -647,7 +647,7 @@ class LauncherWin(Gtk.Window): # header row hdr = Gtk.Box(spacing=8) vbox.pack_start(hdr, False, False, 0) - lbl = Gtk.Label(label="Codex Launcher v2.6.1") + lbl = Gtk.Label(label="Codex Launcher v2.7.0") lbl.set_use_markup(True) hdr.pack_start(lbl, False, False, 0) changelog_btn = Gtk.Button(label="Changelog") @@ -2481,10 +2481,17 @@ class BGPRouteDialog(Gtk.Dialog): self._combo_model.set_active(0) -_USAGE_COLORS = { - "green": "#27ae60", "yellow": "#f39c12", "orange": "#e67e22", - "red": "#e74c3c", "blue": "#3498db", "purple": "#9b59b6", - "dark": "#2c3e50", "light": "#ecf0f1", "mid": "#bdc3c7", +_U = { + "base": "#0C0E16", "surface0": "#161928", "surface1": "#1E2235", + "surface2": "#2A2F47", "text": "#E4E6F0", "subtext": "#B0B4C8", + "dim": "#5C6180", "accent": "#7EB8F7", "blue": "#5DA4E8", + "sapphire": "#4EC5C1", "green": "#59D4A0", "yellow": "#F0C75E", + "red": "#F06A77", "peach": "#F09860", "teal": "#4EC5C1", + "lavender": "#A899F0", "sky": "#70C8E8", "maroon": "#C44B5C", + "flamingo": "#E878B0", "rosewater": "#F0D0C0", + "model_palette": ["#F09860", "#4EC5C1", "#5DA4E8", "#59D4A0", + "#F0C75E", "#A899F0", "#70C8E8", "#E878B0", + "#C44B5C", "#F0D0C0", "#7EB8F7", "#F06A77"], } _USAGE_STATS_FILE = HOME / ".cache/codex-proxy/usage-stats.json" @@ -2497,44 +2504,60 @@ def _load_usage_stats(): pass return {"providers": {}, "updated": None} -def _bar_color(pct): - if pct < 0.5: - return _USAGE_COLORS["green"] - if pct < 0.8: - return _USAGE_COLORS["yellow"] - return _USAGE_COLORS["red"] +def _fmt_tok(n): + if n >= 1_000_000: + return f"{n/1_000_000:.1f}M" + if n >= 1_000: + return f"{n/1_000:.1f}K" + return str(n) + +def _fmt_dur(s): + if s >= 3600: + return f"{s/3600:.1f}h" + if s >= 60: + return f"{s/60:.1f}m" + return f"{s:.1f}s" + +def _status_pill(success_rate, fail_pct): + if fail_pct > 0.15: + return ("ERR", _U["red"]) + if fail_pct > 0.05: + return ("WARN", _U["yellow"]) + return ("OK", _U["green"]) + +def _make_css_widget(css_str): + p = Gtk.CssProvider() + p.load_from_data(css_str.encode()) + return p + +def _apply_css(widget, css_str): + ctx = widget.get_style_context() + ctx.add_provider(_make_css_widget(css_str), Gtk.STYLE_PROVIDER_PRIORITY_USER) + class UsageWindow(Gtk.Window): def __init__(self, parent): - super().__init__(title="Usage Stats") + super().__init__(title="Usage Dashboard") self.set_transient_for(parent) - self.set_default_size(640, 560) + self.set_default_size(720, 640) self.set_position(Gtk.WindowPosition.CENTER) self._parent = parent + _apply_css(self, f""" + window {{ background-color: {_U["base"]}; }} + separator {{ background-color: {_U["surface1"]}; }} + """) + vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=0) self.add(vbox) - header = Gtk.Box(spacing=8) - header.set_margin_start(16) - header.set_margin_end(16) - header.set_margin_top(12) - header.set_margin_bottom(8) - vbox.pack_start(header, False, False, 0) - title = Gtk.Label() - title.set_markup('Usage Dashboard') - header.pack_start(title, False, False, 0) - refresh_btn = Gtk.Button(label="Refresh") - refresh_btn.connect("clicked", lambda b: self._refresh()) - header.pack_end(refresh_btn, False, False, 0) - self._updated_lbl = Gtk.Label() - self._updated_lbl.set_markup('Never') - header.pack_end(self._updated_lbl, False, False, 8) - + self._build_header(vbox) + self._build_summary_strip(vbox) sep = Gtk.Separator() vbox.pack_start(sep, False, False, 0) - self._cards_box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8) + self._cards_box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6) + self._cards_box.set_margin_top(8) sw = Gtk.ScrolledWindow() sw.set_policy(Gtk.PolicyType.NEVER, Gtk.PolicyType.AUTOMATIC) sw.add(self._cards_box) @@ -2543,133 +2566,337 @@ class UsageWindow(Gtk.Window): self._refresh() self.show_all() + def _build_header(self, parent): + hdr = Gtk.Box(spacing=8) + hdr.set_margin_start(16) + hdr.set_margin_end(16) + hdr.set_margin_top(12) + hdr.set_margin_bottom(6) + parent.pack_start(hdr, False, False, 0) + + bolt = Gtk.Label() + bolt.set_markup(f'\u26A1') + hdr.pack_start(bolt, False, False, 0) + + title = Gtk.Label() + title.set_markup(f'Usage Dashboard') + hdr.pack_start(title, False, False, 0) + + self._status_dots = Gtk.Label() + hdr.pack_start(self._status_dots, False, False, 8) + + self._updated_lbl = Gtk.Label() + self._updated_lbl.set_markup(f'Never') + hdr.pack_end(self._updated_lbl, False, False, 4) + + refresh_btn = Gtk.Button(label="Refresh") + _apply_css(refresh_btn, f""" + button {{ color: {_U["text"]}; background-color: {_U["surface0"]}; + border: 1px solid {_U["surface1"]}; border-radius: 6px; padding: 4px 12px; }} + button:hover {{ background-color: {_U["surface1"]}; }} + """) + refresh_btn.connect("clicked", lambda b: self._refresh()) + hdr.pack_end(refresh_btn, False, False, 0) + + def _build_summary_strip(self, parent): + strip = Gtk.Box(spacing=0) + strip.set_margin_start(16) + strip.set_margin_end(16) + strip.set_margin_bottom(6) + _apply_css(strip, f"box {{ background-color: {_U["surface0"]}; border-radius: 8px; padding: 8px 12px; }}") + parent.pack_start(strip, False, False, 0) + + self._kpi_boxes = {} + for key, label, icon in [ + ("providers", "Providers", "\U0001F4CA"), + ("requests", "Requests", "\u26A1"), + ("tokens", "Tokens", "\U0001F9E0"), + ("latency", "Avg Latency", "\u23F1"), + ]: + box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=1) + lbl = Gtk.Label() + lbl.set_markup(f'{icon} {label}') + lbl.set_xalign(0) + box.pack_start(lbl, False, False, 0) + val = Gtk.Label() + val.set_markup(f'-') + val.set_xalign(0) + box.pack_start(val, False, False, 0) + box.set_margin_end(20) + strip.pack_start(box, False, False, 0) + self._kpi_boxes[key] = val + def _refresh(self): for c in self._cards_box.get_children(): self._cards_box.remove(c) stats = _load_usage_stats() updated = stats.get("updated") if updated: - self._updated_lbl.set_markup(f'Updated: {updated}') + self._updated_lbl.set_markup(f'{updated}') providers = stats.get("providers", {}) if not providers: empty = Gtk.Label() - empty.set_markup('No usage data yet.\nLaunch a session to start tracking.') + empty.set_markup(f'No usage data yet.\nLaunch a session to start tracking.') empty.set_margin_top(60) self._cards_box.pack_start(empty, False, False, 0) self._cards_box.show_all() return + total_req = 0 + total_tok_in = 0 + total_tok_out = 0 + total_dur = 0.0 + n_ok = 0 + n_warn = 0 + n_err = 0 + sorted_providers = sorted(providers.items(), key=lambda x: x[1].get("total_requests", 0), reverse=True) + for prov_name, prov_data in sorted_providers: + t = prov_data.get("total_requests", 0) + total_req += t + total_tok_in += prov_data.get("total_tokens_in", 0) + total_tok_out += prov_data.get("total_tokens_out", 0) + total_dur += prov_data.get("total_duration_s", 0.0) + fail = prov_data.get("failures", 0) + fail_pct = fail / t if t > 0 else 0 + _, sc = _status_pill(0, fail_pct) + if fail_pct > 0.15: + n_err += 1 + elif fail_pct > 0.05: + n_warn += 1 + else: + n_ok += 1 + + self._kpi_boxes["providers"].set_markup( + f'{len(providers)}') + self._kpi_boxes["requests"].set_markup( + f'{total_req:,}') + tok_sum = total_tok_in + total_tok_out + tok_str = f"{_fmt_tok(tok_sum)} in:{_fmt_tok(total_tok_in)} out:{_fmt_tok(total_tok_out)}" if tok_sum else "N/A" + self._kpi_boxes["tokens"].set_markup( + f'{tok_str}') + avg_lat = total_dur / total_req if total_req > 0 else 0 + self._kpi_boxes["latency"].set_markup( + f'{_fmt_dur(avg_lat)}') + + dots_parts = [] + if n_ok: + dots_parts.append(f'\u25CF{n_ok}') + if n_warn: + dots_parts.append(f'\u25D0{n_warn}') + if n_err: + dots_parts.append(f'\u2717{n_err}') + if dots_parts: + self._status_dots.set_markup(" ".join(dots_parts)) + for prov_name, prov_data in sorted_providers: card = self._build_card(prov_name, prov_data) self._cards_box.pack_start(card, False, False, 0) self._cards_box.show_all() def _build_card(self, name, data): - frame = Gtk.Frame() - frame.set_margin_start(12) - frame.set_margin_end(12) - frame.set_margin_top(4) - frame.set_margin_bottom(4) - style = frame.get_style_context() - style.add_class("card") - - outer = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=4) - outer.set_margin_start(12) - outer.set_margin_end(12) - outer.set_margin_top(8) - outer.set_margin_bottom(8) - frame.add(outer) - - top_row = Gtk.Box(spacing=8) - outer.pack_start(top_row, False, False, 0) + card = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=0) + card.set_margin_start(12) + card.set_margin_end(12) + _apply_css(card, f""" + box {{ background-color: {_U["surface0"]}; border-radius: 10px; + border: 1px solid {_U["surface1"]}; }} + """) total = data.get("total_requests", 0) ok = data.get("successes", 0) fail = data.get("failures", 0) success_rate = ok / total if total > 0 else 1.0 + fail_pct = fail / total if total > 0 else 0 + status_text, status_color = _status_pill(success_rate, fail_pct) + + border_color = status_color + _apply_css(card, f""" + box {{ background-color: {_U["surface0"]}; border-radius: 10px; + border: 1px solid {border_color}; }} + """) + + inner = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=3) + inner.set_margin_start(14) + inner.set_margin_end(14) + inner.set_margin_top(10) + inner.set_margin_bottom(10) + card.pack_start(inner, False, False, 0) + + top = Gtk.Box(spacing=6) + inner.pack_start(top, False, False, 0) + + dot = Gtk.Label() + dot.set_markup(f'\u25CF') + top.pack_start(dot, False, False, 0) name_lbl = Gtk.Label() short = name.replace("https://", "").replace("http://", "").split("/")[0] - name_lbl.set_markup(f'{short}') - top_row.pack_start(name_lbl, False, False, 0) + name_lbl.set_markup(f'{short}') + top.pack_start(name_lbl, False, False, 0) + + pill = Gtk.Label() + pill.set_markup(f' {status_text} ') + top.pack_start(pill, False, False, 4) req_lbl = Gtk.Label() - req_lbl.set_markup(f'{total} requests') - top_row.pack_start(req_lbl, False, False, 8) - - if fail > 0: - err_lbl = Gtk.Label() - err_lbl.set_markup(f'{fail} failed') - top_row.pack_start(err_lbl, False, False, 4) + req_lbl.set_markup(f'{total} req') + top.pack_start(req_lbl, False, False, 6) last_used = data.get("last_used", "") if last_used: lu_lbl = Gtk.Label() - lu_lbl.set_markup(f'{last_used}') - top_row.pack_end(lu_lbl, False, False, 0) + lu_lbl.set_markup(f'{last_used}') + top.pack_end(lu_lbl, False, False, 0) + + sep1 = Gtk.Separator() + _apply_css(sep1, f"separator {{ background-color: {status_color}; margin-top: 4px; }}") + inner.pack_start(sep1, False, False, 0) + + gauge_box = Gtk.Box(spacing=4) + gauge_box.set_margin_top(4) + inner.pack_start(gauge_box, False, False, 0) + + gauge_label = Gtk.Label() + gauge_label.set_markup(f'\u26A1') + gauge_box.pack_start(gauge_label, False, False, 0) - # Progress bar for success rate bar = Gtk.ProgressBar() bar.set_fraction(success_rate) bar_pct = int(success_rate * 100) - bar.set_text(f"{bar_pct}% success") + bar.set_text(f"{bar_pct}%") bar.set_show_text(True) - bar.set_margin_top(2) - bar.set_margin_bottom(2) - color = _bar_color(1.0 - success_rate) - bar_css = f'progress {{ background-color: {color}; border-radius: 4px; }} trough {{ border-radius: 4px; min-height: 10px; }}' - provider = Gtk.CssProvider() - provider.load_from_data(bar_css.encode()) - bar.get_style_context().add_provider(provider, Gtk.STYLE_PROVIDER_PRIORITY_USER) - outer.pack_start(bar, False, False, 0) + bar_css = f""" + progress {{ background-color: {status_color}; border-radius: 6px; }} + trough {{ background-color: {_U["surface1"]}; border-radius: 6px; min-height: 12px; }} + """ + _apply_css(bar, bar_css) + bar.set_hexpand(True) + gauge_box.pack_start(bar, True, True, 0) - # Stats row - stats_row = Gtk.Box(spacing=16) - outer.pack_start(stats_row, False, False, 0) + if fail > 0: + fail_lbl = Gtk.Label() + fail_lbl.set_markup(f'{fail} fail') + gauge_box.pack_end(fail_lbl, False, False, 0) + + metrics_box = Gtk.Box(spacing=0) + metrics_box.set_margin_top(4) + inner.pack_start(metrics_box, False, False, 0) t_in = data.get("total_tokens_in", 0) t_out = data.get("total_tokens_out", 0) dur = data.get("total_duration_s", 0.0) avg_dur = dur / total if total > 0 else 0 - for label, value in [ - ("Tokens In", f"{t_in:,}"), - ("Tokens Out", f"{t_out:,}"), - ("Avg Latency", f"{avg_dur:.1f}s"), + for label, value, color in [ + ("Tokens In", f"{_fmt_tok(t_in)}", _U["sapphire"]), + ("Tokens Out", f"{_fmt_tok(t_out)}", _U["peach"]), + ("Avg Latency", _fmt_dur(avg_dur), _U["sky"]), + ("Duration", _fmt_dur(dur), _U["lavender"]), ]: - box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=1) + box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=0) l = Gtk.Label() - l.set_markup(f'{label}') + l.set_markup(f'{label}') + l.set_xalign(0) box.pack_start(l, False, False, 0) v = Gtk.Label() - v.set_markup(f'{value}') + v.set_markup(f'{value}') + v.set_xalign(0) box.pack_start(v, False, False, 0) - stats_row.pack_start(box, False, False, 0) + box.set_margin_end(16) + metrics_box.pack_start(box, False, False, 0) - # Models breakdown models = data.get("models", {}) - if len(models) > 0: - model_str = " ".join( - f'{m} ' - f'({md.get("requests",0)})' - for m, md in sorted(models.items(), key=lambda x: x[1].get("requests", 0), reverse=True)[:4] - ) - m_lbl = Gtk.Label() - m_lbl.set_markup(f'Models: {model_str}') - m_lbl.set_line_wrap(True) - m_lbl.set_xalign(0) - outer.pack_start(m_lbl, False, False, 2) + if models: + self._build_models_section(inner, models, total) - # Error info last_err = data.get("last_error") if last_err: + err_box = Gtk.Box(spacing=4) + err_box.set_margin_top(4) + inner.pack_start(err_box, False, False, 0) + icon = Gtk.Label() + icon.set_markup(f'\u26A0') + err_box.pack_start(icon, False, False, 0) err_lbl = Gtk.Label() - err_lbl.set_markup(f'Last error: {last_err}') + err_lbl.set_markup(f'{last_err}') err_lbl.set_xalign(0) - outer.pack_start(err_lbl, False, False, 0) + err_lbl.set_line_wrap(True) + err_box.pack_start(err_lbl, False, False, 0) - return frame + return card + + def _build_models_section(self, parent, models, total_req): + sep_m = Gtk.Separator() + _apply_css(sep_m, f"separator {{ background-color: {_U["lavender"]}; margin-top: 4px; margin-bottom: 2px; }}") + parent.pack_start(sep_m, False, False, 0) + + header = Gtk.Box(spacing=4) + header.set_margin_top(2) + parent.pack_start(header, False, False, 0) + icon = Gtk.Label() + icon.set_markup(f'\U0001F916') + header.pack_start(icon, False, False, 0) + lbl = Gtk.Label() + lbl.set_markup(f'Models') + header.pack_start(lbl, False, False, 0) + + sorted_models = sorted(models.items(), key=lambda x: x[1].get("requests", 0), reverse=True) + + if total_req > 0: + comp_bar = Gtk.Box(spacing=0) + _apply_css(comp_bar, f"box {{ background-color: {_U["surface1"]}; border-radius: 4px; min-height: 8px; margin-top: 2px; }}") + parent.pack_start(comp_bar, False, False, 0) + for i, (mname, mdata) in enumerate(sorted_models): + m_req = mdata.get("requests", 0) + pct = m_req / total_req + if pct < 0.01: + continue + seg = Gtk.Box() + color = _U["model_palette"][i % len(_U["model_palette"])] + _apply_css(seg, f"box {{ background-color: {color}; min-height: 8px; }}") + seg.set_size_request(max(int(pct * 400), 4), 8) + comp_bar.pack_start(seg, False, False, 0) + + models_box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=1) + models_box.set_margin_top(2) + parent.pack_start(models_box, False, False, 0) + + for i, (mname, mdata) in enumerate(sorted_models[:6]): + row = Gtk.Box(spacing=6) + models_box.pack_start(row, False, False, 0) + color = _U["model_palette"][i % len(_U["model_palette"])] + dot = Gtk.Label() + dot.set_markup(f'\u25CF') + row.pack_start(dot, False, False, 0) + m_lbl = Gtk.Label() + m_lbl.set_markup(f'{mname}') + m_lbl.set_xalign(0) + m_lbl.set_size_request(120, -1) + row.pack_start(m_lbl, False, False, 0) + + m_req = mdata.get("requests", 0) + pct = m_req / total_req * 100 if total_req > 0 else 0 + + m_bar = Gtk.ProgressBar() + m_bar.set_fraction(m_req / total_req if total_req > 0 else 0) + _apply_css(m_bar, f""" + progress {{ background-color: {color}; border-radius: 3px; }} + trough {{ background-color: {_U["surface1"]}; border-radius: 3px; min-height: 6px; }} + """) + m_bar.set_size_request(80, -1) + row.pack_start(m_bar, False, False, 0) + + pct_lbl = Gtk.Label() + pct_lbl.set_markup(f'{pct:.0f}% ({m_req})') + row.pack_start(pct_lbl, False, False, 0) + + m_in = mdata.get("tokens_in", 0) + m_out = mdata.get("tokens_out", 0) + if m_in or m_out: + tok_lbl = Gtk.Label() + tok_lbl.set_markup(f'in:{_fmt_tok(m_in)} out:{_fmt_tok(m_out)}') + row.pack_end(tok_lbl, False, False, 0) def main(): diff --git a/src/translate-proxy.py b/src/translate-proxy.py index 1d2442e..aa087f4 100755 --- a/src/translate-proxy.py +++ b/src/translate-proxy.py @@ -11,7 +11,7 @@ Usage: python3 translate-proxy.py --backend openai-compat --target-url https://... --api-key sk-... """ -import json, http.server, urllib.request, time, uuid, os, sys, argparse +import json, http.server, urllib.request, time, uuid, os, sys, argparse, threading, socket # ═══════════════════════════════════════════════════════════════════ # Config @@ -141,6 +141,8 @@ _pool = uuid.uuid4().hex[:8] _response_store = {} _MAX_STORED = 50 +_LOG_DIR = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy") +os.makedirs(_LOG_DIR, exist_ok=True) _stats_path = os.path.join(_LOG_DIR, "usage-stats.json") _stats_lock = threading.Lock() @@ -961,9 +963,6 @@ def cc_stream_to_sse(cc_stream, model, req_id): # HTTP Server # ═══════════════════════════════════════════════════════════════════ -_LOG_DIR = os.path.join(os.path.expanduser("~"), ".cache", "codex-proxy") -os.makedirs(_LOG_DIR, exist_ok=True) - def _log_resp(resp_id, status, output): try: import datetime as _dt @@ -1209,6 +1208,11 @@ class Handler(http.server.BaseHTTPRequestHandler): self.send_header("Cache-Control", "no-cache") self.send_header("Connection", "keep-alive") self.end_headers() + if hasattr(self, 'connection') and self.connection: + try: + self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + except Exception: + pass collected_events = [] last_resp_id = None @@ -1290,6 +1294,11 @@ class Handler(http.server.BaseHTTPRequestHandler): self.send_header("Cache-Control", "no-cache") self.send_header("Connection", "keep-alive") self.end_headers() + if hasattr(self, 'connection') and self.connection: + try: + self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + except Exception: + pass last_resp_id = None last_output = None @@ -1322,7 +1331,8 @@ class Handler(http.server.BaseHTTPRequestHandler): "max_tokens": body.get("max_output_tokens", 8192)} instructions = body.get("instructions", "").strip() if instructions: - an_body["system"] = instructions + an_body["system"] = [{"type": "text", "text": instructions, + "cache_control": {"type": "ephemeral"}}] for k in ("temperature", "top_p"): if k in body: an_body[k] = body[k] @@ -1429,6 +1439,11 @@ class Handler(http.server.BaseHTTPRequestHandler): self.send_header("Cache-Control", "no-cache") self.send_header("Connection", "keep-alive") self.end_headers() + if hasattr(self, 'connection') and self.connection: + try: + self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + except Exception: + pass last_resp_id = None last_output = None for event in cc_stream_to_sse(upstream, model, body.get("request_id") or body.get("id")): @@ -1476,6 +1491,11 @@ class Handler(http.server.BaseHTTPRequestHandler): self.send_header("Cache-Control", "no-cache") self.send_header("Connection", "keep-alive") self.end_headers() + if hasattr(self, 'connection') and self.connection: + try: + self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + except Exception: + pass last_resp_id = None last_output = None last_status = None