Optimize gateway comms reload behavior and strengthen regression coverage (#496)

This commit is contained in:
Lingxuan Zuo
2026-03-15 20:36:48 +08:00
committed by GitHub
Unverified
parent 08960d700f
commit 1dbe4a8466
36 changed files with 1511 additions and 197 deletions

View File

@@ -0,0 +1,22 @@
import { mkdir, readFile, writeFile } from 'node:fs/promises';
import path from 'node:path';
import process from 'node:process';
const ROOT = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..', '..');
const CURRENT_FILE = path.join(ROOT, 'artifacts/comms/current-metrics.json');
const BASELINE_DIR = path.join(ROOT, 'scripts/comms/baseline');
const BASELINE_FILE = path.join(BASELINE_DIR, 'metrics.baseline.json');
async function main() {
const raw = await readFile(CURRENT_FILE, 'utf8');
const current = JSON.parse(raw);
await mkdir(BASELINE_DIR, { recursive: true });
await writeFile(BASELINE_FILE, JSON.stringify(current, null, 2));
console.log(`Updated comms baseline: ${BASELINE_FILE}`);
}
main().catch((error) => {
console.error('[comms:baseline] failed:', error);
process.exitCode = 1;
});

View File

@@ -0,0 +1,126 @@
{
"generated_at": "2026-03-14T15:02:39.817Z",
"scenario": "all",
"scenarios": {
"gateway-restart-during-run": {
"duplicate_event_rate": 0,
"event_fanout_ratio": 1,
"history_inflight_max": 1,
"history_load_qps": 0.4166666666666667,
"rpc_p50_ms": 240,
"rpc_p95_ms": 240,
"rpc_timeout_rate": 0,
"gateway_reconnect_count": 1,
"message_loss_count": 0,
"message_order_violation_count": 0,
"_meta": {
"duration_sec": 2.4,
"total_gateway_events": 4,
"unique_gateway_events": 4,
"total_rpc_calls": 1
}
},
"happy-path-chat": {
"duplicate_event_rate": 0,
"event_fanout_ratio": 1,
"history_inflight_max": 1,
"history_load_qps": 0.2857142857142857,
"rpc_p50_ms": 180,
"rpc_p95_ms": 180,
"rpc_timeout_rate": 0,
"gateway_reconnect_count": 0,
"message_loss_count": 0,
"message_order_violation_count": 0,
"_meta": {
"duration_sec": 3.5,
"total_gateway_events": 3,
"unique_gateway_events": 3,
"total_rpc_calls": 1
}
},
"history-overlap-guard": {
"duplicate_event_rate": 0,
"event_fanout_ratio": 1,
"history_inflight_max": 1,
"history_load_qps": 1,
"rpc_p50_ms": 95,
"rpc_p95_ms": 95,
"rpc_timeout_rate": 0,
"gateway_reconnect_count": 0,
"message_loss_count": 0,
"message_order_violation_count": 0,
"_meta": {
"duration_sec": 2,
"total_gateway_events": 2,
"unique_gateway_events": 2,
"total_rpc_calls": 1
}
},
"invalid-config-patch-recovered": {
"duplicate_event_rate": 0,
"event_fanout_ratio": 1,
"history_inflight_max": 1,
"history_load_qps": 0.4166666666666667,
"rpc_p50_ms": 110,
"rpc_p95_ms": 130,
"rpc_timeout_rate": 0,
"gateway_reconnect_count": 1,
"message_loss_count": 0,
"message_order_violation_count": 0,
"_meta": {
"duration_sec": 2.4,
"total_gateway_events": 3,
"unique_gateway_events": 3,
"total_rpc_calls": 2
}
},
"multi-agent-channel-switch": {
"duplicate_event_rate": 0,
"event_fanout_ratio": 1,
"history_inflight_max": 0,
"history_load_qps": 0,
"rpc_p50_ms": 210,
"rpc_p95_ms": 240,
"rpc_timeout_rate": 0,
"gateway_reconnect_count": 0,
"message_loss_count": 0,
"message_order_violation_count": 0,
"_meta": {
"duration_sec": 2.1,
"total_gateway_events": 5,
"unique_gateway_events": 5,
"total_rpc_calls": 2
}
},
"network-degraded": {
"duplicate_event_rate": 0,
"event_fanout_ratio": 1,
"history_inflight_max": 1,
"history_load_qps": 0.35714285714285715,
"rpc_p50_ms": 420,
"rpc_p95_ms": 820,
"rpc_timeout_rate": 0,
"gateway_reconnect_count": 1,
"message_loss_count": 0,
"message_order_violation_count": 0,
"_meta": {
"duration_sec": 2.8,
"total_gateway_events": 3,
"unique_gateway_events": 3,
"total_rpc_calls": 2
}
}
},
"aggregate": {
"duplicate_event_rate": 0,
"event_fanout_ratio": 1,
"history_inflight_max": 1,
"history_load_qps": 0.41269841269841273,
"rpc_p50_ms": 209.16666666666666,
"rpc_p95_ms": 284.1666666666667,
"rpc_timeout_rate": 0,
"gateway_reconnect_count": 3,
"message_loss_count": 0,
"message_order_violation_count": 0
}
}

122
scripts/comms/compare.mjs Normal file
View File

@@ -0,0 +1,122 @@
import { mkdir, readFile, writeFile } from 'node:fs/promises';
import path from 'node:path';
import process from 'node:process';
const ROOT = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..', '..');
const CURRENT_FILE = path.join(ROOT, 'artifacts/comms/current-metrics.json');
const BASELINE_FILE = path.join(ROOT, 'scripts/comms/baseline/metrics.baseline.json');
const OUTPUT_DIR = path.join(ROOT, 'artifacts/comms');
const REPORT_FILE = path.join(OUTPUT_DIR, 'compare-report.md');
const HARD_THRESHOLDS = {
duplicate_event_rate: 0.005,
event_fanout_ratio: 1.2,
history_inflight_max: 1,
rpc_timeout_rate: 0.01,
message_loss_count: 0,
message_order_violation_count: 0,
};
const RELATIVE_THRESHOLDS = {
history_load_qps: 0.10,
rpc_p95_ms: 0.15,
};
const REQUIRED_SCENARIOS = [
'gateway-restart-during-run',
'happy-path-chat',
'history-overlap-guard',
'invalid-config-patch-recovered',
'multi-agent-channel-switch',
'network-degraded',
];
function ratioDelta(current, baseline) {
if (!Number.isFinite(baseline) || baseline === 0) return current === 0 ? 0 : Infinity;
return (current - baseline) / baseline;
}
function fmtPercent(value) {
return `${(value * 100).toFixed(2)}%`;
}
function fmtNumber(value) {
return Number.isFinite(value) ? Number(value).toFixed(4) : String(value);
}
export function evaluateReport(current, baseline) {
const c = current.aggregate ?? {};
const b = baseline.aggregate ?? {};
const scenarios = current.scenarios ?? {};
const failures = [];
const rows = [];
for (const scenario of REQUIRED_SCENARIOS) {
if (!scenarios[scenario]) {
failures.push(`missing scenario: ${scenario}`);
rows.push(`| scenario:${scenario} | missing | required | FAIL |`);
continue;
}
const scenarioMetrics = scenarios[scenario];
for (const [metric, threshold] of Object.entries(HARD_THRESHOLDS)) {
const cv = Number(scenarioMetrics[metric] ?? 0);
const pass = cv <= threshold;
if (!pass) failures.push(`scenario:${scenario} ${metric}=${cv} > ${threshold}`);
rows.push(`| ${scenario}.${metric} | ${fmtNumber(cv)} | <= ${threshold} | ${pass ? 'PASS' : 'FAIL'} |`);
}
}
for (const [metric, threshold] of Object.entries(HARD_THRESHOLDS)) {
const cv = Number(c[metric] ?? 0);
const pass = cv <= threshold;
if (!pass) failures.push(`${metric}=${cv} > ${threshold}`);
rows.push(`| ${metric} | ${fmtNumber(cv)} | <= ${threshold} | ${pass ? 'PASS' : 'FAIL'} |`);
}
for (const [metric, maxIncrease] of Object.entries(RELATIVE_THRESHOLDS)) {
const cv = Number(c[metric] ?? 0);
const bv = Number(b[metric] ?? 0);
const delta = ratioDelta(cv, bv);
const pass = delta <= maxIncrease;
if (!pass) failures.push(`${metric} delta=${delta} > ${maxIncrease}`);
rows.push(`| ${metric} | ${fmtNumber(cv)} (baseline ${fmtNumber(bv)}) | delta <= ${fmtPercent(maxIncrease)} | ${pass ? 'PASS' : 'FAIL'} (${fmtPercent(delta)}) |`);
}
return { failures, rows };
}
export async function main() {
const current = JSON.parse(await readFile(CURRENT_FILE, 'utf8'));
const baseline = JSON.parse(await readFile(BASELINE_FILE, 'utf8'));
const { failures, rows } = evaluateReport(current, baseline);
const report = [
'# Comms Regression Report',
'',
`- Generated at: ${new Date().toISOString()}`,
`- Result: ${failures.length === 0 ? 'PASS' : 'FAIL'}`,
'',
'| Metric | Current | Threshold | Status |',
'|---|---:|---:|---|',
...rows,
'',
].join('\n');
await mkdir(OUTPUT_DIR, { recursive: true });
await writeFile(REPORT_FILE, report);
console.log(report);
console.log(`\nWrote comparison report to ${REPORT_FILE}`);
if (failures.length > 0) {
console.error('\nThreshold failures:\n- ' + failures.join('\n- '));
process.exitCode = 1;
}
}
const isEntrypoint = process.argv[1] && path.resolve(process.argv[1]) === path.resolve(new URL(import.meta.url).pathname);
if (isEntrypoint) {
main().catch((error) => {
console.error('[comms:compare] failed:', error);
process.exitCode = 1;
});
}

View File

@@ -0,0 +1,9 @@
{"ts":0,"type":"gateway_event","runId":"run-restart-1","sessionKey":"agent:main:session-restart","seq":1,"state":"started","fanout":1}
{"ts":0.3,"type":"rpc","method":"chat.send","latencyMs":240,"timeout":false}
{"ts":0.6,"type":"gateway_reconnect"}
{"ts":0.8,"type":"gateway_event","runId":"run-restart-1","sessionKey":"agent:main:session-restart","seq":2,"state":"delta","fanout":1}
{"ts":1.1,"type":"gateway_event","runId":"run-restart-1","sessionKey":"agent:main:session-restart","seq":3,"state":"delta","fanout":1}
{"ts":1.4,"type":"history_load","sessionKey":"agent:main:session-restart","action":"start"}
{"ts":1.6,"type":"history_load","sessionKey":"agent:main:session-restart","action":"end"}
{"ts":2.0,"type":"gateway_event","runId":"run-restart-1","sessionKey":"agent:main:session-restart","seq":4,"state":"final","fanout":1}
{"ts":2.4,"type":"message","lost":false,"orderViolation":false}

View File

@@ -0,0 +1,7 @@
{"ts":0,"type":"gateway_event","runId":"run-happy-1","sessionKey":"agent:main:session-happy","seq":1,"state":"started","fanout":1}
{"ts":1,"type":"gateway_event","runId":"run-happy-1","sessionKey":"agent:main:session-happy","seq":2,"state":"delta","fanout":1}
{"ts":2,"type":"history_load","sessionKey":"agent:main:session-happy","action":"start"}
{"ts":2.2,"type":"history_load","sessionKey":"agent:main:session-happy","action":"end"}
{"ts":2.5,"type":"rpc","method":"chat.send","latencyMs":180,"timeout":false}
{"ts":3.0,"type":"gateway_event","runId":"run-happy-1","sessionKey":"agent:main:session-happy","seq":3,"state":"final","fanout":1}
{"ts":3.5,"type":"message","lost":false,"orderViolation":false}

View File

@@ -0,0 +1,8 @@
{"ts":0,"type":"gateway_event","runId":"run-history-1","sessionKey":"agent:main:session-history","seq":1,"state":"started","fanout":1}
{"ts":0.2,"type":"history_load","sessionKey":"agent:main:session-history","action":"start"}
{"ts":0.4,"type":"history_load","sessionKey":"agent:main:session-history","action":"end"}
{"ts":0.8,"type":"history_load","sessionKey":"agent:main:session-history","action":"start"}
{"ts":1.0,"type":"history_load","sessionKey":"agent:main:session-history","action":"end"}
{"ts":1.4,"type":"rpc","method":"chat.history","latencyMs":95,"timeout":false}
{"ts":1.8,"type":"gateway_event","runId":"run-history-1","sessionKey":"agent:main:session-history","seq":2,"state":"final","fanout":1}
{"ts":2.0,"type":"message","lost":false,"orderViolation":false}

View File

@@ -0,0 +1,9 @@
{"ts":0,"type":"gateway_event","runId":"run-invalid-1","sessionKey":"agent:main:session-invalid","seq":1,"state":"started","fanout":1}
{"ts":0.2,"type":"rpc","method":"config.patch","latencyMs":110,"timeout":false}
{"ts":0.4,"type":"rpc","method":"config.patch","latencyMs":130,"timeout":false}
{"ts":0.7,"type":"gateway_reconnect"}
{"ts":1.0,"type":"history_load","sessionKey":"agent:main:session-invalid","action":"start"}
{"ts":1.3,"type":"history_load","sessionKey":"agent:main:session-invalid","action":"end"}
{"ts":1.7,"type":"gateway_event","runId":"run-invalid-1","sessionKey":"agent:main:session-invalid","seq":2,"state":"delta","fanout":1}
{"ts":2.1,"type":"gateway_event","runId":"run-invalid-1","sessionKey":"agent:main:session-invalid","seq":3,"state":"final","fanout":1}
{"ts":2.4,"type":"message","lost":false,"orderViolation":false}

View File

@@ -0,0 +1,8 @@
{"ts":0,"type":"gateway_event","runId":"run-a-main","sessionKey":"agent:main:session-1","seq":1,"state":"started","fanout":1}
{"ts":0.2,"type":"rpc","method":"chat.send","latencyMs":210,"timeout":false}
{"ts":0.5,"type":"gateway_event","runId":"run-a-main","sessionKey":"agent:main:session-1","seq":2,"state":"delta","fanout":1}
{"ts":0.8,"type":"gateway_event","runId":"run-a-team","sessionKey":"agent:team-a:session-2","seq":1,"state":"started","fanout":1}
{"ts":1.1,"type":"rpc","method":"chat.send","latencyMs":240,"timeout":false}
{"ts":1.4,"type":"gateway_event","runId":"run-a-main","sessionKey":"agent:main:session-1","seq":3,"state":"final","fanout":1}
{"ts":1.8,"type":"gateway_event","runId":"run-a-team","sessionKey":"agent:team-a:session-2","seq":2,"state":"final","fanout":1}
{"ts":2.1,"type":"message","lost":false,"orderViolation":false}

View File

@@ -0,0 +1,9 @@
{"ts":0,"type":"gateway_event","runId":"run-net-1","sessionKey":"agent:main:session-net","seq":1,"state":"started","fanout":1}
{"ts":0.2,"type":"rpc","method":"chat.send","latencyMs":420,"timeout":false}
{"ts":0.8,"type":"rpc","method":"chat.history","latencyMs":820,"timeout":false}
{"ts":1.0,"type":"history_load","sessionKey":"agent:main:session-net","action":"start"}
{"ts":1.5,"type":"history_load","sessionKey":"agent:main:session-net","action":"end"}
{"ts":1.6,"type":"gateway_reconnect"}
{"ts":2.1,"type":"gateway_event","runId":"run-net-1","sessionKey":"agent:main:session-net","seq":2,"state":"delta","fanout":1}
{"ts":2.3,"type":"gateway_event","runId":"run-net-1","sessionKey":"agent:main:session-net","seq":3,"state":"final","fanout":1}
{"ts":2.8,"type":"message","lost":false,"orderViolation":false}

176
scripts/comms/replay.mjs Normal file
View File

@@ -0,0 +1,176 @@
import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises';
import path from 'node:path';
import process from 'node:process';
const ROOT = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..', '..');
const DATASET_DIR = path.join(ROOT, 'scripts/comms/datasets');
const OUTPUT_DIR = path.join(ROOT, 'artifacts/comms');
const OUTPUT_FILE = path.join(OUTPUT_DIR, 'current-metrics.json');
export function percentile(values, p) {
if (values.length === 0) return 0;
const sorted = [...values].sort((a, b) => a - b);
const idx = Math.min(sorted.length - 1, Math.ceil((p / 100) * sorted.length) - 1);
return sorted[idx];
}
export function dedupeKey(event) {
if (event.type !== 'gateway_event') return null;
const runId = event.runId ?? '';
const sessionKey = event.sessionKey ?? '';
const seq = event.seq ?? '';
const state = event.state ?? '';
if (!runId && !sessionKey && !seq && !state) return null;
return `${runId}|${sessionKey}|${seq}|${state}`;
}
export function calculateScenarioMetrics(events) {
let totalGatewayEvents = 0;
let uniqueGatewayEvents = 0;
let fanoutTotal = 0;
let duplicateGatewayEvents = 0;
let gatewayReconnectCount = 0;
let messageLossCount = 0;
let messageOrderViolationCount = 0;
let rpcTimeoutCount = 0;
const rpcLatencies = [];
const dedupeSet = new Set();
const historyInFlight = new Map();
let historyInflightMax = 0;
let historyLoadCount = 0;
const sorted = [...events].sort((a, b) => (a.ts ?? 0) - (b.ts ?? 0));
const startTs = sorted.length > 0 ? (sorted[0].ts ?? 0) : 0;
const endTs = sorted.length > 0 ? (sorted[sorted.length - 1].ts ?? 0) : 0;
const durationSec = Math.max(1, endTs - startTs);
for (const event of sorted) {
if (event.type === 'gateway_event') {
totalGatewayEvents += 1;
fanoutTotal += Number(event.fanout ?? 1);
const key = dedupeKey(event);
if (!key || !dedupeSet.has(key)) {
uniqueGatewayEvents += 1;
if (key) dedupeSet.add(key);
} else {
duplicateGatewayEvents += 1;
}
continue;
}
if (event.type === 'history_load') {
const sessionKey = String(event.sessionKey ?? 'unknown');
if (event.action === 'start') {
const next = (historyInFlight.get(sessionKey) ?? 0) + 1;
historyInFlight.set(sessionKey, next);
historyInflightMax = Math.max(historyInflightMax, next);
historyLoadCount += 1;
} else if (event.action === 'end') {
const current = historyInFlight.get(sessionKey) ?? 0;
historyInFlight.set(sessionKey, Math.max(0, current - 1));
}
continue;
}
if (event.type === 'rpc') {
const latency = Number(event.latencyMs ?? 0);
if (latency > 0) rpcLatencies.push(latency);
if (event.timeout === true) rpcTimeoutCount += 1;
continue;
}
if (event.type === 'gateway_reconnect') {
gatewayReconnectCount += 1;
continue;
}
if (event.type === 'message') {
if (event.lost === true) messageLossCount += 1;
if (event.orderViolation === true) messageOrderViolationCount += 1;
}
}
return {
duplicate_event_rate: totalGatewayEvents > 0 ? duplicateGatewayEvents / totalGatewayEvents : 0,
event_fanout_ratio: uniqueGatewayEvents > 0 ? fanoutTotal / uniqueGatewayEvents : 0,
history_inflight_max: historyInflightMax,
history_load_qps: historyLoadCount / durationSec,
rpc_p50_ms: percentile(rpcLatencies, 50),
rpc_p95_ms: percentile(rpcLatencies, 95),
rpc_timeout_rate: rpcLatencies.length > 0 ? rpcTimeoutCount / rpcLatencies.length : 0,
gateway_reconnect_count: gatewayReconnectCount,
message_loss_count: messageLossCount,
message_order_violation_count: messageOrderViolationCount,
_meta: {
duration_sec: durationSec,
total_gateway_events: totalGatewayEvents,
unique_gateway_events: uniqueGatewayEvents,
total_rpc_calls: rpcLatencies.length,
},
};
}
export function aggregateMetrics(metricsList) {
if (metricsList.length === 0) {
return calculateScenarioMetrics([]);
}
const sum = (key) => metricsList.reduce((acc, item) => acc + Number(item[key] ?? 0), 0);
return {
duplicate_event_rate: sum('duplicate_event_rate') / metricsList.length,
event_fanout_ratio: sum('event_fanout_ratio') / metricsList.length,
history_inflight_max: Math.max(...metricsList.map((m) => Number(m.history_inflight_max ?? 0))),
history_load_qps: sum('history_load_qps') / metricsList.length,
rpc_p50_ms: sum('rpc_p50_ms') / metricsList.length,
rpc_p95_ms: sum('rpc_p95_ms') / metricsList.length,
rpc_timeout_rate: sum('rpc_timeout_rate') / metricsList.length,
gateway_reconnect_count: Math.round(sum('gateway_reconnect_count')),
message_loss_count: Math.round(sum('message_loss_count')),
message_order_violation_count: Math.round(sum('message_order_violation_count')),
};
}
export async function loadScenario(fileName) {
const fullPath = path.join(DATASET_DIR, fileName);
const raw = await readFile(fullPath, 'utf8');
const lines = raw.split('\n').map((line) => line.trim()).filter(Boolean);
return lines.map((line) => JSON.parse(line));
}
export async function main() {
const argScenario = process.argv.find((arg) => arg.startsWith('--scenario='))?.split('=')[1] ?? 'all';
const files = (await readdir(DATASET_DIR)).filter((name) => name.endsWith('.jsonl')).sort();
const selectedFiles = argScenario === 'all'
? files
: files.filter((name) => name === `${argScenario}.jsonl`);
if (selectedFiles.length === 0) {
throw new Error(`No dataset found for scenario "${argScenario}"`);
}
const scenarios = {};
for (const fileName of selectedFiles) {
const scenarioName = fileName.replace(/\.jsonl$/, '');
const events = await loadScenario(fileName);
scenarios[scenarioName] = calculateScenarioMetrics(events);
}
const aggregate = aggregateMetrics(Object.values(scenarios));
const output = {
generated_at: new Date().toISOString(),
scenario: argScenario,
scenarios,
aggregate,
};
await mkdir(OUTPUT_DIR, { recursive: true });
await writeFile(OUTPUT_FILE, JSON.stringify(output, null, 2));
console.log(`Wrote comms replay metrics to ${OUTPUT_FILE}`);
}
const isEntrypoint = process.argv[1] && path.resolve(process.argv[1]) === path.resolve(new URL(import.meta.url).pathname);
if (isEntrypoint) {
main().catch((error) => {
console.error('[comms:replay] failed:', error);
process.exitCode = 1;
});
}