Optimize gateway comms reload behavior and strengthen regression coverage (#496)
This commit is contained in:
committed by
GitHub
Unverified
parent
08960d700f
commit
1dbe4a8466
22
scripts/comms/baseline.mjs
Normal file
22
scripts/comms/baseline.mjs
Normal file
@@ -0,0 +1,22 @@
|
||||
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import process from 'node:process';
|
||||
|
||||
const ROOT = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..', '..');
|
||||
const CURRENT_FILE = path.join(ROOT, 'artifacts/comms/current-metrics.json');
|
||||
const BASELINE_DIR = path.join(ROOT, 'scripts/comms/baseline');
|
||||
const BASELINE_FILE = path.join(BASELINE_DIR, 'metrics.baseline.json');
|
||||
|
||||
async function main() {
|
||||
const raw = await readFile(CURRENT_FILE, 'utf8');
|
||||
const current = JSON.parse(raw);
|
||||
|
||||
await mkdir(BASELINE_DIR, { recursive: true });
|
||||
await writeFile(BASELINE_FILE, JSON.stringify(current, null, 2));
|
||||
console.log(`Updated comms baseline: ${BASELINE_FILE}`);
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error('[comms:baseline] failed:', error);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
126
scripts/comms/baseline/metrics.baseline.json
Normal file
126
scripts/comms/baseline/metrics.baseline.json
Normal file
@@ -0,0 +1,126 @@
|
||||
{
|
||||
"generated_at": "2026-03-14T15:02:39.817Z",
|
||||
"scenario": "all",
|
||||
"scenarios": {
|
||||
"gateway-restart-during-run": {
|
||||
"duplicate_event_rate": 0,
|
||||
"event_fanout_ratio": 1,
|
||||
"history_inflight_max": 1,
|
||||
"history_load_qps": 0.4166666666666667,
|
||||
"rpc_p50_ms": 240,
|
||||
"rpc_p95_ms": 240,
|
||||
"rpc_timeout_rate": 0,
|
||||
"gateway_reconnect_count": 1,
|
||||
"message_loss_count": 0,
|
||||
"message_order_violation_count": 0,
|
||||
"_meta": {
|
||||
"duration_sec": 2.4,
|
||||
"total_gateway_events": 4,
|
||||
"unique_gateway_events": 4,
|
||||
"total_rpc_calls": 1
|
||||
}
|
||||
},
|
||||
"happy-path-chat": {
|
||||
"duplicate_event_rate": 0,
|
||||
"event_fanout_ratio": 1,
|
||||
"history_inflight_max": 1,
|
||||
"history_load_qps": 0.2857142857142857,
|
||||
"rpc_p50_ms": 180,
|
||||
"rpc_p95_ms": 180,
|
||||
"rpc_timeout_rate": 0,
|
||||
"gateway_reconnect_count": 0,
|
||||
"message_loss_count": 0,
|
||||
"message_order_violation_count": 0,
|
||||
"_meta": {
|
||||
"duration_sec": 3.5,
|
||||
"total_gateway_events": 3,
|
||||
"unique_gateway_events": 3,
|
||||
"total_rpc_calls": 1
|
||||
}
|
||||
},
|
||||
"history-overlap-guard": {
|
||||
"duplicate_event_rate": 0,
|
||||
"event_fanout_ratio": 1,
|
||||
"history_inflight_max": 1,
|
||||
"history_load_qps": 1,
|
||||
"rpc_p50_ms": 95,
|
||||
"rpc_p95_ms": 95,
|
||||
"rpc_timeout_rate": 0,
|
||||
"gateway_reconnect_count": 0,
|
||||
"message_loss_count": 0,
|
||||
"message_order_violation_count": 0,
|
||||
"_meta": {
|
||||
"duration_sec": 2,
|
||||
"total_gateway_events": 2,
|
||||
"unique_gateway_events": 2,
|
||||
"total_rpc_calls": 1
|
||||
}
|
||||
},
|
||||
"invalid-config-patch-recovered": {
|
||||
"duplicate_event_rate": 0,
|
||||
"event_fanout_ratio": 1,
|
||||
"history_inflight_max": 1,
|
||||
"history_load_qps": 0.4166666666666667,
|
||||
"rpc_p50_ms": 110,
|
||||
"rpc_p95_ms": 130,
|
||||
"rpc_timeout_rate": 0,
|
||||
"gateway_reconnect_count": 1,
|
||||
"message_loss_count": 0,
|
||||
"message_order_violation_count": 0,
|
||||
"_meta": {
|
||||
"duration_sec": 2.4,
|
||||
"total_gateway_events": 3,
|
||||
"unique_gateway_events": 3,
|
||||
"total_rpc_calls": 2
|
||||
}
|
||||
},
|
||||
"multi-agent-channel-switch": {
|
||||
"duplicate_event_rate": 0,
|
||||
"event_fanout_ratio": 1,
|
||||
"history_inflight_max": 0,
|
||||
"history_load_qps": 0,
|
||||
"rpc_p50_ms": 210,
|
||||
"rpc_p95_ms": 240,
|
||||
"rpc_timeout_rate": 0,
|
||||
"gateway_reconnect_count": 0,
|
||||
"message_loss_count": 0,
|
||||
"message_order_violation_count": 0,
|
||||
"_meta": {
|
||||
"duration_sec": 2.1,
|
||||
"total_gateway_events": 5,
|
||||
"unique_gateway_events": 5,
|
||||
"total_rpc_calls": 2
|
||||
}
|
||||
},
|
||||
"network-degraded": {
|
||||
"duplicate_event_rate": 0,
|
||||
"event_fanout_ratio": 1,
|
||||
"history_inflight_max": 1,
|
||||
"history_load_qps": 0.35714285714285715,
|
||||
"rpc_p50_ms": 420,
|
||||
"rpc_p95_ms": 820,
|
||||
"rpc_timeout_rate": 0,
|
||||
"gateway_reconnect_count": 1,
|
||||
"message_loss_count": 0,
|
||||
"message_order_violation_count": 0,
|
||||
"_meta": {
|
||||
"duration_sec": 2.8,
|
||||
"total_gateway_events": 3,
|
||||
"unique_gateway_events": 3,
|
||||
"total_rpc_calls": 2
|
||||
}
|
||||
}
|
||||
},
|
||||
"aggregate": {
|
||||
"duplicate_event_rate": 0,
|
||||
"event_fanout_ratio": 1,
|
||||
"history_inflight_max": 1,
|
||||
"history_load_qps": 0.41269841269841273,
|
||||
"rpc_p50_ms": 209.16666666666666,
|
||||
"rpc_p95_ms": 284.1666666666667,
|
||||
"rpc_timeout_rate": 0,
|
||||
"gateway_reconnect_count": 3,
|
||||
"message_loss_count": 0,
|
||||
"message_order_violation_count": 0
|
||||
}
|
||||
}
|
||||
122
scripts/comms/compare.mjs
Normal file
122
scripts/comms/compare.mjs
Normal file
@@ -0,0 +1,122 @@
|
||||
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import process from 'node:process';
|
||||
|
||||
const ROOT = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..', '..');
|
||||
const CURRENT_FILE = path.join(ROOT, 'artifacts/comms/current-metrics.json');
|
||||
const BASELINE_FILE = path.join(ROOT, 'scripts/comms/baseline/metrics.baseline.json');
|
||||
const OUTPUT_DIR = path.join(ROOT, 'artifacts/comms');
|
||||
const REPORT_FILE = path.join(OUTPUT_DIR, 'compare-report.md');
|
||||
|
||||
const HARD_THRESHOLDS = {
|
||||
duplicate_event_rate: 0.005,
|
||||
event_fanout_ratio: 1.2,
|
||||
history_inflight_max: 1,
|
||||
rpc_timeout_rate: 0.01,
|
||||
message_loss_count: 0,
|
||||
message_order_violation_count: 0,
|
||||
};
|
||||
|
||||
const RELATIVE_THRESHOLDS = {
|
||||
history_load_qps: 0.10,
|
||||
rpc_p95_ms: 0.15,
|
||||
};
|
||||
|
||||
const REQUIRED_SCENARIOS = [
|
||||
'gateway-restart-during-run',
|
||||
'happy-path-chat',
|
||||
'history-overlap-guard',
|
||||
'invalid-config-patch-recovered',
|
||||
'multi-agent-channel-switch',
|
||||
'network-degraded',
|
||||
];
|
||||
|
||||
function ratioDelta(current, baseline) {
|
||||
if (!Number.isFinite(baseline) || baseline === 0) return current === 0 ? 0 : Infinity;
|
||||
return (current - baseline) / baseline;
|
||||
}
|
||||
|
||||
function fmtPercent(value) {
|
||||
return `${(value * 100).toFixed(2)}%`;
|
||||
}
|
||||
|
||||
function fmtNumber(value) {
|
||||
return Number.isFinite(value) ? Number(value).toFixed(4) : String(value);
|
||||
}
|
||||
|
||||
export function evaluateReport(current, baseline) {
|
||||
const c = current.aggregate ?? {};
|
||||
const b = baseline.aggregate ?? {};
|
||||
const scenarios = current.scenarios ?? {};
|
||||
const failures = [];
|
||||
const rows = [];
|
||||
|
||||
for (const scenario of REQUIRED_SCENARIOS) {
|
||||
if (!scenarios[scenario]) {
|
||||
failures.push(`missing scenario: ${scenario}`);
|
||||
rows.push(`| scenario:${scenario} | missing | required | FAIL |`);
|
||||
continue;
|
||||
}
|
||||
const scenarioMetrics = scenarios[scenario];
|
||||
for (const [metric, threshold] of Object.entries(HARD_THRESHOLDS)) {
|
||||
const cv = Number(scenarioMetrics[metric] ?? 0);
|
||||
const pass = cv <= threshold;
|
||||
if (!pass) failures.push(`scenario:${scenario} ${metric}=${cv} > ${threshold}`);
|
||||
rows.push(`| ${scenario}.${metric} | ${fmtNumber(cv)} | <= ${threshold} | ${pass ? 'PASS' : 'FAIL'} |`);
|
||||
}
|
||||
}
|
||||
|
||||
for (const [metric, threshold] of Object.entries(HARD_THRESHOLDS)) {
|
||||
const cv = Number(c[metric] ?? 0);
|
||||
const pass = cv <= threshold;
|
||||
if (!pass) failures.push(`${metric}=${cv} > ${threshold}`);
|
||||
rows.push(`| ${metric} | ${fmtNumber(cv)} | <= ${threshold} | ${pass ? 'PASS' : 'FAIL'} |`);
|
||||
}
|
||||
|
||||
for (const [metric, maxIncrease] of Object.entries(RELATIVE_THRESHOLDS)) {
|
||||
const cv = Number(c[metric] ?? 0);
|
||||
const bv = Number(b[metric] ?? 0);
|
||||
const delta = ratioDelta(cv, bv);
|
||||
const pass = delta <= maxIncrease;
|
||||
if (!pass) failures.push(`${metric} delta=${delta} > ${maxIncrease}`);
|
||||
rows.push(`| ${metric} | ${fmtNumber(cv)} (baseline ${fmtNumber(bv)}) | delta <= ${fmtPercent(maxIncrease)} | ${pass ? 'PASS' : 'FAIL'} (${fmtPercent(delta)}) |`);
|
||||
}
|
||||
|
||||
return { failures, rows };
|
||||
}
|
||||
|
||||
export async function main() {
|
||||
const current = JSON.parse(await readFile(CURRENT_FILE, 'utf8'));
|
||||
const baseline = JSON.parse(await readFile(BASELINE_FILE, 'utf8'));
|
||||
const { failures, rows } = evaluateReport(current, baseline);
|
||||
|
||||
const report = [
|
||||
'# Comms Regression Report',
|
||||
'',
|
||||
`- Generated at: ${new Date().toISOString()}`,
|
||||
`- Result: ${failures.length === 0 ? 'PASS' : 'FAIL'}`,
|
||||
'',
|
||||
'| Metric | Current | Threshold | Status |',
|
||||
'|---|---:|---:|---|',
|
||||
...rows,
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
await mkdir(OUTPUT_DIR, { recursive: true });
|
||||
await writeFile(REPORT_FILE, report);
|
||||
console.log(report);
|
||||
console.log(`\nWrote comparison report to ${REPORT_FILE}`);
|
||||
|
||||
if (failures.length > 0) {
|
||||
console.error('\nThreshold failures:\n- ' + failures.join('\n- '));
|
||||
process.exitCode = 1;
|
||||
}
|
||||
}
|
||||
|
||||
const isEntrypoint = process.argv[1] && path.resolve(process.argv[1]) === path.resolve(new URL(import.meta.url).pathname);
|
||||
if (isEntrypoint) {
|
||||
main().catch((error) => {
|
||||
console.error('[comms:compare] failed:', error);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
}
|
||||
9
scripts/comms/datasets/gateway-restart-during-run.jsonl
Normal file
9
scripts/comms/datasets/gateway-restart-during-run.jsonl
Normal file
@@ -0,0 +1,9 @@
|
||||
{"ts":0,"type":"gateway_event","runId":"run-restart-1","sessionKey":"agent:main:session-restart","seq":1,"state":"started","fanout":1}
|
||||
{"ts":0.3,"type":"rpc","method":"chat.send","latencyMs":240,"timeout":false}
|
||||
{"ts":0.6,"type":"gateway_reconnect"}
|
||||
{"ts":0.8,"type":"gateway_event","runId":"run-restart-1","sessionKey":"agent:main:session-restart","seq":2,"state":"delta","fanout":1}
|
||||
{"ts":1.1,"type":"gateway_event","runId":"run-restart-1","sessionKey":"agent:main:session-restart","seq":3,"state":"delta","fanout":1}
|
||||
{"ts":1.4,"type":"history_load","sessionKey":"agent:main:session-restart","action":"start"}
|
||||
{"ts":1.6,"type":"history_load","sessionKey":"agent:main:session-restart","action":"end"}
|
||||
{"ts":2.0,"type":"gateway_event","runId":"run-restart-1","sessionKey":"agent:main:session-restart","seq":4,"state":"final","fanout":1}
|
||||
{"ts":2.4,"type":"message","lost":false,"orderViolation":false}
|
||||
7
scripts/comms/datasets/happy-path-chat.jsonl
Normal file
7
scripts/comms/datasets/happy-path-chat.jsonl
Normal file
@@ -0,0 +1,7 @@
|
||||
{"ts":0,"type":"gateway_event","runId":"run-happy-1","sessionKey":"agent:main:session-happy","seq":1,"state":"started","fanout":1}
|
||||
{"ts":1,"type":"gateway_event","runId":"run-happy-1","sessionKey":"agent:main:session-happy","seq":2,"state":"delta","fanout":1}
|
||||
{"ts":2,"type":"history_load","sessionKey":"agent:main:session-happy","action":"start"}
|
||||
{"ts":2.2,"type":"history_load","sessionKey":"agent:main:session-happy","action":"end"}
|
||||
{"ts":2.5,"type":"rpc","method":"chat.send","latencyMs":180,"timeout":false}
|
||||
{"ts":3.0,"type":"gateway_event","runId":"run-happy-1","sessionKey":"agent:main:session-happy","seq":3,"state":"final","fanout":1}
|
||||
{"ts":3.5,"type":"message","lost":false,"orderViolation":false}
|
||||
8
scripts/comms/datasets/history-overlap-guard.jsonl
Normal file
8
scripts/comms/datasets/history-overlap-guard.jsonl
Normal file
@@ -0,0 +1,8 @@
|
||||
{"ts":0,"type":"gateway_event","runId":"run-history-1","sessionKey":"agent:main:session-history","seq":1,"state":"started","fanout":1}
|
||||
{"ts":0.2,"type":"history_load","sessionKey":"agent:main:session-history","action":"start"}
|
||||
{"ts":0.4,"type":"history_load","sessionKey":"agent:main:session-history","action":"end"}
|
||||
{"ts":0.8,"type":"history_load","sessionKey":"agent:main:session-history","action":"start"}
|
||||
{"ts":1.0,"type":"history_load","sessionKey":"agent:main:session-history","action":"end"}
|
||||
{"ts":1.4,"type":"rpc","method":"chat.history","latencyMs":95,"timeout":false}
|
||||
{"ts":1.8,"type":"gateway_event","runId":"run-history-1","sessionKey":"agent:main:session-history","seq":2,"state":"final","fanout":1}
|
||||
{"ts":2.0,"type":"message","lost":false,"orderViolation":false}
|
||||
@@ -0,0 +1,9 @@
|
||||
{"ts":0,"type":"gateway_event","runId":"run-invalid-1","sessionKey":"agent:main:session-invalid","seq":1,"state":"started","fanout":1}
|
||||
{"ts":0.2,"type":"rpc","method":"config.patch","latencyMs":110,"timeout":false}
|
||||
{"ts":0.4,"type":"rpc","method":"config.patch","latencyMs":130,"timeout":false}
|
||||
{"ts":0.7,"type":"gateway_reconnect"}
|
||||
{"ts":1.0,"type":"history_load","sessionKey":"agent:main:session-invalid","action":"start"}
|
||||
{"ts":1.3,"type":"history_load","sessionKey":"agent:main:session-invalid","action":"end"}
|
||||
{"ts":1.7,"type":"gateway_event","runId":"run-invalid-1","sessionKey":"agent:main:session-invalid","seq":2,"state":"delta","fanout":1}
|
||||
{"ts":2.1,"type":"gateway_event","runId":"run-invalid-1","sessionKey":"agent:main:session-invalid","seq":3,"state":"final","fanout":1}
|
||||
{"ts":2.4,"type":"message","lost":false,"orderViolation":false}
|
||||
8
scripts/comms/datasets/multi-agent-channel-switch.jsonl
Normal file
8
scripts/comms/datasets/multi-agent-channel-switch.jsonl
Normal file
@@ -0,0 +1,8 @@
|
||||
{"ts":0,"type":"gateway_event","runId":"run-a-main","sessionKey":"agent:main:session-1","seq":1,"state":"started","fanout":1}
|
||||
{"ts":0.2,"type":"rpc","method":"chat.send","latencyMs":210,"timeout":false}
|
||||
{"ts":0.5,"type":"gateway_event","runId":"run-a-main","sessionKey":"agent:main:session-1","seq":2,"state":"delta","fanout":1}
|
||||
{"ts":0.8,"type":"gateway_event","runId":"run-a-team","sessionKey":"agent:team-a:session-2","seq":1,"state":"started","fanout":1}
|
||||
{"ts":1.1,"type":"rpc","method":"chat.send","latencyMs":240,"timeout":false}
|
||||
{"ts":1.4,"type":"gateway_event","runId":"run-a-main","sessionKey":"agent:main:session-1","seq":3,"state":"final","fanout":1}
|
||||
{"ts":1.8,"type":"gateway_event","runId":"run-a-team","sessionKey":"agent:team-a:session-2","seq":2,"state":"final","fanout":1}
|
||||
{"ts":2.1,"type":"message","lost":false,"orderViolation":false}
|
||||
9
scripts/comms/datasets/network-degraded.jsonl
Normal file
9
scripts/comms/datasets/network-degraded.jsonl
Normal file
@@ -0,0 +1,9 @@
|
||||
{"ts":0,"type":"gateway_event","runId":"run-net-1","sessionKey":"agent:main:session-net","seq":1,"state":"started","fanout":1}
|
||||
{"ts":0.2,"type":"rpc","method":"chat.send","latencyMs":420,"timeout":false}
|
||||
{"ts":0.8,"type":"rpc","method":"chat.history","latencyMs":820,"timeout":false}
|
||||
{"ts":1.0,"type":"history_load","sessionKey":"agent:main:session-net","action":"start"}
|
||||
{"ts":1.5,"type":"history_load","sessionKey":"agent:main:session-net","action":"end"}
|
||||
{"ts":1.6,"type":"gateway_reconnect"}
|
||||
{"ts":2.1,"type":"gateway_event","runId":"run-net-1","sessionKey":"agent:main:session-net","seq":2,"state":"delta","fanout":1}
|
||||
{"ts":2.3,"type":"gateway_event","runId":"run-net-1","sessionKey":"agent:main:session-net","seq":3,"state":"final","fanout":1}
|
||||
{"ts":2.8,"type":"message","lost":false,"orderViolation":false}
|
||||
176
scripts/comms/replay.mjs
Normal file
176
scripts/comms/replay.mjs
Normal file
@@ -0,0 +1,176 @@
|
||||
import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import process from 'node:process';
|
||||
|
||||
const ROOT = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..', '..');
|
||||
const DATASET_DIR = path.join(ROOT, 'scripts/comms/datasets');
|
||||
const OUTPUT_DIR = path.join(ROOT, 'artifacts/comms');
|
||||
const OUTPUT_FILE = path.join(OUTPUT_DIR, 'current-metrics.json');
|
||||
|
||||
export function percentile(values, p) {
|
||||
if (values.length === 0) return 0;
|
||||
const sorted = [...values].sort((a, b) => a - b);
|
||||
const idx = Math.min(sorted.length - 1, Math.ceil((p / 100) * sorted.length) - 1);
|
||||
return sorted[idx];
|
||||
}
|
||||
|
||||
export function dedupeKey(event) {
|
||||
if (event.type !== 'gateway_event') return null;
|
||||
const runId = event.runId ?? '';
|
||||
const sessionKey = event.sessionKey ?? '';
|
||||
const seq = event.seq ?? '';
|
||||
const state = event.state ?? '';
|
||||
if (!runId && !sessionKey && !seq && !state) return null;
|
||||
return `${runId}|${sessionKey}|${seq}|${state}`;
|
||||
}
|
||||
|
||||
export function calculateScenarioMetrics(events) {
|
||||
let totalGatewayEvents = 0;
|
||||
let uniqueGatewayEvents = 0;
|
||||
let fanoutTotal = 0;
|
||||
let duplicateGatewayEvents = 0;
|
||||
let gatewayReconnectCount = 0;
|
||||
let messageLossCount = 0;
|
||||
let messageOrderViolationCount = 0;
|
||||
let rpcTimeoutCount = 0;
|
||||
const rpcLatencies = [];
|
||||
const dedupeSet = new Set();
|
||||
const historyInFlight = new Map();
|
||||
let historyInflightMax = 0;
|
||||
let historyLoadCount = 0;
|
||||
|
||||
const sorted = [...events].sort((a, b) => (a.ts ?? 0) - (b.ts ?? 0));
|
||||
const startTs = sorted.length > 0 ? (sorted[0].ts ?? 0) : 0;
|
||||
const endTs = sorted.length > 0 ? (sorted[sorted.length - 1].ts ?? 0) : 0;
|
||||
const durationSec = Math.max(1, endTs - startTs);
|
||||
|
||||
for (const event of sorted) {
|
||||
if (event.type === 'gateway_event') {
|
||||
totalGatewayEvents += 1;
|
||||
fanoutTotal += Number(event.fanout ?? 1);
|
||||
const key = dedupeKey(event);
|
||||
if (!key || !dedupeSet.has(key)) {
|
||||
uniqueGatewayEvents += 1;
|
||||
if (key) dedupeSet.add(key);
|
||||
} else {
|
||||
duplicateGatewayEvents += 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (event.type === 'history_load') {
|
||||
const sessionKey = String(event.sessionKey ?? 'unknown');
|
||||
if (event.action === 'start') {
|
||||
const next = (historyInFlight.get(sessionKey) ?? 0) + 1;
|
||||
historyInFlight.set(sessionKey, next);
|
||||
historyInflightMax = Math.max(historyInflightMax, next);
|
||||
historyLoadCount += 1;
|
||||
} else if (event.action === 'end') {
|
||||
const current = historyInFlight.get(sessionKey) ?? 0;
|
||||
historyInFlight.set(sessionKey, Math.max(0, current - 1));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (event.type === 'rpc') {
|
||||
const latency = Number(event.latencyMs ?? 0);
|
||||
if (latency > 0) rpcLatencies.push(latency);
|
||||
if (event.timeout === true) rpcTimeoutCount += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (event.type === 'gateway_reconnect') {
|
||||
gatewayReconnectCount += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (event.type === 'message') {
|
||||
if (event.lost === true) messageLossCount += 1;
|
||||
if (event.orderViolation === true) messageOrderViolationCount += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
duplicate_event_rate: totalGatewayEvents > 0 ? duplicateGatewayEvents / totalGatewayEvents : 0,
|
||||
event_fanout_ratio: uniqueGatewayEvents > 0 ? fanoutTotal / uniqueGatewayEvents : 0,
|
||||
history_inflight_max: historyInflightMax,
|
||||
history_load_qps: historyLoadCount / durationSec,
|
||||
rpc_p50_ms: percentile(rpcLatencies, 50),
|
||||
rpc_p95_ms: percentile(rpcLatencies, 95),
|
||||
rpc_timeout_rate: rpcLatencies.length > 0 ? rpcTimeoutCount / rpcLatencies.length : 0,
|
||||
gateway_reconnect_count: gatewayReconnectCount,
|
||||
message_loss_count: messageLossCount,
|
||||
message_order_violation_count: messageOrderViolationCount,
|
||||
_meta: {
|
||||
duration_sec: durationSec,
|
||||
total_gateway_events: totalGatewayEvents,
|
||||
unique_gateway_events: uniqueGatewayEvents,
|
||||
total_rpc_calls: rpcLatencies.length,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function aggregateMetrics(metricsList) {
|
||||
if (metricsList.length === 0) {
|
||||
return calculateScenarioMetrics([]);
|
||||
}
|
||||
const sum = (key) => metricsList.reduce((acc, item) => acc + Number(item[key] ?? 0), 0);
|
||||
return {
|
||||
duplicate_event_rate: sum('duplicate_event_rate') / metricsList.length,
|
||||
event_fanout_ratio: sum('event_fanout_ratio') / metricsList.length,
|
||||
history_inflight_max: Math.max(...metricsList.map((m) => Number(m.history_inflight_max ?? 0))),
|
||||
history_load_qps: sum('history_load_qps') / metricsList.length,
|
||||
rpc_p50_ms: sum('rpc_p50_ms') / metricsList.length,
|
||||
rpc_p95_ms: sum('rpc_p95_ms') / metricsList.length,
|
||||
rpc_timeout_rate: sum('rpc_timeout_rate') / metricsList.length,
|
||||
gateway_reconnect_count: Math.round(sum('gateway_reconnect_count')),
|
||||
message_loss_count: Math.round(sum('message_loss_count')),
|
||||
message_order_violation_count: Math.round(sum('message_order_violation_count')),
|
||||
};
|
||||
}
|
||||
|
||||
export async function loadScenario(fileName) {
|
||||
const fullPath = path.join(DATASET_DIR, fileName);
|
||||
const raw = await readFile(fullPath, 'utf8');
|
||||
const lines = raw.split('\n').map((line) => line.trim()).filter(Boolean);
|
||||
return lines.map((line) => JSON.parse(line));
|
||||
}
|
||||
|
||||
export async function main() {
|
||||
const argScenario = process.argv.find((arg) => arg.startsWith('--scenario='))?.split('=')[1] ?? 'all';
|
||||
const files = (await readdir(DATASET_DIR)).filter((name) => name.endsWith('.jsonl')).sort();
|
||||
const selectedFiles = argScenario === 'all'
|
||||
? files
|
||||
: files.filter((name) => name === `${argScenario}.jsonl`);
|
||||
|
||||
if (selectedFiles.length === 0) {
|
||||
throw new Error(`No dataset found for scenario "${argScenario}"`);
|
||||
}
|
||||
|
||||
const scenarios = {};
|
||||
for (const fileName of selectedFiles) {
|
||||
const scenarioName = fileName.replace(/\.jsonl$/, '');
|
||||
const events = await loadScenario(fileName);
|
||||
scenarios[scenarioName] = calculateScenarioMetrics(events);
|
||||
}
|
||||
|
||||
const aggregate = aggregateMetrics(Object.values(scenarios));
|
||||
const output = {
|
||||
generated_at: new Date().toISOString(),
|
||||
scenario: argScenario,
|
||||
scenarios,
|
||||
aggregate,
|
||||
};
|
||||
|
||||
await mkdir(OUTPUT_DIR, { recursive: true });
|
||||
await writeFile(OUTPUT_FILE, JSON.stringify(output, null, 2));
|
||||
console.log(`Wrote comms replay metrics to ${OUTPUT_FILE}`);
|
||||
}
|
||||
|
||||
const isEntrypoint = process.argv[1] && path.resolve(process.argv[1]) === path.resolve(new URL(import.meta.url).pathname);
|
||||
if (isEntrypoint) {
|
||||
main().catch((error) => {
|
||||
console.error('[comms:replay] failed:', error);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user