Fix telemetry shutdown noise and improve token usage diagnostics (#444)

Co-authored-by: zuolingxuan <zuolingxuan@bytedance.com>
This commit is contained in:
Lingxuan Zuo
2026-03-13 13:57:49 +08:00
committed by GitHub
Unverified
parent 01adc828b5
commit 995a7f070d
21 changed files with 923 additions and 116 deletions

View File

@@ -0,0 +1,158 @@
import { app, utilityProcess } from 'electron';
import { existsSync } from 'node:fs';
import path from 'node:path';
import { getOpenClawDir, getOpenClawEntryPath } from './paths';
import { logger } from './logger';
import { getUvMirrorEnv } from './uv-env';
const OPENCLAW_DOCTOR_TIMEOUT_MS = 60_000;
const OPENCLAW_DOCTOR_ARGS = ['doctor', '--json'];
const OPENCLAW_DOCTOR_FIX_ARGS = ['doctor', '--fix', '--yes', '--non-interactive'];
export type OpenClawDoctorMode = 'diagnose' | 'fix';
export interface OpenClawDoctorResult {
mode: OpenClawDoctorMode;
success: boolean;
exitCode: number | null;
stdout: string;
stderr: string;
command: string;
cwd: string;
durationMs: number;
timedOut?: boolean;
error?: string;
}
function getBundledBinPath(): string {
const target = `${process.platform}-${process.arch}`;
return app.isPackaged
? path.join(process.resourcesPath, 'bin')
: path.join(process.cwd(), 'resources', 'bin', target);
}
async function runDoctorCommand(mode: OpenClawDoctorMode): Promise<OpenClawDoctorResult> {
const openclawDir = getOpenClawDir();
const entryScript = getOpenClawEntryPath();
const args = mode === 'fix' ? OPENCLAW_DOCTOR_FIX_ARGS : OPENCLAW_DOCTOR_ARGS;
const command = `openclaw ${args.join(' ')}`;
const startedAt = Date.now();
if (!existsSync(entryScript)) {
const error = `OpenClaw entry script not found at ${entryScript}`;
logger.error(`Cannot run OpenClaw doctor: ${error}`);
return {
mode,
success: false,
exitCode: null,
stdout: '',
stderr: '',
command,
cwd: openclawDir,
durationMs: Date.now() - startedAt,
error,
};
}
const binPath = getBundledBinPath();
const binPathExists = existsSync(binPath);
const finalPath = binPathExists
? `${binPath}${path.delimiter}${process.env.PATH || ''}`
: process.env.PATH || '';
const uvEnv = await getUvMirrorEnv();
logger.info(
`Running OpenClaw doctor (mode=${mode}, entry="${entryScript}", args="${args.join(' ')}", cwd="${openclawDir}", bundledBin=${binPathExists ? 'yes' : 'no'})`,
);
return await new Promise<OpenClawDoctorResult>((resolve) => {
const child = utilityProcess.fork(entryScript, args, {
cwd: openclawDir,
stdio: 'pipe',
env: {
...process.env,
...uvEnv,
PATH: finalPath,
OPENCLAW_NO_RESPAWN: '1',
} as NodeJS.ProcessEnv,
});
let stdout = '';
let stderr = '';
let settled = false;
const finish = (result: Omit<OpenClawDoctorResult, 'durationMs'>) => {
if (settled) return;
settled = true;
resolve({
...result,
durationMs: Date.now() - startedAt,
});
};
const timeout = setTimeout(() => {
logger.error(`OpenClaw doctor timed out after ${OPENCLAW_DOCTOR_TIMEOUT_MS}ms`);
try {
child.kill();
} catch {
// ignore
}
finish({
mode,
success: false,
exitCode: null,
stdout,
stderr,
command,
cwd: openclawDir,
timedOut: true,
error: `Timed out after ${OPENCLAW_DOCTOR_TIMEOUT_MS}ms`,
});
}, OPENCLAW_DOCTOR_TIMEOUT_MS);
child.stdout?.on('data', (data) => {
stdout += data.toString();
});
child.stderr?.on('data', (data) => {
stderr += data.toString();
});
child.on('error', (error) => {
clearTimeout(timeout);
logger.error('Failed to spawn OpenClaw doctor process:', error);
finish({
mode,
success: false,
exitCode: null,
stdout,
stderr,
command,
cwd: openclawDir,
error: error instanceof Error ? error.message : String(error),
});
});
child.on('exit', (code) => {
clearTimeout(timeout);
logger.info(`OpenClaw doctor exited with code ${code ?? 'null'}`);
finish({
mode,
success: code === 0,
exitCode: code,
stdout,
stderr,
command,
cwd: openclawDir,
});
});
});
}
export async function runOpenClawDoctor(): Promise<OpenClawDoctorResult> {
return await runDoctorCommand('diagnose');
}
export async function runOpenClawDoctorFix(): Promise<OpenClawDoctorResult> {
return await runDoctorCommand('fix');
}

View File

@@ -6,10 +6,32 @@ import { logger } from './logger';
const POSTHOG_API_KEY = 'phc_aGNegeJQP5FzNiF2rEoKqQbkuCpiiETMttplibXpB0n';
const POSTHOG_HOST = 'https://us.i.posthog.com';
const TELEMETRY_SHUTDOWN_TIMEOUT_MS = 1500;
let posthogClient: PostHog | null = null;
let distinctId: string = '';
function isIgnorablePostHogShutdownError(error: unknown): boolean {
if (!(error instanceof Error)) {
return false;
}
const message = `${error.name} ${error.message}`.toLowerCase();
if (
message.includes('posthogfetchnetworkerror') ||
message.includes('network error while fetching posthog') ||
message.includes('timeouterror') ||
message.includes('aborted due to timeout') ||
message.includes('fetch failed')
) {
return true;
}
return 'cause' in error && error.cause !== error
? isIgnorablePostHogShutdownError(error.cause)
: false;
}
/**
* Initialize PostHog telemetry
*/
@@ -65,15 +87,51 @@ export async function initTelemetry(): Promise<void> {
}
/**
* Ensure PostHog flushes all pending events before shutting down
* Best-effort telemetry shutdown that never blocks app exit on network issues.
*/
export async function shutdownTelemetry(): Promise<void> {
if (posthogClient) {
try {
await posthogClient.shutdown();
logger.debug('Flushed telemetry events on shutdown');
} catch (error) {
logger.error('Error shutting down telemetry:', error);
const client = posthogClient;
posthogClient = null;
distinctId = '';
if (!client) {
return;
}
let didTimeout = false;
let timeoutHandle: ReturnType<typeof setTimeout> | null = null;
const shutdownPromise = client.shutdown().catch((error) => {
if (isIgnorablePostHogShutdownError(error)) {
logger.debug('Ignored telemetry shutdown network error:', error);
return;
}
throw error;
});
try {
await Promise.race([
shutdownPromise,
new Promise<void>((resolve) => {
timeoutHandle = setTimeout(() => {
didTimeout = true;
resolve();
}, TELEMETRY_SHUTDOWN_TIMEOUT_MS);
}),
]);
if (timeoutHandle) {
clearTimeout(timeoutHandle);
}
if (didTimeout) {
logger.debug(`Skipped waiting for telemetry shutdown after ${TELEMETRY_SHUTDOWN_TIMEOUT_MS}ms`);
return;
}
logger.debug('Flushed telemetry events on shutdown');
} catch (error) {
if (timeoutHandle) {
clearTimeout(timeoutHandle);
}
logger.error('Error shutting down telemetry:', error);
}
}

View File

@@ -16,7 +16,7 @@ export interface TokenUsageHistoryEntry {
export function extractSessionIdFromTranscriptFileName(fileName: string): string | undefined {
if (!fileName.endsWith('.jsonl') && !fileName.includes('.jsonl.reset.')) return undefined;
return fileName
.replace(/\.jsonl\.reset\..+$/, '')
.replace(/\.reset\..+$/, '')
.replace(/\.deleted\.jsonl$/, '')
.replace(/\.jsonl$/, '');
}

View File

@@ -15,12 +15,45 @@ export {
type TokenUsageHistoryEntry,
} from './token-usage-core';
async function listAgentIdsWithSessionDirs(): Promise<string[]> {
const openclawDir = getOpenClawConfigDir();
const agentsDir = join(openclawDir, 'agents');
const agentIds = new Set<string>();
try {
for (const agentId of await listConfiguredAgentIds()) {
const normalized = agentId.trim();
if (normalized) {
agentIds.add(normalized);
}
}
} catch {
// Ignore config discovery failures and fall back to disk scan.
}
try {
const agentEntries = await readdir(agentsDir, { withFileTypes: true });
for (const entry of agentEntries) {
if (entry.isDirectory()) {
const normalized = entry.name.trim();
if (normalized) {
agentIds.add(normalized);
}
}
}
} catch {
// Ignore disk discovery failures and return whatever we already found.
}
return [...agentIds];
}
async function listRecentSessionFiles(): Promise<Array<{ filePath: string; sessionId: string; agentId: string; mtimeMs: number }>> {
const openclawDir = getOpenClawConfigDir();
const agentsDir = join(openclawDir, 'agents');
try {
const agentEntries = await listConfiguredAgentIds();
const agentEntries = await listAgentIdsWithSessionDirs();
const files: Array<{ filePath: string; sessionId: string; agentId: string; mtimeMs: number }> = [];
for (const agentId of agentEntries) {