Fix telemetry shutdown noise and improve token usage diagnostics (#444)
Co-authored-by: zuolingxuan <zuolingxuan@bytedance.com>
This commit is contained in:
committed by
GitHub
Unverified
parent
01adc828b5
commit
995a7f070d
158
electron/utils/openclaw-doctor.ts
Normal file
158
electron/utils/openclaw-doctor.ts
Normal file
@@ -0,0 +1,158 @@
|
||||
import { app, utilityProcess } from 'electron';
|
||||
import { existsSync } from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { getOpenClawDir, getOpenClawEntryPath } from './paths';
|
||||
import { logger } from './logger';
|
||||
import { getUvMirrorEnv } from './uv-env';
|
||||
|
||||
const OPENCLAW_DOCTOR_TIMEOUT_MS = 60_000;
|
||||
const OPENCLAW_DOCTOR_ARGS = ['doctor', '--json'];
|
||||
const OPENCLAW_DOCTOR_FIX_ARGS = ['doctor', '--fix', '--yes', '--non-interactive'];
|
||||
|
||||
export type OpenClawDoctorMode = 'diagnose' | 'fix';
|
||||
|
||||
export interface OpenClawDoctorResult {
|
||||
mode: OpenClawDoctorMode;
|
||||
success: boolean;
|
||||
exitCode: number | null;
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
command: string;
|
||||
cwd: string;
|
||||
durationMs: number;
|
||||
timedOut?: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
function getBundledBinPath(): string {
|
||||
const target = `${process.platform}-${process.arch}`;
|
||||
return app.isPackaged
|
||||
? path.join(process.resourcesPath, 'bin')
|
||||
: path.join(process.cwd(), 'resources', 'bin', target);
|
||||
}
|
||||
|
||||
async function runDoctorCommand(mode: OpenClawDoctorMode): Promise<OpenClawDoctorResult> {
|
||||
const openclawDir = getOpenClawDir();
|
||||
const entryScript = getOpenClawEntryPath();
|
||||
const args = mode === 'fix' ? OPENCLAW_DOCTOR_FIX_ARGS : OPENCLAW_DOCTOR_ARGS;
|
||||
const command = `openclaw ${args.join(' ')}`;
|
||||
const startedAt = Date.now();
|
||||
|
||||
if (!existsSync(entryScript)) {
|
||||
const error = `OpenClaw entry script not found at ${entryScript}`;
|
||||
logger.error(`Cannot run OpenClaw doctor: ${error}`);
|
||||
return {
|
||||
mode,
|
||||
success: false,
|
||||
exitCode: null,
|
||||
stdout: '',
|
||||
stderr: '',
|
||||
command,
|
||||
cwd: openclawDir,
|
||||
durationMs: Date.now() - startedAt,
|
||||
error,
|
||||
};
|
||||
}
|
||||
|
||||
const binPath = getBundledBinPath();
|
||||
const binPathExists = existsSync(binPath);
|
||||
const finalPath = binPathExists
|
||||
? `${binPath}${path.delimiter}${process.env.PATH || ''}`
|
||||
: process.env.PATH || '';
|
||||
const uvEnv = await getUvMirrorEnv();
|
||||
|
||||
logger.info(
|
||||
`Running OpenClaw doctor (mode=${mode}, entry="${entryScript}", args="${args.join(' ')}", cwd="${openclawDir}", bundledBin=${binPathExists ? 'yes' : 'no'})`,
|
||||
);
|
||||
|
||||
return await new Promise<OpenClawDoctorResult>((resolve) => {
|
||||
const child = utilityProcess.fork(entryScript, args, {
|
||||
cwd: openclawDir,
|
||||
stdio: 'pipe',
|
||||
env: {
|
||||
...process.env,
|
||||
...uvEnv,
|
||||
PATH: finalPath,
|
||||
OPENCLAW_NO_RESPAWN: '1',
|
||||
} as NodeJS.ProcessEnv,
|
||||
});
|
||||
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
let settled = false;
|
||||
|
||||
const finish = (result: Omit<OpenClawDoctorResult, 'durationMs'>) => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
resolve({
|
||||
...result,
|
||||
durationMs: Date.now() - startedAt,
|
||||
});
|
||||
};
|
||||
|
||||
const timeout = setTimeout(() => {
|
||||
logger.error(`OpenClaw doctor timed out after ${OPENCLAW_DOCTOR_TIMEOUT_MS}ms`);
|
||||
try {
|
||||
child.kill();
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
finish({
|
||||
mode,
|
||||
success: false,
|
||||
exitCode: null,
|
||||
stdout,
|
||||
stderr,
|
||||
command,
|
||||
cwd: openclawDir,
|
||||
timedOut: true,
|
||||
error: `Timed out after ${OPENCLAW_DOCTOR_TIMEOUT_MS}ms`,
|
||||
});
|
||||
}, OPENCLAW_DOCTOR_TIMEOUT_MS);
|
||||
|
||||
child.stdout?.on('data', (data) => {
|
||||
stdout += data.toString();
|
||||
});
|
||||
|
||||
child.stderr?.on('data', (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
child.on('error', (error) => {
|
||||
clearTimeout(timeout);
|
||||
logger.error('Failed to spawn OpenClaw doctor process:', error);
|
||||
finish({
|
||||
mode,
|
||||
success: false,
|
||||
exitCode: null,
|
||||
stdout,
|
||||
stderr,
|
||||
command,
|
||||
cwd: openclawDir,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
});
|
||||
|
||||
child.on('exit', (code) => {
|
||||
clearTimeout(timeout);
|
||||
logger.info(`OpenClaw doctor exited with code ${code ?? 'null'}`);
|
||||
finish({
|
||||
mode,
|
||||
success: code === 0,
|
||||
exitCode: code,
|
||||
stdout,
|
||||
stderr,
|
||||
command,
|
||||
cwd: openclawDir,
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export async function runOpenClawDoctor(): Promise<OpenClawDoctorResult> {
|
||||
return await runDoctorCommand('diagnose');
|
||||
}
|
||||
|
||||
export async function runOpenClawDoctorFix(): Promise<OpenClawDoctorResult> {
|
||||
return await runDoctorCommand('fix');
|
||||
}
|
||||
@@ -6,10 +6,32 @@ import { logger } from './logger';
|
||||
|
||||
const POSTHOG_API_KEY = 'phc_aGNegeJQP5FzNiF2rEoKqQbkuCpiiETMttplibXpB0n';
|
||||
const POSTHOG_HOST = 'https://us.i.posthog.com';
|
||||
const TELEMETRY_SHUTDOWN_TIMEOUT_MS = 1500;
|
||||
|
||||
let posthogClient: PostHog | null = null;
|
||||
let distinctId: string = '';
|
||||
|
||||
function isIgnorablePostHogShutdownError(error: unknown): boolean {
|
||||
if (!(error instanceof Error)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const message = `${error.name} ${error.message}`.toLowerCase();
|
||||
if (
|
||||
message.includes('posthogfetchnetworkerror') ||
|
||||
message.includes('network error while fetching posthog') ||
|
||||
message.includes('timeouterror') ||
|
||||
message.includes('aborted due to timeout') ||
|
||||
message.includes('fetch failed')
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return 'cause' in error && error.cause !== error
|
||||
? isIgnorablePostHogShutdownError(error.cause)
|
||||
: false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize PostHog telemetry
|
||||
*/
|
||||
@@ -65,15 +87,51 @@ export async function initTelemetry(): Promise<void> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure PostHog flushes all pending events before shutting down
|
||||
* Best-effort telemetry shutdown that never blocks app exit on network issues.
|
||||
*/
|
||||
export async function shutdownTelemetry(): Promise<void> {
|
||||
if (posthogClient) {
|
||||
try {
|
||||
await posthogClient.shutdown();
|
||||
logger.debug('Flushed telemetry events on shutdown');
|
||||
} catch (error) {
|
||||
logger.error('Error shutting down telemetry:', error);
|
||||
const client = posthogClient;
|
||||
posthogClient = null;
|
||||
distinctId = '';
|
||||
|
||||
if (!client) {
|
||||
return;
|
||||
}
|
||||
|
||||
let didTimeout = false;
|
||||
let timeoutHandle: ReturnType<typeof setTimeout> | null = null;
|
||||
const shutdownPromise = client.shutdown().catch((error) => {
|
||||
if (isIgnorablePostHogShutdownError(error)) {
|
||||
logger.debug('Ignored telemetry shutdown network error:', error);
|
||||
return;
|
||||
}
|
||||
throw error;
|
||||
});
|
||||
|
||||
try {
|
||||
await Promise.race([
|
||||
shutdownPromise,
|
||||
new Promise<void>((resolve) => {
|
||||
timeoutHandle = setTimeout(() => {
|
||||
didTimeout = true;
|
||||
resolve();
|
||||
}, TELEMETRY_SHUTDOWN_TIMEOUT_MS);
|
||||
}),
|
||||
]);
|
||||
if (timeoutHandle) {
|
||||
clearTimeout(timeoutHandle);
|
||||
}
|
||||
|
||||
if (didTimeout) {
|
||||
logger.debug(`Skipped waiting for telemetry shutdown after ${TELEMETRY_SHUTDOWN_TIMEOUT_MS}ms`);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.debug('Flushed telemetry events on shutdown');
|
||||
} catch (error) {
|
||||
if (timeoutHandle) {
|
||||
clearTimeout(timeoutHandle);
|
||||
}
|
||||
logger.error('Error shutting down telemetry:', error);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ export interface TokenUsageHistoryEntry {
|
||||
export function extractSessionIdFromTranscriptFileName(fileName: string): string | undefined {
|
||||
if (!fileName.endsWith('.jsonl') && !fileName.includes('.jsonl.reset.')) return undefined;
|
||||
return fileName
|
||||
.replace(/\.jsonl\.reset\..+$/, '')
|
||||
.replace(/\.reset\..+$/, '')
|
||||
.replace(/\.deleted\.jsonl$/, '')
|
||||
.replace(/\.jsonl$/, '');
|
||||
}
|
||||
|
||||
@@ -15,12 +15,45 @@ export {
|
||||
type TokenUsageHistoryEntry,
|
||||
} from './token-usage-core';
|
||||
|
||||
async function listAgentIdsWithSessionDirs(): Promise<string[]> {
|
||||
const openclawDir = getOpenClawConfigDir();
|
||||
const agentsDir = join(openclawDir, 'agents');
|
||||
const agentIds = new Set<string>();
|
||||
|
||||
try {
|
||||
for (const agentId of await listConfiguredAgentIds()) {
|
||||
const normalized = agentId.trim();
|
||||
if (normalized) {
|
||||
agentIds.add(normalized);
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Ignore config discovery failures and fall back to disk scan.
|
||||
}
|
||||
|
||||
try {
|
||||
const agentEntries = await readdir(agentsDir, { withFileTypes: true });
|
||||
for (const entry of agentEntries) {
|
||||
if (entry.isDirectory()) {
|
||||
const normalized = entry.name.trim();
|
||||
if (normalized) {
|
||||
agentIds.add(normalized);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Ignore disk discovery failures and return whatever we already found.
|
||||
}
|
||||
|
||||
return [...agentIds];
|
||||
}
|
||||
|
||||
async function listRecentSessionFiles(): Promise<Array<{ filePath: string; sessionId: string; agentId: string; mtimeMs: number }>> {
|
||||
const openclawDir = getOpenClawConfigDir();
|
||||
const agentsDir = join(openclawDir, 'agents');
|
||||
|
||||
try {
|
||||
const agentEntries = await listConfiguredAgentIds();
|
||||
const agentEntries = await listAgentIdsWithSessionDirs();
|
||||
const files: Array<{ filePath: string; sessionId: string; agentId: string; mtimeMs: number }> = [];
|
||||
|
||||
for (const agentId of agentEntries) {
|
||||
|
||||
Reference in New Issue
Block a user