diff --git a/.gitignore b/.gitignore index d8d27a7ae..9de5af241 100644 --- a/.gitignore +++ b/.gitignore @@ -66,5 +66,6 @@ artifacts/ docs/pr-session-notes-*.md .cursor/ +.claude/ .pnpm-store/ package-lock.json diff --git a/electron/api/routes/channels.ts b/electron/api/routes/channels.ts index 0b3c06d0d..6c9267466 100644 --- a/electron/api/routes/channels.ts +++ b/electron/api/routes/channels.ts @@ -33,7 +33,9 @@ import { import { computeChannelRuntimeStatus, pickChannelRuntimeStatus, + type ChannelConnectionStatus, type ChannelRuntimeAccountSnapshot, + type GatewayHealthState, } from '../../utils/channel-status'; import { OPENCLAW_WECHAT_CHANNEL_TYPE, @@ -65,6 +67,8 @@ import { normalizeWhatsAppMessagingTarget, } from '../../utils/openclaw-sdk'; import { logger } from '../../utils/logger'; +import { buildGatewayHealthSummary } from '../../utils/gateway-health'; +import type { GatewayHealthSummary } from '../../gateway/manager'; // listWhatsAppDirectory*FromConfig were removed from openclaw's public exports // in 2026.3.23-1. No-op stubs; WhatsApp target picker uses session discovery. @@ -405,7 +409,8 @@ interface ChannelAccountView { running: boolean; linked: boolean; lastError?: string; - status: 'connected' | 'connecting' | 'disconnected' | 'error'; + status: ChannelConnectionStatus; + statusReason?: string; isDefault: boolean; agentId?: string; } @@ -413,10 +418,34 @@ interface ChannelAccountView { interface ChannelAccountsView { channelType: string; defaultAccountId: string; - status: 'connected' | 'connecting' | 'disconnected' | 'error'; + status: ChannelConnectionStatus; + statusReason?: string; accounts: ChannelAccountView[]; } +export function getChannelStatusDiagnostics(): { + lastChannelsStatusOkAt?: number; + lastChannelsStatusFailureAt?: number; +} { + return { + lastChannelsStatusOkAt, + lastChannelsStatusFailureAt, + }; +} + +function gatewayHealthStateForChannels( + gatewayHealthState: GatewayHealthState, +): GatewayHealthState | undefined { + return gatewayHealthState === 'healthy' ? undefined : gatewayHealthState; +} + +function overlayStatusReason( + gatewayHealth: GatewayHealthSummary, + fallbackReason: string, +): string { + return gatewayHealth.reasons[0] || fallbackReason; +} + function buildGatewayStatusSnapshot(status: GatewayChannelStatusPayload | null): string { if (!status?.channelAccounts) return 'none'; const entries = Object.entries(status.channelAccounts); @@ -480,11 +509,13 @@ type DirectoryEntry = { const CHANNEL_TARGET_CACHE_TTL_MS = 60_000; const CHANNEL_TARGET_CACHE_ENABLED = process.env.VITEST !== 'true'; const channelTargetCache = new Map(); +let lastChannelsStatusOkAt: number | undefined; +let lastChannelsStatusFailureAt: number | undefined; -async function buildChannelAccountsView( +export async function buildChannelAccountsView( ctx: HostApiContext, options?: { probe?: boolean }, -): Promise { +): Promise<{ channels: ChannelAccountsView[]; gatewayHealth: GatewayHealthSummary }> { const startedAt = Date.now(); // Read config once and share across all sub-calls (was 5 readFile calls before). const openClawConfig = await readOpenClawConfig(); @@ -507,17 +538,32 @@ async function buildChannelAccountsView( { probe }, probe ? 5000 : 8000, ); + lastChannelsStatusOkAt = Date.now(); logger.info( `[channels.accounts] channels.status probe=${probe ? '1' : '0'} elapsedMs=${Date.now() - rpcStartedAt} snapshot=${buildGatewayStatusSnapshot(gatewayStatus)}` ); } catch { const probe = options?.probe === true; + lastChannelsStatusFailureAt = Date.now(); logger.warn( `[channels.accounts] channels.status probe=${probe ? '1' : '0'} failed after ${Date.now() - startedAt}ms` ); gatewayStatus = null; } + const gatewayDiagnostics = ctx.gatewayManager.getDiagnostics?.() ?? { + consecutiveHeartbeatMisses: 0, + consecutiveRpcFailures: 0, + }; + const gatewayHealth = buildGatewayHealthSummary({ + status: ctx.gatewayManager.getStatus(), + diagnostics: gatewayDiagnostics, + lastChannelsStatusOkAt, + lastChannelsStatusFailureAt, + platform: process.platform, + }); + const gatewayHealthState = gatewayHealthStateForChannels(gatewayHealth.state); + const channelTypes = new Set([ ...configuredChannels, ...Object.keys(configuredAccounts), @@ -566,7 +612,9 @@ async function buildChannelAccountsView( const accounts: ChannelAccountView[] = accountIds.map((accountId) => { const runtime = runtimeAccounts.find((item) => item.accountId === accountId); const runtimeSnapshot: ChannelRuntimeAccountSnapshot = runtime ?? {}; - const status = computeChannelRuntimeStatus(runtimeSnapshot); + const status = computeChannelRuntimeStatus(runtimeSnapshot, { + gatewayHealthState, + }); return { accountId, name: runtime?.name || accountId, @@ -576,6 +624,11 @@ async function buildChannelAccountsView( linked: runtime?.linked === true, lastError: typeof runtime?.lastError === 'string' ? runtime.lastError : undefined, status, + statusReason: status === 'degraded' + ? overlayStatusReason(gatewayHealth, 'gateway_degraded') + : status === 'error' + ? 'runtime_error' + : undefined, isDefault: accountId === defaultAccountId, agentId: agentsSnapshot.channelAccountOwners[`${rawChannelType}:${accountId}`], }; @@ -585,10 +638,32 @@ async function buildChannelAccountsView( return left.accountId.localeCompare(right.accountId); }); + const visibleAccountSnapshots: ChannelRuntimeAccountSnapshot[] = accounts.map((account) => ({ + connected: account.connected, + running: account.running, + linked: account.linked, + lastError: account.lastError, + })); + const hasRuntimeError = visibleAccountSnapshots.some((account) => typeof account.lastError === 'string' && account.lastError.trim()) + || Boolean(channelSummary?.error?.trim() || channelSummary?.lastError?.trim()); + const baseGroupStatus = pickChannelRuntimeStatus(visibleAccountSnapshots, channelSummary); + const groupStatus = !gatewayStatus && ctx.gatewayManager.getStatus().state === 'running' + ? 'degraded' + : gatewayHealthState && !hasRuntimeError && baseGroupStatus === 'connected' + ? 'degraded' + : pickChannelRuntimeStatus(visibleAccountSnapshots, channelSummary, { + gatewayHealthState, + }); + channels.push({ channelType: uiChannelType, defaultAccountId, - status: pickChannelRuntimeStatus(runtimeAccounts, channelSummary), + status: groupStatus, + statusReason: !gatewayStatus && ctx.gatewayManager.getStatus().state === 'running' + ? 'channels_status_timeout' + : groupStatus === 'degraded' + ? overlayStatusReason(gatewayHealth, 'gateway_degraded') + : undefined, accounts, }); } @@ -597,7 +672,7 @@ async function buildChannelAccountsView( logger.info( `[channels.accounts] response probe=${options?.probe === true ? '1' : '0'} elapsedMs=${Date.now() - startedAt} view=${sorted.map((item) => `${item.channelType}:${item.status}`).join(',')}` ); - return sorted; + return { channels: sorted, gatewayHealth }; } function buildChannelTargetLabel(baseLabel: string, value: string): string { @@ -1193,8 +1268,8 @@ export async function handleChannelRoutes( try { const probe = url.searchParams.get('probe') === '1'; logger.info(`[channels.accounts] request probe=${probe ? '1' : '0'}`); - const channels = await buildChannelAccountsView(ctx, { probe }); - sendJson(res, 200, { success: true, channels }); + const { channels, gatewayHealth } = await buildChannelAccountsView(ctx, { probe }); + sendJson(res, 200, { success: true, channels, gatewayHealth }); } catch (error) { sendJson(res, 500, { success: false, error: String(error) }); } diff --git a/electron/api/routes/diagnostics.ts b/electron/api/routes/diagnostics.ts new file mode 100644 index 000000000..268e4ecd2 --- /dev/null +++ b/electron/api/routes/diagnostics.ts @@ -0,0 +1,86 @@ +import { open } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { IncomingMessage, ServerResponse } from 'http'; +import { logger } from '../../utils/logger'; +import { getOpenClawConfigDir } from '../../utils/paths'; +import { buildGatewayHealthSummary } from '../../utils/gateway-health'; +import type { HostApiContext } from '../context'; +import { sendJson } from '../route-utils'; +import { buildChannelAccountsView, getChannelStatusDiagnostics } from './channels'; + +const DEFAULT_TAIL_LINES = 200; + +async function readTail(filePath: string, tailLines = DEFAULT_TAIL_LINES): Promise { + const safeTailLines = Math.max(1, Math.floor(tailLines)); + try { + const file = await open(filePath, 'r'); + try { + const stat = await file.stat(); + if (stat.size === 0) return ''; + + const chunkSize = 64 * 1024; + let position = stat.size; + let content = ''; + let lineCount = 0; + + while (position > 0 && lineCount <= safeTailLines) { + const bytesToRead = Math.min(chunkSize, position); + position -= bytesToRead; + const buffer = Buffer.allocUnsafe(bytesToRead); + const { bytesRead } = await file.read(buffer, 0, bytesToRead, position); + content = `${buffer.subarray(0, bytesRead).toString('utf-8')}${content}`; + lineCount = content.split('\n').length - 1; + } + + const lines = content.split('\n'); + return lines.length <= safeTailLines ? content : lines.slice(-safeTailLines).join('\n'); + } finally { + await file.close(); + } + } catch { + return ''; + } +} + +export async function handleDiagnosticsRoutes( + req: IncomingMessage, + res: ServerResponse, + url: URL, + ctx: HostApiContext, +): Promise { + if (url.pathname === '/api/diagnostics/gateway-snapshot' && req.method === 'GET') { + try { + const { channels } = await buildChannelAccountsView(ctx, { probe: false }); + const diagnostics = ctx.gatewayManager.getDiagnostics?.() ?? { + consecutiveHeartbeatMisses: 0, + consecutiveRpcFailures: 0, + }; + const channelStatusDiagnostics = getChannelStatusDiagnostics(); + const gateway = { + ...ctx.gatewayManager.getStatus(), + ...buildGatewayHealthSummary({ + status: ctx.gatewayManager.getStatus(), + diagnostics, + lastChannelsStatusOkAt: channelStatusDiagnostics.lastChannelsStatusOkAt, + lastChannelsStatusFailureAt: channelStatusDiagnostics.lastChannelsStatusFailureAt, + platform: process.platform, + }), + }; + const openClawDir = getOpenClawConfigDir(); + sendJson(res, 200, { + capturedAt: Date.now(), + platform: process.platform, + gateway, + channels, + clawxLogTail: await logger.readLogFile(DEFAULT_TAIL_LINES), + gatewayLogTail: await readTail(join(openClawDir, 'logs', 'gateway.log')), + gatewayErrLogTail: await readTail(join(openClawDir, 'logs', 'gateway.err.log')), + }); + } catch (error) { + sendJson(res, 500, { success: false, error: String(error) }); + } + return true; + } + + return false; +} diff --git a/electron/api/server.ts b/electron/api/server.ts index a6af24683..4e16b37c0 100644 --- a/electron/api/server.ts +++ b/electron/api/server.ts @@ -15,6 +15,7 @@ import { handleSkillRoutes } from './routes/skills'; import { handleFileRoutes } from './routes/files'; import { handleSessionRoutes } from './routes/sessions'; import { handleCronRoutes } from './routes/cron'; +import { handleDiagnosticsRoutes } from './routes/diagnostics'; import { sendJson, setCorsHeaders, requireJsonContentType } from './route-utils'; type RouteHandler = ( @@ -35,6 +36,7 @@ const routeHandlers: RouteHandler[] = [ handleFileRoutes, handleSessionRoutes, handleCronRoutes, + handleDiagnosticsRoutes, handleLogRoutes, handleUsageRoutes, ]; diff --git a/electron/gateway/manager.ts b/electron/gateway/manager.ts index 918813d98..df6e40c1f 100644 --- a/electron/gateway/manager.ts +++ b/electron/gateway/manager.ts @@ -65,6 +65,40 @@ export interface GatewayStatus { gatewayReady?: boolean; } +export type GatewayHealthState = 'healthy' | 'degraded' | 'unresponsive'; + +export interface GatewayHealthSummary { + state: GatewayHealthState; + reasons: string[]; + consecutiveHeartbeatMisses: number; + lastAliveAt?: number; + lastRpcSuccessAt?: number; + lastRpcFailureAt?: number; + lastRpcFailureMethod?: string; + lastChannelsStatusOkAt?: number; + lastChannelsStatusFailureAt?: number; +} + +export interface GatewayDiagnosticsSnapshot { + lastAliveAt?: number; + lastRpcSuccessAt?: number; + lastRpcFailureAt?: number; + lastRpcFailureMethod?: string; + lastHeartbeatTimeoutAt?: number; + consecutiveHeartbeatMisses: number; + lastSocketCloseAt?: number; + lastSocketCloseCode?: number; + consecutiveRpcFailures: number; +} + +function isTransportRpcFailure(error: unknown): boolean { + const message = error instanceof Error ? error.message : String(error); + return message.includes('RPC timeout:') + || message.includes('Gateway not connected') + || message.includes('Gateway stopped') + || message.includes('Failed to send RPC request:'); +} + /** * Gateway Manager Events */ @@ -126,6 +160,10 @@ export class GatewayManager extends EventEmitter { /** Set by scheduleReconnect() before calling start() to signal auto-reconnect. */ private isAutoReconnectStart = false; private gatewayReadyFallbackTimer: NodeJS.Timeout | null = null; + private diagnostics: GatewayDiagnosticsSnapshot = { + consecutiveHeartbeatMisses: 0, + consecutiveRpcFailures: 0, + }; constructor(config?: Partial) { super(); @@ -197,6 +235,10 @@ export class GatewayManager extends EventEmitter { return this.stateController.getStatus(); } + getDiagnostics(): GatewayDiagnosticsSnapshot { + return { ...this.diagnostics }; + } + /** * Check if Gateway is connected and ready */ @@ -413,6 +455,7 @@ export class GatewayManager extends EventEmitter { this.restartController.resetDeferredRestart(); this.isAutoReconnectStart = false; + this.diagnostics.consecutiveHeartbeatMisses = 0; this.setStatus({ state: 'stopped', error: undefined, pid: undefined, connectedAt: undefined, uptime: undefined, gatewayReady: undefined }); } @@ -712,7 +755,7 @@ export class GatewayManager extends EventEmitter { * Uses OpenClaw protocol format: { type: "req", id: "...", method: "...", params: {...} } */ async rpc(method: string, params?: unknown, timeoutMs = 30000): Promise { - return new Promise((resolve, reject) => { + return await new Promise((resolve, reject) => { if (!this.ws || this.ws.readyState !== WebSocket.OPEN) { reject(new Error('Gateway not connected')); return; @@ -745,6 +788,14 @@ export class GatewayManager extends EventEmitter { } catch (error) { rejectPendingGatewayRequest(this.pendingRequests, id, new Error(`Failed to send RPC request: ${error}`)); } + }).then((result) => { + this.recordRpcSuccess(); + return result; + }).catch((error) => { + if (isTransportRpcFailure(error)) { + this.recordRpcFailure(method); + } + throw error; }); } @@ -782,6 +833,32 @@ export class GatewayManager extends EventEmitter { } } + private recordGatewayAlive(): void { + this.diagnostics.lastAliveAt = Date.now(); + this.diagnostics.consecutiveHeartbeatMisses = 0; + } + + private recordRpcSuccess(): void { + this.diagnostics.lastRpcSuccessAt = Date.now(); + this.diagnostics.consecutiveRpcFailures = 0; + } + + private recordRpcFailure(method: string): void { + this.diagnostics.lastRpcFailureAt = Date.now(); + this.diagnostics.lastRpcFailureMethod = method; + this.diagnostics.consecutiveRpcFailures += 1; + } + + private recordHeartbeatTimeout(consecutiveMisses: number): void { + this.diagnostics.lastHeartbeatTimeoutAt = Date.now(); + this.diagnostics.consecutiveHeartbeatMisses = consecutiveMisses; + } + + private recordSocketClose(code: number): void { + this.diagnostics.lastSocketCloseAt = Date.now(); + this.diagnostics.lastSocketCloseCode = code; + } + /** * Start Gateway process * Uses OpenClaw npm package from node_modules (dev) or resources (production) @@ -878,7 +955,9 @@ export class GatewayManager extends EventEmitter { this.ws = ws; ws.on('pong', () => { this.connectionMonitor.markAlive('pong'); + this.recordGatewayAlive(); }); + this.recordGatewayAlive(); this.setStatus({ state: 'running', port, @@ -892,6 +971,8 @@ export class GatewayManager extends EventEmitter { }, onCloseAfterHandshake: (closeCode) => { this.connectionMonitor.clear(); + this.recordSocketClose(closeCode); + this.diagnostics.consecutiveHeartbeatMisses = 0; if (this.status.state === 'running') { this.setStatus({ state: 'stopped' }); // On Windows, skip reconnect from WS close. The Gateway is a local @@ -916,6 +997,7 @@ export class GatewayManager extends EventEmitter { */ private handleMessage(message: unknown): void { this.connectionMonitor.markAlive('message'); + this.recordGatewayAlive(); if (typeof message !== 'object' || message === null) { logger.debug('Received non-object Gateway message'); @@ -986,24 +1068,25 @@ export class GatewayManager extends EventEmitter { } }, onHeartbeatTimeout: ({ consecutiveMisses, timeoutMs }) => { - // Heartbeat timeout is observability-only. We intentionally do NOT - // terminate the socket or trigger reconnection here because: - // - // 1. If the gateway process dies → child.on('exit') fires reliably. - // 2. If the socket disconnects → ws.on('close') fires reliably. - // 3. If the gateway event loop is blocked (skills scanning, GC, - // antivirus) → pong is delayed but the process and connection - // are still valid. Terminating the socket would cause a - // cascading restart loop for no reason. - // - // The only scenario ping/pong could catch (silent half-open TCP on - // localhost) is practically impossible. So we just log. + this.recordHeartbeatTimeout(consecutiveMisses); const pid = this.process?.pid ?? 'unknown'; + const isWindows = process.platform === 'win32'; + const shouldAttemptRecovery = !isWindows && this.shouldReconnect && this.status.state === 'running'; logger.warn( `Gateway heartbeat: ${consecutiveMisses} consecutive pong misses ` + - `(timeout=${timeoutMs}ms, pid=${pid}, state=${this.status.state}). ` + - `No action taken — relying on process exit and socket close events.`, + `(timeout=${timeoutMs}ms, pid=${pid}, state=${this.status.state}, autoReconnect=${this.shouldReconnect}).`, ); + if (!shouldAttemptRecovery) { + const reason = isWindows + ? 'platform=win32' + : 'lifecycle is not in auto-recoverable running state'; + logger.warn(`Gateway heartbeat recovery skipped (${reason})`); + return; + } + logger.warn('Gateway heartbeat recovery: restarting unresponsive gateway process'); + void this.restart().catch((error) => { + logger.warn('Gateway heartbeat recovery failed:', error); + }); }, }); } diff --git a/electron/utils/channel-status.ts b/electron/utils/channel-status.ts index 947cb3672..131f44f06 100644 --- a/electron/utils/channel-status.ts +++ b/electron/utils/channel-status.ts @@ -1,4 +1,5 @@ -export type ChannelConnectionStatus = 'connected' | 'connecting' | 'disconnected' | 'error'; +export type GatewayHealthState = 'healthy' | 'degraded' | 'unresponsive'; +export type ChannelConnectionStatus = 'connected' | 'connecting' | 'degraded' | 'disconnected' | 'error'; export interface ChannelRuntimeAccountSnapshot { connected?: boolean; @@ -19,6 +20,10 @@ export interface ChannelRuntimeSummarySnapshot { lastError?: string | null; } +export interface ChannelHealthOverlay { + gatewayHealthState?: GatewayHealthState; +} + const RECENT_ACTIVITY_MS = 10 * 60 * 1000; function hasNonEmptyError(value: string | null | undefined): boolean { @@ -74,9 +79,11 @@ export function isChannelRuntimeConnected( export function computeChannelRuntimeStatus( account: ChannelRuntimeAccountSnapshot, + healthOverlay?: ChannelHealthOverlay, ): ChannelConnectionStatus { - if (isChannelRuntimeConnected(account)) return 'connected'; if (hasChannelRuntimeError(account)) return 'error'; + if (healthOverlay?.gatewayHealthState && healthOverlay.gatewayHealthState !== 'healthy') return 'degraded'; + if (isChannelRuntimeConnected(account)) return 'connected'; if (account.running === true) return 'connecting'; return 'disconnected'; } @@ -84,6 +91,7 @@ export function computeChannelRuntimeStatus( export function pickChannelRuntimeStatus( accounts: ChannelRuntimeAccountSnapshot[], summary?: ChannelRuntimeSummarySnapshot, + healthOverlay?: ChannelHealthOverlay, ): ChannelConnectionStatus { if (accounts.some((account) => isChannelRuntimeConnected(account))) { return 'connected'; @@ -93,6 +101,10 @@ export function pickChannelRuntimeStatus( return 'error'; } + if (healthOverlay?.gatewayHealthState && healthOverlay.gatewayHealthState !== 'healthy') { + return 'degraded'; + } + if (accounts.some((account) => account.running === true)) { return 'connecting'; } diff --git a/electron/utils/gateway-health.ts b/electron/utils/gateway-health.ts new file mode 100644 index 000000000..8fb1a9b87 --- /dev/null +++ b/electron/utils/gateway-health.ts @@ -0,0 +1,81 @@ +import type { + GatewayDiagnosticsSnapshot, + GatewayHealthSummary, + GatewayStatus, +} from '../gateway/manager'; + +type BuildGatewayHealthSummaryOptions = { + status: GatewayStatus; + diagnostics: GatewayDiagnosticsSnapshot; + lastChannelsStatusOkAt?: number; + lastChannelsStatusFailureAt?: number; + platform?: string; + now?: number; +}; + +const CHANNEL_STATUS_FAILURE_WINDOW_MS = 2 * 60_000; +const HEARTBEAT_MISS_THRESHOLD_DEFAULT = 3; +const HEARTBEAT_MISS_THRESHOLD_WIN = 5; + +export function buildGatewayHealthSummary( + options: BuildGatewayHealthSummaryOptions, +): GatewayHealthSummary { + const now = options.now ?? Date.now(); + const reasons = new Set(); + const heartbeatThreshold = options.platform === 'win32' + ? HEARTBEAT_MISS_THRESHOLD_WIN + : HEARTBEAT_MISS_THRESHOLD_DEFAULT; + + const channelStatusFailureIsRecent = + typeof options.lastChannelsStatusFailureAt === 'number' + && now - options.lastChannelsStatusFailureAt <= CHANNEL_STATUS_FAILURE_WINDOW_MS + && ( + typeof options.lastChannelsStatusOkAt !== 'number' + || options.lastChannelsStatusFailureAt > options.lastChannelsStatusOkAt + ); + + if (options.status.state !== 'running') { + reasons.add(options.status.state === 'error' ? 'gateway_error' : 'gateway_not_running'); + return { + state: 'degraded', + reasons: [...reasons], + consecutiveHeartbeatMisses: options.diagnostics.consecutiveHeartbeatMisses, + lastAliveAt: options.diagnostics.lastAliveAt, + lastRpcSuccessAt: options.diagnostics.lastRpcSuccessAt, + lastRpcFailureAt: options.diagnostics.lastRpcFailureAt, + lastRpcFailureMethod: options.diagnostics.lastRpcFailureMethod, + lastChannelsStatusOkAt: options.lastChannelsStatusOkAt, + lastChannelsStatusFailureAt: options.lastChannelsStatusFailureAt, + }; + } + + if (options.diagnostics.consecutiveHeartbeatMisses >= heartbeatThreshold) { + reasons.add('gateway_unresponsive'); + } else if (options.diagnostics.consecutiveHeartbeatMisses > 0) { + reasons.add('gateway_degraded'); + } + + if (options.diagnostics.consecutiveRpcFailures > 0) { + reasons.add('rpc_timeout'); + } + + if (channelStatusFailureIsRecent) { + reasons.add('channels_status_timeout'); + } + + return { + state: reasons.has('gateway_unresponsive') + ? 'unresponsive' + : reasons.size > 0 + ? 'degraded' + : 'healthy', + reasons: [...reasons], + consecutiveHeartbeatMisses: options.diagnostics.consecutiveHeartbeatMisses, + lastAliveAt: options.diagnostics.lastAliveAt, + lastRpcSuccessAt: options.diagnostics.lastRpcSuccessAt, + lastRpcFailureAt: options.diagnostics.lastRpcFailureAt, + lastRpcFailureMethod: options.diagnostics.lastRpcFailureMethod, + lastChannelsStatusOkAt: options.lastChannelsStatusOkAt, + lastChannelsStatusFailureAt: options.lastChannelsStatusFailureAt, + }; +} diff --git a/electron/utils/openclaw-auth.ts b/electron/utils/openclaw-auth.ts index 991e1f4fd..fee0bc9af 100644 --- a/electron/utils/openclaw-auth.ts +++ b/electron/utils/openclaw-auth.ts @@ -1634,6 +1634,51 @@ export async function sanitizeOpenClawConfig(): Promise { pluginsObj.allow = allowArr; } + // ── acpx legacy config/install cleanup ───────────────────── + // Older OpenClaw releases allowed plugins.entries.acpx.config.command + // and expectedVersion overrides. Current bundled acpx schema rejects + // them, which causes the Gateway to fail validation before startup. + // Strip those keys and drop stale installs metadata that still points + // at an older bundled OpenClaw tree so the current bundled plugin can + // be re-registered cleanly. + const acpxEntry = isPlainRecord(pEntries.acpx) ? pEntries.acpx as Record : null; + const acpxConfig = acpxEntry && isPlainRecord(acpxEntry.config) + ? acpxEntry.config as Record + : null; + if (acpxConfig) { + for (const legacyKey of ['command', 'expectedVersion'] as const) { + if (legacyKey in acpxConfig) { + delete acpxConfig[legacyKey]; + modified = true; + console.log(`[sanitize] Removed legacy plugins.entries.acpx.config.${legacyKey}`); + } + } + } + + const installs = isPlainRecord(pluginsObj.installs) ? pluginsObj.installs as Record : null; + const acpxInstall = installs && isPlainRecord(installs.acpx) ? installs.acpx as Record : null; + if (acpxInstall) { + const currentBundledAcpxDir = join(getOpenClawResolvedDir(), 'dist', 'extensions', 'acpx').replace(/\\/g, '/'); + const sourcePath = typeof acpxInstall.sourcePath === 'string' ? acpxInstall.sourcePath : ''; + const installPath = typeof acpxInstall.installPath === 'string' ? acpxInstall.installPath : ''; + const normalizedSourcePath = sourcePath.replace(/\\/g, '/'); + const normalizedInstallPath = installPath.replace(/\\/g, '/'); + const pointsAtDifferentBundledTree = [normalizedSourcePath, normalizedInstallPath].some( + (candidate) => candidate.includes('/node_modules/.pnpm/openclaw@') && candidate !== currentBundledAcpxDir, + ); + const pointsAtMissingPath = (sourcePath && !(await fileExists(sourcePath))) + || (installPath && !(await fileExists(installPath))); + + if (pointsAtDifferentBundledTree || pointsAtMissingPath) { + delete installs.acpx; + if (Object.keys(installs).length === 0) { + delete pluginsObj.installs; + } + modified = true; + console.log('[sanitize] Removed stale plugins.installs.acpx metadata'); + } + } + const installedFeishuId = await resolveInstalledFeishuPluginId(); const configuredFeishuId = FEISHU_PLUGIN_ID_CANDIDATES.find((id) => allowArr.includes(id)) diff --git a/src/i18n/locales/en/channels.json b/src/i18n/locales/en/channels.json index de1e5b235..0f812d8fa 100644 --- a/src/i18n/locales/en/channels.json +++ b/src/i18n/locales/en/channels.json @@ -62,6 +62,7 @@ "connectionStatus": { "connected": "Connected", "connecting": "Connecting", + "degraded": "Degraded", "disconnected": "Disconnected", "error": "Error" }, @@ -99,6 +100,30 @@ "saveAndConnect": "Save & Connect", "envVar": "Environment Variable: {{var}}" }, + "health": { + "state": { + "degraded": "Gateway degraded", + "unresponsive": "Gateway unresponsive" + }, + "reasons": { + "gateway_degraded": "Gateway heartbeat recently degraded.", + "gateway_unresponsive": "Gateway control plane appears unresponsive.", + "channels_status_timeout": "Channel runtime status probe timed out.", + "rpc_timeout": "Recent Gateway RPC calls timed out.", + "gateway_not_running": "Gateway is not running.", + "gateway_error": "Gateway is in an error state.", + "runtime_error": "Channel runtime reported an error." + }, + "restartGateway": "Restart Gateway", + "copyDiagnostics": "Copy Diagnostics", + "viewDiagnostics": "View Diagnostics", + "hideDiagnostics": "Hide Diagnostics", + "diagnosticsTitle": "Gateway Diagnostics Snapshot", + "diagnosticsCopied": "Diagnostics copied to clipboard", + "diagnosticsCopyFailed": "Failed to collect diagnostics: {{error}}", + "restartTriggered": "Gateway restart requested", + "restartFailed": "Failed to restart gateway: {{error}}" + }, "meta": { "telegram": { "description": "Connect Telegram using a bot token from @BotFather", diff --git a/src/i18n/locales/ja/channels.json b/src/i18n/locales/ja/channels.json index 40396c801..94af46e19 100644 --- a/src/i18n/locales/ja/channels.json +++ b/src/i18n/locales/ja/channels.json @@ -62,6 +62,7 @@ "connectionStatus": { "connected": "接続済み", "connecting": "接続中", + "degraded": "劣化中", "disconnected": "未接続", "error": "異常" }, @@ -99,6 +100,30 @@ "saveAndConnect": "保存して接続", "envVar": "環境変数: {{var}}" }, + "health": { + "state": { + "degraded": "ゲートウェイ劣化", + "unresponsive": "ゲートウェイ無応答" + }, + "reasons": { + "gateway_degraded": "ゲートウェイのハートビートに劣化が見られます。", + "gateway_unresponsive": "ゲートウェイの制御プレーンが無応答です。", + "channels_status_timeout": "チャンネル状態の問い合わせがタイムアウトしました。", + "rpc_timeout": "最近のゲートウェイ RPC がタイムアウトしました。", + "gateway_not_running": "ゲートウェイは起動していません。", + "gateway_error": "ゲートウェイはエラー状態です。", + "runtime_error": "チャンネルランタイムがエラーを返しました。" + }, + "restartGateway": "ゲートウェイを再起動", + "copyDiagnostics": "診断をコピー", + "viewDiagnostics": "診断を表示", + "hideDiagnostics": "診断を隠す", + "diagnosticsTitle": "ゲートウェイ診断スナップショット", + "diagnosticsCopied": "診断をクリップボードにコピーしました", + "diagnosticsCopyFailed": "診断の取得に失敗しました: {{error}}", + "restartTriggered": "ゲートウェイの再起動を要求しました", + "restartFailed": "ゲートウェイの再起動に失敗しました: {{error}}" + }, "meta": { "telegram": { "description": "@BotFather からのボットトークンを使用して Telegram に接続します", diff --git a/src/i18n/locales/zh/channels.json b/src/i18n/locales/zh/channels.json index c39848545..1ed42d73c 100644 --- a/src/i18n/locales/zh/channels.json +++ b/src/i18n/locales/zh/channels.json @@ -62,6 +62,7 @@ "connectionStatus": { "connected": "已连接", "connecting": "连接中", + "degraded": "异常降级", "disconnected": "未连接", "error": "异常" }, @@ -99,6 +100,30 @@ "saveAndConnect": "保存并连接", "envVar": "环境变量: {{var}}" }, + "health": { + "state": { + "degraded": "网关状态异常", + "unresponsive": "网关无响应" + }, + "reasons": { + "gateway_degraded": "网关心跳近期出现异常。", + "gateway_unresponsive": "网关控制面看起来已经无响应。", + "channels_status_timeout": "频道运行时状态探测超时。", + "rpc_timeout": "最近的网关 RPC 调用发生超时。", + "gateway_not_running": "网关当前未运行。", + "gateway_error": "网关当前处于错误状态。", + "runtime_error": "频道运行时返回了错误。" + }, + "restartGateway": "重启网关", + "copyDiagnostics": "复制诊断快照", + "viewDiagnostics": "查看诊断快照", + "hideDiagnostics": "隐藏诊断快照", + "diagnosticsTitle": "网关诊断快照", + "diagnosticsCopied": "诊断快照已复制到剪贴板", + "diagnosticsCopyFailed": "收集诊断快照失败:{{error}}", + "restartTriggered": "已请求重启网关", + "restartFailed": "重启网关失败:{{error}}" + }, "meta": { "telegram": { "description": "使用 @BotFather 提供的机器人令牌连接 Telegram", diff --git a/src/lib/channel-status.ts b/src/lib/channel-status.ts index 947cb3672..131f44f06 100644 --- a/src/lib/channel-status.ts +++ b/src/lib/channel-status.ts @@ -1,4 +1,5 @@ -export type ChannelConnectionStatus = 'connected' | 'connecting' | 'disconnected' | 'error'; +export type GatewayHealthState = 'healthy' | 'degraded' | 'unresponsive'; +export type ChannelConnectionStatus = 'connected' | 'connecting' | 'degraded' | 'disconnected' | 'error'; export interface ChannelRuntimeAccountSnapshot { connected?: boolean; @@ -19,6 +20,10 @@ export interface ChannelRuntimeSummarySnapshot { lastError?: string | null; } +export interface ChannelHealthOverlay { + gatewayHealthState?: GatewayHealthState; +} + const RECENT_ACTIVITY_MS = 10 * 60 * 1000; function hasNonEmptyError(value: string | null | undefined): boolean { @@ -74,9 +79,11 @@ export function isChannelRuntimeConnected( export function computeChannelRuntimeStatus( account: ChannelRuntimeAccountSnapshot, + healthOverlay?: ChannelHealthOverlay, ): ChannelConnectionStatus { - if (isChannelRuntimeConnected(account)) return 'connected'; if (hasChannelRuntimeError(account)) return 'error'; + if (healthOverlay?.gatewayHealthState && healthOverlay.gatewayHealthState !== 'healthy') return 'degraded'; + if (isChannelRuntimeConnected(account)) return 'connected'; if (account.running === true) return 'connecting'; return 'disconnected'; } @@ -84,6 +91,7 @@ export function computeChannelRuntimeStatus( export function pickChannelRuntimeStatus( accounts: ChannelRuntimeAccountSnapshot[], summary?: ChannelRuntimeSummarySnapshot, + healthOverlay?: ChannelHealthOverlay, ): ChannelConnectionStatus { if (accounts.some((account) => isChannelRuntimeConnected(account))) { return 'connected'; @@ -93,6 +101,10 @@ export function pickChannelRuntimeStatus( return 'error'; } + if (healthOverlay?.gatewayHealthState && healthOverlay.gatewayHealthState !== 'healthy') { + return 'degraded'; + } + if (accounts.some((account) => account.running === true)) { return 'connecting'; } diff --git a/src/pages/Channels/index.tsx b/src/pages/Channels/index.tsx index fd3b80026..d9ef5dbff 100644 --- a/src/pages/Channels/index.tsx +++ b/src/pages/Channels/index.tsx @@ -1,5 +1,5 @@ import { useState, useEffect, useCallback, useMemo, useRef } from 'react'; -import { RefreshCw, Trash2, AlertCircle, Plus } from 'lucide-react'; +import { RefreshCw, Trash2, AlertCircle, Plus, Copy, RotateCcw, ChevronDown, ChevronUp } from 'lucide-react'; import { Button } from '@/components/ui/button'; import { Badge } from '@/components/ui/badge'; import { ConfirmDialog } from '@/components/ui/confirm-dialog'; @@ -33,7 +33,8 @@ interface ChannelAccountItem { accountId: string; name: string; configured: boolean; - status: 'connected' | 'connecting' | 'disconnected' | 'error'; + status: 'connected' | 'connecting' | 'degraded' | 'disconnected' | 'error'; + statusReason?: string; lastError?: string; isDefault: boolean; agentId?: string; @@ -42,10 +43,51 @@ interface ChannelAccountItem { interface ChannelGroupItem { channelType: string; defaultAccountId: string; - status: 'connected' | 'connecting' | 'disconnected' | 'error'; + status: 'connected' | 'connecting' | 'degraded' | 'disconnected' | 'error'; + statusReason?: string; accounts: ChannelAccountItem[]; } +interface GatewayHealthSummary { + state: 'healthy' | 'degraded' | 'unresponsive'; + reasons: string[]; + consecutiveHeartbeatMisses: number; + lastAliveAt?: number; + lastRpcSuccessAt?: number; + lastRpcFailureAt?: number; + lastRpcFailureMethod?: string; + lastChannelsStatusOkAt?: number; + lastChannelsStatusFailureAt?: number; +} + +interface GatewayDiagnosticSnapshot { + capturedAt: number; + platform: string; + gateway: GatewayHealthSummary & Record; + channels: ChannelGroupItem[]; + clawxLogTail: string; + gatewayLogTail: string; + gatewayErrLogTail: string; +} + +function isGatewayDiagnosticSnapshot(value: unknown): value is GatewayDiagnosticSnapshot { + if (!value || typeof value !== 'object') { + return false; + } + + const snapshot = value as Record; + return ( + typeof snapshot.capturedAt === 'number' + && typeof snapshot.platform === 'string' + && typeof snapshot.gateway === 'object' + && snapshot.gateway !== null + && Array.isArray(snapshot.channels) + && typeof snapshot.clawxLogTail === 'string' + && typeof snapshot.gatewayLogTail === 'string' + && typeof snapshot.gatewayErrLogTail === 'string' + ); +} + interface AgentItem { id: string; name: string; @@ -76,11 +118,20 @@ export function Channels() { const { t } = useTranslation('channels'); const gatewayStatus = useGatewayStore((state) => state.status); const lastGatewayStateRef = useRef(gatewayStatus.state); + const defaultGatewayHealth = useMemo(() => ({ + state: 'healthy', + reasons: [], + consecutiveHeartbeatMisses: 0, + }), []); const [loading, setLoading] = useState(true); const [error, setError] = useState(null); const [channelGroups, setChannelGroups] = useState([]); const [agents, setAgents] = useState([]); + const [gatewayHealth, setGatewayHealth] = useState(defaultGatewayHealth); + const [diagnosticsSnapshot, setDiagnosticsSnapshot] = useState(null); + const [showDiagnostics, setShowDiagnostics] = useState(false); + const [diagnosticsLoading, setDiagnosticsLoading] = useState(false); const [showConfigModal, setShowConfigModal] = useState(false); const [selectedChannelType, setSelectedChannelType] = useState(null); const [selectedAccountId, setSelectedAccountId] = useState(undefined); @@ -139,18 +190,29 @@ export function Channels() { hostApiFetch<{ success: boolean; agents?: AgentItem[]; error?: string }>('/api/agents'), ]); - if (!channelsRes.success) { - throw new Error(channelsRes.error || 'Failed to load channels'); + type ChannelsResponse = { + success: boolean; + channels?: ChannelGroupItem[]; + gatewayHealth?: GatewayHealthSummary; + error?: string; + }; + const channelsPayload = channelsRes as ChannelsResponse; + + if (!channelsPayload.success) { + throw new Error(channelsPayload.error || 'Failed to load channels'); } if (!agentsRes.success) { throw new Error(agentsRes.error || 'Failed to load agents'); } - setChannelGroups(channelsRes.channels || []); + setChannelGroups(channelsPayload.channels || []); setAgents(agentsRes.agents || []); + setGatewayHealth(channelsPayload.gatewayHealth || defaultGatewayHealth); + setDiagnosticsSnapshot(null); + setShowDiagnostics(false); console.info( - `[channels-ui] fetch ok probe=${probe ? '1' : '0'} elapsedMs=${Date.now() - startedAt} view=${(channelsRes.channels || []).map((item) => `${item.channelType}:${item.status}`).join(',')}` + `[channels-ui] fetch ok probe=${probe ? '1' : '0'} elapsedMs=${Date.now() - startedAt} view=${(channelsPayload.channels || []).map((item) => `${item.channelType}:${item.status}`).join(',')}` ); } catch (fetchError) { // Preserve previous data on error — don't clear channelGroups/agents. @@ -269,6 +331,100 @@ export function Channels() { void fetchPageData({ probe: true }); }; + const fetchDiagnosticsSnapshot = useCallback(async (): Promise => { + const response = await hostApiFetch('/api/diagnostics/gateway-snapshot'); + if (response && typeof response === 'object') { + const payload = response as Record; + if (payload.success === false || typeof payload.error === 'string') { + throw new Error(typeof payload.error === 'string' ? payload.error : 'Failed to fetch gateway diagnostics snapshot'); + } + } + if (!isGatewayDiagnosticSnapshot(response)) { + throw new Error('Invalid gateway diagnostics snapshot response'); + } + const snapshot = response; + setDiagnosticsSnapshot(snapshot); + return snapshot; + }, []); + + const handleRestartGateway = async () => { + try { + const result = await hostApiFetch<{ success?: boolean; error?: string }>('/api/gateway/restart', { + method: 'POST', + }); + if (result?.success !== true) { + throw new Error(result?.error || 'Failed to restart gateway'); + } + setDiagnosticsSnapshot(null); + setShowDiagnostics(false); + toast.success(t('health.restartTriggered')); + void fetchPageData({ probe: true }); + } catch (restartError) { + toast.error(t('health.restartFailed', { error: String(restartError) })); + } + }; + + const handleCopyDiagnostics = async () => { + setDiagnosticsLoading(true); + try { + const snapshot = await fetchDiagnosticsSnapshot(); + await navigator.clipboard.writeText(JSON.stringify(snapshot, null, 2)); + toast.success(t('health.diagnosticsCopied')); + } catch (copyError) { + toast.error(t('health.diagnosticsCopyFailed', { error: String(copyError) })); + } finally { + setDiagnosticsLoading(false); + } + }; + + const handleToggleDiagnostics = async () => { + if (showDiagnostics) { + setShowDiagnostics(false); + return; + } + setDiagnosticsLoading(true); + try { + await fetchDiagnosticsSnapshot(); + } catch (diagnosticsError) { + toast.error(t('health.diagnosticsCopyFailed', { error: String(diagnosticsError) })); + setDiagnosticsLoading(false); + return; + } finally { + setDiagnosticsLoading(false); + } + setShowDiagnostics(true); + }; + + const healthReasonLabel = useMemo(() => { + const primaryReason = gatewayHealth.reasons[0]; + if (!primaryReason) return ''; + return t(`health.reasons.${primaryReason}`); + }, [gatewayHealth.reasons, t]); + + const diagnosticsText = useMemo( + () => diagnosticsSnapshot ? JSON.stringify(diagnosticsSnapshot, null, 2) : '', + [diagnosticsSnapshot], + ); + + const statusTone = useCallback((status: ChannelGroupItem['status']) => { + switch (status) { + case 'connected': + return 'bg-green-500/10 text-green-700 dark:text-green-300 border-green-500/20'; + case 'connecting': + return 'bg-sky-500/10 text-sky-700 dark:text-sky-300 border-sky-500/20'; + case 'degraded': + return 'bg-yellow-500/10 text-yellow-700 dark:text-yellow-300 border-yellow-500/20'; + case 'error': + return 'bg-destructive/10 text-destructive border-destructive/20'; + default: + return 'bg-black/5 dark:bg-white/5 text-muted-foreground border-black/10 dark:border-white/10'; + } + }, []); + + const statusLabel = useCallback((status: ChannelGroupItem['status']) => { + return t(`account.connectionStatus.${status}`); + }, [t]); + const handleBindAgent = async (channelType: string, accountId: string, agentId: string) => { try { if (!agentId) { @@ -365,6 +521,86 @@ export function Channels() { )} + {gatewayStatus.state === 'running' && gatewayHealth.state !== 'healthy' && ( +
+
+
+ +
+

+ {t(`health.state.${gatewayHealth.state}`)} +

+ {healthReasonLabel && ( +

{healthReasonLabel}

+ )} +
+
+
+ + + +
+
+ + {showDiagnostics && diagnosticsText && ( +
+

{t('health.diagnosticsTitle')}

+
+                    {diagnosticsText}
+                  
+
+ )} +
+ )} + {error && (
@@ -393,18 +629,9 @@ export function Channels() {

{group.channelType}

-
+ + {statusLabel(group.status)} +
@@ -456,10 +683,18 @@ export function Channels() {

{displayName}

+ + {statusLabel(account.status)} +
{account.lastError && (
{account.lastError}
)} + {!account.lastError && account.statusReason && account.status === 'degraded' && ( +
+ {t(`health.reasons.${account.statusReason}`)} +
+ )}
diff --git a/src/types/channel.ts b/src/types/channel.ts index c323c3cac..0fa515494 100644 --- a/src/types/channel.ts +++ b/src/types/channel.ts @@ -26,7 +26,7 @@ export type ChannelType = /** * Channel connection status */ -export type ChannelStatus = 'connected' | 'disconnected' | 'connecting' | 'error'; +export type ChannelStatus = 'connected' | 'disconnected' | 'connecting' | 'degraded' | 'error'; /** * Channel connection type diff --git a/tests/e2e/channels-health-diagnostics.spec.ts b/tests/e2e/channels-health-diagnostics.spec.ts new file mode 100644 index 000000000..94d748dac --- /dev/null +++ b/tests/e2e/channels-health-diagnostics.spec.ts @@ -0,0 +1,167 @@ +import { completeSetup, expect, test } from './fixtures/electron'; + +test.describe('Channels health diagnostics', () => { + test('shows degraded banner, restarts gateway, and copies diagnostics', async ({ electronApp, page }) => { + await electronApp.evaluate(({ ipcMain }) => { + const state = { + restartCount: 0, + diagnosticsCount: 0, + }; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).__clawxE2eChannelHealth = state; + + ipcMain.removeHandler('hostapi:fetch'); + ipcMain.handle('hostapi:fetch', async (_event, request: { path?: string; method?: string }) => { + const method = request?.method ?? 'GET'; + const path = request?.path ?? ''; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const current = (globalThis as any).__clawxE2eChannelHealth as typeof state; + + if (path === '/api/channels/accounts' && method === 'GET') { + return { + ok: true, + data: { + status: 200, + ok: true, + json: { + success: true, + gatewayHealth: { + state: 'degraded', + reasons: ['channels_status_timeout'], + consecutiveHeartbeatMisses: 1, + }, + channels: [ + { + channelType: 'feishu', + defaultAccountId: 'default', + status: 'degraded', + statusReason: 'channels_status_timeout', + accounts: [ + { + accountId: 'default', + name: 'Primary Account', + configured: true, + status: 'degraded', + statusReason: 'channels_status_timeout', + isDefault: true, + }, + ], + }, + ], + }, + }, + }; + } + + if (path === '/api/gateway/status' && method === 'GET') { + return { + ok: true, + data: { + status: 200, + ok: true, + json: { state: 'running', port: 18789 }, + }, + }; + } + + if (path === '/api/agents' && method === 'GET') { + return { + ok: true, + data: { + status: 200, + ok: true, + json: { success: true, agents: [] }, + }, + }; + } + + if (path === '/api/gateway/restart' && method === 'POST') { + current.restartCount += 1; + return { + ok: true, + data: { + status: 200, + ok: true, + json: { success: true }, + }, + }; + } + + if (path === '/api/diagnostics/gateway-snapshot' && method === 'GET') { + current.diagnosticsCount += 1; + return { + ok: true, + data: { + status: 200, + ok: true, + json: { + capturedAt: 123, + platform: 'darwin', + gateway: { + state: 'degraded', + reasons: ['channels_status_timeout'], + consecutiveHeartbeatMisses: 1, + }, + channels: [], + clawxLogTail: 'clawx-log', + gatewayLogTail: 'gateway-log', + gatewayErrLogTail: '', + }, + }, + }; + } + + return { + ok: false, + error: { message: `Unexpected hostapi:fetch request: ${method} ${path}` }, + }; + }); + }); + + await completeSetup(page); + + await page.evaluate(() => { + Object.defineProperty(navigator, 'clipboard', { + value: { + writeText: (value: string) => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (window as any).__copiedDiagnostics = value; + return Promise.resolve(); + }, + }, + configurable: true, + }); + }); + + await page.getByTestId('sidebar-nav-channels').click(); + await expect(page.getByTestId('channels-page')).toBeVisible(); + await expect(page.getByTestId('channels-health-banner')).toBeVisible(); + await expect(page.getByText(/Gateway degraded|网关状态异常|ゲートウェイ劣化/)).toBeVisible(); + await expect(page.locator('div.rounded-2xl').getByText(/Degraded|异常降级|劣化中/).first()).toBeVisible(); + + await page.getByTestId('channels-restart-gateway').click(); + await page.getByTestId('channels-copy-diagnostics').click(); + await page.getByTestId('channels-toggle-diagnostics').click(); + + await expect(page.getByTestId('channels-diagnostics')).toBeVisible(); + + const result = await electronApp.evaluate(() => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const state = (globalThis as any).__clawxE2eChannelHealth as { restartCount: number; diagnosticsCount: number }; + return { + restartCount: state.restartCount, + diagnosticsCount: state.diagnosticsCount, + }; + }); + + expect(result.restartCount).toBe(1); + expect(result.diagnosticsCount).toBeGreaterThanOrEqual(1); + + const copied = await page.evaluate(() => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return (window as any).__copiedDiagnostics as string; + }); + expect(copied).toContain('"platform": "darwin"'); + }); +}); diff --git a/tests/unit/channel-routes.test.ts b/tests/unit/channel-routes.test.ts index c43455000..8492c0f5b 100644 --- a/tests/unit/channel-routes.test.ts +++ b/tests/unit/channel-routes.test.ts @@ -175,6 +175,7 @@ describe('handleChannelRoutes', () => { gatewayManager: { rpc, getStatus: () => ({ state: 'running' }), + getDiagnostics: () => ({ consecutiveHeartbeatMisses: 0, consecutiveRpcFailures: 0 }), debouncedReload: vi.fn(), debouncedRestart: vi.fn(), }, @@ -921,6 +922,145 @@ describe('handleChannelRoutes', () => { expect(feishu?.accounts.map((entry) => entry.accountId)).toEqual(['default']); }); + it('returns degraded channel health when channels.status times out while gateway is still running', async () => { + listConfiguredChannelsMock.mockResolvedValue(['feishu']); + listConfiguredChannelAccountsMock.mockResolvedValue({ + feishu: { + defaultAccountId: 'default', + accountIds: ['default'], + }, + }); + readOpenClawConfigMock.mockResolvedValue({ + channels: { + feishu: { + defaultAccount: 'default', + }, + }, + }); + + const rpc = vi.fn().mockRejectedValue(new Error('RPC timeout: channels.status')); + + const { handleChannelRoutes } = await import('@electron/api/routes/channels'); + await handleChannelRoutes( + { method: 'GET' } as IncomingMessage, + {} as ServerResponse, + new URL('http://127.0.0.1:13210/api/channels/accounts'), + { + gatewayManager: { + rpc, + getStatus: () => ({ state: 'running' }), + getDiagnostics: () => ({ consecutiveHeartbeatMisses: 0, consecutiveRpcFailures: 0 }), + debouncedReload: vi.fn(), + debouncedRestart: vi.fn(), + }, + } as never, + ); + + expect(sendJsonMock).toHaveBeenCalledWith( + expect.anything(), + 200, + expect.objectContaining({ + success: true, + gatewayHealth: expect.objectContaining({ + state: 'degraded', + reasons: expect.arrayContaining(['channels_status_timeout']), + }), + channels: [ + expect.objectContaining({ + channelType: 'feishu', + status: 'degraded', + statusReason: 'channels_status_timeout', + accounts: [ + expect.objectContaining({ + accountId: 'default', + status: 'degraded', + }), + ], + }), + ], + }), + ); + }); + + it('keeps channel degraded when only filtered stale runtime accounts carry lastError', async () => { + listConfiguredChannelsMock.mockResolvedValue(['feishu']); + listConfiguredChannelAccountsMock.mockResolvedValue({ + feishu: { + defaultAccountId: 'default', + accountIds: ['default'], + }, + }); + readOpenClawConfigMock.mockResolvedValue({ + channels: { + feishu: { + defaultAccount: 'default', + }, + }, + }); + + const rpc = vi.fn().mockResolvedValue({ + channels: { + feishu: { + configured: true, + }, + }, + channelAccounts: { + feishu: [ + { + accountId: 'default', + configured: true, + connected: true, + running: true, + linked: false, + }, + { + accountId: '2', + configured: false, + connected: false, + running: false, + lastError: 'stale runtime session', + }, + ], + }, + channelDefaultAccountId: { + feishu: 'default', + }, + }); + + const { handleChannelRoutes } = await import('@electron/api/routes/channels'); + await handleChannelRoutes( + { method: 'GET' } as IncomingMessage, + {} as ServerResponse, + new URL('http://127.0.0.1:13210/api/channels/accounts'), + { + gatewayManager: { + rpc, + getStatus: () => ({ state: 'running' }), + getDiagnostics: () => ({ consecutiveHeartbeatMisses: 1, consecutiveRpcFailures: 0 }), + debouncedReload: vi.fn(), + debouncedRestart: vi.fn(), + }, + } as never, + ); + + expect(sendJsonMock).toHaveBeenCalledWith( + expect.anything(), + 200, + expect.objectContaining({ + success: true, + channels: [ + expect.objectContaining({ + channelType: 'feishu', + status: 'degraded', + accounts: [ + expect.objectContaining({ accountId: 'default', status: 'degraded' }), + ], + }), + ], + }), + ); + }); + it('lists known QQ Bot targets for a configured account', async () => { const knownUsersPath = join(testOpenClawConfigDir, 'qqbot', 'data'); mkdirSync(knownUsersPath, { recursive: true }); diff --git a/tests/unit/channel-status.test.ts b/tests/unit/channel-status.test.ts index 3d8db0118..ecd7a4127 100644 --- a/tests/unit/channel-status.test.ts +++ b/tests/unit/channel-status.test.ts @@ -63,4 +63,42 @@ describe('channel runtime status helpers', () => { ), ).toBe('error'); }); + + it('returns degraded when gateway health is degraded', () => { + expect( + computeChannelRuntimeStatus( + { running: true, connected: false, linked: false }, + { gatewayHealthState: 'degraded' }, + ), + ).toBe('degraded'); + }); + + it('keeps runtime error higher priority than degraded overlay', () => { + expect( + computeChannelRuntimeStatus( + { running: true, lastError: 'bot token invalid' }, + { gatewayHealthState: 'degraded' }, + ), + ).toBe('error'); + }); + + it('degrades channel summary when gateway health is degraded', () => { + expect( + pickChannelRuntimeStatus( + [{ connected: false, running: false }], + undefined, + { gatewayHealthState: 'degraded' }, + ), + ).toBe('degraded'); + }); + + it('keeps summary error higher priority than degraded gateway health', () => { + expect( + pickChannelRuntimeStatus( + [{ connected: false, running: false }], + { error: 'channel bootstrap failed' }, + { gatewayHealthState: 'degraded' }, + ), + ).toBe('error'); + }); }); diff --git a/tests/unit/channels-page.test.tsx b/tests/unit/channels-page.test.tsx index 21449f794..638bfbacf 100644 --- a/tests/unit/channels-page.test.tsx +++ b/tests/unit/channels-page.test.tsx @@ -51,11 +51,22 @@ function createDeferred() { describe('Channels page status refresh', () => { beforeEach(() => { vi.clearAllMocks(); + Object.defineProperty(globalThis.navigator, 'clipboard', { + value: { + writeText: vi.fn(), + }, + configurable: true, + }); gatewayState.status = { state: 'running', port: 18789 }; hostApiFetchMock.mockImplementation(async (path: string) => { if (path === '/api/channels/accounts') { return { success: true, + gatewayHealth: { + state: 'healthy', + reasons: [], + consecutiveHeartbeatMisses: 0, + }, channels: [ { channelType: 'feishu', @@ -384,4 +395,263 @@ describe('Channels page status refresh', () => { expect(appIdInput).toHaveValue('cli_test_app'); expect(appSecretInput).toHaveValue('secret_test_value'); }); + + it('shows degraded gateway banner and copies diagnostics snapshot', async () => { + subscribeHostEventMock.mockImplementation(() => vi.fn()); + const writeTextMock = vi.mocked(navigator.clipboard.writeText); + + hostApiFetchMock.mockImplementation(async (path: string, init?: { method?: string }) => { + if (path === '/api/channels/accounts') { + return { + success: true, + gatewayHealth: { + state: 'degraded', + reasons: ['channels_status_timeout'], + consecutiveHeartbeatMisses: 1, + }, + channels: [ + { + channelType: 'feishu', + defaultAccountId: 'default', + status: 'degraded', + statusReason: 'channels_status_timeout', + accounts: [ + { + accountId: 'default', + name: 'Primary Account', + configured: true, + status: 'degraded', + statusReason: 'channels_status_timeout', + isDefault: true, + }, + ], + }, + ], + }; + } + + if (path === '/api/agents') { + return { + success: true, + agents: [], + }; + } + + if (path === '/api/diagnostics/gateway-snapshot') { + return { + capturedAt: 123, + platform: 'darwin', + gateway: { + state: 'degraded', + reasons: ['channels_status_timeout'], + consecutiveHeartbeatMisses: 1, + }, + channels: [], + clawxLogTail: 'clawx', + gatewayLogTail: 'gateway', + gatewayErrLogTail: '', + }; + } + + if (path === '/api/gateway/restart' && init?.method === 'POST') { + return { success: true }; + } + + throw new Error(`Unexpected host API path: ${path}`); + }); + + render(); + + expect(await screen.findByTestId('channels-health-banner')).toBeInTheDocument(); + expect(screen.getByText('health.state.degraded')).toBeInTheDocument(); + + fireEvent.click(screen.getByTestId('channels-copy-diagnostics')); + + await waitFor(() => { + expect(hostApiFetchMock).toHaveBeenCalledWith('/api/diagnostics/gateway-snapshot'); + expect(writeTextMock).toHaveBeenCalledWith(expect.stringContaining('"platform": "darwin"')); + }); + }); + + it('surfaces diagnostics fetch failure payloads instead of caching them as snapshots', async () => { + subscribeHostEventMock.mockImplementation(() => vi.fn()); + + hostApiFetchMock.mockImplementation(async (path: string) => { + if (path === '/api/channels/accounts') { + return { + success: true, + gatewayHealth: { + state: 'degraded', + reasons: ['channels_status_timeout'], + consecutiveHeartbeatMisses: 1, + }, + channels: [ + { + channelType: 'feishu', + defaultAccountId: 'default', + status: 'degraded', + statusReason: 'channels_status_timeout', + accounts: [ + { + accountId: 'default', + name: 'Primary Account', + configured: true, + status: 'degraded', + statusReason: 'channels_status_timeout', + isDefault: true, + }, + ], + }, + ], + }; + } + if (path === '/api/agents') { + return { success: true, agents: [] }; + } + if (path === '/api/diagnostics/gateway-snapshot') { + return { success: false, error: 'snapshot failed' }; + } + + throw new Error(`Unexpected host API path: ${path}`); + }); + + render(); + expect(await screen.findByTestId('channels-health-banner')).toBeInTheDocument(); + + fireEvent.click(screen.getByTestId('channels-toggle-diagnostics')); + + await waitFor(() => { + expect(toastErrorMock).toHaveBeenCalledWith('health.diagnosticsCopyFailed'); + }); + expect(screen.queryByTestId('channels-diagnostics')).not.toBeInTheDocument(); + }); + + it('shows restart failure when gateway restart returns success=false', async () => { + subscribeHostEventMock.mockImplementation(() => vi.fn()); + + hostApiFetchMock.mockImplementation(async (path: string, init?: { method?: string }) => { + if (path === '/api/channels/accounts') { + return { + success: true, + gatewayHealth: { + state: 'degraded', + reasons: ['channels_status_timeout'], + consecutiveHeartbeatMisses: 1, + }, + channels: [ + { + channelType: 'feishu', + defaultAccountId: 'default', + status: 'degraded', + statusReason: 'channels_status_timeout', + accounts: [ + { + accountId: 'default', + name: 'Primary Account', + configured: true, + status: 'degraded', + statusReason: 'channels_status_timeout', + isDefault: true, + }, + ], + }, + ], + }; + } + if (path === '/api/agents') { + return { success: true, agents: [] }; + } + if (path === '/api/gateway/restart' && init?.method === 'POST') { + return { success: false, error: 'restart failed' }; + } + + throw new Error(`Unexpected host API path: ${path}`); + }); + + render(); + expect(await screen.findByTestId('channels-health-banner')).toBeInTheDocument(); + + fireEvent.click(screen.getByTestId('channels-restart-gateway')); + + await waitFor(() => { + expect(toastErrorMock).toHaveBeenCalledWith('health.restartFailed'); + }); + expect(toastSuccessMock).not.toHaveBeenCalledWith('health.restartTriggered'); + }); + + it('refetches diagnostics snapshot every time the diagnostics panel is reopened', async () => { + subscribeHostEventMock.mockImplementation(() => vi.fn()); + + let diagnosticsFetchCount = 0; + hostApiFetchMock.mockImplementation(async (path: string) => { + if (path === '/api/channels/accounts') { + return { + success: true, + gatewayHealth: { + state: 'degraded', + reasons: ['channels_status_timeout'], + consecutiveHeartbeatMisses: 1, + }, + channels: [ + { + channelType: 'feishu', + defaultAccountId: 'default', + status: 'degraded', + statusReason: 'channels_status_timeout', + accounts: [ + { + accountId: 'default', + name: 'Primary Account', + configured: true, + status: 'degraded', + statusReason: 'channels_status_timeout', + isDefault: true, + }, + ], + }, + ], + }; + } + if (path === '/api/agents') { + return { success: true, agents: [] }; + } + if (path === '/api/diagnostics/gateway-snapshot') { + diagnosticsFetchCount += 1; + return { + capturedAt: diagnosticsFetchCount, + platform: 'darwin', + gateway: { + state: 'degraded', + reasons: ['channels_status_timeout'], + consecutiveHeartbeatMisses: 1, + }, + channels: [], + clawxLogTail: `clawx-${diagnosticsFetchCount}`, + gatewayLogTail: 'gateway', + gatewayErrLogTail: '', + }; + } + + throw new Error(`Unexpected host API path: ${path}`); + }); + + render(); + + expect(await screen.findByTestId('channels-health-banner')).toBeInTheDocument(); + + fireEvent.click(screen.getByTestId('channels-toggle-diagnostics')); + await waitFor(() => { + expect(screen.getByTestId('channels-diagnostics')).toHaveTextContent('"capturedAt": 1'); + }); + + fireEvent.click(screen.getByTestId('channels-toggle-diagnostics')); + expect(screen.queryByTestId('channels-diagnostics')).not.toBeInTheDocument(); + + fireEvent.click(screen.getByTestId('channels-toggle-diagnostics')); + await waitFor(() => { + expect(screen.getByTestId('channels-diagnostics')).toHaveTextContent('"capturedAt": 2'); + }); + + expect(diagnosticsFetchCount).toBe(2); + }); }); diff --git a/tests/unit/diagnostics-routes.test.ts b/tests/unit/diagnostics-routes.test.ts new file mode 100644 index 000000000..03638aac3 --- /dev/null +++ b/tests/unit/diagnostics-routes.test.ts @@ -0,0 +1,166 @@ +import { afterAll, beforeEach, describe, expect, it, vi } from 'vitest'; +import { mkdirSync, rmSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import type { IncomingMessage, ServerResponse } from 'http'; + +const buildChannelAccountsViewMock = vi.fn(); +const getChannelStatusDiagnosticsMock = vi.fn(); +const sendJsonMock = vi.fn(); +const readLogFileMock = vi.fn(); + +const testOpenClawConfigDir = join(tmpdir(), 'clawx-tests', 'diagnostics-routes-openclaw'); + +vi.mock('@electron/api/routes/channels', () => ({ + buildChannelAccountsView: (...args: unknown[]) => buildChannelAccountsViewMock(...args), + getChannelStatusDiagnostics: (...args: unknown[]) => getChannelStatusDiagnosticsMock(...args), +})); + +vi.mock('@electron/api/route-utils', () => ({ + sendJson: (...args: unknown[]) => sendJsonMock(...args), +})); + +vi.mock('@electron/utils/logger', () => ({ + logger: { + readLogFile: (...args: unknown[]) => readLogFileMock(...args), + }, +})); + +vi.mock('@electron/utils/paths', () => ({ + getOpenClawConfigDir: () => testOpenClawConfigDir, +})); + +describe('handleDiagnosticsRoutes', () => { + beforeEach(() => { + vi.resetAllMocks(); + rmSync(testOpenClawConfigDir, { recursive: true, force: true }); + mkdirSync(join(testOpenClawConfigDir, 'logs'), { recursive: true }); + buildChannelAccountsViewMock.mockResolvedValue({ + channels: [ + { + channelType: 'feishu', + defaultAccountId: 'default', + status: 'degraded', + accounts: [ + { + accountId: 'default', + name: 'Primary Account', + configured: true, + status: 'degraded', + statusReason: 'channels_status_timeout', + isDefault: true, + }, + ], + }, + ], + gatewayHealth: { + state: 'degraded', + reasons: ['channels_status_timeout'], + consecutiveHeartbeatMisses: 1, + }, + }); + getChannelStatusDiagnosticsMock.mockReturnValue({ + lastChannelsStatusOkAt: 100, + lastChannelsStatusFailureAt: 200, + }); + readLogFileMock.mockResolvedValue('clawx-log-tail'); + }); + + afterAll(() => { + rmSync(testOpenClawConfigDir, { recursive: true, force: true }); + }); + + it('returns diagnostics snapshot with channel view and tailed logs', async () => { + writeFileSync(join(testOpenClawConfigDir, 'logs', 'gateway.log'), 'gateway-line-1\ngateway-line-2\n'); + + const { handleDiagnosticsRoutes } = await import('@electron/api/routes/diagnostics'); + const handled = await handleDiagnosticsRoutes( + { method: 'GET' } as IncomingMessage, + {} as ServerResponse, + new URL('http://127.0.0.1:13210/api/diagnostics/gateway-snapshot'), + { + gatewayManager: { + getStatus: () => ({ state: 'running', port: 18789, connectedAt: 50 }), + getDiagnostics: () => ({ + lastAliveAt: 60, + lastRpcSuccessAt: 70, + consecutiveHeartbeatMisses: 1, + consecutiveRpcFailures: 0, + }), + }, + } as never, + ); + + expect(handled).toBe(true); + const payload = sendJsonMock.mock.calls.at(-1)?.[2] as { + platform?: string; + channels?: Array<{ channelType: string; status: string }>; + clawxLogTail?: string; + gatewayLogTail?: string; + gatewayErrLogTail?: string; + gateway?: { state?: string; reasons?: string[] }; + }; + expect(payload.platform).toBe(process.platform); + expect(payload.channels).toEqual([ + expect.objectContaining({ + channelType: 'feishu', + status: 'degraded', + }), + ]); + expect(payload.clawxLogTail).toBe('clawx-log-tail'); + expect(payload.gatewayLogTail).toContain('gateway-line-1'); + expect(payload.gatewayErrLogTail).toBe(''); + expect(payload.gateway?.state).toBe('degraded'); + expect(payload.gateway?.reasons).toEqual(expect.arrayContaining(['gateway_degraded'])); + }); + + it('returns empty gateway log tails when log files are missing', async () => { + const { handleDiagnosticsRoutes } = await import('@electron/api/routes/diagnostics'); + await handleDiagnosticsRoutes( + { method: 'GET' } as IncomingMessage, + {} as ServerResponse, + new URL('http://127.0.0.1:13210/api/diagnostics/gateway-snapshot'), + { + gatewayManager: { + getStatus: () => ({ state: 'running', port: 18789 }), + getDiagnostics: () => ({ + consecutiveHeartbeatMisses: 0, + consecutiveRpcFailures: 0, + }), + }, + } as never, + ); + + const payload = sendJsonMock.mock.calls.at(-1)?.[2] as { + gatewayLogTail?: string; + gatewayErrLogTail?: string; + }; + expect(payload.gatewayLogTail).toBe(''); + expect(payload.gatewayErrLogTail).toBe(''); + }); + + it('reads tailed logs without leaking unread buffer bytes', async () => { + writeFileSync(join(testOpenClawConfigDir, 'logs', 'gateway.log'), 'only-one-line'); + + const { handleDiagnosticsRoutes } = await import('@electron/api/routes/diagnostics'); + await handleDiagnosticsRoutes( + { method: 'GET' } as IncomingMessage, + {} as ServerResponse, + new URL('http://127.0.0.1:13210/api/diagnostics/gateway-snapshot'), + { + gatewayManager: { + getStatus: () => ({ state: 'running', port: 18789 }), + getDiagnostics: () => ({ + consecutiveHeartbeatMisses: 0, + consecutiveRpcFailures: 0, + }), + }, + } as never, + ); + + const payload = sendJsonMock.mock.calls.at(-1)?.[2] as { + gatewayLogTail?: string; + }; + expect(payload.gatewayLogTail).toBe('only-one-line'); + }); +}); diff --git a/tests/unit/gateway-manager-diagnostics.test.ts b/tests/unit/gateway-manager-diagnostics.test.ts new file mode 100644 index 000000000..c62efba38 --- /dev/null +++ b/tests/unit/gateway-manager-diagnostics.test.ts @@ -0,0 +1,165 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('electron', () => ({ + app: { + getPath: () => '/tmp', + isPackaged: false, + }, + utilityProcess: { + fork: vi.fn(), + }, +})); + +vi.mock('@electron/utils/logger', () => ({ + logger: { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }, +})); + +describe('GatewayManager diagnostics', () => { + const originalPlatform = process.platform; + + beforeEach(() => { + vi.resetModules(); + vi.clearAllMocks(); + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-04-14T00:00:00.000Z')); + Object.defineProperty(process, 'platform', { value: originalPlatform }); + }); + + afterEach(() => { + vi.useRealTimers(); + Object.defineProperty(process, 'platform', { value: originalPlatform }); + }); + + it('updates diagnostics on gateway message, rpc success/timeout, and socket close', async () => { + const { GatewayManager } = await import('@electron/gateway/manager'); + const manager = new GatewayManager(); + + const ws = { + readyState: 1, + send: vi.fn(), + ping: vi.fn(), + terminate: vi.fn(), + on: vi.fn(), + }; + + (manager as unknown as { ws: typeof ws }).ws = ws; + + (manager as unknown as { handleMessage: (message: unknown) => void }).handleMessage({ + type: 'event', + event: 'gateway.ready', + payload: {}, + }); + expect(manager.getDiagnostics().lastAliveAt).toBe(Date.now()); + + const successPromise = manager.rpc<{ ok: boolean }>('chat.history', {}, 1000); + const successRequestId = Array.from( + (manager as unknown as { pendingRequests: Map }).pendingRequests.keys(), + )[0]; + (manager as unknown as { handleMessage: (message: unknown) => void }).handleMessage({ + type: 'res', + id: successRequestId, + ok: true, + payload: { ok: true }, + }); + await expect(successPromise).resolves.toEqual({ ok: true }); + expect(manager.getDiagnostics().lastRpcSuccessAt).toBe(Date.now()); + expect(manager.getDiagnostics().consecutiveRpcFailures).toBe(0); + + const failurePromise = manager.rpc('chat.history', {}, 1000); + vi.advanceTimersByTime(1001); + await expect(failurePromise).rejects.toThrow('RPC timeout: chat.history'); + + const diagnostics = manager.getDiagnostics(); + expect(diagnostics.lastRpcFailureAt).toBe(Date.now()); + expect(diagnostics.lastRpcFailureMethod).toBe('chat.history'); + expect(diagnostics.consecutiveRpcFailures).toBe(1); + + (manager as unknown as { recordSocketClose: (code: number) => void }).recordSocketClose(1006); + expect(manager.getDiagnostics().lastSocketCloseAt).toBe(Date.now()); + expect(manager.getDiagnostics().lastSocketCloseCode).toBe(1006); + }); + + it('does not count gateway-declared rpc errors as transport failures', async () => { + const { GatewayManager } = await import('@electron/gateway/manager'); + const { buildGatewayHealthSummary } = await import('@electron/utils/gateway-health'); + const manager = new GatewayManager(); + + const ws = { + readyState: 1, + send: vi.fn(), + ping: vi.fn(), + terminate: vi.fn(), + on: vi.fn(), + }; + + (manager as unknown as { ws: typeof ws }).ws = ws; + (manager as unknown as { status: { state: string; port: number } }).status = { + state: 'running', + port: 18789, + }; + + const failurePromise = manager.rpc('channels.status', {}, 1000); + const failureRequestId = Array.from( + (manager as unknown as { pendingRequests: Map }).pendingRequests.keys(), + )[0]; + (manager as unknown as { handleMessage: (message: unknown) => void }).handleMessage({ + type: 'res', + id: failureRequestId, + ok: false, + error: { message: 'channel unavailable' }, + }); + await expect(failurePromise).rejects.toThrow('channel unavailable'); + + expect(manager.getDiagnostics().consecutiveRpcFailures).toBe(0); + + const health = buildGatewayHealthSummary({ + status: manager.getStatus(), + diagnostics: manager.getDiagnostics(), + platform: process.platform, + }); + expect(health.reasons).not.toContain('rpc_timeout'); + }); + + it('keeps windows heartbeat recovery disabled while diagnostics degrade', async () => { + Object.defineProperty(process, 'platform', { value: 'win32' }); + + const { GatewayManager } = await import('@electron/gateway/manager'); + const { buildGatewayHealthSummary } = await import('@electron/utils/gateway-health'); + const manager = new GatewayManager(); + + const ws = { + readyState: 1, + send: vi.fn(), + ping: vi.fn(), + terminate: vi.fn(), + on: vi.fn(), + }; + + (manager as unknown as { ws: typeof ws }).ws = ws; + (manager as unknown as { shouldReconnect: boolean }).shouldReconnect = true; + (manager as unknown as { status: { state: string; port: number } }).status = { + state: 'running', + port: 18789, + }; + const restartSpy = vi.spyOn(manager, 'restart').mockResolvedValue(); + + (manager as unknown as { startPing: () => void }).startPing(); + vi.advanceTimersByTime(400_000); + + expect(restartSpy).not.toHaveBeenCalled(); + + const health = buildGatewayHealthSummary({ + status: manager.getStatus(), + diagnostics: manager.getDiagnostics(), + platform: 'win32', + }); + expect(health.state).not.toBe('healthy'); + + (manager as unknown as { connectionMonitor: { clear: () => void } }).connectionMonitor.clear(); + }); +}); diff --git a/tests/unit/gateway-manager-heartbeat.test.ts b/tests/unit/gateway-manager-heartbeat.test.ts index d1d834474..5c901486e 100644 --- a/tests/unit/gateway-manager-heartbeat.test.ts +++ b/tests/unit/gateway-manager-heartbeat.test.ts @@ -11,12 +11,22 @@ vi.mock('electron', () => ({ })); describe('GatewayManager heartbeat recovery', () => { + const originalPlatform = process.platform; + beforeEach(() => { + vi.resetModules(); + vi.clearAllMocks(); vi.useFakeTimers(); vi.setSystemTime(new Date('2026-03-19T00:00:00.000Z')); + Object.defineProperty(process, 'platform', { value: originalPlatform }); }); - it('logs warning but does NOT terminate socket after consecutive heartbeat misses', async () => { + afterEach(() => { + vi.useRealTimers(); + Object.defineProperty(process, 'platform', { value: originalPlatform }); + }); + + it('restarts after consecutive heartbeat misses reach threshold', async () => { const { GatewayManager } = await import('@electron/gateway/manager'); const manager = new GatewayManager(); @@ -33,20 +43,20 @@ describe('GatewayManager heartbeat recovery', () => { state: 'running', port: 18789, }; + const restartSpy = vi.spyOn(manager, 'restart').mockResolvedValue(); (manager as unknown as { startPing: () => void }).startPing(); vi.advanceTimersByTime(120_000); expect(ws.ping).toHaveBeenCalledTimes(3); - // Heartbeat timeout is now observability-only — socket should NOT be terminated. - // Process liveness is detected via child.on('exit'), socket disconnects via ws.on('close'). expect(ws.terminate).not.toHaveBeenCalled(); + expect(restartSpy).toHaveBeenCalledTimes(1); (manager as unknown as { connectionMonitor: { clear: () => void } }).connectionMonitor.clear(); }); - it('does not terminate when heartbeat is recovered by incoming messages', async () => { + it('does not restart when heartbeat is recovered by incoming messages', async () => { const { GatewayManager } = await import('@electron/gateway/manager'); const manager = new GatewayManager(); @@ -63,6 +73,7 @@ describe('GatewayManager heartbeat recovery', () => { state: 'running', port: 18789, }; + const restartSpy = vi.spyOn(manager, 'restart').mockResolvedValue(); (manager as unknown as { startPing: () => void }).startPing(); @@ -75,6 +86,65 @@ describe('GatewayManager heartbeat recovery', () => { vi.advanceTimersByTime(30_000); // miss #2 + ping #5 expect(ws.terminate).not.toHaveBeenCalled(); + expect(restartSpy).not.toHaveBeenCalled(); + + (manager as unknown as { connectionMonitor: { clear: () => void } }).connectionMonitor.clear(); + }); + + it('skips heartbeat recovery when auto-reconnect is disabled', async () => { + const { GatewayManager } = await import('@electron/gateway/manager'); + const manager = new GatewayManager(); + + const ws = { + readyState: 1, + ping: vi.fn(), + terminate: vi.fn(), + on: vi.fn(), + }; + + (manager as unknown as { ws: typeof ws }).ws = ws; + (manager as unknown as { shouldReconnect: boolean }).shouldReconnect = false; + (manager as unknown as { status: { state: string; port: number } }).status = { + state: 'running', + port: 18789, + }; + const restartSpy = vi.spyOn(manager, 'restart').mockResolvedValue(); + + (manager as unknown as { startPing: () => void }).startPing(); + + vi.advanceTimersByTime(120_000); + + expect(restartSpy).not.toHaveBeenCalled(); + + (manager as unknown as { connectionMonitor: { clear: () => void } }).connectionMonitor.clear(); + }); + + it('keeps heartbeat recovery disabled on windows', async () => { + Object.defineProperty(process, 'platform', { value: 'win32' }); + + const { GatewayManager } = await import('@electron/gateway/manager'); + const manager = new GatewayManager(); + + const ws = { + readyState: 1, + ping: vi.fn(), + terminate: vi.fn(), + on: vi.fn(), + }; + + (manager as unknown as { ws: typeof ws }).ws = ws; + (manager as unknown as { shouldReconnect: boolean }).shouldReconnect = true; + (manager as unknown as { status: { state: string; port: number } }).status = { + state: 'running', + port: 18789, + }; + const restartSpy = vi.spyOn(manager, 'restart').mockResolvedValue(); + + (manager as unknown as { startPing: () => void }).startPing(); + + vi.advanceTimersByTime(400_000); + + expect(restartSpy).not.toHaveBeenCalled(); (manager as unknown as { connectionMonitor: { clear: () => void } }).connectionMonitor.clear(); }); diff --git a/tests/unit/sanitize-config.test.ts b/tests/unit/sanitize-config.test.ts index 4aab34346..aab4bace3 100644 --- a/tests/unit/sanitize-config.test.ts +++ b/tests/unit/sanitize-config.test.ts @@ -128,6 +128,56 @@ async function sanitizeConfig( : {} ) as Record; + const acpxEntry = (entries.acpx && typeof entries.acpx === 'object' && !Array.isArray(entries.acpx)) + ? { ...(entries.acpx as Record) } + : null; + const acpxConfig = (acpxEntry?.config && typeof acpxEntry.config === 'object' && !Array.isArray(acpxEntry.config)) + ? { ...(acpxEntry.config as Record) } + : null; + if (acpxConfig) { + for (const legacyKey of ['command', 'expectedVersion'] as const) { + if (legacyKey in acpxConfig) { + delete acpxConfig[legacyKey]; + modified = true; + } + } + acpxEntry!.config = acpxConfig; + entries.acpx = acpxEntry!; + pluginsObj.entries = entries; + } + + const installs = ( + pluginsObj.installs && typeof pluginsObj.installs === 'object' && !Array.isArray(pluginsObj.installs) + ? { ...(pluginsObj.installs as Record) } + : {} + ) as Record; + const acpxInstall = (installs.acpx && typeof installs.acpx === 'object' && !Array.isArray(installs.acpx)) + ? installs.acpx as Record + : null; + if (acpxInstall) { + const currentBundledAcpxDir = join(tempDir, 'node_modules', 'openclaw', 'dist', 'extensions', 'acpx').replace(/\\/g, '/'); + const sourcePath = typeof acpxInstall.sourcePath === 'string' ? acpxInstall.sourcePath : ''; + const installPath = typeof acpxInstall.installPath === 'string' ? acpxInstall.installPath : ''; + const normalizedSourcePath = sourcePath.replace(/\\/g, '/'); + const normalizedInstallPath = installPath.replace(/\\/g, '/'); + const pointsAtDifferentBundledTree = [normalizedSourcePath, normalizedInstallPath].some( + (candidate) => candidate.includes('/node_modules/.pnpm/openclaw@') && candidate !== currentBundledAcpxDir, + ); + const pointsAtMissingPath = (sourcePath && !(await fileExists(sourcePath))) + || (installPath && !(await fileExists(installPath))); + + if (pointsAtDifferentBundledTree || pointsAtMissingPath) { + delete installs.acpx; + modified = true; + } + + if (Object.keys(installs).length > 0) { + pluginsObj.installs = installs; + } else { + delete pluginsObj.installs; + } + } + if ('whatsapp' in entries) { delete entries.whatsapp; pluginsObj.entries = entries; @@ -625,6 +675,49 @@ describe('sanitizeOpenClawConfig (blocklist approach)', () => { expect(load.paths).toEqual(['relative/plugin-path', './another-relative']); }); + it('removes legacy acpx overrides and stale bundled install metadata', async () => { + await writeConfig({ + plugins: { + entries: { + acpx: { + enabled: true, + config: { + permissionMode: 'approve-all', + nonInteractivePermissions: 'fail', + command: '/Users/example/project/node_modules/.pnpm/openclaw@2026.4.1/node_modules/openclaw/dist/extensions/acpx/node_modules/acpx/dist/cli.js', + expectedVersion: 'any', + pluginToolsMcpBridge: true, + }, + }, + }, + installs: { + acpx: { + source: 'path', + spec: 'acpx', + sourcePath: '/Users/example/project/node_modules/.pnpm/openclaw@2026.4.1/node_modules/openclaw/dist/extensions/acpx', + installPath: '/Users/example/project/node_modules/.pnpm/openclaw@2026.4.1/node_modules/openclaw/dist/extensions/acpx', + }, + }, + }, + }); + + const modified = await sanitizeConfig(configPath); + expect(modified).toBe(true); + + const result = await readConfig(); + const plugins = result.plugins as Record; + const entries = plugins.entries as Record; + const acpx = entries.acpx as Record; + const acpxConfig = acpx.config as Record; + + expect(acpxConfig).toEqual({ + permissionMode: 'approve-all', + nonInteractivePermissions: 'fail', + pluginToolsMcpBridge: true, + }); + expect(plugins).not.toHaveProperty('installs'); + }); + it('does nothing when plugins.load.paths contains only valid paths', async () => { const original = { plugins: {