Add channel health diagnostics and gateway recovery fixes (#855)
This commit is contained in:
committed by
GitHub
Unverified
parent
6acd8acf5a
commit
1f39d1a8a7
@@ -1,4 +1,5 @@
|
||||
export type ChannelConnectionStatus = 'connected' | 'connecting' | 'disconnected' | 'error';
|
||||
export type GatewayHealthState = 'healthy' | 'degraded' | 'unresponsive';
|
||||
export type ChannelConnectionStatus = 'connected' | 'connecting' | 'degraded' | 'disconnected' | 'error';
|
||||
|
||||
export interface ChannelRuntimeAccountSnapshot {
|
||||
connected?: boolean;
|
||||
@@ -19,6 +20,10 @@ export interface ChannelRuntimeSummarySnapshot {
|
||||
lastError?: string | null;
|
||||
}
|
||||
|
||||
export interface ChannelHealthOverlay {
|
||||
gatewayHealthState?: GatewayHealthState;
|
||||
}
|
||||
|
||||
const RECENT_ACTIVITY_MS = 10 * 60 * 1000;
|
||||
|
||||
function hasNonEmptyError(value: string | null | undefined): boolean {
|
||||
@@ -74,9 +79,11 @@ export function isChannelRuntimeConnected(
|
||||
|
||||
export function computeChannelRuntimeStatus(
|
||||
account: ChannelRuntimeAccountSnapshot,
|
||||
healthOverlay?: ChannelHealthOverlay,
|
||||
): ChannelConnectionStatus {
|
||||
if (isChannelRuntimeConnected(account)) return 'connected';
|
||||
if (hasChannelRuntimeError(account)) return 'error';
|
||||
if (healthOverlay?.gatewayHealthState && healthOverlay.gatewayHealthState !== 'healthy') return 'degraded';
|
||||
if (isChannelRuntimeConnected(account)) return 'connected';
|
||||
if (account.running === true) return 'connecting';
|
||||
return 'disconnected';
|
||||
}
|
||||
@@ -84,6 +91,7 @@ export function computeChannelRuntimeStatus(
|
||||
export function pickChannelRuntimeStatus(
|
||||
accounts: ChannelRuntimeAccountSnapshot[],
|
||||
summary?: ChannelRuntimeSummarySnapshot,
|
||||
healthOverlay?: ChannelHealthOverlay,
|
||||
): ChannelConnectionStatus {
|
||||
if (accounts.some((account) => isChannelRuntimeConnected(account))) {
|
||||
return 'connected';
|
||||
@@ -93,6 +101,10 @@ export function pickChannelRuntimeStatus(
|
||||
return 'error';
|
||||
}
|
||||
|
||||
if (healthOverlay?.gatewayHealthState && healthOverlay.gatewayHealthState !== 'healthy') {
|
||||
return 'degraded';
|
||||
}
|
||||
|
||||
if (accounts.some((account) => account.running === true)) {
|
||||
return 'connecting';
|
||||
}
|
||||
|
||||
81
electron/utils/gateway-health.ts
Normal file
81
electron/utils/gateway-health.ts
Normal file
@@ -0,0 +1,81 @@
|
||||
import type {
|
||||
GatewayDiagnosticsSnapshot,
|
||||
GatewayHealthSummary,
|
||||
GatewayStatus,
|
||||
} from '../gateway/manager';
|
||||
|
||||
type BuildGatewayHealthSummaryOptions = {
|
||||
status: GatewayStatus;
|
||||
diagnostics: GatewayDiagnosticsSnapshot;
|
||||
lastChannelsStatusOkAt?: number;
|
||||
lastChannelsStatusFailureAt?: number;
|
||||
platform?: string;
|
||||
now?: number;
|
||||
};
|
||||
|
||||
const CHANNEL_STATUS_FAILURE_WINDOW_MS = 2 * 60_000;
|
||||
const HEARTBEAT_MISS_THRESHOLD_DEFAULT = 3;
|
||||
const HEARTBEAT_MISS_THRESHOLD_WIN = 5;
|
||||
|
||||
export function buildGatewayHealthSummary(
|
||||
options: BuildGatewayHealthSummaryOptions,
|
||||
): GatewayHealthSummary {
|
||||
const now = options.now ?? Date.now();
|
||||
const reasons = new Set<string>();
|
||||
const heartbeatThreshold = options.platform === 'win32'
|
||||
? HEARTBEAT_MISS_THRESHOLD_WIN
|
||||
: HEARTBEAT_MISS_THRESHOLD_DEFAULT;
|
||||
|
||||
const channelStatusFailureIsRecent =
|
||||
typeof options.lastChannelsStatusFailureAt === 'number'
|
||||
&& now - options.lastChannelsStatusFailureAt <= CHANNEL_STATUS_FAILURE_WINDOW_MS
|
||||
&& (
|
||||
typeof options.lastChannelsStatusOkAt !== 'number'
|
||||
|| options.lastChannelsStatusFailureAt > options.lastChannelsStatusOkAt
|
||||
);
|
||||
|
||||
if (options.status.state !== 'running') {
|
||||
reasons.add(options.status.state === 'error' ? 'gateway_error' : 'gateway_not_running');
|
||||
return {
|
||||
state: 'degraded',
|
||||
reasons: [...reasons],
|
||||
consecutiveHeartbeatMisses: options.diagnostics.consecutiveHeartbeatMisses,
|
||||
lastAliveAt: options.diagnostics.lastAliveAt,
|
||||
lastRpcSuccessAt: options.diagnostics.lastRpcSuccessAt,
|
||||
lastRpcFailureAt: options.diagnostics.lastRpcFailureAt,
|
||||
lastRpcFailureMethod: options.diagnostics.lastRpcFailureMethod,
|
||||
lastChannelsStatusOkAt: options.lastChannelsStatusOkAt,
|
||||
lastChannelsStatusFailureAt: options.lastChannelsStatusFailureAt,
|
||||
};
|
||||
}
|
||||
|
||||
if (options.diagnostics.consecutiveHeartbeatMisses >= heartbeatThreshold) {
|
||||
reasons.add('gateway_unresponsive');
|
||||
} else if (options.diagnostics.consecutiveHeartbeatMisses > 0) {
|
||||
reasons.add('gateway_degraded');
|
||||
}
|
||||
|
||||
if (options.diagnostics.consecutiveRpcFailures > 0) {
|
||||
reasons.add('rpc_timeout');
|
||||
}
|
||||
|
||||
if (channelStatusFailureIsRecent) {
|
||||
reasons.add('channels_status_timeout');
|
||||
}
|
||||
|
||||
return {
|
||||
state: reasons.has('gateway_unresponsive')
|
||||
? 'unresponsive'
|
||||
: reasons.size > 0
|
||||
? 'degraded'
|
||||
: 'healthy',
|
||||
reasons: [...reasons],
|
||||
consecutiveHeartbeatMisses: options.diagnostics.consecutiveHeartbeatMisses,
|
||||
lastAliveAt: options.diagnostics.lastAliveAt,
|
||||
lastRpcSuccessAt: options.diagnostics.lastRpcSuccessAt,
|
||||
lastRpcFailureAt: options.diagnostics.lastRpcFailureAt,
|
||||
lastRpcFailureMethod: options.diagnostics.lastRpcFailureMethod,
|
||||
lastChannelsStatusOkAt: options.lastChannelsStatusOkAt,
|
||||
lastChannelsStatusFailureAt: options.lastChannelsStatusFailureAt,
|
||||
};
|
||||
}
|
||||
@@ -1634,6 +1634,51 @@ export async function sanitizeOpenClawConfig(): Promise<void> {
|
||||
pluginsObj.allow = allowArr;
|
||||
}
|
||||
|
||||
// ── acpx legacy config/install cleanup ─────────────────────
|
||||
// Older OpenClaw releases allowed plugins.entries.acpx.config.command
|
||||
// and expectedVersion overrides. Current bundled acpx schema rejects
|
||||
// them, which causes the Gateway to fail validation before startup.
|
||||
// Strip those keys and drop stale installs metadata that still points
|
||||
// at an older bundled OpenClaw tree so the current bundled plugin can
|
||||
// be re-registered cleanly.
|
||||
const acpxEntry = isPlainRecord(pEntries.acpx) ? pEntries.acpx as Record<string, unknown> : null;
|
||||
const acpxConfig = acpxEntry && isPlainRecord(acpxEntry.config)
|
||||
? acpxEntry.config as Record<string, unknown>
|
||||
: null;
|
||||
if (acpxConfig) {
|
||||
for (const legacyKey of ['command', 'expectedVersion'] as const) {
|
||||
if (legacyKey in acpxConfig) {
|
||||
delete acpxConfig[legacyKey];
|
||||
modified = true;
|
||||
console.log(`[sanitize] Removed legacy plugins.entries.acpx.config.${legacyKey}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const installs = isPlainRecord(pluginsObj.installs) ? pluginsObj.installs as Record<string, unknown> : null;
|
||||
const acpxInstall = installs && isPlainRecord(installs.acpx) ? installs.acpx as Record<string, unknown> : null;
|
||||
if (acpxInstall) {
|
||||
const currentBundledAcpxDir = join(getOpenClawResolvedDir(), 'dist', 'extensions', 'acpx').replace(/\\/g, '/');
|
||||
const sourcePath = typeof acpxInstall.sourcePath === 'string' ? acpxInstall.sourcePath : '';
|
||||
const installPath = typeof acpxInstall.installPath === 'string' ? acpxInstall.installPath : '';
|
||||
const normalizedSourcePath = sourcePath.replace(/\\/g, '/');
|
||||
const normalizedInstallPath = installPath.replace(/\\/g, '/');
|
||||
const pointsAtDifferentBundledTree = [normalizedSourcePath, normalizedInstallPath].some(
|
||||
(candidate) => candidate.includes('/node_modules/.pnpm/openclaw@') && candidate !== currentBundledAcpxDir,
|
||||
);
|
||||
const pointsAtMissingPath = (sourcePath && !(await fileExists(sourcePath)))
|
||||
|| (installPath && !(await fileExists(installPath)));
|
||||
|
||||
if (pointsAtDifferentBundledTree || pointsAtMissingPath) {
|
||||
delete installs.acpx;
|
||||
if (Object.keys(installs).length === 0) {
|
||||
delete pluginsObj.installs;
|
||||
}
|
||||
modified = true;
|
||||
console.log('[sanitize] Removed stale plugins.installs.acpx metadata');
|
||||
}
|
||||
}
|
||||
|
||||
const installedFeishuId = await resolveInstalledFeishuPluginId();
|
||||
const configuredFeishuId =
|
||||
FEISHU_PLUGIN_ID_CANDIDATES.find((id) => allowArr.includes(id))
|
||||
|
||||
Reference in New Issue
Block a user