refactor IPC (#341)

This commit is contained in:
Lingxuan Zuo
2026-03-08 11:54:49 +08:00
committed by GitHub
Unverified
parent c03d92e9a2
commit 3d804a9f5e
52 changed files with 3121 additions and 336 deletions

View File

@@ -228,9 +228,11 @@ export class GatewayManager extends EventEmitter {
}> = new Map();
private deviceIdentity: DeviceIdentity | null = null;
private restartDebounceTimer: NodeJS.Timeout | null = null;
private reloadDebounceTimer: NodeJS.Timeout | null = null;
private lifecycleEpoch = 0;
private deferredRestartPending = false;
private restartInFlight: Promise<void> | null = null;
private externalShutdownSupported: boolean | null = null;
constructor(config?: Partial<ReconnectConfig>) {
super();
@@ -259,6 +261,11 @@ export class GatewayManager extends EventEmitter {
return sanitized;
}
private isUnsupportedShutdownError(error: unknown): boolean {
const message = error instanceof Error ? error.message : String(error);
return /unknown method:\s*shutdown/i.test(message);
}
private formatExit(code: number | null, signal: NodeJS.Signals | null): string {
if (code !== null) return `code=${code}`;
if (signal) return `signal=${signal}`;
@@ -272,6 +279,14 @@ export class GatewayManager extends EventEmitter {
// Known noisy lines that are not actionable for Gateway lifecycle debugging.
if (msg.includes('openclaw-control-ui') && msg.includes('token_mismatch')) return { level: 'drop', normalized: msg };
if (msg.includes('closed before connect') && msg.includes('token mismatch')) return { level: 'drop', normalized: msg };
// During renderer refresh / transport switching, loopback websocket probes can time out
// while the gateway is reloading. This is expected and not actionable.
if (msg.includes('[ws] handshake timeout') && msg.includes('remote=127.0.0.1')) {
return { level: 'debug', normalized: msg };
}
if (msg.includes('[ws] closed before connect') && msg.includes('remote=127.0.0.1')) {
return { level: 'debug', normalized: msg };
}
// Downgrade frequent non-fatal noise.
if (msg.includes('ExperimentalWarning')) return { level: 'debug', normalized: msg };
@@ -536,11 +551,17 @@ export class GatewayManager extends EventEmitter {
// If this manager is attached to an external gateway process, ask it to shut down
// over protocol before closing the socket.
if (!this.ownsProcess && this.ws?.readyState === WebSocket.OPEN) {
if (!this.ownsProcess && this.ws?.readyState === WebSocket.OPEN && this.externalShutdownSupported !== false) {
try {
await this.rpc('shutdown', undefined, 5000);
this.externalShutdownSupported = true;
} catch (error) {
logger.warn('Failed to request shutdown for externally managed Gateway:', error);
if (this.isUnsupportedShutdownError(error)) {
this.externalShutdownSupported = false;
logger.info('External Gateway does not support "shutdown"; skipping shutdown RPC for future stops');
} else {
logger.warn('Failed to request shutdown for externally managed Gateway:', error);
}
}
}
@@ -648,6 +669,71 @@ export class GatewayManager extends EventEmitter {
}, delayMs);
}
/**
* Ask the Gateway process to reload config in-place when possible.
* Falls back to restart on unsupported platforms or signaling failures.
*/
async reload(): Promise<void> {
if (this.isRestartDeferred()) {
this.markDeferredRestart('reload');
return;
}
if (!this.process?.pid || this.status.state !== 'running') {
logger.warn('Gateway reload requested while not running; falling back to restart');
await this.restart();
return;
}
if (process.platform === 'win32') {
logger.debug('Windows detected, falling back to Gateway restart for reload');
await this.restart();
return;
}
const connectedForMs = this.status.connectedAt
? Date.now() - this.status.connectedAt
: Number.POSITIVE_INFINITY;
// Avoid signaling a process that just came up; it will already read latest config.
if (connectedForMs < 8000) {
logger.info(`Gateway connected ${connectedForMs}ms ago, skipping reload signal`);
return;
}
try {
process.kill(this.process.pid, 'SIGUSR1');
logger.info(`Sent SIGUSR1 to Gateway for config reload (pid=${this.process.pid})`);
// Some gateway builds do not handle SIGUSR1 as an in-process reload.
// If process state doesn't recover quickly, fall back to restart.
await new Promise((resolve) => setTimeout(resolve, 1500));
if (this.status.state !== 'running' || !this.process?.pid) {
logger.warn('Gateway did not stay running after reload signal, falling back to restart');
await this.restart();
}
} catch (error) {
logger.warn('Gateway reload signal failed, falling back to restart:', error);
await this.restart();
}
}
/**
* Debounced reload — coalesces multiple rapid config-change events into one
* in-process reload when possible.
*/
debouncedReload(delayMs = 1200): void {
if (this.reloadDebounceTimer) {
clearTimeout(this.reloadDebounceTimer);
}
logger.debug(`Gateway reload debounced (will fire in ${delayMs}ms)`);
this.reloadDebounceTimer = setTimeout(() => {
this.reloadDebounceTimer = null;
void this.reload().catch((err) => {
logger.warn('Debounced Gateway reload failed:', err);
});
}, delayMs);
}
/**
* Clear all active timers
*/
@@ -668,6 +754,10 @@ export class GatewayManager extends EventEmitter {
clearTimeout(this.restartDebounceTimer);
this.restartDebounceTimer = null;
}
if (this.reloadDebounceTimer) {
clearTimeout(this.reloadDebounceTimer);
this.reloadDebounceTimer = null;
}
}
/**