fix(gateway): terminate owned process before retry to prevent port conflict on Windows (#724)
This commit is contained in:
@@ -259,8 +259,8 @@ export class GatewayManager extends EventEmitter {
|
||||
|
||||
this.startHealthCheck();
|
||||
},
|
||||
waitForPortFree: async (port) => {
|
||||
await waitForPortFree(port);
|
||||
waitForPortFree: async (port, signal) => {
|
||||
await waitForPortFree(port, 30000, signal);
|
||||
},
|
||||
startProcess: async () => {
|
||||
await this.startProcess();
|
||||
@@ -282,6 +282,29 @@ export class GatewayManager extends EventEmitter {
|
||||
delay: async (ms) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, ms));
|
||||
},
|
||||
terminateOwnedProcess: async () => {
|
||||
if (this.process && this.ownsProcess) {
|
||||
logger.info('Terminating owned Gateway process before retry...');
|
||||
const proc = this.process;
|
||||
const pid = proc.pid;
|
||||
await terminateOwnedGatewayProcess(proc).catch(() => {});
|
||||
// Only clear the handle if the process has actually exited.
|
||||
// terminateOwnedGatewayProcess may resolve via its timeout path
|
||||
// while the child is still alive; in that case keep the reference
|
||||
// so subsequent retries and stop() can still target it.
|
||||
if (pid != null) {
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
// Still alive — keep this.process so later cleanup can reach it
|
||||
} catch {
|
||||
// Process is gone — safe to clear the handle
|
||||
this.process = null;
|
||||
}
|
||||
} else {
|
||||
this.process = null;
|
||||
}
|
||||
}
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
if (error instanceof LifecycleSupersededError) {
|
||||
|
||||
@@ -18,13 +18,15 @@ type StartupHooks = {
|
||||
findExistingGateway: (port: number) => Promise<ExistingGatewayInfo | null>;
|
||||
connect: (port: number, externalToken?: string) => Promise<void>;
|
||||
onConnectedToExistingGateway: () => void;
|
||||
waitForPortFree: (port: number) => Promise<void>;
|
||||
waitForPortFree: (port: number, signal?: AbortSignal) => Promise<void>;
|
||||
startProcess: () => Promise<void>;
|
||||
waitForReady: (port: number) => Promise<void>;
|
||||
onConnectedToManagedGateway: () => void;
|
||||
runDoctorRepair: () => Promise<boolean>;
|
||||
onDoctorRepairSuccess: () => void;
|
||||
delay: (ms: number) => Promise<void>;
|
||||
/** Called before a retry to terminate the previously spawned process if still running */
|
||||
terminateOwnedProcess?: () => Promise<void>;
|
||||
};
|
||||
|
||||
export async function runGatewayStartupSequence(hooks: StartupHooks): Promise<void> {
|
||||
@@ -97,6 +99,34 @@ export async function runGatewayStartupSequence(hooks: StartupHooks): Promise<vo
|
||||
if (recoveryAction === 'retry') {
|
||||
logger.warn(`Transient start error: ${String(error)}. Retrying... (${startAttempts}/${maxStartAttempts})`);
|
||||
await hooks.delay(1000);
|
||||
// Terminate the previously spawned process before retrying so it doesn't
|
||||
// hold the port and cause another handshake failure.
|
||||
if (hooks.terminateOwnedProcess) {
|
||||
await hooks.terminateOwnedProcess().catch((err) => {
|
||||
logger.warn('Failed to terminate owned process before retry:', err);
|
||||
});
|
||||
}
|
||||
hooks.assertLifecycle('start/retry-pre-port-wait');
|
||||
// Wait for port to become free before retrying (handles lingering processes).
|
||||
// Use a short-polling AbortController so that a superseding stop()/restart()
|
||||
// can cancel the wait promptly instead of blocking for the full 30s timeout.
|
||||
if (hooks.shouldWaitForPortFree) {
|
||||
const abortController = new AbortController();
|
||||
// Poll lifecycle every 500ms and abort the port-wait if superseded
|
||||
const lifecyclePollInterval = setInterval(() => {
|
||||
try {
|
||||
hooks.assertLifecycle('start/retry-port-wait-poll');
|
||||
} catch {
|
||||
abortController.abort();
|
||||
}
|
||||
}, 500);
|
||||
try {
|
||||
await hooks.waitForPortFree(hooks.port, abortController.signal);
|
||||
} finally {
|
||||
clearInterval(lifecyclePollInterval);
|
||||
}
|
||||
}
|
||||
hooks.assertLifecycle('start/retry-post-port-wait');
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -34,11 +34,12 @@ export async function terminateOwnedGatewayProcess(child: Electron.UtilityProces
|
||||
|
||||
// Register a single exit listener before any kill attempt to avoid
|
||||
// the race where exit fires between two separate `once('exit')` calls.
|
||||
child.once('exit', () => {
|
||||
const exitListener = () => {
|
||||
exited = true;
|
||||
clearTimeout(timeout);
|
||||
resolve();
|
||||
});
|
||||
};
|
||||
child.once('exit', exitListener);
|
||||
|
||||
const pid = child.pid;
|
||||
logger.info(`Sending kill to Gateway process (pid=${pid ?? 'unknown'})`);
|
||||
@@ -72,6 +73,8 @@ export async function terminateOwnedGatewayProcess(child: Electron.UtilityProces
|
||||
}
|
||||
}
|
||||
}
|
||||
// Clean up the exit listener on timeout to prevent listener leaks
|
||||
child.off('exit', exitListener);
|
||||
resolve();
|
||||
}, 5000);
|
||||
});
|
||||
@@ -125,13 +128,18 @@ export async function unloadLaunchctlGatewayService(): Promise<void> {
|
||||
}
|
||||
}
|
||||
|
||||
export async function waitForPortFree(port: number, timeoutMs = 30000): Promise<void> {
|
||||
export async function waitForPortFree(port: number, timeoutMs = 30000, signal?: AbortSignal): Promise<void> {
|
||||
const net = await import('net');
|
||||
const start = Date.now();
|
||||
const pollInterval = 500;
|
||||
let logged = false;
|
||||
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
if (signal?.aborted) {
|
||||
logger.debug(`waitForPortFree: aborted while waiting for port ${port}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const available = await new Promise<boolean>((resolve) => {
|
||||
const server = net.createServer();
|
||||
server.once('error', () => resolve(false));
|
||||
|
||||
Reference in New Issue
Block a user