fix(gateway): refine process termination and wait logic on retry (#750)
This commit is contained in:
committed by
GitHub
Unverified
parent
560ae95611
commit
28508bc643
@@ -297,11 +297,13 @@ export class GatewayManager extends EventEmitter {
|
||||
process.kill(pid, 0);
|
||||
// Still alive — keep this.process so later cleanup can reach it
|
||||
} catch {
|
||||
// Process is gone — safe to clear the handle
|
||||
// Process is gone — safe to clear the handle and ownership flag
|
||||
this.process = null;
|
||||
this.ownsProcess = false;
|
||||
}
|
||||
} else {
|
||||
this.process = null;
|
||||
this.ownsProcess = false;
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -98,14 +98,15 @@ export async function runGatewayStartupSequence(hooks: StartupHooks): Promise<vo
|
||||
|
||||
if (recoveryAction === 'retry') {
|
||||
logger.warn(`Transient start error: ${String(error)}. Retrying... (${startAttempts}/${maxStartAttempts})`);
|
||||
await hooks.delay(1000);
|
||||
// Terminate the previously spawned process before retrying so it doesn't
|
||||
// hold the port and cause another handshake failure.
|
||||
// Terminate the previously spawned process before the backoff delay so
|
||||
// it releases the port as early as possible; the subsequent delay gives
|
||||
// the OS time to recycle the port (especially TCP TIME_WAIT on Windows).
|
||||
if (hooks.terminateOwnedProcess) {
|
||||
await hooks.terminateOwnedProcess().catch((err) => {
|
||||
logger.warn('Failed to terminate owned process before retry:', err);
|
||||
});
|
||||
}
|
||||
await hooks.delay(1000);
|
||||
hooks.assertLifecycle('start/retry-pre-port-wait');
|
||||
// Wait for port to become free before retrying (handles lingering processes).
|
||||
// Use a short-polling AbortController so that a superseding stop()/restart()
|
||||
@@ -122,6 +123,12 @@ export async function runGatewayStartupSequence(hooks: StartupHooks): Promise<vo
|
||||
}, 500);
|
||||
try {
|
||||
await hooks.waitForPortFree(hooks.port, abortController.signal);
|
||||
} catch (portWaitError) {
|
||||
// If the wait was aborted due to lifecycle supersede, convert to
|
||||
// LifecycleSupersededError so the outer handler propagates correctly.
|
||||
hooks.assertLifecycle('start/retry-port-wait-aborted');
|
||||
// If assertLifecycle didn't throw, it's a genuine port-wait error — rethrow.
|
||||
throw portWaitError;
|
||||
} finally {
|
||||
clearInterval(lifecyclePollInterval);
|
||||
}
|
||||
|
||||
@@ -137,7 +137,7 @@ export async function waitForPortFree(port: number, timeoutMs = 30000, signal?:
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
if (signal?.aborted) {
|
||||
logger.debug(`waitForPortFree: aborted while waiting for port ${port}`);
|
||||
return;
|
||||
throw new Error(`waitForPortFree: aborted while waiting for port ${port}`);
|
||||
}
|
||||
|
||||
const available = await new Promise<boolean>((resolve) => {
|
||||
|
||||
Reference in New Issue
Block a user