fix(gateway): refine process termination and wait logic on retry (#750)

This commit is contained in:
paisley
2026-04-02 16:45:23 +08:00
committed by GitHub
Unverified
parent 560ae95611
commit 28508bc643
3 changed files with 14 additions and 5 deletions

View File

@@ -297,11 +297,13 @@ export class GatewayManager extends EventEmitter {
process.kill(pid, 0);
// Still alive — keep this.process so later cleanup can reach it
} catch {
// Process is gone — safe to clear the handle
// Process is gone — safe to clear the handle and ownership flag
this.process = null;
this.ownsProcess = false;
}
} else {
this.process = null;
this.ownsProcess = false;
}
}
},

View File

@@ -98,14 +98,15 @@ export async function runGatewayStartupSequence(hooks: StartupHooks): Promise<vo
if (recoveryAction === 'retry') {
logger.warn(`Transient start error: ${String(error)}. Retrying... (${startAttempts}/${maxStartAttempts})`);
await hooks.delay(1000);
// Terminate the previously spawned process before retrying so it doesn't
// hold the port and cause another handshake failure.
// Terminate the previously spawned process before the backoff delay so
// it releases the port as early as possible; the subsequent delay gives
// the OS time to recycle the port (especially TCP TIME_WAIT on Windows).
if (hooks.terminateOwnedProcess) {
await hooks.terminateOwnedProcess().catch((err) => {
logger.warn('Failed to terminate owned process before retry:', err);
});
}
await hooks.delay(1000);
hooks.assertLifecycle('start/retry-pre-port-wait');
// Wait for port to become free before retrying (handles lingering processes).
// Use a short-polling AbortController so that a superseding stop()/restart()
@@ -122,6 +123,12 @@ export async function runGatewayStartupSequence(hooks: StartupHooks): Promise<vo
}, 500);
try {
await hooks.waitForPortFree(hooks.port, abortController.signal);
} catch (portWaitError) {
// If the wait was aborted due to lifecycle supersede, convert to
// LifecycleSupersededError so the outer handler propagates correctly.
hooks.assertLifecycle('start/retry-port-wait-aborted');
// If assertLifecycle didn't throw, it's a genuine port-wait error — rethrow.
throw portWaitError;
} finally {
clearInterval(lifecyclePollInterval);
}

View File

@@ -137,7 +137,7 @@ export async function waitForPortFree(port: number, timeoutMs = 30000, signal?:
while (Date.now() - start < timeoutMs) {
if (signal?.aborted) {
logger.debug(`waitForPortFree: aborted while waiting for port ${port}`);
return;
throw new Error(`waitForPortFree: aborted while waiting for port ${port}`);
}
const available = await new Promise<boolean>((resolve) => {