fix(gateway): resolve model switch failure and websocket timeouts during rapid oauth restarts (#266)

This commit is contained in:
paisley
2026-03-03 10:18:23 +08:00
committed by GitHub
Unverified
parent 9532400053
commit bc47b455b5
2 changed files with 24 additions and 1 deletions

View File

@@ -337,7 +337,11 @@ export class GatewayManager extends EventEmitter {
});
try {
let startAttempts = 0;
const MAX_START_ATTEMPTS = 3;
while (true) {
startAttempts++;
this.assertLifecycleEpoch(startEpoch, 'start');
this.recentStartupStderrLines = [];
try {
@@ -390,6 +394,22 @@ export class GatewayManager extends EventEmitter {
}
logger.error('OpenClaw doctor repair failed; not retrying Gateway startup');
}
// Retry on transient connect errors
const errMsg = String(error);
const isTransientError =
errMsg.includes('WebSocket closed before handshake') ||
errMsg.includes('ECONNREFUSED') ||
errMsg.includes('Gateway process exited before becoming ready') ||
errMsg.includes('Timed out waiting for connect.challenge') ||
errMsg.includes('Connect handshake timeout');
if (startAttempts < MAX_START_ATTEMPTS && isTransientError) {
logger.warn(`Transient start error: ${errMsg}. Retrying... (${startAttempts}/${MAX_START_ATTEMPTS})`);
await new Promise((r) => setTimeout(r, 1000));
continue;
}
throw error;
}
}

View File

@@ -1312,7 +1312,10 @@ function registerProviderHandlers(gatewayManager: GatewayManager): void {
}
// Debounced restart so the gateway picks up the new default provider.
if (gatewayManager.isConnected()) {
// Because OAuth success triggers a debounced restart, the gateway might not be
// currently connected ('starting' or 'reconnecting'). Checking if it is simply
// not 'stopped' ensures the restart request is correctly queued or coalesced.
if (gatewayManager.getStatus().state !== 'stopped') {
logger.info(`Scheduling Gateway restart after provider switch to "${ock}"`);
gatewayManager.debouncedRestart();
}