fix: resolve Windows gateway reconnect race condition and translate comments (#787)

This commit is contained in:
paisley
2026-04-07 16:58:18 +08:00
committed by GitHub
Unverified
parent d8750e135b
commit c3a735a49c
5 changed files with 97 additions and 17 deletions

View File

@@ -775,8 +775,19 @@ export class GatewayManager extends EventEmitter {
if (this.status.state === 'running') {
this.setStatus({ state: 'stopped' });
this.scheduleReconnect();
}
// Always attempt reconnect from process exit. scheduleReconnect()
// internally checks shouldReconnect and reconnect-timer guards, so
// calling it unconditionally is safe — intentional stop() calls set
// shouldReconnect=false which makes scheduleReconnect() no-op.
//
// On Windows, the WS close handler intentionally skips reconnect
// (to avoid racing with this exit handler). However, WS close
// fires *before* process exit and sets state='stopped', which
// previously caused this handler to also skip reconnect — leaving
// the gateway permanently dead with no recovery path.
this.scheduleReconnect();
},
onError: () => {
this.ownsProcess = false;

View File

@@ -162,7 +162,12 @@ export async function launchGatewayProcess(options: {
});
child.on('exit', (code: number) => {
const expectedExit = !options.getShouldReconnect() || options.getCurrentState() === 'stopped';
// Only check shouldReconnect — not current state. On Windows the WS
// close handler fires before the process exit handler and sets state to
// 'stopped', which would make an unexpected crash look like a planned
// shutdown in logs. shouldReconnect is the reliable indicator: stop()
// sets it to false (expected), crashes leave it true (unexpected).
const expectedExit = !options.getShouldReconnect();
const level = expectedExit ? logger.info : logger.warn;
level(`Gateway process exited (code=${code}, expected=${expectedExit ? 'yes' : 'no'})`);
options.onExit(child, code);

View File

@@ -684,9 +684,10 @@ function buildSessionSwitchPatch(
>,
nextSessionKey: string,
): Partial<ChatState> {
// 仅将没有任何历史记录且无活动时间的会话视为空会话。
// 单纯依赖 messages.length 是不可靠的,因为 switchSession 会在真正调用 loadHistory 前抢先清空当前 messages
// 造成竞争条件,使得带有真实历史的会话被判定为空并从侧边栏移除。
// Only treat sessions with no history records and no activity timestamp as empty.
// Relying solely on messages.length is unreliable because switchSession clears
// the current messages before loadHistory runs, creating a race condition that
// could cause sessions with real history to be incorrectly removed from the sidebar.
const leavingEmpty = !state.currentSessionKey.endsWith(':main')
&& state.messages.length === 0
&& !state.sessionLastActivity[state.currentSessionKey]
@@ -1229,7 +1230,7 @@ export const useChatStore = create<ChatState>((set, get) => ({
// sessions.reset archives (renames) the session JSONL file, making old
// conversation history inaccessible when the user switches back to it.
const { currentSessionKey, messages, sessions, sessionLastActivity, sessionLabels } = get();
// 仅将没有任何历史记录且无活动时间的会话视为空会话
// Only treat sessions with no history records and no activity timestamp as empty
const leavingEmpty = !currentSessionKey.endsWith(':main')
&& messages.length === 0
&& !sessionLastActivity[currentSessionKey]
@@ -1272,8 +1273,8 @@ export const useChatStore = create<ChatState>((set, get) => ({
// This mirrors the "leavingEmpty" logic in switchSession so that creating
// a new session and immediately navigating away doesn't leave a ghost entry
// in the sidebar.
// 同样需要综合检查 sessionLastActivity sessionLabels
// 防止因为 switchSession 抢先清空 messages 而误判有历史的会话为空。
// Also check sessionLastActivity and sessionLabels comprehensively to prevent
// falsely treating sessions with history as empty due to switchSession clearing messages early.
const isEmptyNonMain = !currentSessionKey.endsWith(':main')
&& messages.length === 0
&& !sessionLastActivity[currentSessionKey]
@@ -1307,8 +1308,8 @@ export const useChatStore = create<ChatState>((set, get) => ({
if (!quiet) set({ loading: true, error: null });
// 安全保护:如果历史记录加载花费太多时间,则强制将 loading 设置为 false
// 防止 UI 永远卡在转圈状态。
// Safety guard: if history loading takes too long, force loading to false
// to prevent the UI from being stuck in a spinner forever.
let loadingTimedOut = false;
const loadingSafetyTimer = quiet ? null : setTimeout(() => {
loadingTimedOut = true;
@@ -1494,7 +1495,7 @@ export const useChatStore = create<ChatState>((set, get) => ({
try {
await loadPromise;
} finally {
// 正常完成时清除安全定时器
// Clear the safety timer on normal completion
if (loadingSafetyTimer) clearTimeout(loadingSafetyTimer);
if (!loadingTimedOut) {
// Only update load time if we actually didn't time out

View File

@@ -154,9 +154,10 @@ export function createSessionActions(
switchSession: (key: string) => {
const { currentSessionKey, messages, sessionLastActivity, sessionLabels } = get();
// 仅将没有任何历史记录且无活动时间的会话视为空会话。
// 单纯依赖 messages.length 是不可靠的,因为 switchSession 会在真正调用 loadHistory 前抢先清空当前 messages
// 造成竞争条件,使得带有真实历史的会话被判定为空并从侧边栏移除。
// Only treat sessions with no history records and no activity timestamp as empty.
// Relying solely on messages.length is unreliable because switchSession clears
// the current messages before loadHistory runs, creating a race condition that
// could cause sessions with real history to be incorrectly removed from the sidebar.
const leavingEmpty = !currentSessionKey.endsWith(':main')
&& messages.length === 0
&& !sessionLastActivity[currentSessionKey]
@@ -253,7 +254,7 @@ export function createSessionActions(
// sessions.reset archives (renames) the session JSONL file, making old
// conversation history inaccessible when the user switches back to it.
const { currentSessionKey, messages, sessionLastActivity, sessionLabels } = get();
// 仅将没有任何历史记录且无活动时间的会话视为空会话
// Only treat sessions with no history records and no activity timestamp as empty
const leavingEmpty = !currentSessionKey.endsWith(':main')
&& messages.length === 0
&& !sessionLastActivity[currentSessionKey]
@@ -294,8 +295,8 @@ export function createSessionActions(
// This mirrors the "leavingEmpty" logic in switchSession so that creating
// a new session and immediately navigating away doesn't leave a ghost entry
// in the sidebar.
// 同样需要综合检查 sessionLastActivity sessionLabels
// 防止因为 switchSession 抢先清空 messages 而误判有历史的会话为空。
// Also check sessionLastActivity and sessionLabels comprehensively to prevent
// falsely treating sessions with history as empty due to switchSession clearing messages early.
const isEmptyNonMain = !currentSessionKey.endsWith(':main')
&& messages.length === 0
&& !sessionLastActivity[currentSessionKey]

View File

@@ -1,6 +1,7 @@
import { describe, expect, it } from 'vitest';
import {
getDeferredRestartAction,
getReconnectScheduleDecision,
getReconnectSkipReason,
isLifecycleSuperseded,
nextLifecycleEpoch,
@@ -109,4 +110,65 @@ describe('gateway process policy helpers', () => {
).toBe('drop');
});
});
describe('getReconnectScheduleDecision', () => {
const baseContext = {
shouldReconnect: true,
hasReconnectTimer: false,
reconnectAttempts: 0,
maxAttempts: 10,
baseDelay: 1000,
maxDelay: 30000,
};
it('skips reconnect when shouldReconnect is false (intentional stop)', () => {
const decision = getReconnectScheduleDecision({
...baseContext,
shouldReconnect: false,
});
expect(decision).toEqual({ action: 'skip', reason: 'auto-reconnect disabled' });
});
it('returns already-scheduled when a reconnect timer exists (prevents double-scheduling)', () => {
const decision = getReconnectScheduleDecision({
...baseContext,
hasReconnectTimer: true,
});
expect(decision).toEqual({ action: 'already-scheduled' });
});
it('fails when max reconnect attempts are exhausted', () => {
const decision = getReconnectScheduleDecision({
...baseContext,
reconnectAttempts: 10,
maxAttempts: 10,
});
expect(decision).toEqual({ action: 'fail', attempts: 10, maxAttempts: 10 });
});
it('schedules reconnect with exponential backoff delay', () => {
const decision = getReconnectScheduleDecision({
...baseContext,
reconnectAttempts: 0,
});
expect(decision).toEqual({
action: 'schedule',
nextAttempt: 1,
maxAttempts: 10,
delay: 1000,
});
});
it('caps backoff delay at maxDelay', () => {
const decision = getReconnectScheduleDecision({
...baseContext,
reconnectAttempts: 8,
maxDelay: 30000,
});
expect(decision).toMatchObject({ action: 'schedule' });
if (decision.action === 'schedule') {
expect(decision.delay).toBeLessThanOrEqual(30000);
}
});
});
});