From 9b503b531b342c174947555ffcbb07bc0e833d2e Mon Sep 17 00:00:00 2001 From: Haze <709547807@qq.com> Date: Fri, 20 Mar 2026 18:34:20 +0800 Subject: [PATCH] fix(processes): fix multiple clawx processes running concurently (#589) Co-authored-by: Cursor Agent Co-authored-by: Haze Co-authored-by: paisley <8197966+su8su@users.noreply.github.com> Co-authored-by: Felix <24791380+vcfgv@users.noreply.github.com> --- README.ja-JP.md | 11 ++ README.md | 11 ++ README.zh-CN.md | 11 ++ electron/api/routes/agents.ts | 16 +- electron/gateway/manager.ts | 22 +++ electron/gateway/supervisor.ts | 48 ++++-- electron/main/index.ts | 119 ++++++++++++++- electron/main/process-instance-lock.ts | 181 +++++++++++++++++++++++ electron/main/quit-lifecycle.ts | 30 ++++ electron/main/signal-quit.ts | 11 ++ tests/unit/agents-routes.test.ts | 78 ++++++++++ tests/unit/gateway-supervisor.test.ts | 137 +++++++++++++++++ tests/unit/main-quit-lifecycle.test.ts | 23 +++ tests/unit/process-instance-lock.test.ts | 157 ++++++++++++++++++++ tests/unit/signal-quit.test.ts | 15 ++ 15 files changed, 844 insertions(+), 26 deletions(-) create mode 100644 electron/main/process-instance-lock.ts create mode 100644 electron/main/quit-lifecycle.ts create mode 100644 electron/main/signal-quit.ts create mode 100644 tests/unit/agents-routes.test.ts create mode 100644 tests/unit/gateway-supervisor.test.ts create mode 100644 tests/unit/main-quit-lifecycle.test.ts create mode 100644 tests/unit/process-instance-lock.test.ts create mode 100644 tests/unit/signal-quit.test.ts diff --git a/README.ja-JP.md b/README.ja-JP.md index 9f24daecf..eedcde87a 100644 --- a/README.ja-JP.md +++ b/README.ja-JP.md @@ -252,6 +252,17 @@ ClawXは、**デュアルプロセス + Host API 統一アクセス**構成を - **セキュアストレージ**: APIキーや機密データは、OSのネイティブセキュアストレージ機構を活用します - **CORSセーフ設計**: ローカルHTTPはMainプロキシ経由とし、Renderer側CORS問題を回避します +### プロセスモデルと Gateway トラブルシューティング + +- ClawX は Electron アプリのため、**1つのアプリインスタンスでも複数プロセス(main/renderer/zygote/utility)が表示される**のが正常です。 +- 単一起動保護は Electron のロックに加え、ローカルのプロセスロックファイルも併用し、デスクトップ IPC / セッションバスが不安定な環境でも重複起動を防ぎます。 +- ローリングアップグレード中に旧版/新版が混在すると、単一起動保護の挙動が非対称になる場合があります。安定運用のため、デスクトップクライアントは可能な限り同一バージョンへ揃えてください。 +- ただし OpenClaw Gateway の待受は常に**単一**であるべきです。`127.0.0.1:18789` を Listen しているプロセスは1つだけです。 +- Listen プロセスの確認例: + - macOS/Linux: `lsof -nP -iTCP:18789 -sTCP:LISTEN` + - Windows (PowerShell): `Get-NetTCPConnection -LocalPort 18789 -State Listen` +- ウィンドウの閉じるボタン(`X`)は既定でトレイへ最小化する動作で、完全終了ではありません。完全終了する場合はトレイメニューの **Quit ClawX** を使用してください。 + --- ## ユースケース diff --git a/README.md b/README.md index 8e68f4991..0e839d7e2 100644 --- a/README.md +++ b/README.md @@ -256,6 +256,17 @@ ClawX employs a **dual-process architecture** with a unified host API layer. The - **Secure Storage**: API keys and sensitive data leverage the operating system's native secure storage mechanisms - **CORS-Safe by Design**: Local HTTP access is proxied by Main, preventing renderer-side CORS issues +### Process Model & Gateway Troubleshooting + +- ClawX is an Electron app, so **one app instance normally appears as multiple OS processes** (main/renderer/zygote/utility). This is expected. +- Single-instance protection uses Electron's lock plus a local process-file lock fallback, preventing duplicate app launch in environments where desktop IPC/session bus is unstable. +- During rolling upgrades, mixed old/new app versions can still have asymmetric protection behavior. For best reliability, upgrade all desktop clients to the same version. +- The OpenClaw Gateway listener should still be **single-owner**: only one process should listen on `127.0.0.1:18789`. +- To verify the active listener: + - macOS/Linux: `lsof -nP -iTCP:18789 -sTCP:LISTEN` + - Windows (PowerShell): `Get-NetTCPConnection -LocalPort 18789 -State Listen` +- Clicking the window close button (`X`) hides ClawX to tray; it does **not** fully quit the app. Use tray menu **Quit ClawX** for complete shutdown. + --- ## Use Cases diff --git a/README.zh-CN.md b/README.zh-CN.md index eaa8e4c68..c772586e3 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -256,6 +256,17 @@ ClawX 采用 **双进程 + Host API 统一接入架构**。渲染进程只调用 - **安全存储**:API 密钥和敏感数据利用操作系统原生的安全存储机制 - **CORS 安全**:本地 HTTP 请求由主进程代理,避免渲染进程跨域问题 +### 进程模型与 Gateway 排障 + +- ClawX 基于 Electron,**单个应用实例出现多个系统进程是正常现象**(main/renderer/zygote/utility)。 +- 单实例保护同时使用 Electron 自带锁与本地进程文件锁回退机制,可在桌面会话总线异常时避免重复启动。 +- 滚动升级期间若新旧版本混跑,单实例保护仍可能出现不对称行为。为保证稳定性,建议桌面客户端尽量统一升级到同一版本。 +- 但 OpenClaw Gateway 监听应始终保持**单实例**:`127.0.0.1:18789` 只能有一个监听者。 +- 可用以下命令确认监听进程: + - macOS/Linux:`lsof -nP -iTCP:18789 -sTCP:LISTEN` + - Windows(PowerShell):`Get-NetTCPConnection -LocalPort 18789 -State Listen` +- 点击窗口关闭按钮(`X`)默认只是最小化到托盘,并不会完全退出应用。请在托盘菜单中选择 **Quit ClawX** 执行完整退出。 + --- ## 使用场景 diff --git a/electron/api/routes/agents.ts b/electron/api/routes/agents.ts index e5dbbdb7d..13f9823fa 100644 --- a/electron/api/routes/agents.ts +++ b/electron/api/routes/agents.ts @@ -36,7 +36,7 @@ const execAsync = promisify(exec); * stale bot connections is to kill the Gateway process entirely and * spawn a fresh one that reads the updated openclaw.json from scratch. */ -async function restartGatewayForAgentDeletion(ctx: HostApiContext): Promise { +export async function restartGatewayForAgentDeletion(ctx: HostApiContext): Promise { try { // Capture the PID of the running Gateway BEFORE stop() clears it. const status = ctx.gatewayManager.getStatus(); @@ -50,10 +50,14 @@ async function restartGatewayForAgentDeletion(ctx: HostApiContext): Promise setTimeout(resolve, 500)); - try { process.kill(pid, 0); process.kill(pid, 'SIGKILL'); } catch { /* already dead */ } + if (process.platform === 'win32') { + await execAsync(`taskkill /F /PID ${pid} /T`); + } else { + process.kill(pid, 'SIGTERM'); + // Give it a moment to die + await new Promise((resolve) => setTimeout(resolve, 500)); + try { process.kill(pid, 0); process.kill(pid, 'SIGKILL'); } catch { /* already dead */ } + } } catch { // process already gone – that's fine } @@ -85,7 +89,7 @@ async function restartGatewayForAgentDeletion(ctx: HostApiContext): Promise { + // If the existing gateway is actually our own spawned UtilityProcess // (e.g. after a self-restart code=1012), keep ownership so that // stop() can still terminate the process during a restart() cycle. @@ -250,6 +251,7 @@ export class GatewayManager extends EventEmitter { this.ownsProcess = false; this.setStatus({ pid: undefined }); } + this.startHealthCheck(); }, waitForPortFree: async (port) => { @@ -356,6 +358,25 @@ export class GatewayManager extends EventEmitter { this.setStatus({ state: 'stopped', error: undefined, pid: undefined, connectedAt: undefined, uptime: undefined }); } + /** + * Best-effort emergency cleanup for app-quit timeout paths. + * Only terminates a process this manager still owns. + */ + async forceTerminateOwnedProcessForQuit(): Promise { + if (!this.process || !this.ownsProcess) { + return false; + } + + const child = this.process; + await terminateOwnedGatewayProcess(child); + if (this.process === child) { + this.process = null; + } + this.ownsProcess = false; + this.setStatus({ pid: undefined }); + return true; + } + /** * Restart Gateway process */ @@ -724,6 +745,7 @@ export class GatewayManager extends EventEmitter { this.process = child; this.ownsProcess = true; + logger.debug(`Gateway manager now owns process pid=${child.pid ?? 'unknown'}`); this.lastSpawnSummary = lastSpawnSummary; } diff --git a/electron/gateway/supervisor.ts b/electron/gateway/supervisor.ts index 04391d944..7f6d7461c 100644 --- a/electron/gateway/supervisor.ts +++ b/electron/gateway/supervisor.ts @@ -22,39 +22,58 @@ export function warmupManagedPythonReadiness(): void { } export async function terminateOwnedGatewayProcess(child: Electron.UtilityProcess): Promise { - let exited = false; + const terminateWindowsProcessTree = async (pid: number): Promise => { + const cp = await import('child_process'); + await new Promise((resolve) => { + cp.exec(`taskkill /F /PID ${pid} /T`, { timeout: 5000, windowsHide: true }, () => resolve()); + }); + }; await new Promise((resolve) => { + let exited = false; + + // Register a single exit listener before any kill attempt to avoid + // the race where exit fires between two separate `once('exit')` calls. child.once('exit', () => { exited = true; + clearTimeout(timeout); resolve(); }); const pid = child.pid; logger.info(`Sending kill to Gateway process (pid=${pid ?? 'unknown'})`); - try { - child.kill(); - } catch { - // ignore if already exited + + if (process.platform === 'win32' && pid) { + void terminateWindowsProcessTree(pid).catch((err) => { + logger.warn(`Windows process-tree kill failed for Gateway pid=${pid}:`, err); + }); + } else { + try { + child.kill(); + } catch { + // ignore if already exited + } } const timeout = setTimeout(() => { if (!exited) { logger.warn(`Gateway did not exit in time, force-killing (pid=${pid ?? 'unknown'})`); if (pid) { - try { - process.kill(pid, 'SIGKILL'); - } catch { - // ignore + if (process.platform === 'win32') { + void terminateWindowsProcessTree(pid).catch((err) => { + logger.warn(`Forced Windows process-tree kill failed for Gateway pid=${pid}:`, err); + }); + } else { + try { + process.kill(pid, 'SIGKILL'); + } catch { + // ignore + } } } } resolve(); }, 5000); - - child.once('exit', () => { - clearTimeout(timeout); - }); }); } @@ -226,6 +245,9 @@ export async function findExistingGatewayProcess(options: { const pids = await getListeningProcessIds(port); if (pids.length > 0 && (!ownedPid || !pids.includes(String(ownedPid)))) { await terminateOrphanedProcessIds(port, pids); + if (process.platform === 'win32') { + await waitForPortFree(port, 10000); + } return null; } } catch (err) { diff --git a/electron/main/index.ts b/electron/main/index.ts index f82bb02ef..7d21e856b 100644 --- a/electron/main/index.ts +++ b/electron/main/index.ts @@ -27,6 +27,13 @@ import { createMainWindowFocusState, requestSecondInstanceFocus, } from './main-window-focus'; +import { + createQuitLifecycleState, + markQuitCleanupCompleted, + requestQuitLifecycleAction, +} from './quit-lifecycle'; +import { createSignalQuitHandler } from './signal-quit'; +import { acquireProcessInstanceFileLock } from './process-instance-lock'; import { getSetting } from '../utils/store'; import { ensureBuiltinSkillsInstalled, ensurePreinstalledSkillsInstalled } from '../utils/skill-config'; import { ensureAllBundledPluginsInstalled } from '../utils/plugin-install'; @@ -68,10 +75,37 @@ if (process.platform === 'linux') { // same port, then each treats the other's gateway as "orphaned" and kills // it — creating an infinite kill/restart loop on Windows. // The losing process must exit immediately so it never reaches Gateway startup. -const gotTheLock = app.requestSingleInstanceLock(); -if (!gotTheLock) { +const gotElectronLock = app.requestSingleInstanceLock(); +if (!gotElectronLock) { + console.info('[ClawX] Another instance already holds the single-instance lock; exiting duplicate process'); app.exit(0); } +let releaseProcessInstanceFileLock: () => void = () => {}; +let gotFileLock = true; +if (gotElectronLock) { + try { + const fileLock = acquireProcessInstanceFileLock({ + userDataDir: app.getPath('userData'), + lockName: 'clawx', + }); + gotFileLock = fileLock.acquired; + releaseProcessInstanceFileLock = fileLock.release; + if (!fileLock.acquired) { + const ownerDescriptor = fileLock.ownerPid + ? `${fileLock.ownerFormat ?? 'legacy'} pid=${fileLock.ownerPid}` + : fileLock.ownerFormat === 'unknown' + ? 'unknown lock format/content' + : 'unknown owner'; + console.info( + `[ClawX] Another instance already holds process lock (${fileLock.lockPath}, ${ownerDescriptor}); exiting duplicate process`, + ); + app.exit(0); + } + } catch (error) { + console.warn('[ClawX] Failed to acquire process instance file lock; continuing with Electron single-instance lock only', error); + } +} +const gotTheLock = gotElectronLock && gotFileLock; // Global references let mainWindow: BrowserWindow | null = null; @@ -80,6 +114,7 @@ let clawHubService!: ClawHubService; let hostEventBus!: HostEventBus; let hostApiServer: Server | null = null; const mainWindowFocusState = createMainWindowFocusState(); +const quitLifecycleState = createQuitLifecycleState(); /** * Resolve the icons directory path (works in both dev and packaged mode) @@ -216,7 +251,7 @@ async function initialize(): Promise { logger.init(); logger.info('=== ClawX Application Starting ==='); logger.debug( - `Runtime: platform=${process.platform}/${process.arch}, electron=${process.versions.electron}, node=${process.versions.node}, packaged=${app.isPackaged}` + `Runtime: platform=${process.platform}/${process.arch}, electron=${process.versions.electron}, node=${process.versions.node}, packaged=${app.isPackaged}, pid=${process.pid}, ppid=${process.ppid}` ); // Warm up network optimization (non-blocking) @@ -413,6 +448,22 @@ async function initialize(): Promise { } if (gotTheLock) { + const requestQuitOnSignal = createSignalQuitHandler({ + logInfo: (message) => logger.info(message), + requestQuit: () => app.quit(), + }); + + process.on('exit', () => { + releaseProcessInstanceFileLock(); + }); + + process.once('SIGINT', () => requestQuitOnSignal('SIGINT')); + process.once('SIGTERM', () => requestQuitOnSignal('SIGTERM')); + + app.on('will-quit', () => { + releaseProcessInstanceFileLock(); + }); + if (process.platform === 'win32') { app.setAppUserModelId(WINDOWS_APP_USER_MODEL_ID); } @@ -461,15 +512,69 @@ if (gotTheLock) { } }); - app.on('before-quit', () => { + app.on('before-quit', (event) => { setQuitting(); + const action = requestQuitLifecycleAction(quitLifecycleState); + + if (action === 'allow-quit') { + return; + } + + event.preventDefault(); + + if (action === 'cleanup-in-progress') { + logger.debug('Quit requested while cleanup already in progress; waiting for shutdown task to finish'); + return; + } + hostEventBus.closeAll(); hostApiServer?.close(); - // Fire-and-forget: do not await gatewayManager.stop() here. - // Awaiting inside before-quit can stall Electron's quit sequence. - void gatewayManager.stop().catch((err) => { + + const stopPromise = gatewayManager.stop().catch((err) => { logger.warn('gatewayManager.stop() error during quit:', err); }); + const timeoutPromise = new Promise<'timeout'>((resolve) => { + setTimeout(() => resolve('timeout'), 5000); + }); + + void Promise.race([stopPromise.then(() => 'stopped' as const), timeoutPromise]).then((result) => { + if (result === 'timeout') { + logger.warn('Gateway shutdown timed out during app quit; proceeding with forced quit'); + void gatewayManager.forceTerminateOwnedProcessForQuit().then((terminated) => { + if (terminated) { + logger.warn('Forced gateway process termination completed after quit timeout'); + } + }).catch((err) => { + logger.warn('Forced gateway termination failed after quit timeout:', err); + }); + } + markQuitCleanupCompleted(quitLifecycleState); + app.quit(); + }); + }); + + // Best-effort Gateway cleanup on unexpected crashes. + // These handlers attempt to terminate the Gateway child process within a + // short timeout before force-exiting, preventing orphaned processes. + const emergencyGatewayCleanup = (reason: string, error: unknown): void => { + logger.error(`${reason}:`, error); + try { + void gatewayManager?.stop().catch(() => { /* ignore */ }); + } catch { + // ignore — stop() may not be callable if state is corrupted + } + // Give Gateway stop a brief window, then force-exit. + setTimeout(() => { + process.exit(1); + }, 3000).unref(); + }; + + process.on('uncaughtException', (error) => { + emergencyGatewayCleanup('Uncaught exception in main process', error); + }); + + process.on('unhandledRejection', (reason) => { + emergencyGatewayCleanup('Unhandled promise rejection in main process', reason); }); } diff --git a/electron/main/process-instance-lock.ts b/electron/main/process-instance-lock.ts new file mode 100644 index 000000000..a4d7b2417 --- /dev/null +++ b/electron/main/process-instance-lock.ts @@ -0,0 +1,181 @@ +import { closeSync, existsSync, mkdirSync, openSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; + +const LOCK_SCHEMA = 'clawx-instance-lock'; +const LOCK_VERSION = 1; + +export interface ProcessInstanceFileLock { + acquired: boolean; + lockPath: string; + ownerPid?: number; + ownerFormat?: 'legacy' | 'structured' | 'unknown'; + release: () => void; +} + +export interface ProcessInstanceFileLockOptions { + userDataDir: string; + lockName: string; + pid?: number; + isPidAlive?: (pid: number) => boolean; +} + +function defaultPidAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch (error) { + const errno = (error as NodeJS.ErrnoException).code; + return errno !== 'ESRCH'; + } +} + +type ParsedLockOwner = + | { kind: 'legacy'; pid: number } + | { kind: 'structured'; pid: number } + | { kind: 'unknown' }; + +interface StructuredLockContent { + schema: string; + version: number; + pid: number; +} + +function parsePositivePid(raw: string): number | undefined { + if (!/^\d+$/.test(raw)) { + return undefined; + } + const parsed = Number.parseInt(raw, 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + return undefined; + } + return parsed; +} + +function parseStructuredLockContent(raw: string): StructuredLockContent | undefined { + try { + const parsed = JSON.parse(raw) as Partial; + if ( + parsed?.schema === LOCK_SCHEMA + && parsed?.version === LOCK_VERSION + && typeof parsed?.pid === 'number' + && Number.isFinite(parsed.pid) + && parsed.pid > 0 + ) { + return { + schema: parsed.schema, + version: parsed.version, + pid: parsed.pid, + }; + } + } catch { + // ignore parse errors + } + return undefined; +} + +function readLockOwner(lockPath: string): ParsedLockOwner { + try { + const raw = readFileSync(lockPath, 'utf8').trim(); + const legacyPid = parsePositivePid(raw); + if (legacyPid !== undefined) { + return { kind: 'legacy', pid: legacyPid }; + } + + const structured = parseStructuredLockContent(raw); + if (structured) { + return { kind: 'structured', pid: structured.pid }; + } + } catch { + // ignore read errors + } + + return { kind: 'unknown' }; +} + +export function acquireProcessInstanceFileLock( + options: ProcessInstanceFileLockOptions, +): ProcessInstanceFileLock { + const pid = options.pid ?? process.pid; + const isPidAlive = options.isPidAlive ?? defaultPidAlive; + + mkdirSync(options.userDataDir, { recursive: true }); + const lockPath = join(options.userDataDir, `${options.lockName}.instance.lock`); + + let ownerPid: number | undefined; + let ownerFormat: ProcessInstanceFileLock['ownerFormat'] = 'unknown'; + + for (let attempt = 0; attempt < 2; attempt += 1) { + try { + const fd = openSync(lockPath, 'wx'); + try { + // Keep writing legacy numeric format for broad backward compatibility. + // Parser accepts both legacy numeric and structured JSON formats. + writeFileSync(fd, String(pid), 'utf8'); + } finally { + closeSync(fd); + } + + let released = false; + return { + acquired: true, + lockPath, + release: () => { + if (released) return; + released = true; + try { + const currentOwner = readLockOwner(lockPath); + if ( + (currentOwner.kind === 'legacy' || currentOwner.kind === 'structured') + && currentOwner.pid !== pid + ) { + return; + } + if (currentOwner.kind === 'unknown') { + return; + } + rmSync(lockPath, { force: true }); + } catch { + // best-effort + } + }, + }; + } catch (error) { + const errno = (error as NodeJS.ErrnoException).code; + if (errno !== 'EEXIST') { + break; + } + + const owner = readLockOwner(lockPath); + if (owner.kind === 'legacy' || owner.kind === 'structured') { + ownerPid = owner.pid; + ownerFormat = owner.kind; + } else { + ownerPid = undefined; + ownerFormat = 'unknown'; + } + const shouldTreatAsStale = + (owner.kind === 'legacy' || owner.kind === 'structured') + && !isPidAlive(owner.pid); + if (shouldTreatAsStale && existsSync(lockPath)) { + try { + rmSync(lockPath, { force: true }); + continue; + } catch { + // If deletion fails, treat as held lock. + } + } + + break; + } + } + + return { + acquired: false, + lockPath, + ownerPid, + ownerFormat, + release: () => { + // no-op when lock wasn't acquired + }, + }; +} diff --git a/electron/main/quit-lifecycle.ts b/electron/main/quit-lifecycle.ts new file mode 100644 index 000000000..3eaf45ea2 --- /dev/null +++ b/electron/main/quit-lifecycle.ts @@ -0,0 +1,30 @@ +export interface QuitLifecycleState { + cleanupStarted: boolean; + cleanupCompleted: boolean; +} + +export type QuitLifecycleAction = 'start-cleanup' | 'cleanup-in-progress' | 'allow-quit'; + +export function createQuitLifecycleState(): QuitLifecycleState { + return { + cleanupStarted: false, + cleanupCompleted: false, + }; +} + +export function requestQuitLifecycleAction(state: QuitLifecycleState): QuitLifecycleAction { + if (state.cleanupCompleted) { + return 'allow-quit'; + } + + if (state.cleanupStarted) { + return 'cleanup-in-progress'; + } + + state.cleanupStarted = true; + return 'start-cleanup'; +} + +export function markQuitCleanupCompleted(state: QuitLifecycleState): void { + state.cleanupCompleted = true; +} diff --git a/electron/main/signal-quit.ts b/electron/main/signal-quit.ts new file mode 100644 index 000000000..42bbdcff9 --- /dev/null +++ b/electron/main/signal-quit.ts @@ -0,0 +1,11 @@ +export interface SignalQuitHandlerHooks { + logInfo: (message: string) => void; + requestQuit: () => void; +} + +export function createSignalQuitHandler(hooks: SignalQuitHandlerHooks): (signal: NodeJS.Signals) => void { + return (signal: NodeJS.Signals) => { + hooks.logInfo(`Received ${signal}; requesting app quit`); + hooks.requestQuit(); + }; +} diff --git a/tests/unit/agents-routes.test.ts b/tests/unit/agents-routes.test.ts new file mode 100644 index 000000000..4d8812b75 --- /dev/null +++ b/tests/unit/agents-routes.test.ts @@ -0,0 +1,78 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const originalPlatform = process.platform; + +const { mockExec } = vi.hoisted(() => ({ + mockExec: vi.fn(), +})); + +vi.mock('child_process', () => ({ + exec: mockExec, + default: { + exec: mockExec, + }, +})); + +vi.mock('@electron/utils/agent-config', () => ({ + assignChannelToAgent: vi.fn(), + clearChannelBinding: vi.fn(), + createAgent: vi.fn(), + deleteAgentConfig: vi.fn(), + listAgentsSnapshot: vi.fn(), + removeAgentWorkspaceDirectory: vi.fn(), + resolveAccountIdForAgent: vi.fn(), + updateAgentName: vi.fn(), +})); + +vi.mock('@electron/utils/channel-config', () => ({ + deleteChannelAccountConfig: vi.fn(), +})); + +vi.mock('@electron/services/providers/provider-runtime-sync', () => ({ + syncAllProviderAuthToRuntime: vi.fn(), +})); + +vi.mock('@electron/api/route-utils', () => ({ + parseJsonBody: vi.fn(), + sendJson: vi.fn(), +})); + +function setPlatform(platform: string): void { + Object.defineProperty(process, 'platform', { value: platform, writable: true }); +} + +describe('restartGatewayForAgentDeletion', () => { + beforeEach(() => { + vi.resetAllMocks(); + vi.resetModules(); + mockExec.mockImplementation((_cmd: string, _opts: object, cb: (err: Error | null, stdout: string) => void) => { + cb(null, ''); + return {} as never; + }); + }); + + afterEach(() => { + Object.defineProperty(process, 'platform', { value: originalPlatform, writable: true }); + }); + + it('uses taskkill tree strategy on Windows when gateway pid is known', async () => { + setPlatform('win32'); + const { restartGatewayForAgentDeletion } = await import('@electron/api/routes/agents'); + + const restart = vi.fn().mockResolvedValue(undefined); + const getStatus = vi.fn(() => ({ pid: 4321, port: 18789 })); + + await restartGatewayForAgentDeletion({ + gatewayManager: { + getStatus, + restart, + }, + } as never); + + expect(mockExec).toHaveBeenCalledWith( + 'taskkill /F /PID 4321 /T', + expect.any(Function), + ); + expect(restart).toHaveBeenCalledTimes(1); + }); +}); diff --git a/tests/unit/gateway-supervisor.test.ts b/tests/unit/gateway-supervisor.test.ts new file mode 100644 index 000000000..e2b9e81d4 --- /dev/null +++ b/tests/unit/gateway-supervisor.test.ts @@ -0,0 +1,137 @@ +import { EventEmitter } from 'node:events'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const originalPlatform = process.platform; + +const { + mockExec, + mockCreateServer, +} = vi.hoisted(() => ({ + mockExec: vi.fn(), + mockCreateServer: vi.fn(), +})); + +vi.mock('electron', () => ({ + app: { + isPackaged: false, + getPath: () => '/tmp', + }, + utilityProcess: {}, +})); + +vi.mock('child_process', () => ({ + exec: mockExec, + execSync: vi.fn(), + spawn: vi.fn(), + default: { + exec: mockExec, + execSync: vi.fn(), + spawn: vi.fn(), + }, +})); + +vi.mock('net', () => ({ + createServer: mockCreateServer, +})); + +class MockUtilityChild extends EventEmitter { + pid?: number; + kill = vi.fn(); + + constructor(pid?: number) { + super(); + this.pid = pid; + } +} + +function setPlatform(platform: string): void { + Object.defineProperty(process, 'platform', { value: platform, writable: true }); +} + +describe('gateway supervisor process cleanup', () => { + beforeEach(() => { + vi.resetModules(); + vi.clearAllMocks(); + + mockExec.mockImplementation((_cmd: string, _opts: object, cb: (err: Error | null, stdout: string) => void) => { + cb(null, ''); + return {} as never; + }); + + mockCreateServer.mockImplementation(() => { + const handlers = new Map void>(); + return { + once(event: string, callback: (...args: unknown[]) => void) { + handlers.set(event, callback); + return this; + }, + listen() { + queueMicrotask(() => handlers.get('listening')?.()); + return this; + }, + close(callback?: () => void) { + callback?.(); + }, + }; + }); + }); + + afterEach(() => { + Object.defineProperty(process, 'platform', { value: originalPlatform, writable: true }); + }); + + it('uses taskkill tree strategy for owned process on Windows', async () => { + setPlatform('win32'); + const child = new MockUtilityChild(4321); + const { terminateOwnedGatewayProcess } = await import('@electron/gateway/supervisor'); + + const stopPromise = terminateOwnedGatewayProcess(child as unknown as Electron.UtilityProcess); + child.emit('exit', 0); + await stopPromise; + + await vi.waitFor(() => { + expect(mockExec).toHaveBeenCalledWith( + 'taskkill /F /PID 4321 /T', + expect.objectContaining({ timeout: 5000, windowsHide: true }), + expect.any(Function), + ); + }); + expect(child.kill).not.toHaveBeenCalled(); + }); + + it('uses direct child.kill for owned process on non-Windows', async () => { + setPlatform('linux'); + const child = new MockUtilityChild(9876); + const { terminateOwnedGatewayProcess } = await import('@electron/gateway/supervisor'); + + const stopPromise = terminateOwnedGatewayProcess(child as unknown as Electron.UtilityProcess); + child.emit('exit', 0); + await stopPromise; + + expect(child.kill).toHaveBeenCalledTimes(1); + }); + + it('waits for port release after orphan cleanup on Windows', async () => { + setPlatform('win32'); + const { findExistingGatewayProcess } = await import('@electron/gateway/supervisor'); + + mockExec.mockImplementation((cmd: string, _opts: object, cb: (err: Error | null, stdout: string) => void) => { + if (cmd.includes('netstat -ano')) { + cb(null, ' TCP 127.0.0.1:18789 0.0.0.0:0 LISTENING 4321\n'); + return {} as never; + } + cb(null, ''); + return {} as never; + }); + + const result = await findExistingGatewayProcess({ port: 18789 }); + expect(result).toBeNull(); + + expect(mockExec).toHaveBeenCalledWith( + expect.stringContaining('taskkill /F /PID 4321 /T'), + expect.objectContaining({ timeout: 5000, windowsHide: true }), + expect.any(Function), + ); + expect(mockCreateServer).toHaveBeenCalled(); + }); +}); diff --git a/tests/unit/main-quit-lifecycle.test.ts b/tests/unit/main-quit-lifecycle.test.ts new file mode 100644 index 000000000..78799f842 --- /dev/null +++ b/tests/unit/main-quit-lifecycle.test.ts @@ -0,0 +1,23 @@ +import { describe, expect, it } from 'vitest'; +import { + createQuitLifecycleState, + markQuitCleanupCompleted, + requestQuitLifecycleAction, +} from '@electron/main/quit-lifecycle'; + +describe('main quit lifecycle coordination', () => { + it('starts cleanup only once', () => { + const state = createQuitLifecycleState(); + + expect(requestQuitLifecycleAction(state)).toBe('start-cleanup'); + expect(requestQuitLifecycleAction(state)).toBe('cleanup-in-progress'); + }); + + it('allows quit after cleanup is marked complete', () => { + const state = createQuitLifecycleState(); + + expect(requestQuitLifecycleAction(state)).toBe('start-cleanup'); + markQuitCleanupCompleted(state); + expect(requestQuitLifecycleAction(state)).toBe('allow-quit'); + }); +}); diff --git a/tests/unit/process-instance-lock.test.ts b/tests/unit/process-instance-lock.test.ts new file mode 100644 index 000000000..3fa5cca99 --- /dev/null +++ b/tests/unit/process-instance-lock.test.ts @@ -0,0 +1,157 @@ +import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, describe, expect, it } from 'vitest'; +import { acquireProcessInstanceFileLock } from '@electron/main/process-instance-lock'; + +const tempDirs: string[] = []; + +function createTempDir(): string { + const dir = mkdtempSync(join(tmpdir(), 'clawx-instance-lock-')); + tempDirs.push(dir); + return dir; +} + +afterEach(() => { + while (tempDirs.length > 0) { + const dir = tempDirs.pop(); + if (!dir) continue; + rmSync(dir, { recursive: true, force: true }); + } +}); + +describe('process instance file lock', () => { + it('acquires lock and writes owner pid', () => { + const userDataDir = createTempDir(); + const lock = acquireProcessInstanceFileLock({ + userDataDir, + lockName: 'clawx', + pid: 12345, + }); + + const lockPath = join(userDataDir, 'clawx.instance.lock'); + expect(lock.acquired).toBe(true); + expect(existsSync(lockPath)).toBe(true); + expect(readFileSync(lockPath, 'utf8')).toBe('12345'); + + lock.release(); + expect(existsSync(lockPath)).toBe(false); + }); + + it('rejects a second lock when owner pid is alive', () => { + const userDataDir = createTempDir(); + const first = acquireProcessInstanceFileLock({ + userDataDir, + lockName: 'clawx', + pid: 2222, + isPidAlive: () => true, + }); + + const second = acquireProcessInstanceFileLock({ + userDataDir, + lockName: 'clawx', + pid: 3333, + isPidAlive: () => true, + }); + + expect(first.acquired).toBe(true); + expect(second.acquired).toBe(false); + expect(second.ownerPid).toBe(2222); + expect(second.ownerFormat).toBe('legacy'); + + first.release(); + }); + + it('replaces stale lock file when owner pid is not alive', () => { + const userDataDir = createTempDir(); + const lockPath = join(userDataDir, 'clawx.instance.lock'); + writeFileSync(lockPath, '4444', 'utf8'); + + const lock = acquireProcessInstanceFileLock({ + userDataDir, + lockName: 'clawx', + pid: 5555, + isPidAlive: () => false, + }); + + expect(lock.acquired).toBe(true); + expect(readFileSync(lockPath, 'utf8')).toBe('5555'); + lock.release(); + }); + + it('replaces stale structured lock file when owner pid is not alive', () => { + const userDataDir = createTempDir(); + const lockPath = join(userDataDir, 'clawx.instance.lock'); + writeFileSync(lockPath, JSON.stringify({ + schema: 'clawx-instance-lock', + version: 1, + pid: 7777, + }), 'utf8'); + + const lock = acquireProcessInstanceFileLock({ + userDataDir, + lockName: 'clawx', + pid: 6666, + isPidAlive: () => false, + }); + + expect(lock.acquired).toBe(true); + expect(readFileSync(lockPath, 'utf8')).toBe('6666'); + lock.release(); + }); + + it('does not treat malformed lock file content as stale', () => { + const userDataDir = createTempDir(); + const lockPath = join(userDataDir, 'clawx.instance.lock'); + writeFileSync(lockPath, 'not-a-pid', 'utf8'); + + const lock = acquireProcessInstanceFileLock({ + userDataDir, + lockName: 'clawx', + pid: 6666, + }); + + expect(lock.acquired).toBe(false); + expect(lock.ownerPid).toBeUndefined(); + expect(lock.ownerFormat).toBe('unknown'); + expect(readFileSync(lockPath, 'utf8')).toBe('not-a-pid'); + }); + + it('does not remove lock file if ownership changed before release', () => { + const userDataDir = createTempDir(); + const lockPath = join(userDataDir, 'clawx.instance.lock'); + const first = acquireProcessInstanceFileLock({ + userDataDir, + lockName: 'clawx', + pid: 1234, + }); + + // Simulate a new process acquiring the lock after a handover race. + writeFileSync(lockPath, '9999', 'utf8'); + first.release(); + + expect(readFileSync(lockPath, 'utf8')).toBe('9999'); + }); + + it('does not treat unknown structured lock schema as stale', () => { + const userDataDir = createTempDir(); + const lockPath = join(userDataDir, 'clawx.instance.lock'); + writeFileSync(lockPath, JSON.stringify({ + schema: 'future-lock-schema', + version: 2, + pid: 8888, + owner: 'future-build', + }), 'utf8'); + + const lock = acquireProcessInstanceFileLock({ + userDataDir, + lockName: 'clawx', + pid: 9999, + }); + + expect(lock.acquired).toBe(false); + expect(lock.ownerPid).toBeUndefined(); + expect(lock.ownerFormat).toBe('unknown'); + expect(readFileSync(lockPath, 'utf8')).toContain('future-lock-schema'); + }); +}); diff --git a/tests/unit/signal-quit.test.ts b/tests/unit/signal-quit.test.ts new file mode 100644 index 000000000..82d3570e4 --- /dev/null +++ b/tests/unit/signal-quit.test.ts @@ -0,0 +1,15 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createSignalQuitHandler } from '@electron/main/signal-quit'; + +describe('signal quit handler', () => { + it('logs and requests quit when signal is received', () => { + const logInfo = vi.fn(); + const requestQuit = vi.fn(); + const handler = createSignalQuitHandler({ logInfo, requestQuit }); + + handler('SIGTERM'); + + expect(logInfo).toHaveBeenCalledWith('Received SIGTERM; requesting app quit'); + expect(requestQuit).toHaveBeenCalledTimes(1); + }); +});