From 9f2bc3cf68f725c5a24d72645e7752ae4032f0ad Mon Sep 17 00:00:00 2001 From: paisley <8197966+su8su@users.noreply.github.com> Date: Sat, 14 Mar 2026 14:17:42 +0800 Subject: [PATCH] Fix agent to channel (#485) --- electron/api/routes/agents.ts | 98 +++++++++++++++++++++++++++- electron/utils/agent-config.ts | 15 +++-- electron/utils/channel-config.ts | 56 ++++++++++++++++ electron/utils/openclaw-workspace.ts | 17 ++--- 4 files changed, 168 insertions(+), 18 deletions(-) diff --git a/electron/api/routes/agents.ts b/electron/api/routes/agents.ts index e483dee64..963c0e108 100644 --- a/electron/api/routes/agents.ts +++ b/electron/api/routes/agents.ts @@ -5,10 +5,12 @@ import { createAgent, deleteAgentConfig, listAgentsSnapshot, + removeAgentWorkspaceDirectory, resolveAccountIdForAgent, updateAgentName, } from '../../utils/agent-config'; import { deleteChannelAccountConfig } from '../../utils/channel-config'; +import { syncAllProviderAuthToRuntime } from '../../services/providers/provider-runtime-sync'; import type { HostApiContext } from '../context'; import { parseJsonBody, sendJson } from '../route-utils'; @@ -20,6 +22,84 @@ function scheduleGatewayReload(ctx: HostApiContext, reason: string): void { void reason; } +import { exec } from 'child_process'; +import { promisify } from 'util'; +const execAsync = promisify(exec); + +/** + * Force a full Gateway process restart after agent deletion. + * + * A SIGUSR1 in-process reload is NOT sufficient here: channel plugins + * (e.g. Feishu) maintain long-lived WebSocket connections to external + * services and do not disconnect accounts that were removed from the + * config during an in-process reload. The only reliable way to drop + * stale bot connections is to kill the Gateway process entirely and + * spawn a fresh one that reads the updated openclaw.json from scratch. + */ +async function restartGatewayForAgentDeletion(ctx: HostApiContext): Promise { + try { + // Capture the PID of the running Gateway BEFORE stop() clears it. + const status = ctx.gatewayManager.getStatus(); + const pid = status.pid; + const port = status.port; + console.log('[agents] Triggering Gateway restart (kill+respawn) after agent deletion', { pid, port }); + + // Force-kill the Gateway process by PID. The manager's stop() only + // kills "owned" processes; if the manager connected to an already- + // running Gateway (ownsProcess=false), stop() simply closes the WS + // and the old process stays alive with its stale channel connections. + if (pid) { + try { + process.kill(pid, 'SIGTERM'); + // Give it a moment to die + await new Promise((resolve) => setTimeout(resolve, 500)); + try { process.kill(pid, 0); process.kill(pid, 'SIGKILL'); } catch { /* already dead */ } + } catch { + // process already gone – that's fine + } + } else if (port) { + // If we don't know the PID (e.g. connected to an orphaned Gateway from + // a previous pnpm dev run), forcefully kill whatever is on the port. + try { + if (process.platform === 'darwin' || process.platform === 'linux') { + // MUST use -sTCP:LISTEN. Otherwise lsof returns the client process (ClawX itself) + // that has an ESTABLISHED WebSocket connection to the port, causing us to kill ourselves. + const { stdout } = await execAsync(`lsof -t -i :${port} -sTCP:LISTEN`); + const pids = stdout.trim().split('\n').filter(Boolean); + for (const p of pids) { + try { process.kill(parseInt(p, 10), 'SIGTERM'); } catch { /* ignore */ } + } + await new Promise((resolve) => setTimeout(resolve, 500)); + for (const p of pids) { + try { process.kill(parseInt(p, 10), 'SIGKILL'); } catch { /* ignore */ } + } + } else if (process.platform === 'win32') { + // Find PID listening on the port + const { stdout } = await execAsync(`netstat -ano | findstr :${port}`); + const lines = stdout.trim().split('\n'); + const pids = new Set(); + for (const line of lines) { + const parts = line.trim().split(/\s+/); + if (parts.length >= 5 && parts[1].endsWith(`:${port}`) && parts[3] === 'LISTENING') { + pids.add(parts[4]); + } + } + for (const p of pids) { + try { await execAsync(`taskkill /F /PID ${p}`); } catch { /* ignore */ } + } + } + } catch { + // Port might not be bound or command failed; ignore + } + } + + await ctx.gatewayManager.restart(); + console.log('[agents] Gateway restart completed after agent deletion'); + } catch (err) { + console.warn('[agents] Gateway restart after agent deletion failed:', err); + } +} + export async function handleAgentRoutes( req: IncomingMessage, res: ServerResponse, @@ -35,6 +115,13 @@ export async function handleAgentRoutes( try { const body = await parseJsonBody<{ name: string }>(req); const snapshot = await createAgent(body.name); + // Sync provider API keys to the new agent's auth-profiles.json so the + // embedded runner can authenticate with LLM providers when messages + // arrive via channel bots (e.g. Feishu). Without this, the copied + // auth-profiles.json may contain a stale key → 401 from the LLM. + syncAllProviderAuthToRuntime().catch((err) => { + console.warn('[agents] Failed to sync provider auth after agent creation:', err); + }); scheduleGatewayReload(ctx, 'create-agent'); sendJson(res, 200, { success: true, ...snapshot }); } catch (error) { @@ -81,8 +168,15 @@ export async function handleAgentRoutes( if (parts.length === 1) { try { const agentId = decodeURIComponent(parts[0]); - const snapshot = await deleteAgentConfig(agentId); - scheduleGatewayReload(ctx, 'delete-agent'); + const { snapshot, removedEntry } = await deleteAgentConfig(agentId); + // Await reload synchronously BEFORE responding to the client. + // This ensures the Feishu plugin has disconnected the deleted bot + // before the UI shows "delete success" and the user tries chatting. + await restartGatewayForAgentDeletion(ctx); + // Delete workspace after reload so the new config is already live. + await removeAgentWorkspaceDirectory(removedEntry).catch((err) => { + console.warn('[agents] Failed to remove workspace after agent deletion:', err); + }); sendJson(res, 200, { success: true, ...snapshot }); } catch (error) { sendJson(res, 500, { success: false, error: String(error) }); diff --git a/electron/utils/agent-config.ts b/electron/utils/agent-config.ts index 5d82f0909..49c9b7aaf 100644 --- a/electron/utils/agent-config.ts +++ b/electron/utils/agent-config.ts @@ -335,8 +335,8 @@ function getManagedWorkspaceDirectory(agent: AgentListEntry): string | null { return normalizedConfigured === normalizedManaged ? configuredWorkspace : null; } -async function removeAgentWorkspaceDirectory(agent: AgentListEntry): Promise { - const workspaceDir = getManagedWorkspaceDirectory(agent); +export async function removeAgentWorkspaceDirectory(agent: { id: string; workspace?: string }): Promise { + const workspaceDir = getManagedWorkspaceDirectory(agent as AgentListEntry); if (!workspaceDir) { logger.warn('Skipping agent workspace deletion for unmanaged path', { agentId: agent.id, @@ -567,7 +567,7 @@ export async function updateAgentName(agentId: string, name: string): Promise { +export async function deleteAgentConfig(agentId: string): Promise<{ snapshot: AgentsSnapshot; removedEntry: AgentListEntry }> { return withConfigLock(async () => { if (agentId === MAIN_AGENT_ID) { throw new Error('The main agent cannot be deleted'); @@ -599,9 +599,14 @@ export async function deleteAgentConfig(agentId: string): Promise = { + feishu: 'appId', + wecom: 'botId', + dingtalk: 'clientId', + telegram: 'botToken', + discord: 'token', + qqbot: 'appId', + signal: 'phoneNumber', + imessage: 'serverUrl', + matrix: 'accessToken', + line: 'channelAccessToken', + msteams: 'appId', + googlechat: 'serviceAccountKey', + mattermost: 'botToken', +}; + // ── Helpers ────────────────────────────────────────────────────── async function fileExists(p: string): Promise { @@ -316,6 +335,39 @@ function migrateLegacyChannelConfigToAccounts( } } +/** + * Throws if the unique credential (e.g. appId for Feishu) in `config` is + * already registered under a *different* account in the same channel section. + * This prevents two agents from silently sharing the same bot connection. + */ +function assertNoDuplicateCredential( + channelType: string, + config: ChannelConfigData, + channelSection: ChannelConfigData, + resolvedAccountId: string, +): void { + const uniqueKey = CHANNEL_UNIQUE_CREDENTIAL_KEY[channelType]; + if (!uniqueKey) return; + + const incomingValue = config[uniqueKey]; + if (!incomingValue || typeof incomingValue !== 'string') return; + + const accounts = channelSection.accounts as Record | undefined; + if (!accounts) return; + + for (const [existingAccountId, accountCfg] of Object.entries(accounts)) { + if (existingAccountId === resolvedAccountId) continue; + if (!accountCfg || typeof accountCfg !== 'object') continue; + const existingValue = accountCfg[uniqueKey]; + if (typeof existingValue === 'string' && existingValue === incomingValue) { + throw new Error( + `The ${channelType} bot (${uniqueKey}: ${incomingValue}) is already bound to another agent (account: ${existingAccountId}). ` + + `Each agent must use a unique bot.`, + ); + } + } +} + export async function saveChannelConfig( channelType: string, config: ChannelConfigData, @@ -358,6 +410,10 @@ export async function saveChannelConfig( const channelSection = currentConfig.channels[channelType]; migrateLegacyChannelConfigToAccounts(channelSection, DEFAULT_ACCOUNT_ID); + + // Guard: reject if this bot/app credential is already used by another account. + assertNoDuplicateCredential(channelType, config, channelSection, resolvedAccountId); + const existingAccountConfig = resolveAccountConfig(channelSection, resolvedAccountId); const transformedConfig = transformChannelConfig(channelType, config, existingAccountConfig); diff --git a/electron/utils/openclaw-workspace.ts b/electron/utils/openclaw-workspace.ts index 673dee1c1..2dc5725d3 100644 --- a/electron/utils/openclaw-workspace.ts +++ b/electron/utils/openclaw-workspace.ts @@ -5,7 +5,7 @@ * main thread. */ import { access, readFile, writeFile, readdir, mkdir, unlink } from 'fs/promises'; -import { constants, Dirent } from 'fs'; +import { constants } from 'fs'; import { join } from 'path'; import { homedir } from 'os'; import { logger } from './logger'; @@ -78,16 +78,11 @@ async function resolveAllWorkspaceDirs(): Promise { // ignore config parse errors } - try { - const entries: Dirent[] = await readdir(openclawDir, { withFileTypes: true }); - for (const entry of entries) { - if (entry.isDirectory() && entry.name.startsWith('workspace')) { - dirs.add(join(openclawDir, entry.name)); - } - } - } catch { - // ignore read errors - } + // We intentionally do NOT scan ~/.openclaw/ for any directory starting + // with 'workspace'. Doing so causes a race condition where a recently deleted + // agent's workspace (e.g., workspace-code23) is found and resuscitated by + // the context merge routine before its deletion finishes. Only workspaces + // explicitly declared in openclaw.json should be seeded. if (dirs.size === 0) { dirs.add(join(openclawDir, 'workspace'));