Fix agent to channel (#485)
This commit is contained in:
committed by
GitHub
Unverified
parent
f6de56fa78
commit
9f2bc3cf68
@@ -5,10 +5,12 @@ import {
|
||||
createAgent,
|
||||
deleteAgentConfig,
|
||||
listAgentsSnapshot,
|
||||
removeAgentWorkspaceDirectory,
|
||||
resolveAccountIdForAgent,
|
||||
updateAgentName,
|
||||
} from '../../utils/agent-config';
|
||||
import { deleteChannelAccountConfig } from '../../utils/channel-config';
|
||||
import { syncAllProviderAuthToRuntime } from '../../services/providers/provider-runtime-sync';
|
||||
import type { HostApiContext } from '../context';
|
||||
import { parseJsonBody, sendJson } from '../route-utils';
|
||||
|
||||
@@ -20,6 +22,84 @@ function scheduleGatewayReload(ctx: HostApiContext, reason: string): void {
|
||||
void reason;
|
||||
}
|
||||
|
||||
import { exec } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
/**
|
||||
* Force a full Gateway process restart after agent deletion.
|
||||
*
|
||||
* A SIGUSR1 in-process reload is NOT sufficient here: channel plugins
|
||||
* (e.g. Feishu) maintain long-lived WebSocket connections to external
|
||||
* services and do not disconnect accounts that were removed from the
|
||||
* config during an in-process reload. The only reliable way to drop
|
||||
* stale bot connections is to kill the Gateway process entirely and
|
||||
* spawn a fresh one that reads the updated openclaw.json from scratch.
|
||||
*/
|
||||
async function restartGatewayForAgentDeletion(ctx: HostApiContext): Promise<void> {
|
||||
try {
|
||||
// Capture the PID of the running Gateway BEFORE stop() clears it.
|
||||
const status = ctx.gatewayManager.getStatus();
|
||||
const pid = status.pid;
|
||||
const port = status.port;
|
||||
console.log('[agents] Triggering Gateway restart (kill+respawn) after agent deletion', { pid, port });
|
||||
|
||||
// Force-kill the Gateway process by PID. The manager's stop() only
|
||||
// kills "owned" processes; if the manager connected to an already-
|
||||
// running Gateway (ownsProcess=false), stop() simply closes the WS
|
||||
// and the old process stays alive with its stale channel connections.
|
||||
if (pid) {
|
||||
try {
|
||||
process.kill(pid, 'SIGTERM');
|
||||
// Give it a moment to die
|
||||
await new Promise((resolve) => setTimeout(resolve, 500));
|
||||
try { process.kill(pid, 0); process.kill(pid, 'SIGKILL'); } catch { /* already dead */ }
|
||||
} catch {
|
||||
// process already gone – that's fine
|
||||
}
|
||||
} else if (port) {
|
||||
// If we don't know the PID (e.g. connected to an orphaned Gateway from
|
||||
// a previous pnpm dev run), forcefully kill whatever is on the port.
|
||||
try {
|
||||
if (process.platform === 'darwin' || process.platform === 'linux') {
|
||||
// MUST use -sTCP:LISTEN. Otherwise lsof returns the client process (ClawX itself)
|
||||
// that has an ESTABLISHED WebSocket connection to the port, causing us to kill ourselves.
|
||||
const { stdout } = await execAsync(`lsof -t -i :${port} -sTCP:LISTEN`);
|
||||
const pids = stdout.trim().split('\n').filter(Boolean);
|
||||
for (const p of pids) {
|
||||
try { process.kill(parseInt(p, 10), 'SIGTERM'); } catch { /* ignore */ }
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, 500));
|
||||
for (const p of pids) {
|
||||
try { process.kill(parseInt(p, 10), 'SIGKILL'); } catch { /* ignore */ }
|
||||
}
|
||||
} else if (process.platform === 'win32') {
|
||||
// Find PID listening on the port
|
||||
const { stdout } = await execAsync(`netstat -ano | findstr :${port}`);
|
||||
const lines = stdout.trim().split('\n');
|
||||
const pids = new Set<string>();
|
||||
for (const line of lines) {
|
||||
const parts = line.trim().split(/\s+/);
|
||||
if (parts.length >= 5 && parts[1].endsWith(`:${port}`) && parts[3] === 'LISTENING') {
|
||||
pids.add(parts[4]);
|
||||
}
|
||||
}
|
||||
for (const p of pids) {
|
||||
try { await execAsync(`taskkill /F /PID ${p}`); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Port might not be bound or command failed; ignore
|
||||
}
|
||||
}
|
||||
|
||||
await ctx.gatewayManager.restart();
|
||||
console.log('[agents] Gateway restart completed after agent deletion');
|
||||
} catch (err) {
|
||||
console.warn('[agents] Gateway restart after agent deletion failed:', err);
|
||||
}
|
||||
}
|
||||
|
||||
export async function handleAgentRoutes(
|
||||
req: IncomingMessage,
|
||||
res: ServerResponse,
|
||||
@@ -35,6 +115,13 @@ export async function handleAgentRoutes(
|
||||
try {
|
||||
const body = await parseJsonBody<{ name: string }>(req);
|
||||
const snapshot = await createAgent(body.name);
|
||||
// Sync provider API keys to the new agent's auth-profiles.json so the
|
||||
// embedded runner can authenticate with LLM providers when messages
|
||||
// arrive via channel bots (e.g. Feishu). Without this, the copied
|
||||
// auth-profiles.json may contain a stale key → 401 from the LLM.
|
||||
syncAllProviderAuthToRuntime().catch((err) => {
|
||||
console.warn('[agents] Failed to sync provider auth after agent creation:', err);
|
||||
});
|
||||
scheduleGatewayReload(ctx, 'create-agent');
|
||||
sendJson(res, 200, { success: true, ...snapshot });
|
||||
} catch (error) {
|
||||
@@ -81,8 +168,15 @@ export async function handleAgentRoutes(
|
||||
if (parts.length === 1) {
|
||||
try {
|
||||
const agentId = decodeURIComponent(parts[0]);
|
||||
const snapshot = await deleteAgentConfig(agentId);
|
||||
scheduleGatewayReload(ctx, 'delete-agent');
|
||||
const { snapshot, removedEntry } = await deleteAgentConfig(agentId);
|
||||
// Await reload synchronously BEFORE responding to the client.
|
||||
// This ensures the Feishu plugin has disconnected the deleted bot
|
||||
// before the UI shows "delete success" and the user tries chatting.
|
||||
await restartGatewayForAgentDeletion(ctx);
|
||||
// Delete workspace after reload so the new config is already live.
|
||||
await removeAgentWorkspaceDirectory(removedEntry).catch((err) => {
|
||||
console.warn('[agents] Failed to remove workspace after agent deletion:', err);
|
||||
});
|
||||
sendJson(res, 200, { success: true, ...snapshot });
|
||||
} catch (error) {
|
||||
sendJson(res, 500, { success: false, error: String(error) });
|
||||
|
||||
@@ -335,8 +335,8 @@ function getManagedWorkspaceDirectory(agent: AgentListEntry): string | null {
|
||||
return normalizedConfigured === normalizedManaged ? configuredWorkspace : null;
|
||||
}
|
||||
|
||||
async function removeAgentWorkspaceDirectory(agent: AgentListEntry): Promise<void> {
|
||||
const workspaceDir = getManagedWorkspaceDirectory(agent);
|
||||
export async function removeAgentWorkspaceDirectory(agent: { id: string; workspace?: string }): Promise<void> {
|
||||
const workspaceDir = getManagedWorkspaceDirectory(agent as AgentListEntry);
|
||||
if (!workspaceDir) {
|
||||
logger.warn('Skipping agent workspace deletion for unmanaged path', {
|
||||
agentId: agent.id,
|
||||
@@ -567,7 +567,7 @@ export async function updateAgentName(agentId: string, name: string): Promise<Ag
|
||||
});
|
||||
}
|
||||
|
||||
export async function deleteAgentConfig(agentId: string): Promise<AgentsSnapshot> {
|
||||
export async function deleteAgentConfig(agentId: string): Promise<{ snapshot: AgentsSnapshot; removedEntry: AgentListEntry }> {
|
||||
return withConfigLock(async () => {
|
||||
if (agentId === MAIN_AGENT_ID) {
|
||||
throw new Error('The main agent cannot be deleted');
|
||||
@@ -599,9 +599,14 @@ export async function deleteAgentConfig(agentId: string): Promise<AgentsSnapshot
|
||||
await writeOpenClawConfig(config);
|
||||
await deleteAgentChannelAccounts(agentId);
|
||||
await removeAgentRuntimeDirectory(agentId);
|
||||
await removeAgentWorkspaceDirectory(removedEntry);
|
||||
// NOTE: workspace directory is NOT deleted here intentionally.
|
||||
// The caller (route handler) defers workspace removal until after
|
||||
// the Gateway process has fully restarted, so that any in-flight
|
||||
// process.chdir(workspace) calls complete before the directory
|
||||
// disappears (otherwise process.cwd() throws ENOENT for the rest
|
||||
// of the Gateway's lifetime).
|
||||
logger.info('Deleted agent config entry', { agentId });
|
||||
return buildSnapshotFromConfig(config);
|
||||
return { snapshot: await buildSnapshotFromConfig(config), removedEntry };
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -24,6 +24,25 @@ const CHANNEL_TOP_LEVEL_KEYS_TO_KEEP = new Set(['accounts', 'defaultAccount', 'e
|
||||
// Channels that are managed as plugins (config goes under plugins.entries, not channels)
|
||||
const PLUGIN_CHANNELS = ['whatsapp'];
|
||||
|
||||
// Unique credential key per channel type – used for duplicate bot detection.
|
||||
// Maps each channel type to the field that uniquely identifies a bot/account.
|
||||
// When two agents try to use the same value for this field, the save is rejected.
|
||||
const CHANNEL_UNIQUE_CREDENTIAL_KEY: Record<string, string> = {
|
||||
feishu: 'appId',
|
||||
wecom: 'botId',
|
||||
dingtalk: 'clientId',
|
||||
telegram: 'botToken',
|
||||
discord: 'token',
|
||||
qqbot: 'appId',
|
||||
signal: 'phoneNumber',
|
||||
imessage: 'serverUrl',
|
||||
matrix: 'accessToken',
|
||||
line: 'channelAccessToken',
|
||||
msteams: 'appId',
|
||||
googlechat: 'serviceAccountKey',
|
||||
mattermost: 'botToken',
|
||||
};
|
||||
|
||||
// ── Helpers ──────────────────────────────────────────────────────
|
||||
|
||||
async function fileExists(p: string): Promise<boolean> {
|
||||
@@ -316,6 +335,39 @@ function migrateLegacyChannelConfigToAccounts(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Throws if the unique credential (e.g. appId for Feishu) in `config` is
|
||||
* already registered under a *different* account in the same channel section.
|
||||
* This prevents two agents from silently sharing the same bot connection.
|
||||
*/
|
||||
function assertNoDuplicateCredential(
|
||||
channelType: string,
|
||||
config: ChannelConfigData,
|
||||
channelSection: ChannelConfigData,
|
||||
resolvedAccountId: string,
|
||||
): void {
|
||||
const uniqueKey = CHANNEL_UNIQUE_CREDENTIAL_KEY[channelType];
|
||||
if (!uniqueKey) return;
|
||||
|
||||
const incomingValue = config[uniqueKey];
|
||||
if (!incomingValue || typeof incomingValue !== 'string') return;
|
||||
|
||||
const accounts = channelSection.accounts as Record<string, ChannelConfigData> | undefined;
|
||||
if (!accounts) return;
|
||||
|
||||
for (const [existingAccountId, accountCfg] of Object.entries(accounts)) {
|
||||
if (existingAccountId === resolvedAccountId) continue;
|
||||
if (!accountCfg || typeof accountCfg !== 'object') continue;
|
||||
const existingValue = accountCfg[uniqueKey];
|
||||
if (typeof existingValue === 'string' && existingValue === incomingValue) {
|
||||
throw new Error(
|
||||
`The ${channelType} bot (${uniqueKey}: ${incomingValue}) is already bound to another agent (account: ${existingAccountId}). ` +
|
||||
`Each agent must use a unique bot.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function saveChannelConfig(
|
||||
channelType: string,
|
||||
config: ChannelConfigData,
|
||||
@@ -358,6 +410,10 @@ export async function saveChannelConfig(
|
||||
|
||||
const channelSection = currentConfig.channels[channelType];
|
||||
migrateLegacyChannelConfigToAccounts(channelSection, DEFAULT_ACCOUNT_ID);
|
||||
|
||||
// Guard: reject if this bot/app credential is already used by another account.
|
||||
assertNoDuplicateCredential(channelType, config, channelSection, resolvedAccountId);
|
||||
|
||||
const existingAccountConfig = resolveAccountConfig(channelSection, resolvedAccountId);
|
||||
const transformedConfig = transformChannelConfig(channelType, config, existingAccountConfig);
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* main thread.
|
||||
*/
|
||||
import { access, readFile, writeFile, readdir, mkdir, unlink } from 'fs/promises';
|
||||
import { constants, Dirent } from 'fs';
|
||||
import { constants } from 'fs';
|
||||
import { join } from 'path';
|
||||
import { homedir } from 'os';
|
||||
import { logger } from './logger';
|
||||
@@ -78,16 +78,11 @@ async function resolveAllWorkspaceDirs(): Promise<string[]> {
|
||||
// ignore config parse errors
|
||||
}
|
||||
|
||||
try {
|
||||
const entries: Dirent[] = await readdir(openclawDir, { withFileTypes: true });
|
||||
for (const entry of entries) {
|
||||
if (entry.isDirectory() && entry.name.startsWith('workspace')) {
|
||||
dirs.add(join(openclawDir, entry.name));
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// ignore read errors
|
||||
}
|
||||
// We intentionally do NOT scan ~/.openclaw/ for any directory starting
|
||||
// with 'workspace'. Doing so causes a race condition where a recently deleted
|
||||
// agent's workspace (e.g., workspace-code23) is found and resuscitated by
|
||||
// the context merge routine before its deletion finishes. Only workspaces
|
||||
// explicitly declared in openclaw.json should be seeded.
|
||||
|
||||
if (dirs.size === 0) {
|
||||
dirs.add(join(openclawDir, 'workspace'));
|
||||
|
||||
Reference in New Issue
Block a user