feat(gateway): enhance gateway readiness handling and batch sync configuration (#851)

Co-authored-by: paisley <8197966+su8su@users.noreply.github.com>
This commit is contained in:
Haze
2026-04-14 15:42:37 +08:00
committed by GitHub
Unverified
parent 758a8f8c94
commit 30bd8c08f9
14 changed files with 626 additions and 69 deletions

View File

@@ -64,6 +64,7 @@ import {
normalizeSlackMessagingTarget,
normalizeWhatsAppMessagingTarget,
} from '../../utils/openclaw-sdk';
import { logger } from '../../utils/logger';
// listWhatsAppDirectory*FromConfig were removed from openclaw's public exports
// in 2026.3.23-1. No-op stubs; WhatsApp target picker uses session discovery.
@@ -263,11 +264,11 @@ function scheduleGatewayChannelSaveRefresh(
return;
}
if (FORCE_RESTART_CHANNELS.has(storedChannelType)) {
ctx.gatewayManager.debouncedRestart();
ctx.gatewayManager.debouncedRestart(150);
void reason;
return;
}
ctx.gatewayManager.debouncedReload();
ctx.gatewayManager.debouncedReload(150);
void reason;
}
@@ -416,6 +417,28 @@ interface ChannelAccountsView {
accounts: ChannelAccountView[];
}
function buildGatewayStatusSnapshot(status: GatewayChannelStatusPayload | null): string {
if (!status?.channelAccounts) return 'none';
const entries = Object.entries(status.channelAccounts);
if (entries.length === 0) return 'empty';
return entries
.slice(0, 12)
.map(([channelType, accounts]) => {
const channelStatus = pickChannelRuntimeStatus(accounts);
const flags = accounts.slice(0, 4).map((account) => {
const accountId = typeof account.accountId === 'string' ? account.accountId : 'default';
const connected = account.connected === true ? '1' : '0';
const running = account.running === true ? '1' : '0';
const linked = account.linked === true ? '1' : '0';
const probeOk = account.probe?.ok === true ? '1' : '0';
const hasErr = typeof account.lastError === 'string' && account.lastError.trim().length > 0 ? '1' : '0';
return `${accountId}[c${connected}r${running}l${linked}p${probeOk}e${hasErr}]`;
}).join('|');
return `${channelType}:${channelStatus}{${flags}}`;
})
.join(', ');
}
function shouldIncludeRuntimeAccountId(
accountId: string,
configuredAccountIds: Set<string>,
@@ -458,7 +481,11 @@ const CHANNEL_TARGET_CACHE_TTL_MS = 60_000;
const CHANNEL_TARGET_CACHE_ENABLED = process.env.VITEST !== 'true';
const channelTargetCache = new Map<string, { expiresAt: number; targets: ChannelTargetOptionView[] }>();
async function buildChannelAccountsView(ctx: HostApiContext): Promise<ChannelAccountsView[]> {
async function buildChannelAccountsView(
ctx: HostApiContext,
options?: { probe?: boolean },
): Promise<ChannelAccountsView[]> {
const startedAt = Date.now();
// Read config once and share across all sub-calls (was 5 readFile calls before).
const openClawConfig = await readOpenClawConfig();
@@ -470,11 +497,24 @@ async function buildChannelAccountsView(ctx: HostApiContext): Promise<ChannelAcc
let gatewayStatus: GatewayChannelStatusPayload | null;
try {
// probe: false use cached runtime state instead of active network probes
// per channel. Real-time status updates arrive via channel.status events.
// probe=false uses cached runtime state (lighter); probe=true forces
// adapter-level connectivity checks for faster post-restart convergence.
const probe = options?.probe === true;
// 8s timeout — fail fast when Gateway is busy with AI tasks.
gatewayStatus = await ctx.gatewayManager.rpc<GatewayChannelStatusPayload>('channels.status', { probe: false }, 8000);
const rpcStartedAt = Date.now();
gatewayStatus = await ctx.gatewayManager.rpc<GatewayChannelStatusPayload>(
'channels.status',
{ probe },
probe ? 5000 : 8000,
);
logger.info(
`[channels.accounts] channels.status probe=${probe ? '1' : '0'} elapsedMs=${Date.now() - rpcStartedAt} snapshot=${buildGatewayStatusSnapshot(gatewayStatus)}`
);
} catch {
const probe = options?.probe === true;
logger.warn(
`[channels.accounts] channels.status probe=${probe ? '1' : '0'} failed after ${Date.now() - startedAt}ms`
);
gatewayStatus = null;
}
@@ -553,7 +593,11 @@ async function buildChannelAccountsView(ctx: HostApiContext): Promise<ChannelAcc
});
}
return channels.sort((left, right) => left.channelType.localeCompare(right.channelType));
const sorted = channels.sort((left, right) => left.channelType.localeCompare(right.channelType));
logger.info(
`[channels.accounts] response probe=${options?.probe === true ? '1' : '0'} elapsedMs=${Date.now() - startedAt} view=${sorted.map((item) => `${item.channelType}:${item.status}`).join(',')}`
);
return sorted;
}
function buildChannelTargetLabel(baseLabel: string, value: string): string {
@@ -1147,7 +1191,9 @@ export async function handleChannelRoutes(
if (url.pathname === '/api/channels/accounts' && req.method === 'GET') {
try {
const channels = await buildChannelAccountsView(ctx);
const probe = url.searchParams.get('probe') === '1';
logger.info(`[channels.accounts] request probe=${probe ? '1' : '0'}`);
const channels = await buildChannelAccountsView(ctx, { probe });
sendJson(res, 200, { success: true, channels });
} catch (error) {
sendJson(res, 500, { success: false, error: String(error) });

View File

@@ -20,8 +20,8 @@ import { getApiKey, getDefaultProvider, getProvider } from '../utils/secure-stor
import { getProviderEnvVar, getKeyableProviderTypes } from '../utils/provider-registry';
import { getOpenClawDir, getOpenClawEntryPath, isOpenClawPresent } from '../utils/paths';
import { getUvMirrorEnv } from '../utils/uv-env';
import { cleanupDanglingWeChatPluginState, listConfiguredChannels, readOpenClawConfig } from '../utils/channel-config';
import { syncGatewayTokenToConfig, syncBrowserConfigToOpenClaw, syncSessionIdleMinutesToOpenClaw, sanitizeOpenClawConfig } from '../utils/openclaw-auth';
import { cleanupDanglingWeChatPluginState, listConfiguredChannelsFromConfig, readOpenClawConfig } from '../utils/channel-config';
import { sanitizeOpenClawConfig, batchSyncConfigFields } from '../utils/openclaw-auth';
import { buildProxyEnv, resolveProxySettings } from '../utils/proxy';
import { syncProxyConfigToOpenClaw } from '../utils/openclaw-proxy';
import { logger } from '../utils/logger';
@@ -180,7 +180,20 @@ function ensureConfiguredPluginsUpgraded(configuredChannels: string[]): void {
* resolution algorithm find them. Skip-if-exists avoids overwriting
* openclaw's own deps (they take priority).
*/
let _extensionDepsLinked = false;
/**
* Reset the extension-deps-linked cache so the next
* ensureExtensionDepsResolvable() call re-scans and links.
* Called before each Gateway launch to pick up newly installed extensions.
*/
export function resetExtensionDepsLinked(): void {
_extensionDepsLinked = false;
}
function ensureExtensionDepsResolvable(openclawDir: string): void {
if (_extensionDepsLinked) return;
const extDir = join(openclawDir, 'dist', 'extensions');
const topNM = join(openclawDir, 'node_modules');
let linkedCount = 0;
@@ -229,6 +242,8 @@ function ensureExtensionDepsResolvable(openclawDir: string): void {
if (linkedCount > 0) {
logger.info(`[extension-deps] Linked ${linkedCount} extension packages into ${topNM}`);
}
_extensionDepsLinked = true;
}
// ── Pre-launch sync ──────────────────────────────────────────────
@@ -236,6 +251,11 @@ function ensureExtensionDepsResolvable(openclawDir: string): void {
export async function syncGatewayConfigBeforeLaunch(
appSettings: Awaited<ReturnType<typeof getAllSettings>>,
): Promise<void> {
// Reset the extension-deps cache so that newly installed extensions
// (e.g. user added a channel while the app was running) get their
// node_modules linked on the next Gateway spawn.
resetExtensionDepsLinked();
await syncProxyConfigToOpenClaw(appSettings, { preserveExistingWhenDisabled: true });
try {
@@ -260,21 +280,20 @@ export async function syncGatewayConfigBeforeLaunch(
// Auto-upgrade installed plugins before Gateway starts so that
// the plugin manifest ID matches what sanitize wrote to the config.
// Read config once and reuse for both listConfiguredChannels and plugins.allow.
try {
const configuredChannels = await listConfiguredChannels();
const rawCfg = await readOpenClawConfig();
const configuredChannels = await listConfiguredChannelsFromConfig(rawCfg);
// Also ensure plugins referenced in plugins.allow are installed even if
// they have no channels.X section yet (e.g. qqbot added via plugins.allow
// but never fully saved through ClawX UI).
try {
const rawCfg = await readOpenClawConfig();
const allowList = Array.isArray(rawCfg.plugins?.allow) ? (rawCfg.plugins!.allow as string[]) : [];
// Build reverse maps: dirName → channelType AND known manifest IDs → channelType
const pluginIdToChannel: Record<string, string> = {};
for (const [channelType, info] of Object.entries(CHANNEL_PLUGIN_MAP)) {
pluginIdToChannel[info.dirName] = channelType;
}
// Known manifest IDs that differ from their dirName/channelType
pluginIdToChannel['openclaw-lark'] = 'feishu';
pluginIdToChannel['feishu-openclaw-plugin'] = 'feishu';
@@ -295,22 +314,11 @@ export async function syncGatewayConfigBeforeLaunch(
logger.warn('Failed to auto-upgrade plugins:', err);
}
// Batch gateway token, browser config, and session idle into one read+write cycle.
try {
await syncGatewayTokenToConfig(appSettings.gatewayToken);
await batchSyncConfigFields(appSettings.gatewayToken);
} catch (err) {
logger.warn('Failed to sync gateway token to openclaw.json:', err);
}
try {
await syncBrowserConfigToOpenClaw();
} catch (err) {
logger.warn('Failed to sync browser config to openclaw.json:', err);
}
try {
await syncSessionIdleMinutesToOpenClaw();
} catch (err) {
logger.warn('Failed to sync session idle minutes to openclaw.json:', err);
logger.warn('Failed to batch-sync config fields to openclaw.json:', err);
}
}
@@ -360,7 +368,8 @@ async function resolveChannelStartupPolicy(): Promise<{
channelStartupSummary: string;
}> {
try {
const configuredChannels = await listConfiguredChannels();
const rawCfg = await readOpenClawConfig();
const configuredChannels = await listConfiguredChannelsFromConfig(rawCfg);
if (configuredChannels.length === 0) {
return {
skipChannels: true,

View File

@@ -23,8 +23,13 @@ export function dispatchProtocolEvent(
break;
}
case 'channel.status':
case 'channel.status_changed':
emitter.emit('channel:status', payload as { channelId: string; status: string });
break;
case 'gateway.ready':
case 'ready':
emitter.emit('gateway:ready', payload);
break;
default:
emitter.emit('notification', { method: event, params: payload });
}

View File

@@ -61,6 +61,8 @@ export interface GatewayStatus {
connectedAt?: number;
version?: string;
reconnectAttempts?: number;
/** True once the gateway's internal subsystems (skills, plugins) are ready for RPC calls. */
gatewayReady?: boolean;
}
/**
@@ -119,9 +121,11 @@ export class GatewayManager extends EventEmitter {
private static readonly HEARTBEAT_TIMEOUT_MS_WIN = 25_000;
private static readonly HEARTBEAT_MAX_MISSES_WIN = 5;
public static readonly RESTART_COOLDOWN_MS = 5_000;
private static readonly GATEWAY_READY_FALLBACK_MS = 30_000;
private lastRestartAt = 0;
/** Set by scheduleReconnect() before calling start() to signal auto-reconnect. */
private isAutoReconnectStart = false;
private gatewayReadyFallbackTimer: NodeJS.Timeout | null = null;
constructor(config?: Partial<ReconnectConfig>) {
super();
@@ -152,6 +156,14 @@ export class GatewayManager extends EventEmitter {
this.reconnectConfig = { ...DEFAULT_RECONNECT_CONFIG, ...config };
// Device identity is loaded lazily in start() — not in the constructor —
// so that async file I/O and key generation don't block module loading.
this.on('gateway:ready', () => {
this.clearGatewayReadyFallback();
if (this.status.state === 'running' && !this.status.gatewayReady) {
logger.info('Gateway subsystems ready (event received)');
this.setStatus({ gatewayReady: true });
}
});
}
private async initDeviceIdentity(): Promise<void> {
@@ -231,12 +243,16 @@ export class GatewayManager extends EventEmitter {
this.reconnectAttempts = 0;
}
this.isAutoReconnectStart = false; // consume the flag
this.setStatus({ state: 'starting', reconnectAttempts: this.reconnectAttempts });
this.setStatus({ state: 'starting', reconnectAttempts: this.reconnectAttempts, gatewayReady: false });
// Check if Python environment is ready (self-healing) asynchronously.
// Fire-and-forget: only needs to run once, not on every retry.
warmupManagedPythonReadiness();
const t0 = Date.now();
let tSpawned = 0;
let tReady = 0;
try {
await runGatewayStartupSequence({
port: this.status.port,
@@ -262,7 +278,6 @@ export class GatewayManager extends EventEmitter {
await this.connect(port, externalToken);
},
onConnectedToExistingGateway: () => {
// If the existing gateway is actually our own spawned UtilityProcess
// (e.g. after a self-restart code=1012), keep ownership so that
// stop() can still terminate the process during a restart() cycle.
@@ -288,16 +303,24 @@ export class GatewayManager extends EventEmitter {
},
startProcess: async () => {
await this.startProcess();
tSpawned = Date.now();
},
waitForReady: async (port) => {
await waitForGatewayReady({
port,
getProcessExitCode: () => this.processExitCode,
});
tReady = Date.now();
},
onConnectedToManagedGateway: () => {
this.startHealthCheck();
logger.debug('Gateway started successfully');
const tConnected = Date.now();
logger.info('[metric] gateway.startup', {
configSyncMs: tSpawned ? tSpawned - t0 : undefined,
spawnToReadyMs: tReady && tSpawned ? tReady - tSpawned : undefined,
readyToConnectMs: tReady ? tConnected - tReady : undefined,
totalMs: tConnected - t0,
});
},
runDoctorRepair: async () => await runOpenClawDoctorRepair(),
onDoctorRepairSuccess: () => {
@@ -390,7 +413,7 @@ export class GatewayManager extends EventEmitter {
this.restartController.resetDeferredRestart();
this.isAutoReconnectStart = false;
this.setStatus({ state: 'stopped', error: undefined, pid: undefined, connectedAt: undefined, uptime: undefined });
this.setStatus({ state: 'stopped', error: undefined, pid: undefined, connectedAt: undefined, uptime: undefined, gatewayReady: undefined });
}
/**
@@ -663,6 +686,25 @@ export class GatewayManager extends EventEmitter {
clearTimeout(this.reloadDebounceTimer);
this.reloadDebounceTimer = null;
}
this.clearGatewayReadyFallback();
}
private clearGatewayReadyFallback(): void {
if (this.gatewayReadyFallbackTimer) {
clearTimeout(this.gatewayReadyFallbackTimer);
this.gatewayReadyFallbackTimer = null;
}
}
private scheduleGatewayReadyFallback(): void {
this.clearGatewayReadyFallback();
this.gatewayReadyFallbackTimer = setTimeout(() => {
this.gatewayReadyFallbackTimer = null;
if (this.status.state === 'running' && !this.status.gatewayReady) {
logger.info('Gateway ready fallback triggered (no gateway.ready event within timeout)');
this.setStatus({ gatewayReady: true });
}
}, GatewayManager.GATEWAY_READY_FALLBACK_MS);
}
/**
@@ -843,6 +885,7 @@ export class GatewayManager extends EventEmitter {
connectedAt: Date.now(),
});
this.startPing();
this.scheduleGatewayReadyFallback();
},
onMessage: (message) => {
this.handleMessage(message);

View File

@@ -1452,7 +1452,7 @@ function registerOpenClawHandlers(gatewayManager: GatewayManager): void {
const scheduleGatewayChannelRestart = (reason: string): void => {
if (gatewayManager.getStatus().state !== 'stopped') {
logger.info(`Scheduling Gateway restart after ${reason}`);
gatewayManager.debouncedRestart();
gatewayManager.debouncedRestart(150);
} else {
logger.info(`Gateway is stopped; skip immediate restart after ${reason}`);
}
@@ -1465,11 +1465,11 @@ function registerOpenClawHandlers(gatewayManager: GatewayManager): void {
}
if (forceRestartChannels.has(channelType)) {
logger.info(`Scheduling Gateway restart after ${reason}`);
gatewayManager.debouncedRestart();
gatewayManager.debouncedRestart(150);
return;
}
logger.info(`Scheduling Gateway reload after ${reason}`);
gatewayManager.debouncedReload();
gatewayManager.debouncedReload(150);
};
// Get OpenClaw package status

View File

@@ -1235,6 +1235,89 @@ export async function syncSessionIdleMinutesToOpenClaw(): Promise<void> {
});
}
/**
* Batch-apply gateway token, browser config, and session idle minutes in a
* single config lock + read + write cycle. Replaces three separate
* withConfigLock calls during pre-launch sync.
*/
export async function batchSyncConfigFields(token: string): Promise<void> {
const DEFAULT_IDLE_MINUTES = 10_080; // 7 days
return withConfigLock(async () => {
const config = await readOpenClawJson();
let modified = true;
// ── Gateway token + controlUi ──
const gateway = (
config.gateway && typeof config.gateway === 'object'
? { ...(config.gateway as Record<string, unknown>) }
: {}
) as Record<string, unknown>;
const auth = (
gateway.auth && typeof gateway.auth === 'object'
? { ...(gateway.auth as Record<string, unknown>) }
: {}
) as Record<string, unknown>;
auth.mode = 'token';
auth.token = token;
gateway.auth = auth;
const controlUi = (
gateway.controlUi && typeof gateway.controlUi === 'object'
? { ...(gateway.controlUi as Record<string, unknown>) }
: {}
) as Record<string, unknown>;
const allowedOrigins = Array.isArray(controlUi.allowedOrigins)
? (controlUi.allowedOrigins as unknown[]).filter((v): v is string => typeof v === 'string')
: [];
if (!allowedOrigins.includes('file://')) {
controlUi.allowedOrigins = [...allowedOrigins, 'file://'];
}
gateway.controlUi = controlUi;
if (!gateway.mode) gateway.mode = 'local';
config.gateway = gateway;
// ── Browser config ──
const browser = (
config.browser && typeof config.browser === 'object'
? { ...(config.browser as Record<string, unknown>) }
: {}
) as Record<string, unknown>;
if (browser.enabled === undefined) {
browser.enabled = true;
config.browser = browser;
modified = true;
}
if (browser.defaultProfile === undefined) {
browser.defaultProfile = 'openclaw';
config.browser = browser;
modified = true;
}
// ── Session idle minutes ──
const session = (
config.session && typeof config.session === 'object'
? { ...(config.session as Record<string, unknown>) }
: {}
) as Record<string, unknown>;
const hasExplicitSessionConfig = session.idleMinutes !== undefined
|| session.reset !== undefined
|| session.resetByType !== undefined
|| session.resetByChannel !== undefined;
if (!hasExplicitSessionConfig) {
session.idleMinutes = DEFAULT_IDLE_MINUTES;
config.session = session;
modified = true;
}
if (modified) {
await writeOpenClawJson(config);
console.log('Synced gateway token, browser config, and session idle to openclaw.json');
}
});
}
/**
* Update a provider entry in every discovered agent's models.json.
*/
@@ -1670,9 +1753,10 @@ export async function sanitizeOpenClawConfig(): Promise<void> {
// that conflicts with the official @larksuite/openclaw-lark plugin
// (id: 'openclaw-lark'). When the canonical feishu plugin is NOT the
// built-in 'feishu' itself, we must:
// 1. Remove bare 'feishu' from plugins.allow
// 2. Always set plugins.entries.feishu = { enabled: false } to explicitly
// disable the built-in — it loads automatically unless disabled.
// 1. Remove bare 'feishu' from plugins.allow (already done above at line ~1648)
// 2. Delete plugins.entries.feishu entirely — keeping it with enabled:false
// causes the Gateway to report the feishu channel as "disabled".
// Since 'feishu' is not in plugins.allow, the built-in won't load.
const allowArr2 = Array.isArray(pluginsObj.allow) ? pluginsObj.allow as string[] : [];
const hasCanonicalFeishu = allowArr2.includes(canonicalFeishuId) || !!pEntries[canonicalFeishuId];
if (hasCanonicalFeishu && canonicalFeishuId !== 'feishu') {
@@ -1683,11 +1767,13 @@ export async function sanitizeOpenClawConfig(): Promise<void> {
console.log('[sanitize] Removed bare "feishu" from plugins.allow (openclaw-lark plugin is configured)');
modified = true;
}
// Always ensure the built-in feishu plugin is explicitly disabled.
// Built-in extensions load automatically unless plugins.entries.<id>.enabled = false.
if (!pEntries.feishu || pEntries.feishu.enabled !== false) {
pEntries.feishu = { ...(pEntries.feishu || {}), enabled: false };
console.log('[sanitize] Disabled built-in feishu plugin (openclaw-lark plugin is configured)');
// Delete the built-in feishu entry entirely instead of setting enabled:false.
// Setting enabled:false causes the Gateway to report the channel as "disabled"
// which shows as an error in the UI. Since 'feishu' is removed from
// plugins.allow above, the built-in extension won't auto-load.
if (pEntries.feishu) {
delete pEntries.feishu;
console.log('[sanitize] Removed built-in feishu plugin entry (openclaw-lark plugin is configured)');
modified = true;
}
}