Add channel health diagnostics and gateway recovery fixes (#855)
This commit is contained in:
committed by
GitHub
Unverified
parent
6acd8acf5a
commit
1f39d1a8a7
@@ -1,5 +1,5 @@
|
||||
import { useState, useEffect, useCallback, useMemo, useRef } from 'react';
|
||||
import { RefreshCw, Trash2, AlertCircle, Plus } from 'lucide-react';
|
||||
import { RefreshCw, Trash2, AlertCircle, Plus, Copy, RotateCcw, ChevronDown, ChevronUp } from 'lucide-react';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { Badge } from '@/components/ui/badge';
|
||||
import { ConfirmDialog } from '@/components/ui/confirm-dialog';
|
||||
@@ -33,7 +33,8 @@ interface ChannelAccountItem {
|
||||
accountId: string;
|
||||
name: string;
|
||||
configured: boolean;
|
||||
status: 'connected' | 'connecting' | 'disconnected' | 'error';
|
||||
status: 'connected' | 'connecting' | 'degraded' | 'disconnected' | 'error';
|
||||
statusReason?: string;
|
||||
lastError?: string;
|
||||
isDefault: boolean;
|
||||
agentId?: string;
|
||||
@@ -42,10 +43,51 @@ interface ChannelAccountItem {
|
||||
interface ChannelGroupItem {
|
||||
channelType: string;
|
||||
defaultAccountId: string;
|
||||
status: 'connected' | 'connecting' | 'disconnected' | 'error';
|
||||
status: 'connected' | 'connecting' | 'degraded' | 'disconnected' | 'error';
|
||||
statusReason?: string;
|
||||
accounts: ChannelAccountItem[];
|
||||
}
|
||||
|
||||
interface GatewayHealthSummary {
|
||||
state: 'healthy' | 'degraded' | 'unresponsive';
|
||||
reasons: string[];
|
||||
consecutiveHeartbeatMisses: number;
|
||||
lastAliveAt?: number;
|
||||
lastRpcSuccessAt?: number;
|
||||
lastRpcFailureAt?: number;
|
||||
lastRpcFailureMethod?: string;
|
||||
lastChannelsStatusOkAt?: number;
|
||||
lastChannelsStatusFailureAt?: number;
|
||||
}
|
||||
|
||||
interface GatewayDiagnosticSnapshot {
|
||||
capturedAt: number;
|
||||
platform: string;
|
||||
gateway: GatewayHealthSummary & Record<string, unknown>;
|
||||
channels: ChannelGroupItem[];
|
||||
clawxLogTail: string;
|
||||
gatewayLogTail: string;
|
||||
gatewayErrLogTail: string;
|
||||
}
|
||||
|
||||
function isGatewayDiagnosticSnapshot(value: unknown): value is GatewayDiagnosticSnapshot {
|
||||
if (!value || typeof value !== 'object') {
|
||||
return false;
|
||||
}
|
||||
|
||||
const snapshot = value as Record<string, unknown>;
|
||||
return (
|
||||
typeof snapshot.capturedAt === 'number'
|
||||
&& typeof snapshot.platform === 'string'
|
||||
&& typeof snapshot.gateway === 'object'
|
||||
&& snapshot.gateway !== null
|
||||
&& Array.isArray(snapshot.channels)
|
||||
&& typeof snapshot.clawxLogTail === 'string'
|
||||
&& typeof snapshot.gatewayLogTail === 'string'
|
||||
&& typeof snapshot.gatewayErrLogTail === 'string'
|
||||
);
|
||||
}
|
||||
|
||||
interface AgentItem {
|
||||
id: string;
|
||||
name: string;
|
||||
@@ -76,11 +118,20 @@ export function Channels() {
|
||||
const { t } = useTranslation('channels');
|
||||
const gatewayStatus = useGatewayStore((state) => state.status);
|
||||
const lastGatewayStateRef = useRef(gatewayStatus.state);
|
||||
const defaultGatewayHealth = useMemo<GatewayHealthSummary>(() => ({
|
||||
state: 'healthy',
|
||||
reasons: [],
|
||||
consecutiveHeartbeatMisses: 0,
|
||||
}), []);
|
||||
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [channelGroups, setChannelGroups] = useState<ChannelGroupItem[]>([]);
|
||||
const [agents, setAgents] = useState<AgentItem[]>([]);
|
||||
const [gatewayHealth, setGatewayHealth] = useState<GatewayHealthSummary>(defaultGatewayHealth);
|
||||
const [diagnosticsSnapshot, setDiagnosticsSnapshot] = useState<GatewayDiagnosticSnapshot | null>(null);
|
||||
const [showDiagnostics, setShowDiagnostics] = useState(false);
|
||||
const [diagnosticsLoading, setDiagnosticsLoading] = useState(false);
|
||||
const [showConfigModal, setShowConfigModal] = useState(false);
|
||||
const [selectedChannelType, setSelectedChannelType] = useState<ChannelType | null>(null);
|
||||
const [selectedAccountId, setSelectedAccountId] = useState<string | undefined>(undefined);
|
||||
@@ -139,18 +190,29 @@ export function Channels() {
|
||||
hostApiFetch<{ success: boolean; agents?: AgentItem[]; error?: string }>('/api/agents'),
|
||||
]);
|
||||
|
||||
if (!channelsRes.success) {
|
||||
throw new Error(channelsRes.error || 'Failed to load channels');
|
||||
type ChannelsResponse = {
|
||||
success: boolean;
|
||||
channels?: ChannelGroupItem[];
|
||||
gatewayHealth?: GatewayHealthSummary;
|
||||
error?: string;
|
||||
};
|
||||
const channelsPayload = channelsRes as ChannelsResponse;
|
||||
|
||||
if (!channelsPayload.success) {
|
||||
throw new Error(channelsPayload.error || 'Failed to load channels');
|
||||
}
|
||||
|
||||
if (!agentsRes.success) {
|
||||
throw new Error(agentsRes.error || 'Failed to load agents');
|
||||
}
|
||||
|
||||
setChannelGroups(channelsRes.channels || []);
|
||||
setChannelGroups(channelsPayload.channels || []);
|
||||
setAgents(agentsRes.agents || []);
|
||||
setGatewayHealth(channelsPayload.gatewayHealth || defaultGatewayHealth);
|
||||
setDiagnosticsSnapshot(null);
|
||||
setShowDiagnostics(false);
|
||||
console.info(
|
||||
`[channels-ui] fetch ok probe=${probe ? '1' : '0'} elapsedMs=${Date.now() - startedAt} view=${(channelsRes.channels || []).map((item) => `${item.channelType}:${item.status}`).join(',')}`
|
||||
`[channels-ui] fetch ok probe=${probe ? '1' : '0'} elapsedMs=${Date.now() - startedAt} view=${(channelsPayload.channels || []).map((item) => `${item.channelType}:${item.status}`).join(',')}`
|
||||
);
|
||||
} catch (fetchError) {
|
||||
// Preserve previous data on error — don't clear channelGroups/agents.
|
||||
@@ -269,6 +331,100 @@ export function Channels() {
|
||||
void fetchPageData({ probe: true });
|
||||
};
|
||||
|
||||
const fetchDiagnosticsSnapshot = useCallback(async (): Promise<GatewayDiagnosticSnapshot> => {
|
||||
const response = await hostApiFetch<unknown>('/api/diagnostics/gateway-snapshot');
|
||||
if (response && typeof response === 'object') {
|
||||
const payload = response as Record<string, unknown>;
|
||||
if (payload.success === false || typeof payload.error === 'string') {
|
||||
throw new Error(typeof payload.error === 'string' ? payload.error : 'Failed to fetch gateway diagnostics snapshot');
|
||||
}
|
||||
}
|
||||
if (!isGatewayDiagnosticSnapshot(response)) {
|
||||
throw new Error('Invalid gateway diagnostics snapshot response');
|
||||
}
|
||||
const snapshot = response;
|
||||
setDiagnosticsSnapshot(snapshot);
|
||||
return snapshot;
|
||||
}, []);
|
||||
|
||||
const handleRestartGateway = async () => {
|
||||
try {
|
||||
const result = await hostApiFetch<{ success?: boolean; error?: string }>('/api/gateway/restart', {
|
||||
method: 'POST',
|
||||
});
|
||||
if (result?.success !== true) {
|
||||
throw new Error(result?.error || 'Failed to restart gateway');
|
||||
}
|
||||
setDiagnosticsSnapshot(null);
|
||||
setShowDiagnostics(false);
|
||||
toast.success(t('health.restartTriggered'));
|
||||
void fetchPageData({ probe: true });
|
||||
} catch (restartError) {
|
||||
toast.error(t('health.restartFailed', { error: String(restartError) }));
|
||||
}
|
||||
};
|
||||
|
||||
const handleCopyDiagnostics = async () => {
|
||||
setDiagnosticsLoading(true);
|
||||
try {
|
||||
const snapshot = await fetchDiagnosticsSnapshot();
|
||||
await navigator.clipboard.writeText(JSON.stringify(snapshot, null, 2));
|
||||
toast.success(t('health.diagnosticsCopied'));
|
||||
} catch (copyError) {
|
||||
toast.error(t('health.diagnosticsCopyFailed', { error: String(copyError) }));
|
||||
} finally {
|
||||
setDiagnosticsLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleToggleDiagnostics = async () => {
|
||||
if (showDiagnostics) {
|
||||
setShowDiagnostics(false);
|
||||
return;
|
||||
}
|
||||
setDiagnosticsLoading(true);
|
||||
try {
|
||||
await fetchDiagnosticsSnapshot();
|
||||
} catch (diagnosticsError) {
|
||||
toast.error(t('health.diagnosticsCopyFailed', { error: String(diagnosticsError) }));
|
||||
setDiagnosticsLoading(false);
|
||||
return;
|
||||
} finally {
|
||||
setDiagnosticsLoading(false);
|
||||
}
|
||||
setShowDiagnostics(true);
|
||||
};
|
||||
|
||||
const healthReasonLabel = useMemo(() => {
|
||||
const primaryReason = gatewayHealth.reasons[0];
|
||||
if (!primaryReason) return '';
|
||||
return t(`health.reasons.${primaryReason}`);
|
||||
}, [gatewayHealth.reasons, t]);
|
||||
|
||||
const diagnosticsText = useMemo(
|
||||
() => diagnosticsSnapshot ? JSON.stringify(diagnosticsSnapshot, null, 2) : '',
|
||||
[diagnosticsSnapshot],
|
||||
);
|
||||
|
||||
const statusTone = useCallback((status: ChannelGroupItem['status']) => {
|
||||
switch (status) {
|
||||
case 'connected':
|
||||
return 'bg-green-500/10 text-green-700 dark:text-green-300 border-green-500/20';
|
||||
case 'connecting':
|
||||
return 'bg-sky-500/10 text-sky-700 dark:text-sky-300 border-sky-500/20';
|
||||
case 'degraded':
|
||||
return 'bg-yellow-500/10 text-yellow-700 dark:text-yellow-300 border-yellow-500/20';
|
||||
case 'error':
|
||||
return 'bg-destructive/10 text-destructive border-destructive/20';
|
||||
default:
|
||||
return 'bg-black/5 dark:bg-white/5 text-muted-foreground border-black/10 dark:border-white/10';
|
||||
}
|
||||
}, []);
|
||||
|
||||
const statusLabel = useCallback((status: ChannelGroupItem['status']) => {
|
||||
return t(`account.connectionStatus.${status}`);
|
||||
}, [t]);
|
||||
|
||||
const handleBindAgent = async (channelType: string, accountId: string, agentId: string) => {
|
||||
try {
|
||||
if (!agentId) {
|
||||
@@ -365,6 +521,86 @@ export function Channels() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{gatewayStatus.state === 'running' && gatewayHealth.state !== 'healthy' && (
|
||||
<div
|
||||
data-testid="channels-health-banner"
|
||||
className={cn(
|
||||
'mb-8 rounded-xl border p-4',
|
||||
gatewayHealth.state === 'unresponsive'
|
||||
? 'border-destructive/50 bg-destructive/10'
|
||||
: 'border-yellow-500/50 bg-yellow-500/10',
|
||||
)}
|
||||
>
|
||||
<div className="flex flex-col gap-4 md:flex-row md:items-start md:justify-between">
|
||||
<div className="flex items-start gap-3">
|
||||
<AlertCircle
|
||||
className={cn(
|
||||
'mt-0.5 h-5 w-5 shrink-0',
|
||||
gatewayHealth.state === 'unresponsive'
|
||||
? 'text-destructive'
|
||||
: 'text-yellow-600 dark:text-yellow-400',
|
||||
)}
|
||||
/>
|
||||
<div>
|
||||
<p className="text-sm font-semibold text-foreground">
|
||||
{t(`health.state.${gatewayHealth.state}`)}
|
||||
</p>
|
||||
{healthReasonLabel && (
|
||||
<p className="mt-1 text-sm text-foreground/75">{healthReasonLabel}</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex flex-wrap items-center gap-2">
|
||||
<Button
|
||||
data-testid="channels-restart-gateway"
|
||||
size="sm"
|
||||
variant="outline"
|
||||
className="h-8 rounded-full text-xs"
|
||||
onClick={() => { void handleRestartGateway(); }}
|
||||
>
|
||||
<RotateCcw className="mr-2 h-3.5 w-3.5" />
|
||||
{t('health.restartGateway')}
|
||||
</Button>
|
||||
<Button
|
||||
data-testid="channels-copy-diagnostics"
|
||||
size="sm"
|
||||
variant="outline"
|
||||
className="h-8 rounded-full text-xs"
|
||||
disabled={diagnosticsLoading}
|
||||
onClick={() => { void handleCopyDiagnostics(); }}
|
||||
>
|
||||
<Copy className="mr-2 h-3.5 w-3.5" />
|
||||
{t('health.copyDiagnostics')}
|
||||
</Button>
|
||||
<Button
|
||||
data-testid="channels-toggle-diagnostics"
|
||||
size="sm"
|
||||
variant="outline"
|
||||
className="h-8 rounded-full text-xs"
|
||||
disabled={diagnosticsLoading}
|
||||
onClick={() => { void handleToggleDiagnostics(); }}
|
||||
>
|
||||
{showDiagnostics ? (
|
||||
<ChevronUp className="mr-2 h-3.5 w-3.5" />
|
||||
) : (
|
||||
<ChevronDown className="mr-2 h-3.5 w-3.5" />
|
||||
)}
|
||||
{showDiagnostics ? t('health.hideDiagnostics') : t('health.viewDiagnostics')}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{showDiagnostics && diagnosticsText && (
|
||||
<div className="mt-4 rounded-xl border border-black/10 dark:border-white/10 bg-background/80 p-3">
|
||||
<p className="mb-2 text-xs font-medium text-muted-foreground">{t('health.diagnosticsTitle')}</p>
|
||||
<pre data-testid="channels-diagnostics" className="max-h-[320px] overflow-auto whitespace-pre-wrap break-all text-[11px] text-foreground/85">
|
||||
{diagnosticsText}
|
||||
</pre>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="mb-8 p-4 rounded-xl border border-destructive/50 bg-destructive/10 flex items-center gap-3">
|
||||
<AlertCircle className="h-5 w-5 text-destructive" />
|
||||
@@ -393,18 +629,9 @@ export function Channels() {
|
||||
</h3>
|
||||
<p className="text-[12px] text-muted-foreground">{group.channelType}</p>
|
||||
</div>
|
||||
<div
|
||||
className={cn(
|
||||
'w-2 h-2 rounded-full shrink-0',
|
||||
group.status === 'connected'
|
||||
? 'bg-green-500'
|
||||
: group.status === 'connecting'
|
||||
? 'bg-yellow-500 animate-pulse'
|
||||
: group.status === 'error'
|
||||
? 'bg-destructive'
|
||||
: 'bg-muted-foreground'
|
||||
)}
|
||||
/>
|
||||
<Badge className={cn('rounded-full border px-2.5 py-0.5 text-[11px] font-medium', statusTone(group.status))}>
|
||||
{statusLabel(group.status)}
|
||||
</Badge>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center gap-2">
|
||||
@@ -456,10 +683,18 @@ export function Channels() {
|
||||
<div className="min-w-0">
|
||||
<div className="flex items-center gap-2">
|
||||
<p className="text-[13px] font-medium text-foreground truncate">{displayName}</p>
|
||||
<Badge className={cn('rounded-full border px-2 py-0.5 text-[10px] font-medium', statusTone(account.status))}>
|
||||
{statusLabel(account.status)}
|
||||
</Badge>
|
||||
</div>
|
||||
{account.lastError && (
|
||||
<div className="text-[12px] text-destructive mt-1">{account.lastError}</div>
|
||||
)}
|
||||
{!account.lastError && account.statusReason && account.status === 'degraded' && (
|
||||
<div className="text-[12px] text-yellow-700 dark:text-yellow-300 mt-1">
|
||||
{t(`health.reasons.${account.statusReason}`)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="flex items-center gap-2">
|
||||
|
||||
Reference in New Issue
Block a user