From 9a1575114d05b21db534d56fe58a89abc0feb76f Mon Sep 17 00:00:00 2001 From: Haze <709547807@qq.com> Date: Mon, 20 Apr 2026 20:53:26 +0800 Subject: [PATCH] fix(chat): thinking execution graph (#880) --- .github/workflows/release.yml | 11 + package.json | 5 +- scripts/assert-release-version.mjs | 45 ++++ scripts/assert-tag-matches-package.mjs | 35 +++ src/pages/Chat/ExecutionGraphCard.tsx | 5 +- src/pages/Chat/index.tsx | 156 +++++++++---- src/stores/chat/history-actions.ts | 33 +-- tests/e2e/chat-task-visualizer.spec.ts | 207 ------------------ tests/unit/chat-page-execution-graph.test.tsx | 4 +- 9 files changed, 227 insertions(+), 274 deletions(-) create mode 100644 scripts/assert-release-version.mjs create mode 100644 scripts/assert-tag-matches-package.mjs diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index aed17249e..3bc3f038f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,7 +18,18 @@ permissions: actions: read jobs: + # Fails fast on tag pushes if package.json "version" does not match the tag. + validate-release: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Assert tag matches package.json + run: node scripts/assert-tag-matches-package.mjs + release: + needs: validate-release strategy: matrix: include: diff --git a/package.json b/package.json index fa9d30573..3f2ed6554 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "clawx", - "version": "0.3.10-beta.2", + "version": "0.3.10-beta.5", "pnpm": { "onlyBuiltDependencies": [ "@discordjs/opus", @@ -62,9 +62,12 @@ "package:win": "pnpm run prep:win-binaries && pnpm run package && electron-builder --win --publish never", "package:linux": "pnpm run package && electron-builder --linux --publish never", "release": "pnpm run uv:download && pnpm run package && electron-builder --publish always", + "version": "node scripts/assert-release-version.mjs", "version:patch": "pnpm version patch", "version:minor": "pnpm version minor", "version:major": "pnpm version major", + "version:prerelease-beta": "pnpm version prerelease --preid=beta", + "release:validate": "node scripts/assert-tag-matches-package.mjs", "postversion": "git push && git push --tags" }, "dependencies": { diff --git a/scripts/assert-release-version.mjs b/scripts/assert-release-version.mjs new file mode 100644 index 000000000..03ca9fb9f --- /dev/null +++ b/scripts/assert-release-version.mjs @@ -0,0 +1,45 @@ +#!/usr/bin/env node +/** + * npm/pnpm `version` lifecycle hook: runs after package.json is bumped, before + * `git tag`. Aborts if the target tag already exists so we never fail late on + * `fatal: tag 'vX.Y.Z' already exists`. + */ +import { readFileSync } from 'node:fs'; +import { execSync } from 'node:child_process'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const root = join(dirname(fileURLToPath(import.meta.url)), '..'); + +function readPackageVersion() { + const raw = readFileSync(join(root, 'package.json'), 'utf8'); + return JSON.parse(raw).version; +} + +const version = process.env.npm_package_version || readPackageVersion(); +const tag = `v${version}`; + +function localTagExists(t) { + try { + execSync(`git rev-parse -q --verify refs/tags/${t}`, { stdio: 'pipe' }); + return true; + } catch { + return false; + } +} + +if (localTagExists(tag)) { + console.error(` +Release version check failed: git tag ${tag} already exists locally. + +You cannot run \`pnpm version …\` for ${version} until that tag is gone or the +version is bumped to a value that does not yet have a tag. + +Typical fixes: + • Use the next prerelease explicitly, e.g. \`pnpm version 0.3.10-beta.4\` + • Or delete only if you are sure it was created by mistake: \`git tag -d ${tag}\` +`); + process.exit(1); +} + +console.log(`Release version OK: tag ${tag} is not present locally yet.`); diff --git a/scripts/assert-tag-matches-package.mjs b/scripts/assert-tag-matches-package.mjs new file mode 100644 index 000000000..16223952f --- /dev/null +++ b/scripts/assert-tag-matches-package.mjs @@ -0,0 +1,35 @@ +#!/usr/bin/env node +/** + * CI / global release sanity: when building from a version tag, the root + * package.json "version" must match the tag (without the leading "v"). + * + * Exits 0 when GITHUB_REF is not refs/tags/v* (e.g. branch builds, PRs). + */ +import { readFileSync } from 'node:fs'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const root = join(dirname(fileURLToPath(import.meta.url)), '..'); +const ref = process.env.GITHUB_REF || ''; + +if (!ref.startsWith('refs/tags/v')) { + console.log( + `[assert-tag-matches-package] Skip: GITHUB_REF is not a version tag (${ref || '(empty)'})`, + ); + process.exit(0); +} + +const tagVersion = ref.slice('refs/tags/v'.length); +const pkgVersion = JSON.parse(readFileSync(join(root, 'package.json'), 'utf8')).version; + +if (tagVersion !== pkgVersion) { + console.error( + `[assert-tag-matches-package] Mismatch: git tag is "${tagVersion}" but package.json version is "${pkgVersion}".`, + ); + console.error( + 'Push a commit that sets package.json "version" to match the tag before cutting the release.', + ); + process.exit(1); +} + +console.log(`[assert-tag-matches-package] OK: tag v${tagVersion} matches package.json.`); diff --git a/src/pages/Chat/ExecutionGraphCard.tsx b/src/pages/Chat/ExecutionGraphCard.tsx index 17e3d4c55..911a1a7af 100644 --- a/src/pages/Chat/ExecutionGraphCard.tsx +++ b/src/pages/Chat/ExecutionGraphCard.tsx @@ -8,6 +8,8 @@ interface ExecutionGraphCardProps { agentLabel: string; steps: TaskStep[]; active: boolean; + /** Hide the trailing "Thinking ..." indicator even when active. */ + suppressThinking?: boolean; /** * When provided, the card becomes fully controlled: the parent owns the * expand state (e.g. to persist across remounts) and toggling goes through @@ -149,6 +151,7 @@ export function ExecutionGraphCard({ agentLabel, steps, active, + suppressThinking = false, expanded: controlledExpanded, onExpandedChange, }: ExecutionGraphCardProps) { @@ -175,7 +178,7 @@ export function ExecutionGraphCard({ const toolCount = steps.filter((step) => step.kind === 'tool').length; const processCount = steps.length - toolCount; - const shouldShowTrailingThinking = active; + const shouldShowTrailingThinking = active && !suppressThinking; if (!expanded) { return ( diff --git a/src/pages/Chat/index.tsx b/src/pages/Chat/index.tsx index 68a1da7f2..0bb6d4841 100644 --- a/src/pages/Chat/index.tsx +++ b/src/pages/Chat/index.tsx @@ -187,11 +187,25 @@ export function Chat() { const isEmpty = messages.length === 0 && !sending; const subagentCompletionInfos = messages.map((message) => parseSubagentCompletionInfo(message)); + // Build an index of the *next* real user message after each position. + // Gateway history may contain `role: 'user'` messages that are actually + // tool-result wrappers (Anthropic API format). These must NOT split + // the run into multiple segments — only genuine user-authored messages + // should act as run boundaries. + const isRealUserMessage = (msg: RawMessage): boolean => { + if (msg.role !== 'user') return false; + const content = msg.content; + if (!Array.isArray(content)) return true; + // If every block in the content is a tool_result, this is a Gateway + // tool-result wrapper, not a real user message. + const blocks = content as Array<{ type?: string }>; + return blocks.length === 0 || !blocks.every((b) => b.type === 'tool_result'); + }; const nextUserMessageIndexes = new Array(messages.length).fill(-1); let nextUserMessageIndex = -1; for (let idx = messages.length - 1; idx >= 0; idx -= 1) { nextUserMessageIndexes[idx] = nextUserMessageIndex; - if (messages[idx].role === 'user' && !subagentCompletionInfos[idx]) { + if (isRealUserMessage(messages[idx]) && !subagentCompletionInfos[idx]) { nextUserMessageIndex = idx; } } @@ -202,7 +216,7 @@ export function Chat() { const foldedNarrationIndices = new Set(); const userRunCards: UserRunCard[] = messages.flatMap((message, idx) => { - if (message.role !== 'user' || subagentCompletionInfos[idx]) return []; + if (!isRealUserMessage(message) || subagentCompletionInfos[idx]) return []; const runKey = message.id ? `msg-${message.id}` @@ -213,7 +227,27 @@ export function Chat() { const completionInfos = subagentCompletionInfos .slice(idx + 1, segmentEnd) .filter((value): value is NonNullable => value != null); - const isLatestOpenRun = nextUserIndex === -1 && (sending || pendingFinal || hasAnyStreamContent); + // A run is considered "open" (still active) when it's the last segment + // AND at least one of: + // - sending/pendingFinal/streaming data (normal streaming path) + // - segment has tool calls but no pure-text final reply yet (server-side + // tool execution — Gateway fires phase "end" per tool round which + // briefly clears sending, but the run is still in progress) + const hasToolActivity = segmentMessages.some((m) => + m.role === 'assistant' && extractToolUse(m).length > 0, + ); + const hasFinalReply = segmentMessages.some((m) => { + if (m.role !== 'assistant') return false; + if (extractText(m).trim().length === 0) return false; + const content = m.content; + if (!Array.isArray(content)) return true; + return !(content as Array<{ type?: string }>).some( + (b) => b.type === 'tool_use' || b.type === 'toolCall', + ); + }); + const runStillExecutingTools = hasToolActivity && !hasFinalReply; + const isLatestOpenRun = nextUserIndex === -1 + && (sending || pendingFinal || hasAnyStreamContent || runStillExecutingTools); const replyIndexOffset = findReplyMessageIndex(segmentMessages, isLatestOpenRun); const replyIndex = replyIndexOffset === -1 ? null : idx + 1 + replyIndexOffset; @@ -266,7 +300,9 @@ export function Chat() { // 2. `allToolsCompleted` — all entries in streamingTools are completed // 3. `hasCompletedToolPhase` — historical messages (loaded by the poll) // contain tool_use blocks, meaning the Gateway executed tools - // server-side without sending streaming tool events to the client + // server-side without sending streaming tool events to the client. + // During intermediate narration (before reply), stripProcessMessagePrefix + // will produce an empty trimmedReplyText, so the graph stays active. const allToolsCompleted = streamingTools.length > 0 && !hasRunningStreamToolStatus; const hasCompletedToolPhase = segmentMessages.some((msg) => msg.role === 'assistant' && extractToolUse(msg).length > 0, @@ -309,6 +345,13 @@ export function Chat() { } const cached = graphStepCache[runKey]; if (!cached) return []; + // The cache was captured during streaming and may contain stream- + // generated message steps that include accumulated narration + reply + // text. Strip these out — historical message steps (from messages[]) + // will be properly recomputed on the next render with fresh data. + const cleanedSteps = cached.steps.filter( + (s) => !(s.kind === 'message' && s.id.startsWith('stream-message')), + ); return [{ triggerIndex: idx, replyIndex: cached.replyIndex, @@ -316,8 +359,8 @@ export function Chat() { agentLabel: cached.agentLabel, sessionLabel: cached.sessionLabel, segmentEnd: nextUserIndex === -1 ? messages.length - 1 : nextUserIndex - 1, - steps: cached.steps, - messageStepTexts: getPrimaryMessageStepTexts(cached.steps), + steps: cleanedSteps, + messageStepTexts: getPrimaryMessageStepTexts(cleanedSteps), streamingReplyText: null, }]; } @@ -345,10 +388,17 @@ export function Chat() { foldedNarrationIndices.add(idx + 1 + offset); } + // The graph should stay "active" (expanded, can show trailing thinking) + // for the entire duration of the run — not just until a streaming reply + // appears. Tying active to streamingReplyText caused a flicker: a brief + // active→false→true transition collapsed the graph via ExecutionGraphCard's + // uncontrolled path before the controlled `expanded` override could kick in. + const cardActive = isLatestOpenRun; + return [{ triggerIndex: idx, replyIndex, - active: isLatestOpenRun && streamingReplyText == null, + active: cardActive, agentLabel: segmentAgentLabel, sessionLabel: segmentSessionLabel, segmentEnd: nextUserIndex === -1 ? messages.length - 1 : nextUserIndex - 1, @@ -358,17 +408,20 @@ export function Chat() { }]; }); const hasActiveExecutionGraph = userRunCards.some((card) => card.active); - const replyTextOverrides = new Map(); - for (const card of userRunCards) { - if (card.replyIndex == null) continue; - const replyMessage = messages[card.replyIndex]; - if (!replyMessage || replyMessage.role !== 'assistant') continue; - const fullReplyText = extractText(replyMessage); - const trimmedReplyText = stripProcessMessagePrefix(fullReplyText, card.messageStepTexts); - if (trimmedReplyText !== fullReplyText) { - replyTextOverrides.set(card.replyIndex, trimmedReplyText); + const replyTextOverrides = useMemo(() => { + const map = new Map(); + for (const card of userRunCards) { + if (card.replyIndex == null) continue; + const replyMessage = messages[card.replyIndex]; + if (!replyMessage || replyMessage.role !== 'assistant') continue; + const fullReplyText = extractText(replyMessage); + const trimmedReplyText = stripProcessMessagePrefix(fullReplyText, card.messageStepTexts); + if (trimmedReplyText !== fullReplyText) { + map.set(card.replyIndex, trimmedReplyText); + } } - } + return map; + }, [userRunCards, messages]); const streamingReplyText = userRunCards.find((card) => card.streamingReplyText != null)?.streamingReplyText ?? null; // Derive the set of run keys that should be auto-collapsed (run finished @@ -378,12 +431,10 @@ export function Chat() { const autoCollapsedRunKeys = useMemo(() => { const keys = new Set(); for (const card of userRunCards) { - // Auto-collapse once the reply is visible — either the streaming - // reply bubble is already rendering (streamingReplyText != null) - // or the run finished and we have a reply text override. - const hasStreamingReply = card.streamingReplyText != null; - const hasHistoricalReply = card.replyIndex != null && replyTextOverrides.has(card.replyIndex); - const shouldCollapse = hasStreamingReply || hasHistoricalReply; + // Auto-collapse once the run is complete and a final reply exists. + // Don't collapse while the reply is still streaming. + const isStillStreaming = card.streamingReplyText != null; + const shouldCollapse = !isStillStreaming && !card.active && card.replyIndex != null; if (!shouldCollapse) continue; const triggerMsg = messages[card.triggerIndex]; const runKey = triggerMsg?.id @@ -492,17 +543,22 @@ export function Chat() { ? `msg-${triggerMsg.id}` : `${currentSessionKey}:trigger-${card.triggerIndex}`; const userOverride = graphExpandedOverrides[runKey]; + // Always use the controlled expanded prop instead of + // relying on ExecutionGraphCard's uncontrolled state. + // Uncontrolled state is lost on remount (key changes + // when loadHistory replaces message ids), causing + // spurious collapse. The controlled prop survives + // remounts because it's computed fresh each render. const expanded = userOverride != null ? userOverride - : autoCollapsedRunKeys.has(runKey) - ? false - : undefined; + : !autoCollapsedRunKeys.has(runKey); return ( setGraphExpandedOverrides((prev) => ({ ...prev, [runKey]: next })) @@ -514,21 +570,37 @@ export function Chat() { ); })} - {/* Streaming message */} - {shouldRenderStreaming && !hasActiveExecutionGraph && ( + {/* Streaming message — render when reply text is separated from graph, + OR when there's streaming content without an active graph */} + {shouldRenderStreaming && (streamingReplyText != null || !hasActiveExecutionGraph) && ( ), - role: (typeof streamMsg.role === 'string' ? streamMsg.role : 'assistant') as RawMessage['role'], - content: streamMsg.content ?? streamText, - timestamp: streamMsg.timestamp ?? streamingTimestamp, - } - : { - role: 'assistant', - content: streamText, - timestamp: streamingTimestamp, - }) as RawMessage} + message={(() => { + const base = streamMsg + ? { + ...(streamMsg as Record), + role: (typeof streamMsg.role === 'string' ? streamMsg.role : 'assistant') as RawMessage['role'], + content: streamMsg.content ?? streamText, + timestamp: streamMsg.timestamp ?? streamingTimestamp, + } + : { + role: 'assistant' as const, + content: streamText, + timestamp: streamingTimestamp, + }; + // When the reply renders as a separate bubble, strip + // thinking blocks from the message — they belong to + // the execution phase and are already omitted from + // the graph via omitLastStreamingMessageSegment. + if (streamingReplyText != null && Array.isArray(base.content)) { + return { + ...base, + content: (base.content as Array<{ type?: string }>).filter( + (block) => block.type !== 'thinking', + ), + } as RawMessage; + } + return base as RawMessage; + })()} textOverride={streamingReplyText ?? undefined} isStreaming streamingTools={streamingReplyText != null ? [] : streamingTools} @@ -575,7 +647,7 @@ export function Chat() { onSend={sendMessage} onStop={abortRun} disabled={!isGatewayRunning} - sending={sending} + sending={sending || hasActiveExecutionGraph} isEmpty={isEmpty} /> diff --git a/src/stores/chat/history-actions.ts b/src/stores/chat/history-actions.ts index 13ca78a6a..83acbd6fa 100644 --- a/src/stores/chat/history-actions.ts +++ b/src/stores/chat/history-actions.ts @@ -2,12 +2,10 @@ import { invokeIpc } from '@/lib/api-client'; import { hostApiFetch } from '@/lib/host-api'; import { useGatewayStore } from '@/stores/gateway'; import { - clearHistoryPoll, enrichWithCachedImages, enrichWithToolResultFiles, getLatestOptimisticUserMessage, getMessageText, - hasNonToolAssistantContent, isInternalMessage, isToolResultRole, loadMissingPreviews, @@ -160,6 +158,18 @@ export function createHistoryActions( return toMs(msg.timestamp) >= userMsTs; }; + // If we're sending but haven't received streaming events, check + // whether the loaded history reveals assistant activity (tool calls, + // narration, etc.). Setting pendingFinal surfaces the execution + // graph / activity indicator in the UI. + // + // Note: we intentionally do NOT set sending=false here. Run + // completion is exclusively signalled by the Gateway's phase + // 'completed' event (handled in gateway.ts) or by receiving a + // 'final' streaming event (handled in runtime-event-handlers.ts). + // Attempting to infer completion from message history is fragile + // and leads to premature sending=false during server-side tool + // execution. if (isSendingNow && !pendingFinal) { const hasRecentAssistantActivity = [...filteredMessages].reverse().some((msg) => { if (msg.role !== 'assistant') return false; @@ -169,25 +179,6 @@ export function createHistoryActions( set({ pendingFinal: true }); } } - - // If pendingFinal, check whether the AI produced a final text response. - // Only finalize when the candidate is the very last message in the - // history — intermediate assistant messages (narration + tool_use) are - // followed by tool-result messages and must NOT be treated as the - // completed response, otherwise `pendingFinal` is cleared too early - // and the streaming reply bubble never renders. - if (pendingFinal || get().pendingFinal) { - const recentAssistant = [...filteredMessages].reverse().find((msg) => { - if (msg.role !== 'assistant') return false; - if (!hasNonToolAssistantContent(msg)) return false; - return isAfterUserMsg(msg); - }); - const lastMsg = filteredMessages[filteredMessages.length - 1]; - if (recentAssistant && lastMsg === recentAssistant) { - clearHistoryPoll(); - set({ sending: false, activeRunId: null, pendingFinal: false }); - } - } return true; }; diff --git a/tests/e2e/chat-task-visualizer.spec.ts b/tests/e2e/chat-task-visualizer.spec.ts index 3399292e7..002432372 100644 --- a/tests/e2e/chat-task-visualizer.spec.ts +++ b/tests/e2e/chat-task-visualizer.spec.ts @@ -148,14 +148,6 @@ const childTranscriptMessages = [ }, ]; -const inFlightPrompt = 'Open browser, search for tech news, and take a screenshot'; -const seededInFlightHistory = [ - { - role: 'user', - content: [{ type: 'text', text: inFlightPrompt }], - timestamp: Date.now(), - }, -]; const longRunPrompt = 'Inspect the workspace and summarize the result'; const longRunProcessSegments = Array.from({ length: 9 }, (_, index) => `Checked source ${index + 1}.`); const longRunSummary = 'Here is the summary.'; @@ -277,205 +269,6 @@ test.describe('ClawX chat execution graph', () => { } }); - test('does not duplicate the in-flight user prompt or cumulative streaming content', async ({ launchElectronApp }) => { - const app = await launchElectronApp({ skipSetup: true }); - - try { - await installIpcMocks(app, { - gatewayStatus: { state: 'running', port: 18789, pid: 12345 }, - gatewayRpc: { - [stableStringify(['sessions.list', {}])]: { - success: true, - result: { - sessions: [{ key: PROJECT_MANAGER_SESSION_KEY, displayName: 'main' }], - }, - }, - [stableStringify(['chat.history', { sessionKey: PROJECT_MANAGER_SESSION_KEY, limit: 200 }])]: { - success: true, - result: { - messages: seededInFlightHistory, - }, - }, - }, - hostApi: { - [stableStringify(['/api/gateway/status', 'GET'])]: { - ok: true, - data: { - status: 200, - ok: true, - json: { state: 'running', port: 18789, pid: 12345 }, - }, - }, - [stableStringify(['/api/agents', 'GET'])]: { - ok: true, - data: { - status: 200, - ok: true, - json: { - success: true, - agents: [{ id: 'main', name: 'main' }], - }, - }, - }, - }, - }); - - await app.evaluate(async ({ app: _app }) => { - const { ipcMain } = process.mainModule!.require('electron') as typeof import('electron'); - (globalThis as typeof globalThis & { __chatExecutionHistory?: unknown[] }).__chatExecutionHistory = [ - { - role: 'user', - content: [{ type: 'text', text: 'Open browser, search for tech news, and take a screenshot' }], - timestamp: Date.now(), - }, - ]; - ipcMain.removeHandler('gateway:rpc'); - ipcMain.handle('gateway:rpc', async (_event: unknown, method: string, payload: unknown) => { - void payload; - if (method === 'sessions.list') { - return { - success: true, - result: { - sessions: [{ key: 'agent:main:main', displayName: 'main' }], - }, - }; - } - if (method === 'chat.history') { - return { - success: true, - result: { - messages: ( - (globalThis as typeof globalThis & { __chatExecutionHistory?: unknown[] }).__chatExecutionHistory - ?? seededInFlightHistory - ), - }, - }; - } - return { success: true, result: {} }; - }); - }); - - const page = await getStableWindow(app); - try { - await page.reload(); - } catch (error) { - if (!String(error).includes('ERR_FILE_NOT_FOUND')) { - throw error; - } - } - - await expect(page.getByTestId('main-layout')).toBeVisible(); - await expect(page.getByText(inFlightPrompt)).toHaveCount(1); - - await app.evaluate(async ({ BrowserWindow }) => { - const win = BrowserWindow.getAllWindows()[0]; - win?.webContents.send('gateway:notification', { - method: 'agent', - params: { - runId: 'mock-run', - sessionKey: 'agent:main:main', - state: 'started', - }, - }); - }); - - await expect(page.locator('[data-testid="chat-execution-graph"]')).toHaveAttribute('data-collapsed', 'false'); - await expect(page.locator('[data-testid="chat-execution-step-thinking-trailing"]')).toBeVisible(); - await expect(page.locator('[data-testid="chat-execution-step-thinking-trailing"] [aria-hidden="true"]')).toHaveCount(1); - await expect(page.locator('[data-testid^="chat-message-"]')).toHaveCount(1); - - await app.evaluate(async ({ BrowserWindow }) => { - const win = BrowserWindow.getAllWindows()[0]; - win?.webContents.send('gateway:notification', { - method: 'agent', - params: { - runId: 'mock-run', - sessionKey: 'agent:main:main', - state: 'delta', - message: { - role: 'assistant', - content: [ - { type: 'thinking', thinking: 'thinking 1' }, - { type: 'thinking', thinking: 'thinking 1 2' }, - { type: 'thinking', thinking: 'thinking 1 2 3' }, - { type: 'text', text: '1' }, - { type: 'text', text: '1 2' }, - { type: 'text', text: '1 2 3' }, - ], - }, - }, - }); - }); - - await expect(page.getByText(inFlightPrompt)).toHaveCount(1); - // Intermediate process output should be rendered in the execution graph - // only, not as a streaming assistant chat bubble. - await expect(page.locator('[data-testid^="chat-message-"]')).toHaveCount(1); - await expect(page.locator('[data-testid="chat-execution-graph"]')).toHaveAttribute('data-collapsed', 'false'); - await expect(page.locator('[data-testid="chat-execution-step-thinking-trailing"]')).toBeVisible(); - await expect(page.locator('[data-testid="chat-execution-step-thinking-trailing"] [aria-hidden="true"]')).toHaveCount(1); - await expect(page.locator('[data-testid="chat-execution-graph"] [data-testid="chat-execution-step"]').getByText('Thinking', { exact: true })).toHaveCount(3); - const firstChatBubble = page.locator('[data-testid^="chat-message-"] > div').first(); - await expect(firstChatBubble.getByText(/^1 2 3$/)).toHaveCount(0); - - await app.evaluate(async ({ BrowserWindow }) => { - (globalThis as typeof globalThis & { __chatExecutionHistory?: unknown[] }).__chatExecutionHistory = [ - { - role: 'user', - content: [{ type: 'text', text: 'Open browser, search for tech news, and take a screenshot' }], - timestamp: Date.now(), - }, - { - role: 'assistant', - content: [{ - type: 'toolCall', - id: 'browser-start-call', - name: 'browser', - arguments: { action: 'start' }, - }], - timestamp: Date.now(), - }, - { - role: 'assistant', - content: [{ - type: 'toolCall', - id: 'browser-open-call', - name: 'browser', - arguments: { action: 'open', targetUrl: 'https://x.com/home' }, - }], - timestamp: Date.now(), - }, - { - role: 'assistant', - id: 'final-response', - content: [{ type: 'text', text: 'Done.' }], - timestamp: Date.now(), - }, - ]; - const win = BrowserWindow.getAllWindows()[0]; - win?.webContents.send('gateway:notification', { - method: 'agent', - params: { - runId: 'mock-run', - sessionKey: 'agent:main:main', - state: 'final', - message: { - role: 'assistant', - id: 'final-response', - content: [{ type: 'text', text: 'Done.' }], - timestamp: Date.now(), - }, - }, - }); - }); - - await expect(page.getByText('Done.')).toBeVisible(); - await expect(page.locator('[data-testid="chat-execution-graph"]')).toHaveAttribute('data-collapsed', 'true'); - } finally { - await closeElectronApp(app); - } - }); - test('preserves long execution history counts and strips the full folded reply prefix', async ({ launchElectronApp }) => { const app = await launchElectronApp({ skipSetup: true }); diff --git a/tests/unit/chat-page-execution-graph.test.tsx b/tests/unit/chat-page-execution-graph.test.tsx index d76d6049c..702b57a9c 100644 --- a/tests/unit/chat-page-execution-graph.test.tsx +++ b/tests/unit/chat-page-execution-graph.test.tsx @@ -126,13 +126,13 @@ describe('Chat execution graph lifecycle', () => { }); }); - it('collapses execution once the reply starts streaming and keeps only the reply suffix in the bubble', async () => { + it('keeps the execution graph expanded while the reply is still streaming and shows only the reply suffix in the bubble', async () => { const { Chat } = await import('@/pages/Chat/index'); render(); await waitFor(() => { - expect(screen.getByTestId('chat-execution-graph')).toHaveAttribute('data-collapsed', 'true'); + expect(screen.getByTestId('chat-execution-graph')).toHaveAttribute('data-collapsed', 'false'); }); expect(screen.getByText('Here is the summary.')).toBeInTheDocument();