diff --git a/src/pages/Chat/index.tsx b/src/pages/Chat/index.tsx
index 1d512b2d0..68a1da7f2 100644
--- a/src/pages/Chat/index.tsx
+++ b/src/pages/Chat/index.tsx
@@ -258,8 +258,21 @@ export function Chat() {
       return builtSteps;
     };
 
+    // Show the streaming response as a separate bubble (not inside the
+    // execution graph) once all tool calls have finished.
+    //
+    // Three signals indicate "tools finished, now streaming the reply":
+    //   1. `pendingFinal`        — set by tool-result final events
+    //   2. `allToolsCompleted`   — all entries in streamingTools are completed
+    //   3. `hasCompletedToolPhase` — historical messages (loaded by the poll)
+    //      contain tool_use blocks, meaning the Gateway executed tools
+    //      server-side without sending streaming tool events to the client
+    const allToolsCompleted = streamingTools.length > 0 && !hasRunningStreamToolStatus;
+    const hasCompletedToolPhase = segmentMessages.some((msg) =>
+      msg.role === 'assistant' && extractToolUse(msg).length > 0,
+    );
     const rawStreamingReplyCandidate = isLatestOpenRun
-      && pendingFinal
+      && (pendingFinal || allToolsCompleted || hasCompletedToolPhase)
       && (hasStreamText || hasStreamImages)
       && streamTools.length === 0
       && !hasRunningStreamToolStatus;
@@ -365,8 +378,12 @@ export function Chat() {
   const autoCollapsedRunKeys = useMemo(() => {
     const keys = new Set<string>();
     for (const card of userRunCards) {
-      const shouldCollapse = card.streamingReplyText != null
-        || (card.replyIndex != null && replyTextOverrides.has(card.replyIndex));
+      // Auto-collapse once the reply is visible — either the streaming
+      // reply bubble is already rendering (streamingReplyText != null)
+      // or the run finished and we have a reply text override.
+      const hasStreamingReply = card.streamingReplyText != null;
+      const hasHistoricalReply = card.replyIndex != null && replyTextOverrides.has(card.replyIndex);
+      const shouldCollapse = hasStreamingReply || hasHistoricalReply;
       if (!shouldCollapse) continue;
       const triggerMsg = messages[card.triggerIndex];
       const runKey = triggerMsg?.id
diff --git a/src/pages/Chat/task-visualization.ts b/src/pages/Chat/task-visualization.ts
index d9bdaef36..60fa5c6dc 100644
--- a/src/pages/Chat/task-visualization.ts
+++ b/src/pages/Chat/task-visualization.ts
@@ -283,13 +283,18 @@ export function deriveTaskSteps({
   }
 
   if (streamMessage) {
-    appendDetailSegments(extractThinkingSegments(streamMessage), {
-      idPrefix: 'stream-thinking',
-      label: 'Thinking',
-      kind: 'thinking',
-      running: true,
-      upsertStep,
-    });
+    // When the reply is being rendered as a separate bubble
+    // (omitLastStreamingMessageSegment), thinking that accompanies
+    // the reply belongs to the bubble — omit it from the graph.
+    if (!omitLastStreamingMessageSegment) {
+      appendDetailSegments(extractThinkingSegments(streamMessage), {
+        idPrefix: 'stream-thinking',
+        label: 'Thinking',
+        kind: 'thinking',
+        running: true,
+        upsertStep,
+      });
+    }
 
     // Stream-time narration should also appear in the execution graph so that
     // intermediate process output stays in P1 instead of leaking into the
diff --git a/src/stores/chat/history-actions.ts b/src/stores/chat/history-actions.ts
index 7d88e08f4..13ca78a6a 100644
--- a/src/stores/chat/history-actions.ts
+++ b/src/stores/chat/history-actions.ts
@@ -171,13 +171,19 @@ export function createHistoryActions(
         }
 
         // If pendingFinal, check whether the AI produced a final text response.
+        // Only finalize when the candidate is the very last message in the
+        // history — intermediate assistant messages (narration + tool_use) are
+        // followed by tool-result messages and must NOT be treated as the
+        // completed response, otherwise `pendingFinal` is cleared too early
+        // and the streaming reply bubble never renders.
         if (pendingFinal || get().pendingFinal) {
           const recentAssistant = [...filteredMessages].reverse().find((msg) => {
             if (msg.role !== 'assistant') return false;
             if (!hasNonToolAssistantContent(msg)) return false;
             return isAfterUserMsg(msg);
           });
-          if (recentAssistant) {
+          const lastMsg = filteredMessages[filteredMessages.length - 1];
+          if (recentAssistant && lastMsg === recentAssistant) {
             clearHistoryPoll();
             set({ sending: false, activeRunId: null, pendingFinal: false });
           }