From cf8091d81fece8462b6a1985ab595d1f3c2e9935 Mon Sep 17 00:00:00 2001 From: Haze <709547807@qq.com> Date: Sat, 14 Feb 2026 00:21:04 +0800 Subject: [PATCH] feat(chat): improve media handling and caching for user messages (#81) --- electron/gateway/manager.ts | 2 +- electron/main/ipc-handlers.ts | 57 +++++++++--- electron/preload/index.ts | 3 +- src/pages/Chat/ChatMessage.tsx | 28 +++--- src/pages/Chat/message-utils.ts | 68 ++++++++++++-- src/pages/Setup/index.tsx | 2 +- src/stores/chat.ts | 156 +++++++++++++++++++++++++++++++- 7 files changed, 280 insertions(+), 36 deletions(-) diff --git a/electron/gateway/manager.ts b/electron/gateway/manager.ts index 889ab84a5..5fe652934 100644 --- a/electron/gateway/manager.ts +++ b/electron/gateway/manager.ts @@ -656,7 +656,7 @@ export class GatewayManager extends EventEmitter { /** * Wait for Gateway to be ready by checking if the port is accepting connections */ - private async waitForReady(retries = 120, interval = 1000): Promise { + private async waitForReady(retries = 600, interval = 1000): Promise { for (let i = 0; i < retries; i++) { // Early exit if the gateway process has already exited if (this.process && (this.process.exitCode !== null || this.process.signalCode !== null)) { diff --git a/electron/main/ipc-handlers.ts b/electron/main/ipc-handlers.ts index d0cc7cd7c..9eae72bbd 100644 --- a/electron/main/ipc-handlers.ts +++ b/electron/main/ipc-handlers.ts @@ -441,27 +441,38 @@ function registerGatewayHandlers( }) => { try { let message = params.message; - const imageAttachments: Array<{ type: string; mimeType: string; fileName: string; content: string }> = []; + // The Gateway processes image attachments through TWO parallel paths: + // Path A: `attachments` param → parsed via `parseMessageWithAttachments` → + // injected as inline vision content when the model supports images. + // Format: { content: base64, mimeType: string, fileName?: string } + // Path B: `[media attached: ...]` in message text → Gateway's native image + // detection (`detectAndLoadPromptImages`) reads the file from disk and + // injects it as inline vision content. Also works for history messages. + // We use BOTH paths for maximum reliability. + const imageAttachments: Array> = []; const fileReferences: string[] = []; if (params.media && params.media.length > 0) { for (const m of params.media) { logger.info(`[chat:sendWithMedia] Processing file: ${m.fileName} (${m.mimeType}), path: ${m.filePath}, exists: ${existsSync(m.filePath)}, isVision: ${VISION_MIME_TYPES.has(m.mimeType)}`); + + // Always add file path reference so the model can access it via tools + fileReferences.push( + `[media attached: ${m.filePath} (${m.mimeType}) | ${m.filePath}]`, + ); + if (VISION_MIME_TYPES.has(m.mimeType)) { - // Raster image — inline as base64 vision attachment + // Send as base64 attachment in the format the Gateway expects: + // { content: base64String, mimeType: string, fileName?: string } + // The Gateway normalizer looks for `a.content` (NOT `a.source.data`). const fileBuffer = readFileSync(m.filePath); - logger.info(`[chat:sendWithMedia] Read ${fileBuffer.length} bytes, base64 length: ${fileBuffer.toString('base64').length}`); + const base64Data = fileBuffer.toString('base64'); + logger.info(`[chat:sendWithMedia] Read ${fileBuffer.length} bytes, base64 length: ${base64Data.length}`); imageAttachments.push({ - type: 'image', + content: base64Data, mimeType: m.mimeType, fileName: m.fileName, - content: fileBuffer.toString('base64'), }); - } else { - // Non-vision file — reference by path (same format as channel inbound media) - fileReferences.push( - `[media attached: ${m.filePath} (${m.mimeType}) | ${m.filePath}]`, - ); } } } @@ -483,9 +494,9 @@ function registerGatewayHandlers( rpcParams.attachments = imageAttachments; } - logger.info(`[chat:sendWithMedia] Sending: message="${message.substring(0, 100)}", imageAttachments=${imageAttachments.length}, fileRefs=${fileReferences.length}`); + logger.info(`[chat:sendWithMedia] Sending: message="${message.substring(0, 100)}", attachments=${imageAttachments.length}, fileRefs=${fileReferences.length}`); - // Use a longer timeout when attachments are present (120s vs default 30s) + // Use a longer timeout when images are present (120s vs default 30s) const timeoutMs = imageAttachments.length > 0 ? 120000 : 30000; const result = await gatewayManager.rpc('chat.send', rpcParams, timeoutMs); logger.info(`[chat:sendWithMedia] RPC result: ${JSON.stringify(result)}`); @@ -1557,4 +1568,26 @@ function registerFileHandlers(): void { return { id, fileName: payload.fileName, mimeType, fileSize, stagedPath, preview }; }); + + // Load thumbnails for file paths on disk (used to restore previews in history) + ipcMain.handle('media:getThumbnails', async (_, paths: Array<{ filePath: string; mimeType: string }>) => { + const results: Record = {}; + for (const { filePath, mimeType } of paths) { + try { + if (!existsSync(filePath)) { + results[filePath] = { preview: null, fileSize: 0 }; + continue; + } + const stat = statSync(filePath); + let preview: string | null = null; + if (mimeType.startsWith('image/')) { + preview = generateImagePreview(filePath, mimeType); + } + results[filePath] = { preview, fileSize: stat.size }; + } catch { + results[filePath] = { preview: null, fileSize: 0 }; + } + } + return results; + }); } diff --git a/electron/preload/index.ts b/electron/preload/index.ts index 80ad64cdb..3dd5b5294 100644 --- a/electron/preload/index.ts +++ b/electron/preload/index.ts @@ -115,9 +115,10 @@ const electronAPI = { 'log:getFilePath', 'log:getDir', 'log:listFiles', - // File staging + // File staging & media 'file:stage', 'file:stageBuffer', + 'media:getThumbnails', // Chat send with media (reads staged files in main process) 'chat:sendWithMedia', // OpenClaw extras diff --git a/src/pages/Chat/ChatMessage.tsx b/src/pages/Chat/ChatMessage.tsx index 7d75ef1c9..e9acf50cd 100644 --- a/src/pages/Chat/ChatMessage.tsx +++ b/src/pages/Chat/ChatMessage.tsx @@ -105,35 +105,41 @@ export const ChatMessage = memo(function ChatMessage({ /> )} - {/* Images (from assistant/channel content blocks) */} + {/* Images from content blocks (Gateway session data — persists across history reloads) */} {images.length > 0 && (
{images.map((img, i) => ( attachment ))}
)} - {/* File attachments (user-uploaded files) */} + {/* File attachments (local preview — shown before history reload) */} + {/* Only show _attachedFiles images if no content-block images (avoid duplicates) */} {attachedFiles.length > 0 && (
- {attachedFiles.map((file, i) => ( - file.mimeType.startsWith('image/') && file.preview ? ( + {attachedFiles.map((file, i) => { + // Skip image attachments if we already have images from content blocks + if (file.mimeType.startsWith('image/') && file.preview && images.length > 0) return null; + return file.mimeType.startsWith('image/') && file.preview ? ( {file.fileName} ) : ( - - ) - ))} + + ); + })}
)} diff --git a/src/pages/Chat/message-utils.ts b/src/pages/Chat/message-utils.ts index 8419cd8ce..529f00c4c 100644 --- a/src/pages/Chat/message-utils.ts +++ b/src/pages/Chat/message-utils.ts @@ -5,20 +5,38 @@ */ import type { RawMessage, ContentBlock } from '@/stores/chat'; +/** + * Clean Gateway metadata from user message text for display. + * Strips: [media attached: ... | ...], [message_id: ...], + * and the timestamp prefix [Day Date Time Timezone]. + */ +function cleanUserText(text: string): string { + return text + // Remove [media attached: path (mime) | path] references + .replace(/\s*\[media attached:[^\]]*\]/g, '') + // Remove [message_id: uuid] + .replace(/\s*\[message_id:\s*[^\]]+\]/g, '') + // Remove Gateway timestamp prefix like [Fri 2026-02-13 22:39 GMT+8] + .replace(/^\[(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}\s+[^\]]+\]\s*/i, '') + .trim(); +} + /** * Extract displayable text from a message's content field. * Handles both string content and array-of-blocks content. + * For user messages, strips Gateway-injected metadata. */ export function extractText(message: RawMessage | unknown): string { if (!message || typeof message !== 'object') return ''; const msg = message as Record; const content = msg.content; + const isUser = msg.role === 'user'; + + let result = ''; if (typeof content === 'string') { - return content.trim().length > 0 ? content : ''; - } - - if (Array.isArray(content)) { + result = content.trim().length > 0 ? content : ''; + } else if (Array.isArray(content)) { const parts: string[] = []; for (const block of content as ContentBlock[]) { if (block.type === 'text' && block.text) { @@ -28,15 +46,18 @@ export function extractText(message: RawMessage | unknown): string { } } const combined = parts.join('\n\n'); - return combined.trim().length > 0 ? combined : ''; + result = combined.trim().length > 0 ? combined : ''; + } else if (typeof msg.text === 'string') { + // Fallback: try .text field + result = msg.text.trim().length > 0 ? msg.text : ''; } - // Fallback: try .text field - if (typeof msg.text === 'string') { - return msg.text.trim().length > 0 ? msg.text : ''; + // Strip Gateway metadata from user messages for clean display + if (isUser && result) { + result = cleanUserText(result); } - return ''; + return result; } /** @@ -64,6 +85,35 @@ export function extractThinking(message: RawMessage | unknown): string | null { return combined.length > 0 ? combined : null; } +/** + * Extract media file references from Gateway-formatted user message text. + * Returns array of { filePath, mimeType } from [media attached: path (mime) | path] patterns. + */ +export function extractMediaRefs(message: RawMessage | unknown): Array<{ filePath: string; mimeType: string }> { + if (!message || typeof message !== 'object') return []; + const msg = message as Record; + if (msg.role !== 'user') return []; + const content = msg.content; + + let text = ''; + if (typeof content === 'string') { + text = content; + } else if (Array.isArray(content)) { + text = (content as ContentBlock[]) + .filter(b => b.type === 'text' && b.text) + .map(b => b.text!) + .join('\n'); + } + + const refs: Array<{ filePath: string; mimeType: string }> = []; + const regex = /\[media attached:\s*([^\s(]+)\s*\(([^)]+)\)\s*\|[^\]]*\]/g; + let match; + while ((match = regex.exec(text)) !== null) { + refs.push({ filePath: match[1], mimeType: match[2] }); + } + return refs; +} + /** * Extract image attachments from a message. * Returns array of { mimeType, data } for base64 images. diff --git a/src/pages/Setup/index.tsx b/src/pages/Setup/index.tsx index 159e02f1e..0085497d4 100644 --- a/src/pages/Setup/index.tsx +++ b/src/pages/Setup/index.tsx @@ -519,7 +519,7 @@ function RuntimeContent({ onStatusChange }: RuntimeContentProps) { } return prev; }); - }, 120 * 1000); // 120 seconds — enough for gateway to fully initialize + }, 600 * 1000); // 600 seconds — enough for gateway to fully initialize return () => { if (gatewayTimeoutRef.current) { diff --git a/src/stores/chat.ts b/src/stores/chat.ts index 113c5beb2..48abfc3b5 100644 --- a/src/stores/chat.ts +++ b/src/stores/chat.ts @@ -100,6 +100,135 @@ interface ChatState { const DEFAULT_CANONICAL_PREFIX = 'agent:main'; const DEFAULT_SESSION_KEY = `${DEFAULT_CANONICAL_PREFIX}:main`; +// ── Local image cache ───────────────────────────────────────── +// The Gateway doesn't store image attachments in session content blocks, +// so we cache them locally keyed by staged file path (which appears in the +// [media attached: ...] reference in the Gateway's user message text). +// Keying by path avoids the race condition of keying by runId (which is only +// available after the RPC returns, but history may load before that). +const IMAGE_CACHE_KEY = 'clawx:image-cache'; +const IMAGE_CACHE_MAX = 100; // max entries to prevent unbounded growth + +function loadImageCache(): Map { + try { + const raw = localStorage.getItem(IMAGE_CACHE_KEY); + if (raw) { + const entries = JSON.parse(raw) as Array<[string, AttachedFileMeta]>; + return new Map(entries); + } + } catch { /* ignore parse errors */ } + return new Map(); +} + +function saveImageCache(cache: Map): void { + try { + // Evict oldest entries if over limit + const entries = Array.from(cache.entries()); + const trimmed = entries.length > IMAGE_CACHE_MAX + ? entries.slice(entries.length - IMAGE_CACHE_MAX) + : entries; + localStorage.setItem(IMAGE_CACHE_KEY, JSON.stringify(trimmed)); + } catch { /* ignore quota errors */ } +} + +const _imageCache = loadImageCache(); + +/** Extract plain text from message content (string or content blocks) */ +function getMessageText(content: unknown): string { + if (typeof content === 'string') return content; + if (Array.isArray(content)) { + return (content as Array<{ type?: string; text?: string }>) + .filter(b => b.type === 'text' && b.text) + .map(b => b.text!) + .join('\n'); + } + return ''; +} + +/** Extract media file refs from [media attached: () | ...] patterns */ +function extractMediaRefs(text: string): Array<{ filePath: string; mimeType: string }> { + const refs: Array<{ filePath: string; mimeType: string }> = []; + const regex = /\[media attached:\s*([^\s(]+)\s*\(([^)]+)\)\s*\|[^\]]*\]/g; + let match; + while ((match = regex.exec(text)) !== null) { + refs.push({ filePath: match[1], mimeType: match[2] }); + } + return refs; +} + +/** + * Restore _attachedFiles for user messages loaded from history. + * Uses local cache for previews when available, but ALWAYS creates entries + * from [media attached: ...] text patterns so file cards show even without cache. + */ +function enrichWithCachedImages(messages: RawMessage[]): RawMessage[] { + return messages.map(msg => { + if (msg.role !== 'user' || msg._attachedFiles) return msg; + const text = getMessageText(msg.content); + const refs = extractMediaRefs(text); + if (refs.length === 0) return msg; + const files: AttachedFileMeta[] = refs.map(ref => { + const cached = _imageCache.get(ref.filePath); + if (cached) return cached; + // Fallback: create entry from text pattern (preview loaded later via IPC) + const fileName = ref.filePath.split(/[\\/]/).pop() || 'file'; + return { fileName, mimeType: ref.mimeType, fileSize: 0, preview: null }; + }); + return { ...msg, _attachedFiles: files }; + }); +} + +/** + * Async: load missing previews from disk via IPC for messages that have + * _attachedFiles with null previews. Updates messages in-place and triggers re-render. + */ +async function loadMissingPreviews(messages: RawMessage[]): Promise { + // Collect all image paths that need previews + const needPreview: Array<{ filePath: string; mimeType: string }> = []; + for (const msg of messages) { + if (msg.role !== 'user' || !msg._attachedFiles) continue; + const text = getMessageText(msg.content); + const refs = extractMediaRefs(text); + for (let i = 0; i < refs.length; i++) { + const file = msg._attachedFiles[i]; + if (file && file.mimeType.startsWith('image/') && !file.preview) { + needPreview.push(refs[i]); + } + } + } + if (needPreview.length === 0) return false; + + try { + const thumbnails = await window.electron.ipcRenderer.invoke( + 'media:getThumbnails', + needPreview, + ) as Record; + + let updated = false; + for (const msg of messages) { + if (msg.role !== 'user' || !msg._attachedFiles) continue; + const text = getMessageText(msg.content); + const refs = extractMediaRefs(text); + for (let i = 0; i < refs.length; i++) { + const file = msg._attachedFiles[i]; + const thumb = thumbnails[refs[i]?.filePath]; + if (file && thumb && (thumb.preview || thumb.fileSize)) { + if (thumb.preview) file.preview = thumb.preview; + if (thumb.fileSize) file.fileSize = thumb.fileSize; + // Update cache for future loads + _imageCache.set(refs[i].filePath, { ...file }); + updated = true; + } + } + } + if (updated) saveImageCache(_imageCache); + return updated; + } catch (err) { + console.warn('[loadMissingPreviews] Failed:', err); + return false; + } +} + function getCanonicalPrefixFromSessions(sessions: ChatSession[]): string | null { const canonical = sessions.find((s) => s.key.startsWith('agent:'))?.key; if (!canonical) return null; @@ -465,8 +594,18 @@ export const useChatStore = create((set, get) => ({ const data = result.result; const rawMessages = Array.isArray(data.messages) ? data.messages as RawMessage[] : []; const filteredMessages = rawMessages.filter((msg) => !isToolResultRole(msg.role)); + // Restore file attachments for user messages (from cache + text patterns) + const enrichedMessages = enrichWithCachedImages(filteredMessages); const thinkingLevel = data.thinkingLevel ? String(data.thinkingLevel) : null; - set({ messages: filteredMessages, thinkingLevel, loading: false }); + set({ messages: enrichedMessages, thinkingLevel, loading: false }); + + // Async: load missing image previews from disk (updates in background) + loadMissingPreviews(enrichedMessages).then((updated) => { + if (updated) { + // Trigger re-render with updated previews + set({ messages: [...enrichedMessages] }); + } + }); const { pendingFinal, lastUserMessageAt } = get(); if (pendingFinal) { const recentAssistant = [...filteredMessages].reverse().find((msg) => { @@ -528,6 +667,21 @@ export const useChatStore = create((set, get) => ({ console.log('[sendMessage] Media paths:', attachments!.map(a => a.stagedPath)); } + // Cache image attachments BEFORE the IPC call to avoid race condition: + // history may reload (via Gateway event) before the RPC returns. + // Keyed by staged file path which appears in [media attached: ...]. + if (hasMedia && attachments) { + for (const a of attachments) { + _imageCache.set(a.stagedPath, { + fileName: a.fileName, + mimeType: a.mimeType, + fileSize: a.fileSize, + preview: a.preview, + }); + } + saveImageCache(_imageCache); + } + let result: { success: boolean; result?: { runId?: string }; error?: string }; if (hasMedia) {