diff --git a/packages/ui/src/components/message-part.tsx b/packages/ui/src/components/message-part.tsx index 542ee5a..94fbcaf 100644 --- a/packages/ui/src/components/message-part.tsx +++ b/packages/ui/src/components/message-part.tsx @@ -94,7 +94,7 @@ interface MessagePartProps { return ( - +
providers().get(props.instanceId) || [] const [isOpen, setIsOpen] = createSignal(false) + const qwenAuth = useQwenOAuth() + const [offlineModels, setOfflineModels] = createSignal>(new Set()) let triggerRef!: HTMLButtonElement let searchInputRef!: HTMLInputElement @@ -32,6 +37,41 @@ export default function ModelSelector(props: ModelSelectorProps) { } }) + const readOfflineModels = () => { + if (typeof window === "undefined") return new Set() + try { + const raw = window.localStorage.getItem(OPENCODE_ZEN_OFFLINE_STORAGE_KEY) + const parsed = raw ? JSON.parse(raw) : [] + return new Set(Array.isArray(parsed) ? parsed.filter((id) => typeof id === "string") : []) + } catch { + return new Set() + } + } + + const refreshOfflineModels = () => { + setOfflineModels(readOfflineModels()) + } + + onMount(() => { + refreshOfflineModels() + if (typeof window === "undefined") return + const handleCustom = () => refreshOfflineModels() + const handleStorage = (event: StorageEvent) => { + if (event.key === OPENCODE_ZEN_OFFLINE_STORAGE_KEY) { + refreshOfflineModels() + } + } + window.addEventListener("opencode-zen-offline-models", handleCustom as EventListener) + window.addEventListener("storage", handleStorage) + onCleanup(() => { + window.removeEventListener("opencode-zen-offline-models", handleCustom as EventListener) + window.removeEventListener("storage", handleStorage) + }) + }) + + const isOfflineModel = (model: FlatModel) => + model.providerId === "opencode-zen" && offlineModels().has(model.id) + const allModels = createMemo(() => instanceProviders().flatMap((p) => p.models.map((m) => ({ @@ -49,6 +89,21 @@ export default function ModelSelector(props: ModelSelectorProps) { const handleChange = async (value: FlatModel | null) => { if (!value) return + + // Auto-trigger Qwen OAuth if needed + if (value.providerId === 'qwen-oauth' && !qwenAuth.isAuthenticated()) { + const confirmed = window.confirm("Qwen Code requires authentication. Sign in now?") + if (confirmed) { + try { + await qwenAuth.signIn() + } catch (error) { + log.error("Qwen authentication failed", error) + // Continue to set model even if auth failed, to allow user to try again later + // or user might have authenticatd in another tab + } + } + } + await props.onModelChange({ providerId: value.providerId, modelId: value.id }) } @@ -83,8 +138,11 @@ export default function ModelSelector(props: ModelSelectorProps) { class="selector-option" >
- - {itemProps.item.rawValue.name} + + {itemProps.item.rawValue.name} + {isOfflineModel(itemProps.item.rawValue) && ( + Offline + )} {itemProps.item.rawValue.providerName} • {itemProps.item.rawValue.providerId}/ @@ -106,8 +164,11 @@ export default function ModelSelector(props: ModelSelectorProps) { class="selector-trigger" >
- - Model: {currentModelValue()?.name ?? "None"} + + Model: {currentModelValue()?.name ?? "None"} + {currentModelValue() && isOfflineModel(currentModelValue() as FlatModel) && ( + Offline + )} {currentModelValue() && ( diff --git a/packages/ui/src/stores/session-actions.ts b/packages/ui/src/stores/session-actions.ts index 4bd733c..d1dd63b 100644 --- a/packages/ui/src/stores/session-actions.ts +++ b/packages/ui/src/stores/session-actions.ts @@ -2,18 +2,50 @@ import { resolvePastedPlaceholders } from "../lib/prompt-placeholders" import { instances } from "./instances" import { addTaskMessage } from "./task-actions" -import { addRecentModelPreference, setAgentModelPreference } from "./preferences" -import { sessions, withSession } from "./session-state" +import { addRecentModelPreference, setAgentModelPreference, getAgentModelPreference } from "./preferences" +import { sessions, withSession, providers, setActiveParentSession, setActiveSession } from "./session-state" import { getDefaultModel, isModelValid } from "./session-models" import { updateSessionInfo } from "./message-v2/session-info" import { messageStoreBus } from "./message-v2/bus" +import { buildRecordDisplayData } from "./message-v2/record-display-cache" import { getLogger } from "../lib/logger" +import { executeCompactionWrapper, getSessionCompactionState, setSessionCompactionState, type CompactionResult } from "./session-compaction" +import { createSession, loadMessages } from "./session-api" +import { showToastNotification } from "../lib/notifications" +import { showConfirmDialog } from "./alerts" +import { QwenOAuthManager } from "../lib/integrations/qwen-oauth" const log = getLogger("actions") const ID_LENGTH = 26 const BASE62_CHARS = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" +const tokenWarningState = new Map() +const compactionAttemptState = new Map() +const TOKEN_WARNING_TTL_MS = 30_000 +const COMPACTION_ATTEMPT_TTL_MS = 60_000 +const COMPACTION_SUMMARY_MAX_CHARS = 4000 +const STREAM_TIMEOUT_MS = 120_000 +const OPENCODE_ZEN_OFFLINE_STORAGE_KEY = "opencode-zen-offline-models" + +function markOpencodeZenModelOffline(modelId: string): void { + if (typeof window === "undefined" || !modelId) return + try { + const raw = window.localStorage.getItem(OPENCODE_ZEN_OFFLINE_STORAGE_KEY) + const parsed = raw ? JSON.parse(raw) : [] + const list = Array.isArray(parsed) ? parsed : [] + if (!list.includes(modelId)) { + list.push(modelId) + window.localStorage.setItem(OPENCODE_ZEN_OFFLINE_STORAGE_KEY, JSON.stringify(list)) + window.dispatchEvent( + new CustomEvent("opencode-zen-offline-models", { detail: { modelId } }), + ) + } + } catch { + // Ignore storage errors + } +} + let lastTimestamp = 0 let localCounter = 0 @@ -55,6 +87,601 @@ function createId(prefix: string): string { return `${prefix}_${hex}${random}` } +function getModelLimits(instanceId: string, session: any): { contextLimit: number; outputLimit: number } { + const instanceProviders = providers().get(instanceId) || [] + const provider = instanceProviders.find((p) => p.id === session.model.providerId) + const model = provider?.models.find((m) => m.id === session.model.modelId) + + return { + contextLimit: model?.limit?.context || 128000, + outputLimit: model?.limit?.output || 32000, + } +} + +function getWarningKey(instanceId: string, sessionId: string): string { + return `${instanceId}:${sessionId}` +} + +function setTokenWarningState(instanceId: string, sessionId: string, suppressContextError: boolean): void { + tokenWarningState.set(getWarningKey(instanceId, sessionId), { + timestamp: Date.now(), + suppressContextError, + }) +} + +export function consumeTokenWarningSuppression(instanceId: string, sessionId: string): boolean { + const key = getWarningKey(instanceId, sessionId) + const entry = tokenWarningState.get(key) + if (!entry) return false + tokenWarningState.delete(key) + if (Date.now() - entry.timestamp > TOKEN_WARNING_TTL_MS) { + return false + } + return entry.suppressContextError +} + +export function consumeCompactionSuppression(instanceId: string, sessionId: string): boolean { + const key = getWarningKey(instanceId, sessionId) + const entry = compactionAttemptState.get(key) + if (!entry) return false + compactionAttemptState.delete(key) + if (Date.now() - entry.timestamp > COMPACTION_ATTEMPT_TTL_MS) { + return false + } + return entry.suppressContextError +} + +function buildCompactionSeed(result: CompactionResult): string { + const lines: string[] = [] + lines.push("Compacted session summary.") + lines.push("") + lines.push(`Summary: ${result.human_summary}`) + + const details = result.detailed_summary + if (details?.what_was_done?.length) { + lines.push("") + lines.push("What was done:") + details.what_was_done.slice(0, 8).forEach((entry) => lines.push(`- ${entry}`)) + } + if (details?.files?.length) { + lines.push("") + lines.push("Files:") + details.files.slice(0, 8).forEach((file) => lines.push(`- ${file.path}: ${file.notes}`)) + } + if (details?.current_state) { + lines.push("") + lines.push(`Current state: ${details.current_state}`) + } + if (details?.next_steps?.length) { + lines.push("") + lines.push("Next steps:") + details.next_steps.slice(0, 6).forEach((step) => lines.push(`- ${step}`)) + } + if (details?.blockers?.length) { + lines.push("") + lines.push("Blockers:") + details.blockers.slice(0, 6).forEach((blocker) => lines.push(`- ${blocker}`)) + } + + const output = lines.join("\n").trim() + if (output.length <= COMPACTION_SUMMARY_MAX_CHARS) { + return output + } + return `${output.slice(0, COMPACTION_SUMMARY_MAX_CHARS - 3)}...` +} + +async function checkTokenBudgetBeforeSend( + instanceId: string, + sessionId: string, + session: any, +): Promise { + const store = messageStoreBus.getInstance(instanceId) + if (!store) return true + + const usage = store.getSessionUsage(sessionId) + const { contextLimit, outputLimit } = getModelLimits(instanceId, session) + + // Use actualUsageTokens which is the REAL context usage from the last message + // NOT the cumulative total of all tokens ever processed + const currentContextUsage = usage?.actualUsageTokens || 0 + + // Only show warning if we're actually near the limit + // Using 80% threshold before warning + const warningThreshold = contextLimit * 0.8 + + const existingWarning = tokenWarningState.get(getWarningKey(instanceId, sessionId)) + if (existingWarning && Date.now() - existingWarning.timestamp < TOKEN_WARNING_TTL_MS) { + return true + } + + if (getSessionCompactionState(instanceId, sessionId)) { + return false + } + + if (currentContextUsage >= warningThreshold && currentContextUsage + outputLimit >= contextLimit) { + log.warn("Token budget approaching limit", { + instanceId, + sessionId, + currentContextUsage, + outputLimit, + contextLimit, + warningThreshold, + }) + + const confirmed = await showConfirmDialog( + `Context limit approaching (${currentContextUsage.toLocaleString()} / ${contextLimit.toLocaleString()} tokens). Compact now to continue?`, + { + title: "Token Budget Warning", + confirmLabel: "Compact", + cancelLabel: "Continue Anyway", + }, + ) + + if (confirmed) { + setTokenWarningState(instanceId, sessionId, true) + await compactSession(instanceId, sessionId) + return false + } + + setTokenWarningState(instanceId, sessionId, true) + return true + } + + return true +} + +type ExternalChatMessage = { role: "user" | "assistant" | "system"; content: string } + +function shouldForceEnglish(prompt: string): boolean { + const text = prompt.trim() + if (!text) return false + + const lower = text.toLowerCase() + const explicitEnglish = /(answer|respond|reply|write|speak|output)[^a-zA-Z]{0,24}english\b/.test(lower) + if (explicitEnglish) return true + + const explicitOther = /(answer|respond|reply|write|speak|output)[^a-zA-Z]{0,24}(spanish|español|french|german|italian|portuguese|brazilian|arabic|hindi|urdu|turkish|russian|japanese|korean|chinese|mandarin|cantonese|thai|vietnamese|indonesian|malay|polish|dutch|hebrew)\b/.test(lower) + if (explicitOther) return false + + const nonLatin = + /[\u0100-\u02FF\u0370-\u1FFF\u2C00-\uD7FF\u3040-\u30FF\u3400-\u9FFF\uAC00-\uD7AF\u0590-\u08FF\u0900-\u0FFF]/.test(text) + if (nonLatin) return false + + return true +} + +function buildLanguageSystemInstruction(prompt: string): string | undefined { + if (!shouldForceEnglish(prompt)) return undefined + return "Respond in English unless the user explicitly requests another language." +} + +function extractPlainTextFromParts(parts: Array<{ type?: string; text?: unknown; filename?: string }>): string { + const segments: string[] = [] + for (const part of parts) { + if (!part || typeof part !== "object") continue + if (part.type === "text" && typeof part.text === "string") { + segments.push(part.text) + } else if (part.type === "file" && typeof part.filename === "string") { + segments.push(`[file: ${part.filename}]`) + } + } + return segments.join("\n").trim() +} + +function buildExternalChatMessages( + instanceId: string, + sessionId: string, + systemMessage?: string, +): ExternalChatMessage[] { + const store = messageStoreBus.getOrCreate(instanceId) + const messageIds = store.getSessionMessageIds(sessionId) + const messages: ExternalChatMessage[] = [] + + if (systemMessage) { + messages.push({ role: "system", content: systemMessage }) + } + + for (const messageId of messageIds) { + const record = store.getMessage(messageId) + if (!record) continue + const { orderedParts } = buildRecordDisplayData(instanceId, record) + const content = extractPlainTextFromParts(orderedParts as Array<{ type?: string; text?: unknown; filename?: string }>) + if (!content) continue + messages.push({ + role: record.role === "assistant" ? "assistant" : "user", + content, + }) + } + + return messages +} + +async function readSseStream( + response: Response, + onData: (data: string) => void, + idleTimeoutMs: number = 45_000, +): Promise { + if (!response.body) { + throw new Error("Response body is missing") + } + const reader = response.body.getReader() + const decoder = new TextDecoder() + let buffer = "" + let shouldStop = false + let timedOut = false + let idleTimer: ReturnType | undefined + + const resetIdleTimer = () => { + if (idleTimer) clearTimeout(idleTimer) + idleTimer = setTimeout(() => { + timedOut = true + reader.cancel().catch(() => {}) + }, idleTimeoutMs) + } + resetIdleTimer() + + try { + while (!shouldStop) { + const { done, value } = await reader.read() + if (done) break + resetIdleTimer() + buffer += decoder.decode(value, { stream: true }) + const lines = buffer.split("\n") + buffer = lines.pop() || "" + + for (const line of lines) { + const trimmed = line.trim() + if (!trimmed.startsWith("data:")) continue + const data = trimmed.slice(5).trim() + if (!data) continue + if (data === "[DONE]") { + shouldStop = true + break + } + onData(data) + } + } + if (timedOut) { + throw new Error("Stream timed out") + } + } finally { + if (idleTimer) clearTimeout(idleTimer) + reader.releaseLock() + } +} + +async function streamOllamaChat( + instanceId: string, + sessionId: string, + providerId: string, + modelId: string, + systemMessage: string | undefined, + messageId: string, + assistantMessageId: string, + assistantPartId: string, +): Promise { + const controller = new AbortController() + const timeoutId = setTimeout(() => controller.abort(), STREAM_TIMEOUT_MS) + + const response = await fetch("/api/ollama/chat", { + method: "POST", + headers: { "Content-Type": "application/json" }, + signal: controller.signal, + body: JSON.stringify({ + model: modelId, + messages: buildExternalChatMessages(instanceId, sessionId, systemMessage), + stream: true, + }), + }) + + if (!response.ok) { + const errorText = await response.text().catch(() => "") + throw new Error(errorText || `Ollama chat failed (${response.status})`) + } + + const store = messageStoreBus.getOrCreate(instanceId) + let fullText = "" + + try { + await readSseStream(response, (data) => { + try { + const chunk = JSON.parse(data) + // Check for error response from server + if (chunk?.error) { + throw new Error(chunk.error) + } + const delta = chunk?.message?.content + if (typeof delta !== "string" || delta.length === 0) return + fullText += delta + store.applyPartUpdate({ + messageId: assistantMessageId, + part: { id: assistantPartId, type: "text", text: fullText } as any, + }) + } catch (e) { + if (e instanceof Error) throw e + // Ignore malformed chunks + } + }) + } finally { + clearTimeout(timeoutId) + } + + store.upsertMessage({ + id: assistantMessageId, + sessionId, + role: "assistant", + status: "complete", + updatedAt: Date.now(), + isEphemeral: false, + }) + store.setMessageInfo(assistantMessageId, { + id: assistantMessageId, + role: "assistant", + providerID: providerId, + modelID: modelId, + time: { created: store.getMessageInfo(assistantMessageId)?.time?.created ?? Date.now(), completed: Date.now() }, + } as any) + store.upsertMessage({ + id: messageId, + sessionId, + role: "user", + status: "sent", + updatedAt: Date.now(), + isEphemeral: false, + }) +} + +async function streamQwenChat( + instanceId: string, + sessionId: string, + providerId: string, + modelId: string, + systemMessage: string | undefined, + accessToken: string, + resourceUrl: string | undefined, + messageId: string, + assistantMessageId: string, + assistantPartId: string, +): Promise { + const controller = new AbortController() + const timeoutId = setTimeout(() => controller.abort(), STREAM_TIMEOUT_MS) + + const response = await fetch("/api/qwen/chat", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${accessToken}`, + }, + signal: controller.signal, + body: JSON.stringify({ + model: modelId, + messages: buildExternalChatMessages(instanceId, sessionId, systemMessage), + stream: true, + resource_url: resourceUrl, + }), + }) + + if (!response.ok) { + const errorText = await response.text().catch(() => "") + throw new Error(errorText || `Qwen chat failed (${response.status})`) + } + + const store = messageStoreBus.getOrCreate(instanceId) + let fullText = "" + + try { + await readSseStream(response, (data) => { + try { + const chunk = JSON.parse(data) + const delta = + chunk?.choices?.[0]?.delta?.content ?? + chunk?.choices?.[0]?.message?.content + if (typeof delta !== "string" || delta.length === 0) return + fullText += delta + store.applyPartUpdate({ + messageId: assistantMessageId, + part: { id: assistantPartId, type: "text", text: fullText } as any, + }) + } catch { + // Ignore malformed chunks + } + }) + } finally { + clearTimeout(timeoutId) + } + + store.upsertMessage({ + id: assistantMessageId, + sessionId, + role: "assistant", + status: "complete", + updatedAt: Date.now(), + isEphemeral: false, + }) + store.setMessageInfo(assistantMessageId, { + id: assistantMessageId, + role: "assistant", + providerID: providerId, + modelID: modelId, + time: { created: store.getMessageInfo(assistantMessageId)?.time?.created ?? Date.now(), completed: Date.now() }, + } as any) + store.upsertMessage({ + id: messageId, + sessionId, + role: "user", + status: "sent", + updatedAt: Date.now(), + isEphemeral: false, + }) +} + +async function streamOpenCodeZenChat( + instanceId: string, + sessionId: string, + providerId: string, + modelId: string, + systemMessage: string | undefined, + messageId: string, + assistantMessageId: string, + assistantPartId: string, +): Promise { + const controller = new AbortController() + const timeoutId = setTimeout(() => controller.abort(), STREAM_TIMEOUT_MS) + + const response = await fetch("/api/opencode-zen/chat", { + method: "POST", + headers: { "Content-Type": "application/json" }, + signal: controller.signal, + body: JSON.stringify({ + model: modelId, + messages: buildExternalChatMessages(instanceId, sessionId, systemMessage), + stream: true, + }), + }) + + if (!response.ok) { + const errorText = await response.text().catch(() => "") + throw new Error(errorText || `OpenCode Zen chat failed (${response.status})`) + } + + const store = messageStoreBus.getOrCreate(instanceId) + let fullText = "" + + try { + await readSseStream(response, (data) => { + try { + const chunk = JSON.parse(data) + if (chunk?.error) { + throw new Error(typeof chunk.error === "string" ? chunk.error : "OpenCode Zen streaming error") + } + const delta = + chunk?.choices?.[0]?.delta?.content ?? + chunk?.choices?.[0]?.message?.content + if (typeof delta !== "string" || delta.length === 0) return + fullText += delta + store.applyPartUpdate({ + messageId: assistantMessageId, + part: { id: assistantPartId, type: "text", text: fullText } as any, + }) + } catch (error) { + if (error instanceof Error) { + throw error + } + } + }) + } finally { + clearTimeout(timeoutId) + } + + + store.upsertMessage({ + id: assistantMessageId, + sessionId, + role: "assistant", + status: "complete", + updatedAt: Date.now(), + isEphemeral: false, + }) + store.setMessageInfo(assistantMessageId, { + id: assistantMessageId, + role: "assistant", + providerID: providerId, + modelID: modelId, + time: { created: store.getMessageInfo(assistantMessageId)?.time?.created ?? Date.now(), completed: Date.now() }, + } as any) + store.upsertMessage({ + id: messageId, + sessionId, + role: "user", + status: "sent", + updatedAt: Date.now(), + isEphemeral: false, + }) +} + +async function streamZAIChat( + instanceId: string, + sessionId: string, + providerId: string, + modelId: string, + systemMessage: string | undefined, + messageId: string, + assistantMessageId: string, + assistantPartId: string, +): Promise { + const controller = new AbortController() + const timeoutId = setTimeout(() => controller.abort(), STREAM_TIMEOUT_MS) + + const response = await fetch("/api/zai/chat", { + method: "POST", + headers: { "Content-Type": "application/json" }, + signal: controller.signal, + body: JSON.stringify({ + model: modelId, + messages: buildExternalChatMessages(instanceId, sessionId, systemMessage), + stream: true, + }), + }) + + if (!response.ok) { + const errorText = await response.text().catch(() => "") + throw new Error(errorText || `Z.AI chat failed (${response.status})`) + } + + const store = messageStoreBus.getOrCreate(instanceId) + let fullText = "" + + try { + await readSseStream(response, (data) => { + try { + const chunk = JSON.parse(data) + // Check for error response from server + if (chunk?.error) { + throw new Error(chunk.error) + } + const delta = + chunk?.choices?.[0]?.delta?.content ?? + chunk?.choices?.[0]?.message?.content + if (typeof delta !== "string" || delta.length === 0) return + fullText += delta + store.applyPartUpdate({ + messageId: assistantMessageId, + part: { id: assistantPartId, type: "text", text: fullText } as any, + }) + } catch (e) { + if (e instanceof Error) throw e + // Ignore malformed chunks + } + }) + } finally { + clearTimeout(timeoutId) + } + + store.upsertMessage({ + id: assistantMessageId, + sessionId, + role: "assistant", + status: "complete", + updatedAt: Date.now(), + isEphemeral: false, + }) + store.setMessageInfo(assistantMessageId, { + id: assistantMessageId, + role: "assistant", + providerID: providerId, + modelID: modelId, + time: { created: store.getMessageInfo(assistantMessageId)?.time?.created ?? Date.now(), completed: Date.now() }, + } as any) + store.upsertMessage({ + id: messageId, + sessionId, + role: "user", + status: "sent", + updatedAt: Date.now(), + isEphemeral: false, + }) +} + async function sendMessage( instanceId: string, sessionId: string, @@ -73,6 +700,21 @@ async function sendMessage( throw new Error("Session not found") } + let effectiveModel = session.model + if (!isModelValid(instanceId, effectiveModel)) { + const fallback = await getDefaultModel(instanceId, session.agent || undefined) + if (isModelValid(instanceId, fallback)) { + await updateSessionModel(instanceId, sessionId, fallback) + effectiveModel = fallback + } + } + + const sessionForLimits = { ...session, model: effectiveModel } + const canSend = await checkTokenBudgetBeforeSend(instanceId, sessionId, sessionForLimits) + if (!canSend) { + return "" + } + const messageId = createId("msg") // If taskId is provided, associate this message with the task and set it as active @@ -99,7 +741,7 @@ async function sendMessage( id: textPartId, type: "text" as const, text: resolvedPrompt, - synthetic: true, + synthetic: false, renderCache: undefined, }, ] @@ -178,17 +820,176 @@ async function sendMessage( /* trigger reactivity for legacy session data */ }) + const providerId = effectiveModel.providerId + const languageSystem = buildLanguageSystemInstruction(prompt) + if (providerId === "ollama-cloud" || providerId === "qwen-oauth" || providerId === "opencode-zen" || providerId === "zai") { + const store = messageStoreBus.getOrCreate(instanceId) + const now = Date.now() + const assistantMessageId = createId("msg") + const assistantPartId = createId("part") + + store.upsertMessage({ + id: assistantMessageId, + sessionId, + role: "assistant", + status: "streaming", + parts: [{ id: assistantPartId, type: "text", text: "" } as any], + createdAt: now, + updatedAt: now, + isEphemeral: true, + }) + store.setMessageInfo(assistantMessageId, { + id: assistantMessageId, + role: "assistant", + providerID: effectiveModel.providerId, + modelID: effectiveModel.modelId, + time: { created: now, completed: 0 }, + } as any) + store.upsertMessage({ + id: messageId, + sessionId, + role: "user", + status: "sent", + updatedAt: now, + isEphemeral: false, + }) + + try { + if (providerId === "ollama-cloud") { + await streamOllamaChat( + instanceId, + sessionId, + providerId, + effectiveModel.modelId, + languageSystem, + messageId, + assistantMessageId, + assistantPartId, + ) + } else if (providerId === "opencode-zen") { + await streamOpenCodeZenChat( + instanceId, + sessionId, + providerId, + effectiveModel.modelId, + languageSystem, + messageId, + assistantMessageId, + assistantPartId, + ) + } else if (providerId === "zai") { + await streamZAIChat( + instanceId, + sessionId, + providerId, + effectiveModel.modelId, + languageSystem, + messageId, + assistantMessageId, + assistantPartId, + ) + } else { + const qwenManager = new QwenOAuthManager() + const token = await qwenManager.getValidToken() + if (!token?.access_token) { + showToastNotification({ + title: "Qwen OAuth unavailable", + message: "Please sign in to Qwen Code again to refresh your token.", + variant: "warning", + duration: 8000, + }) + store.upsertMessage({ + id: messageId, + sessionId, + role: "user", + status: "error", + updatedAt: Date.now(), + }) + store.upsertMessage({ + id: assistantMessageId, + sessionId, + role: "assistant", + status: "error", + updatedAt: Date.now(), + isEphemeral: false, + }) + return messageId + } + + await streamQwenChat( + instanceId, + sessionId, + providerId, + effectiveModel.modelId, + languageSystem, + token.access_token, + token.resource_url, + messageId, + assistantMessageId, + assistantPartId, + ) + } + return messageId + } catch (error: any) { + if (providerId === "opencode-zen") { + const message = String(error?.message || "") + const match = message.match(/Model\s+([A-Za-z0-9._-]+)\s+not supported/i) + if (match?.[1]) { + markOpencodeZenModelOffline(match[1]) + } + } + store.upsertMessage({ + id: messageId, + sessionId, + role: "user", + status: "error", + updatedAt: Date.now(), + }) + store.upsertMessage({ + id: assistantMessageId, + sessionId, + role: "assistant", + status: "error", + updatedAt: Date.now(), + isEphemeral: false, + }) + store.setMessageInfo(assistantMessageId, { + id: assistantMessageId, + role: "assistant", + providerID: effectiveModel.providerId, + modelID: effectiveModel.modelId, + time: { created: now, completed: Date.now() }, + error: { name: "UnknownError", message: error?.message || "Request failed" }, + } as any) + showToastNotification({ + title: + providerId === "ollama-cloud" + ? "Ollama request failed" + : providerId === "zai" + ? "Z.AI request failed" + : providerId === "opencode-zen" + ? "OpenCode Zen request failed" + : "Qwen request failed", + message: error?.message || "Request failed", + variant: "error", + duration: 8000, + }) + throw error + } + } + const requestBody = { messageID: messageId, parts: requestParts, ...(session.agent && { agent: session.agent }), - ...(session.model.providerId && - session.model.modelId && { - model: { - providerID: session.model.providerId, - modelID: session.model.modelId, - }, - }), + ...(effectiveModel.providerId && + effectiveModel.modelId && { + model: { + providerID: effectiveModel.providerId, + modelID: effectiveModel.modelId, + }, + }), + ...(languageSystem && { system: languageSystem }), } log.info("sendMessage", { @@ -207,11 +1008,20 @@ async function sendMessage( } try { - log.info("session.promptAsync starting", { instanceId, sessionId }) - const response = await instance.client.session.promptAsync({ + log.info("session.promptAsync starting", { instanceId, sessionId, providerId: session.model.providerId }) + + // Add timeout to prevent infinite hanging + const timeoutMs = 60000 // 60 seconds + const timeoutPromise = new Promise((_, reject) => { + setTimeout(() => reject(new Error(`Request timed out after ${timeoutMs / 1000}s. The model provider may not be available.`)), timeoutMs) + }) + + const promptPromise = instance.client.session.promptAsync({ path: { id: sessionId }, body: requestBody, }) + + const response = await Promise.race([promptPromise, timeoutPromise]) log.info("session.promptAsync success", { instanceId, sessionId, response }) return messageId } catch (error: any) { @@ -222,18 +1032,27 @@ async function sendMessage( stack: error?.stack, requestBody }) - + // Update message status to error in store const store = messageStoreBus.getOrCreate(instanceId) store.upsertMessage({ id: messageId, sessionId, + role: "user", status: "error", updatedAt: Date.now(), }) - + + // Show user-friendly error notification + showToastNotification({ + title: "Message failed", + message: error?.message || "Failed to send message. Check your model configuration.", + variant: "error", + duration: 8000, + }) + throw error - + } } @@ -321,6 +1140,155 @@ async function abortSession(instanceId: string, sessionId: string): Promise { + const instance = instances().get(instanceId) + if (!instance || !instance.client) { + return { + success: false, + mode: "compact", + human_summary: "Instance not ready", + token_before: 0, + token_after: 0, + token_reduction_pct: 0, + } + } + + const instanceSessions = sessions().get(instanceId) + const session = instanceSessions?.get(sessionId) + if (!session) { + return { + success: false, + mode: "compact", + human_summary: "Session not found", + token_before: 0, + token_after: 0, + token_reduction_pct: 0, + } + } + + const store = messageStoreBus.getInstance(instanceId) + const messageCount = store?.getSessionMessageIds(sessionId)?.length ?? 0 + if (messageCount <= 2) { + log.info("compactSession: Session too small to compact", { instanceId, sessionId, count: messageCount }) + return { + success: true, + mode: "compact", + human_summary: "Session too small to compact", + token_before: 0, + token_after: 0, + token_reduction_pct: 0, + } + } + + const usageBefore = store?.getSessionUsage(sessionId) + const tokenBefore = usageBefore + ? (usageBefore.totalInputTokens || 0) + (usageBefore.totalOutputTokens || 0) + (usageBefore.totalReasoningTokens || 0) + : 0 + + log.info("compactSession: Running local compaction", { instanceId, sessionId, messageCount }) + setSessionCompactionState(instanceId, sessionId, true) + compactionAttemptState.set(getWarningKey(instanceId, sessionId), { + timestamp: Date.now(), + suppressContextError: true, + }) + + try { + const result = await executeCompactionWrapper(instanceId, sessionId, "compact") + if (!result.success) { + return result + } + + const compactedSession = await createSession(instanceId, session.agent || undefined) + + if (session.model.providerId && session.model.modelId) { + await updateSessionModel(instanceId, compactedSession.id, { + providerId: session.model.providerId, + modelId: session.model.modelId, + }) + } + + if (session.tasks && session.tasks.length > 0) { + const tasksCopy = session.tasks.map((task) => ({ ...task })) + withSession(instanceId, compactedSession.id, (nextSession) => { + nextSession.tasks = tasksCopy + nextSession.activeTaskId = undefined + }) + } + + if (session.parentId) { + withSession(instanceId, compactedSession.id, (nextSession) => { + nextSession.parentId = session.parentId + }) + withSession(instanceId, session.parentId, (parentSession) => { + if (!parentSession.tasks) return + parentSession.tasks = parentSession.tasks.map((task) => ( + task.taskSessionId === sessionId + ? { ...task, taskSessionId: compactedSession.id } + : task + )) + }) + } + + const summaryText = buildCompactionSeed(result) + const summaryMessageId = createId("msg") + const summaryPartId = createId("part") + + await instance.client.session.promptAsync({ + path: { id: compactedSession.id }, + body: { + messageID: summaryMessageId, + agent: session.agent || undefined, + model: session.model.providerId && session.model.modelId + ? { providerID: session.model.providerId, modelID: session.model.modelId } + : undefined, + noReply: true, + system: "You are continuing from a compacted session. Use the summary below as context.", + parts: [ + { + id: summaryPartId, + type: "text", + text: summaryText, + }, + ], + }, + }) + + if (session.parentId) { + setActiveSession(instanceId, compactedSession.id) + } else { + setActiveParentSession(instanceId, compactedSession.id) + } + await loadMessages(instanceId, compactedSession.id, true) + updateSessionInfo(instanceId, compactedSession.id) + + showToastNotification({ + title: "Session compacted", + message: "Created a new compacted session with a summary to continue work.", + variant: "success", + duration: 8000, + }) + + log.info("compactSession: Complete", { instanceId, sessionId, compactedSessionId: compactedSession.id }) + return { + ...result, + token_before: tokenBefore, + } + } catch (error) { + log.error("compactSession: Failed to compact session", { instanceId, sessionId, error }) + return { + success: false, + mode: "compact", + human_summary: "Compaction failed", + token_before: tokenBefore, + token_after: tokenBefore, + token_reduction_pct: 0, + } + } finally { + compactionAttemptState.delete(getWarningKey(instanceId, sessionId)) + setSessionCompactionState(instanceId, sessionId, false) + } +} + async function updateSessionAgent(instanceId: string, sessionId: string, agent: string): Promise { const instanceSessions = sessions().get(instanceId) const session = instanceSessions?.get(sessionId) @@ -328,7 +1296,9 @@ async function updateSessionAgent(instanceId: string, sessionId: string, agent: throw new Error("Session not found") } - const nextModel = await getDefaultModel(instanceId, agent) + const agentModelPreference = await getAgentModelPreference(instanceId, agent) + const defaultModel = await getDefaultModel(instanceId, agent) + const nextModel = agentModelPreference || defaultModel const shouldApplyModel = isModelValid(instanceId, nextModel) withSession(instanceId, sessionId, (current) => { @@ -338,7 +1308,7 @@ async function updateSessionAgent(instanceId: string, sessionId: string, agent: } }) - if (agent && shouldApplyModel) { + if (agent && shouldApplyModel && !agentModelPreference) { await setAgentModelPreference(instanceId, agent, nextModel) } @@ -367,6 +1337,27 @@ async function updateSessionModel( current.model = model }) + const propagateModel = (targetSessionId?: string | null) => { + if (!targetSessionId || targetSessionId === sessionId) return + withSession(instanceId, targetSessionId, (current) => { + current.model = model + }) + updateSessionInfo(instanceId, targetSessionId) + } + + if (session.parentId) { + propagateModel(session.parentId) + } + + if (session.tasks && session.tasks.length > 0) { + const seen = new Set() + for (const task of session.tasks) { + if (!task.taskSessionId || seen.has(task.taskSessionId)) continue + seen.add(task.taskSessionId) + propagateModel(task.taskSessionId) + } + } + if (session.agent) { await setAgentModelPreference(instanceId, session.agent, model) } @@ -459,6 +1450,7 @@ async function forkSession(instanceId: string, sessionId: string): Promise