diff --git a/packages/ui/src/components/message-part.tsx b/packages/ui/src/components/message-part.tsx
index 542ee5a..94fbcaf 100644
--- a/packages/ui/src/components/message-part.tsx
+++ b/packages/ui/src/components/message-part.tsx
@@ -94,7 +94,7 @@ interface MessagePartProps {
   return (
     <Switch>
       <Match when={partType() === "text"}>
-        <Show when={!(props.part.type === "text" && props.part.synthetic) && partHasRenderableText(props.part)}>
+        <Show when={!(props.part.type === "text" && props.part.synthetic && isAssistantMessage()) && partHasRenderableText(props.part)}>
           <div class={textContainerClass()}>
             <Show
                when={isAssistantMessage()}
diff --git a/packages/ui/src/components/model-selector.tsx b/packages/ui/src/components/model-selector.tsx
index 2f940f8..1f5b96f 100644
--- a/packages/ui/src/components/model-selector.tsx
+++ b/packages/ui/src/components/model-selector.tsx
@@ -1,11 +1,12 @@
 import { Combobox } from "@kobalte/core/combobox"
-import { createEffect, createMemo, createSignal } from "solid-js"
+import { createEffect, createMemo, createSignal, onCleanup, onMount } from "solid-js"
 import { providers, fetchProviders } from "../stores/sessions"
 import { ChevronDown } from "lucide-solid"
 import type { Model } from "../types/session"
 import { getLogger } from "../lib/logger"
 const log = getLogger("session")
 
+const OPENCODE_ZEN_OFFLINE_STORAGE_KEY = "opencode-zen-offline-models"
 
 interface ModelSelectorProps {
   instanceId: string
@@ -20,9 +21,13 @@ interface FlatModel extends Model {
   searchText: string
 }
 
+import { useQwenOAuth } from "../lib/integrations/qwen-oauth"
+
 export default function ModelSelector(props: ModelSelectorProps) {
   const instanceProviders = () => providers().get(props.instanceId) || []
   const [isOpen, setIsOpen] = createSignal(false)
+  const qwenAuth = useQwenOAuth()
+  const [offlineModels, setOfflineModels] = createSignal<Set<string>>(new Set())
   let triggerRef!: HTMLButtonElement
   let searchInputRef!: HTMLInputElement
 
@@ -32,6 +37,41 @@ export default function ModelSelector(props: ModelSelectorProps) {
     }
   })
 
+  const readOfflineModels = () => {
+    if (typeof window === "undefined") return new Set<string>()
+    try {
+      const raw = window.localStorage.getItem(OPENCODE_ZEN_OFFLINE_STORAGE_KEY)
+      const parsed = raw ? JSON.parse(raw) : []
+      return new Set(Array.isArray(parsed) ? parsed.filter((id) => typeof id === "string") : [])
+    } catch {
+      return new Set<string>()
+    }
+  }
+
+  const refreshOfflineModels = () => {
+    setOfflineModels(readOfflineModels())
+  }
+
+  onMount(() => {
+    refreshOfflineModels()
+    if (typeof window === "undefined") return
+    const handleCustom = () => refreshOfflineModels()
+    const handleStorage = (event: StorageEvent) => {
+      if (event.key === OPENCODE_ZEN_OFFLINE_STORAGE_KEY) {
+        refreshOfflineModels()
+      }
+    }
+    window.addEventListener("opencode-zen-offline-models", handleCustom as EventListener)
+    window.addEventListener("storage", handleStorage)
+    onCleanup(() => {
+      window.removeEventListener("opencode-zen-offline-models", handleCustom as EventListener)
+      window.removeEventListener("storage", handleStorage)
+    })
+  })
+
+  const isOfflineModel = (model: FlatModel) =>
+    model.providerId === "opencode-zen" && offlineModels().has(model.id)
+
   const allModels = createMemo<FlatModel[]>(() =>
     instanceProviders().flatMap((p) =>
       p.models.map((m) => ({
@@ -49,6 +89,21 @@ export default function ModelSelector(props: ModelSelectorProps) {
 
   const handleChange = async (value: FlatModel | null) => {
     if (!value) return
+
+    // Auto-trigger Qwen OAuth if needed
+    if (value.providerId === 'qwen-oauth' && !qwenAuth.isAuthenticated()) {
+      const confirmed = window.confirm("Qwen Code requires authentication. Sign in now?")
+      if (confirmed) {
+        try {
+          await qwenAuth.signIn()
+        } catch (error) {
+          log.error("Qwen authentication failed", error)
+          // Continue to set model even if auth failed, to allow user to try again later
+          // or user might have authenticatd in another tab
+        }
+      }
+    }
+
     await props.onModelChange({ providerId: value.providerId, modelId: value.id })
   }
 
@@ -83,8 +138,11 @@ export default function ModelSelector(props: ModelSelectorProps) {
             class="selector-option"
           >
             <div class="selector-option-content">
-              <Combobox.ItemLabel class="selector-option-label">
-                {itemProps.item.rawValue.name}
+              <Combobox.ItemLabel class="selector-option-label flex items-center gap-2">
+                <span class="truncate">{itemProps.item.rawValue.name}</span>
+                {isOfflineModel(itemProps.item.rawValue) && (
+                  <span class="selector-badge selector-badge-warning">Offline</span>
+                )}
               </Combobox.ItemLabel>
               <Combobox.ItemDescription class="selector-option-description">
                 {itemProps.item.rawValue.providerName} • {itemProps.item.rawValue.providerId}/
@@ -106,8 +164,11 @@ export default function ModelSelector(props: ModelSelectorProps) {
             class="selector-trigger"
           >
             <div class="selector-trigger-label selector-trigger-label--stacked">
-              <span class="selector-trigger-primary selector-trigger-primary--align-left">
-                Model: {currentModelValue()?.name ?? "None"}
+              <span class="selector-trigger-primary selector-trigger-primary--align-left flex items-center gap-2">
+                <span class="truncate">Model: {currentModelValue()?.name ?? "None"}</span>
+                {currentModelValue() && isOfflineModel(currentModelValue() as FlatModel) && (
+                  <span class="selector-badge selector-badge-warning">Offline</span>
+                )}
               </span>
               {currentModelValue() && (
                 <span class="selector-trigger-secondary">
diff --git a/packages/ui/src/stores/session-actions.ts b/packages/ui/src/stores/session-actions.ts
index 4bd733c..d1dd63b 100644
--- a/packages/ui/src/stores/session-actions.ts
+++ b/packages/ui/src/stores/session-actions.ts
@@ -2,18 +2,50 @@ import { resolvePastedPlaceholders } from "../lib/prompt-placeholders"
 import { instances } from "./instances"
 import { addTaskMessage } from "./task-actions"
 
-import { addRecentModelPreference, setAgentModelPreference } from "./preferences"
-import { sessions, withSession } from "./session-state"
+import { addRecentModelPreference, setAgentModelPreference, getAgentModelPreference } from "./preferences"
+import { sessions, withSession, providers, setActiveParentSession, setActiveSession } from "./session-state"
 import { getDefaultModel, isModelValid } from "./session-models"
 import { updateSessionInfo } from "./message-v2/session-info"
 import { messageStoreBus } from "./message-v2/bus"
+import { buildRecordDisplayData } from "./message-v2/record-display-cache"
 import { getLogger } from "../lib/logger"
+import { executeCompactionWrapper, getSessionCompactionState, setSessionCompactionState, type CompactionResult } from "./session-compaction"
+import { createSession, loadMessages } from "./session-api"
+import { showToastNotification } from "../lib/notifications"
+import { showConfirmDialog } from "./alerts"
+import { QwenOAuthManager } from "../lib/integrations/qwen-oauth"
 
 const log = getLogger("actions")
 
 const ID_LENGTH = 26
 const BASE62_CHARS = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
 
+const tokenWarningState = new Map<string, { timestamp: number; suppressContextError: boolean }>()
+const compactionAttemptState = new Map<string, { timestamp: number; suppressContextError: boolean }>()
+const TOKEN_WARNING_TTL_MS = 30_000
+const COMPACTION_ATTEMPT_TTL_MS = 60_000
+const COMPACTION_SUMMARY_MAX_CHARS = 4000
+const STREAM_TIMEOUT_MS = 120_000
+const OPENCODE_ZEN_OFFLINE_STORAGE_KEY = "opencode-zen-offline-models"
+
+function markOpencodeZenModelOffline(modelId: string): void {
+  if (typeof window === "undefined" || !modelId) return
+  try {
+    const raw = window.localStorage.getItem(OPENCODE_ZEN_OFFLINE_STORAGE_KEY)
+    const parsed = raw ? JSON.parse(raw) : []
+    const list = Array.isArray(parsed) ? parsed : []
+    if (!list.includes(modelId)) {
+      list.push(modelId)
+      window.localStorage.setItem(OPENCODE_ZEN_OFFLINE_STORAGE_KEY, JSON.stringify(list))
+      window.dispatchEvent(
+        new CustomEvent("opencode-zen-offline-models", { detail: { modelId } }),
+      )
+    }
+  } catch {
+    // Ignore storage errors
+  }
+}
+
 let lastTimestamp = 0
 let localCounter = 0
 
@@ -55,6 +87,601 @@ function createId(prefix: string): string {
   return `${prefix}_${hex}${random}`
 }
 
+function getModelLimits(instanceId: string, session: any): { contextLimit: number; outputLimit: number } {
+  const instanceProviders = providers().get(instanceId) || []
+  const provider = instanceProviders.find((p) => p.id === session.model.providerId)
+  const model = provider?.models.find((m) => m.id === session.model.modelId)
+
+  return {
+    contextLimit: model?.limit?.context || 128000,
+    outputLimit: model?.limit?.output || 32000,
+  }
+}
+
+function getWarningKey(instanceId: string, sessionId: string): string {
+  return `${instanceId}:${sessionId}`
+}
+
+function setTokenWarningState(instanceId: string, sessionId: string, suppressContextError: boolean): void {
+  tokenWarningState.set(getWarningKey(instanceId, sessionId), {
+    timestamp: Date.now(),
+    suppressContextError,
+  })
+}
+
+export function consumeTokenWarningSuppression(instanceId: string, sessionId: string): boolean {
+  const key = getWarningKey(instanceId, sessionId)
+  const entry = tokenWarningState.get(key)
+  if (!entry) return false
+  tokenWarningState.delete(key)
+  if (Date.now() - entry.timestamp > TOKEN_WARNING_TTL_MS) {
+    return false
+  }
+  return entry.suppressContextError
+}
+
+export function consumeCompactionSuppression(instanceId: string, sessionId: string): boolean {
+  const key = getWarningKey(instanceId, sessionId)
+  const entry = compactionAttemptState.get(key)
+  if (!entry) return false
+  compactionAttemptState.delete(key)
+  if (Date.now() - entry.timestamp > COMPACTION_ATTEMPT_TTL_MS) {
+    return false
+  }
+  return entry.suppressContextError
+}
+
+function buildCompactionSeed(result: CompactionResult): string {
+  const lines: string[] = []
+  lines.push("Compacted session summary.")
+  lines.push("")
+  lines.push(`Summary: ${result.human_summary}`)
+
+  const details = result.detailed_summary
+  if (details?.what_was_done?.length) {
+    lines.push("")
+    lines.push("What was done:")
+    details.what_was_done.slice(0, 8).forEach((entry) => lines.push(`- ${entry}`))
+  }
+  if (details?.files?.length) {
+    lines.push("")
+    lines.push("Files:")
+    details.files.slice(0, 8).forEach((file) => lines.push(`- ${file.path}: ${file.notes}`))
+  }
+  if (details?.current_state) {
+    lines.push("")
+    lines.push(`Current state: ${details.current_state}`)
+  }
+  if (details?.next_steps?.length) {
+    lines.push("")
+    lines.push("Next steps:")
+    details.next_steps.slice(0, 6).forEach((step) => lines.push(`- ${step}`))
+  }
+  if (details?.blockers?.length) {
+    lines.push("")
+    lines.push("Blockers:")
+    details.blockers.slice(0, 6).forEach((blocker) => lines.push(`- ${blocker}`))
+  }
+
+  const output = lines.join("\n").trim()
+  if (output.length <= COMPACTION_SUMMARY_MAX_CHARS) {
+    return output
+  }
+  return `${output.slice(0, COMPACTION_SUMMARY_MAX_CHARS - 3)}...`
+}
+
+async function checkTokenBudgetBeforeSend(
+  instanceId: string,
+  sessionId: string,
+  session: any,
+): Promise<boolean> {
+  const store = messageStoreBus.getInstance(instanceId)
+  if (!store) return true
+
+  const usage = store.getSessionUsage(sessionId)
+  const { contextLimit, outputLimit } = getModelLimits(instanceId, session)
+
+  // Use actualUsageTokens which is the REAL context usage from the last message
+  // NOT the cumulative total of all tokens ever processed
+  const currentContextUsage = usage?.actualUsageTokens || 0
+
+  // Only show warning if we're actually near the limit
+  // Using 80% threshold before warning
+  const warningThreshold = contextLimit * 0.8
+
+  const existingWarning = tokenWarningState.get(getWarningKey(instanceId, sessionId))
+  if (existingWarning && Date.now() - existingWarning.timestamp < TOKEN_WARNING_TTL_MS) {
+    return true
+  }
+
+  if (getSessionCompactionState(instanceId, sessionId)) {
+    return false
+  }
+
+  if (currentContextUsage >= warningThreshold && currentContextUsage + outputLimit >= contextLimit) {
+    log.warn("Token budget approaching limit", {
+      instanceId,
+      sessionId,
+      currentContextUsage,
+      outputLimit,
+      contextLimit,
+      warningThreshold,
+    })
+
+    const confirmed = await showConfirmDialog(
+      `Context limit approaching (${currentContextUsage.toLocaleString()} / ${contextLimit.toLocaleString()} tokens). Compact now to continue?`,
+      {
+        title: "Token Budget Warning",
+        confirmLabel: "Compact",
+        cancelLabel: "Continue Anyway",
+      },
+    )
+
+    if (confirmed) {
+      setTokenWarningState(instanceId, sessionId, true)
+      await compactSession(instanceId, sessionId)
+      return false
+    }
+
+    setTokenWarningState(instanceId, sessionId, true)
+    return true
+  }
+
+  return true
+}
+
+type ExternalChatMessage = { role: "user" | "assistant" | "system"; content: string }
+
+function shouldForceEnglish(prompt: string): boolean {
+  const text = prompt.trim()
+  if (!text) return false
+
+  const lower = text.toLowerCase()
+  const explicitEnglish = /(answer|respond|reply|write|speak|output)[^a-zA-Z]{0,24}english\b/.test(lower)
+  if (explicitEnglish) return true
+
+  const explicitOther = /(answer|respond|reply|write|speak|output)[^a-zA-Z]{0,24}(spanish|español|french|german|italian|portuguese|brazilian|arabic|hindi|urdu|turkish|russian|japanese|korean|chinese|mandarin|cantonese|thai|vietnamese|indonesian|malay|polish|dutch|hebrew)\b/.test(lower)
+  if (explicitOther) return false
+
+  const nonLatin =
+    /[\u0100-\u02FF\u0370-\u1FFF\u2C00-\uD7FF\u3040-\u30FF\u3400-\u9FFF\uAC00-\uD7AF\u0590-\u08FF\u0900-\u0FFF]/.test(text)
+  if (nonLatin) return false
+
+  return true
+}
+
+function buildLanguageSystemInstruction(prompt: string): string | undefined {
+  if (!shouldForceEnglish(prompt)) return undefined
+  return "Respond in English unless the user explicitly requests another language."
+}
+
+function extractPlainTextFromParts(parts: Array<{ type?: string; text?: unknown; filename?: string }>): string {
+  const segments: string[] = []
+  for (const part of parts) {
+    if (!part || typeof part !== "object") continue
+    if (part.type === "text" && typeof part.text === "string") {
+      segments.push(part.text)
+    } else if (part.type === "file" && typeof part.filename === "string") {
+      segments.push(`[file: ${part.filename}]`)
+    }
+  }
+  return segments.join("\n").trim()
+}
+
+function buildExternalChatMessages(
+  instanceId: string,
+  sessionId: string,
+  systemMessage?: string,
+): ExternalChatMessage[] {
+  const store = messageStoreBus.getOrCreate(instanceId)
+  const messageIds = store.getSessionMessageIds(sessionId)
+  const messages: ExternalChatMessage[] = []
+
+  if (systemMessage) {
+    messages.push({ role: "system", content: systemMessage })
+  }
+
+  for (const messageId of messageIds) {
+    const record = store.getMessage(messageId)
+    if (!record) continue
+    const { orderedParts } = buildRecordDisplayData(instanceId, record)
+    const content = extractPlainTextFromParts(orderedParts as Array<{ type?: string; text?: unknown; filename?: string }>)
+    if (!content) continue
+    messages.push({
+      role: record.role === "assistant" ? "assistant" : "user",
+      content,
+    })
+  }
+
+  return messages
+}
+
+async function readSseStream(
+  response: Response,
+  onData: (data: string) => void,
+  idleTimeoutMs: number = 45_000,
+): Promise<void> {
+  if (!response.body) {
+    throw new Error("Response body is missing")
+  }
+  const reader = response.body.getReader()
+  const decoder = new TextDecoder()
+  let buffer = ""
+  let shouldStop = false
+  let timedOut = false
+  let idleTimer: ReturnType<typeof setTimeout> | undefined
+
+  const resetIdleTimer = () => {
+    if (idleTimer) clearTimeout(idleTimer)
+    idleTimer = setTimeout(() => {
+      timedOut = true
+      reader.cancel().catch(() => {})
+    }, idleTimeoutMs)
+  }
+  resetIdleTimer()
+
+  try {
+    while (!shouldStop) {
+      const { done, value } = await reader.read()
+      if (done) break
+      resetIdleTimer()
+      buffer += decoder.decode(value, { stream: true })
+      const lines = buffer.split("\n")
+      buffer = lines.pop() || ""
+
+      for (const line of lines) {
+        const trimmed = line.trim()
+        if (!trimmed.startsWith("data:")) continue
+        const data = trimmed.slice(5).trim()
+        if (!data) continue
+        if (data === "[DONE]") {
+          shouldStop = true
+          break
+        }
+        onData(data)
+      }
+    }
+    if (timedOut) {
+      throw new Error("Stream timed out")
+    }
+  } finally {
+    if (idleTimer) clearTimeout(idleTimer)
+    reader.releaseLock()
+  }
+}
+
+async function streamOllamaChat(
+  instanceId: string,
+  sessionId: string,
+  providerId: string,
+  modelId: string,
+  systemMessage: string | undefined,
+  messageId: string,
+  assistantMessageId: string,
+  assistantPartId: string,
+): Promise<void> {
+  const controller = new AbortController()
+  const timeoutId = setTimeout(() => controller.abort(), STREAM_TIMEOUT_MS)
+
+  const response = await fetch("/api/ollama/chat", {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    signal: controller.signal,
+    body: JSON.stringify({
+      model: modelId,
+      messages: buildExternalChatMessages(instanceId, sessionId, systemMessage),
+      stream: true,
+    }),
+  })
+
+  if (!response.ok) {
+    const errorText = await response.text().catch(() => "")
+    throw new Error(errorText || `Ollama chat failed (${response.status})`)
+  }
+
+  const store = messageStoreBus.getOrCreate(instanceId)
+  let fullText = ""
+
+  try {
+    await readSseStream(response, (data) => {
+      try {
+        const chunk = JSON.parse(data)
+        // Check for error response from server
+        if (chunk?.error) {
+          throw new Error(chunk.error)
+        }
+        const delta = chunk?.message?.content
+        if (typeof delta !== "string" || delta.length === 0) return
+        fullText += delta
+        store.applyPartUpdate({
+          messageId: assistantMessageId,
+          part: { id: assistantPartId, type: "text", text: fullText } as any,
+        })
+      } catch (e) {
+        if (e instanceof Error) throw e
+        // Ignore malformed chunks
+      }
+    })
+  } finally {
+    clearTimeout(timeoutId)
+  }
+
+  store.upsertMessage({
+    id: assistantMessageId,
+    sessionId,
+    role: "assistant",
+    status: "complete",
+    updatedAt: Date.now(),
+    isEphemeral: false,
+  })
+  store.setMessageInfo(assistantMessageId, {
+    id: assistantMessageId,
+    role: "assistant",
+    providerID: providerId,
+    modelID: modelId,
+    time: { created: store.getMessageInfo(assistantMessageId)?.time?.created ?? Date.now(), completed: Date.now() },
+  } as any)
+  store.upsertMessage({
+    id: messageId,
+    sessionId,
+    role: "user",
+    status: "sent",
+    updatedAt: Date.now(),
+    isEphemeral: false,
+  })
+}
+
+async function streamQwenChat(
+  instanceId: string,
+  sessionId: string,
+  providerId: string,
+  modelId: string,
+  systemMessage: string | undefined,
+  accessToken: string,
+  resourceUrl: string | undefined,
+  messageId: string,
+  assistantMessageId: string,
+  assistantPartId: string,
+): Promise<void> {
+  const controller = new AbortController()
+  const timeoutId = setTimeout(() => controller.abort(), STREAM_TIMEOUT_MS)
+
+  const response = await fetch("/api/qwen/chat", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${accessToken}`,
+    },
+    signal: controller.signal,
+    body: JSON.stringify({
+      model: modelId,
+      messages: buildExternalChatMessages(instanceId, sessionId, systemMessage),
+      stream: true,
+      resource_url: resourceUrl,
+    }),
+  })
+
+  if (!response.ok) {
+    const errorText = await response.text().catch(() => "")
+    throw new Error(errorText || `Qwen chat failed (${response.status})`)
+  }
+
+  const store = messageStoreBus.getOrCreate(instanceId)
+  let fullText = ""
+
+  try {
+    await readSseStream(response, (data) => {
+      try {
+        const chunk = JSON.parse(data)
+        const delta =
+          chunk?.choices?.[0]?.delta?.content ??
+          chunk?.choices?.[0]?.message?.content
+        if (typeof delta !== "string" || delta.length === 0) return
+        fullText += delta
+        store.applyPartUpdate({
+          messageId: assistantMessageId,
+          part: { id: assistantPartId, type: "text", text: fullText } as any,
+        })
+      } catch {
+        // Ignore malformed chunks
+      }
+    })
+  } finally {
+    clearTimeout(timeoutId)
+  }
+
+  store.upsertMessage({
+    id: assistantMessageId,
+    sessionId,
+    role: "assistant",
+    status: "complete",
+    updatedAt: Date.now(),
+    isEphemeral: false,
+  })
+  store.setMessageInfo(assistantMessageId, {
+    id: assistantMessageId,
+    role: "assistant",
+    providerID: providerId,
+    modelID: modelId,
+    time: { created: store.getMessageInfo(assistantMessageId)?.time?.created ?? Date.now(), completed: Date.now() },
+  } as any)
+  store.upsertMessage({
+    id: messageId,
+    sessionId,
+    role: "user",
+    status: "sent",
+    updatedAt: Date.now(),
+    isEphemeral: false,
+  })
+}
+
+async function streamOpenCodeZenChat(
+  instanceId: string,
+  sessionId: string,
+  providerId: string,
+  modelId: string,
+  systemMessage: string | undefined,
+  messageId: string,
+  assistantMessageId: string,
+  assistantPartId: string,
+): Promise<void> {
+  const controller = new AbortController()
+  const timeoutId = setTimeout(() => controller.abort(), STREAM_TIMEOUT_MS)
+
+  const response = await fetch("/api/opencode-zen/chat", {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    signal: controller.signal,
+    body: JSON.stringify({
+      model: modelId,
+      messages: buildExternalChatMessages(instanceId, sessionId, systemMessage),
+      stream: true,
+    }),
+  })
+
+  if (!response.ok) {
+    const errorText = await response.text().catch(() => "")
+    throw new Error(errorText || `OpenCode Zen chat failed (${response.status})`)
+  }
+
+  const store = messageStoreBus.getOrCreate(instanceId)
+  let fullText = ""
+
+  try {
+    await readSseStream(response, (data) => {
+      try {
+        const chunk = JSON.parse(data)
+        if (chunk?.error) {
+          throw new Error(typeof chunk.error === "string" ? chunk.error : "OpenCode Zen streaming error")
+        }
+        const delta =
+          chunk?.choices?.[0]?.delta?.content ??
+          chunk?.choices?.[0]?.message?.content
+        if (typeof delta !== "string" || delta.length === 0) return
+        fullText += delta
+        store.applyPartUpdate({
+          messageId: assistantMessageId,
+          part: { id: assistantPartId, type: "text", text: fullText } as any,
+        })
+      } catch (error) {
+        if (error instanceof Error) {
+          throw error
+        }
+      }
+    })
+  } finally {
+    clearTimeout(timeoutId)
+  }
+
+
+  store.upsertMessage({
+    id: assistantMessageId,
+    sessionId,
+    role: "assistant",
+    status: "complete",
+    updatedAt: Date.now(),
+    isEphemeral: false,
+  })
+  store.setMessageInfo(assistantMessageId, {
+    id: assistantMessageId,
+    role: "assistant",
+    providerID: providerId,
+    modelID: modelId,
+    time: { created: store.getMessageInfo(assistantMessageId)?.time?.created ?? Date.now(), completed: Date.now() },
+  } as any)
+  store.upsertMessage({
+    id: messageId,
+    sessionId,
+    role: "user",
+    status: "sent",
+    updatedAt: Date.now(),
+    isEphemeral: false,
+  })
+}
+
+async function streamZAIChat(
+  instanceId: string,
+  sessionId: string,
+  providerId: string,
+  modelId: string,
+  systemMessage: string | undefined,
+  messageId: string,
+  assistantMessageId: string,
+  assistantPartId: string,
+): Promise<void> {
+  const controller = new AbortController()
+  const timeoutId = setTimeout(() => controller.abort(), STREAM_TIMEOUT_MS)
+
+  const response = await fetch("/api/zai/chat", {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    signal: controller.signal,
+    body: JSON.stringify({
+      model: modelId,
+      messages: buildExternalChatMessages(instanceId, sessionId, systemMessage),
+      stream: true,
+    }),
+  })
+
+  if (!response.ok) {
+    const errorText = await response.text().catch(() => "")
+    throw new Error(errorText || `Z.AI chat failed (${response.status})`)
+  }
+
+  const store = messageStoreBus.getOrCreate(instanceId)
+  let fullText = ""
+
+  try {
+    await readSseStream(response, (data) => {
+      try {
+        const chunk = JSON.parse(data)
+        // Check for error response from server
+        if (chunk?.error) {
+          throw new Error(chunk.error)
+        }
+        const delta =
+          chunk?.choices?.[0]?.delta?.content ??
+          chunk?.choices?.[0]?.message?.content
+        if (typeof delta !== "string" || delta.length === 0) return
+        fullText += delta
+        store.applyPartUpdate({
+          messageId: assistantMessageId,
+          part: { id: assistantPartId, type: "text", text: fullText } as any,
+        })
+      } catch (e) {
+        if (e instanceof Error) throw e
+        // Ignore malformed chunks
+      }
+    })
+  } finally {
+    clearTimeout(timeoutId)
+  }
+
+  store.upsertMessage({
+    id: assistantMessageId,
+    sessionId,
+    role: "assistant",
+    status: "complete",
+    updatedAt: Date.now(),
+    isEphemeral: false,
+  })
+  store.setMessageInfo(assistantMessageId, {
+    id: assistantMessageId,
+    role: "assistant",
+    providerID: providerId,
+    modelID: modelId,
+    time: { created: store.getMessageInfo(assistantMessageId)?.time?.created ?? Date.now(), completed: Date.now() },
+  } as any)
+  store.upsertMessage({
+    id: messageId,
+    sessionId,
+    role: "user",
+    status: "sent",
+    updatedAt: Date.now(),
+    isEphemeral: false,
+  })
+}
+
 async function sendMessage(
   instanceId: string,
   sessionId: string,
@@ -73,6 +700,21 @@ async function sendMessage(
     throw new Error("Session not found")
   }
 
+  let effectiveModel = session.model
+  if (!isModelValid(instanceId, effectiveModel)) {
+    const fallback = await getDefaultModel(instanceId, session.agent || undefined)
+    if (isModelValid(instanceId, fallback)) {
+      await updateSessionModel(instanceId, sessionId, fallback)
+      effectiveModel = fallback
+    }
+  }
+
+  const sessionForLimits = { ...session, model: effectiveModel }
+  const canSend = await checkTokenBudgetBeforeSend(instanceId, sessionId, sessionForLimits)
+  if (!canSend) {
+    return ""
+  }
+
   const messageId = createId("msg")
 
   // If taskId is provided, associate this message with the task and set it as active
@@ -99,7 +741,7 @@ async function sendMessage(
       id: textPartId,
       type: "text" as const,
       text: resolvedPrompt,
-      synthetic: true,
+      synthetic: false,
       renderCache: undefined,
     },
   ]
@@ -178,17 +820,176 @@ async function sendMessage(
     /* trigger reactivity for legacy session data */
   })
 
+  const providerId = effectiveModel.providerId
+  const languageSystem = buildLanguageSystemInstruction(prompt)
+  if (providerId === "ollama-cloud" || providerId === "qwen-oauth" || providerId === "opencode-zen" || providerId === "zai") {
+    const store = messageStoreBus.getOrCreate(instanceId)
+    const now = Date.now()
+    const assistantMessageId = createId("msg")
+    const assistantPartId = createId("part")
+
+    store.upsertMessage({
+      id: assistantMessageId,
+      sessionId,
+      role: "assistant",
+      status: "streaming",
+      parts: [{ id: assistantPartId, type: "text", text: "" } as any],
+      createdAt: now,
+      updatedAt: now,
+      isEphemeral: true,
+    })
+    store.setMessageInfo(assistantMessageId, {
+      id: assistantMessageId,
+      role: "assistant",
+      providerID: effectiveModel.providerId,
+      modelID: effectiveModel.modelId,
+      time: { created: now, completed: 0 },
+    } as any)
+    store.upsertMessage({
+      id: messageId,
+      sessionId,
+      role: "user",
+      status: "sent",
+      updatedAt: now,
+      isEphemeral: false,
+    })
+
+    try {
+      if (providerId === "ollama-cloud") {
+        await streamOllamaChat(
+          instanceId,
+          sessionId,
+          providerId,
+          effectiveModel.modelId,
+          languageSystem,
+          messageId,
+          assistantMessageId,
+          assistantPartId,
+        )
+      } else if (providerId === "opencode-zen") {
+        await streamOpenCodeZenChat(
+          instanceId,
+          sessionId,
+          providerId,
+          effectiveModel.modelId,
+          languageSystem,
+          messageId,
+          assistantMessageId,
+          assistantPartId,
+        )
+      } else if (providerId === "zai") {
+        await streamZAIChat(
+          instanceId,
+          sessionId,
+          providerId,
+          effectiveModel.modelId,
+          languageSystem,
+          messageId,
+          assistantMessageId,
+          assistantPartId,
+        )
+      } else {
+        const qwenManager = new QwenOAuthManager()
+        const token = await qwenManager.getValidToken()
+        if (!token?.access_token) {
+          showToastNotification({
+            title: "Qwen OAuth unavailable",
+            message: "Please sign in to Qwen Code again to refresh your token.",
+            variant: "warning",
+            duration: 8000,
+          })
+          store.upsertMessage({
+            id: messageId,
+            sessionId,
+            role: "user",
+            status: "error",
+            updatedAt: Date.now(),
+          })
+          store.upsertMessage({
+            id: assistantMessageId,
+            sessionId,
+            role: "assistant",
+            status: "error",
+            updatedAt: Date.now(),
+            isEphemeral: false,
+          })
+          return messageId
+        }
+
+        await streamQwenChat(
+          instanceId,
+          sessionId,
+          providerId,
+          effectiveModel.modelId,
+          languageSystem,
+          token.access_token,
+          token.resource_url,
+          messageId,
+          assistantMessageId,
+          assistantPartId,
+        )
+      }
+      return messageId
+    } catch (error: any) {
+      if (providerId === "opencode-zen") {
+        const message = String(error?.message || "")
+        const match = message.match(/Model\s+([A-Za-z0-9._-]+)\s+not supported/i)
+        if (match?.[1]) {
+          markOpencodeZenModelOffline(match[1])
+        }
+      }
+      store.upsertMessage({
+        id: messageId,
+        sessionId,
+        role: "user",
+        status: "error",
+        updatedAt: Date.now(),
+      })
+      store.upsertMessage({
+        id: assistantMessageId,
+        sessionId,
+        role: "assistant",
+        status: "error",
+        updatedAt: Date.now(),
+        isEphemeral: false,
+      })
+      store.setMessageInfo(assistantMessageId, {
+        id: assistantMessageId,
+        role: "assistant",
+        providerID: effectiveModel.providerId,
+        modelID: effectiveModel.modelId,
+        time: { created: now, completed: Date.now() },
+        error: { name: "UnknownError", message: error?.message || "Request failed" },
+      } as any)
+      showToastNotification({
+        title:
+          providerId === "ollama-cloud"
+            ? "Ollama request failed"
+            : providerId === "zai"
+              ? "Z.AI request failed"
+              : providerId === "opencode-zen"
+                ? "OpenCode Zen request failed"
+                : "Qwen request failed",
+        message: error?.message || "Request failed",
+        variant: "error",
+        duration: 8000,
+      })
+      throw error
+    }
+  }
+
   const requestBody = {
     messageID: messageId,
     parts: requestParts,
     ...(session.agent && { agent: session.agent }),
-    ...(session.model.providerId &&
-      session.model.modelId && {
-        model: {
-          providerID: session.model.providerId,
-          modelID: session.model.modelId,
-        },
-      }),
+    ...(effectiveModel.providerId &&
+      effectiveModel.modelId && {
+      model: {
+        providerID: effectiveModel.providerId,
+        modelID: effectiveModel.modelId,
+      },
+    }),
+    ...(languageSystem && { system: languageSystem }),
   }
 
   log.info("sendMessage", {
@@ -207,11 +1008,20 @@ async function sendMessage(
   }
 
   try {
-    log.info("session.promptAsync starting", { instanceId, sessionId })
-    const response = await instance.client.session.promptAsync({
+    log.info("session.promptAsync starting", { instanceId, sessionId, providerId: session.model.providerId })
+
+    // Add timeout to prevent infinite hanging
+    const timeoutMs = 60000 // 60 seconds
+    const timeoutPromise = new Promise<never>((_, reject) => {
+      setTimeout(() => reject(new Error(`Request timed out after ${timeoutMs / 1000}s. The model provider may not be available.`)), timeoutMs)
+    })
+
+    const promptPromise = instance.client.session.promptAsync({
       path: { id: sessionId },
       body: requestBody,
     })
+
+    const response = await Promise.race([promptPromise, timeoutPromise])
     log.info("session.promptAsync success", { instanceId, sessionId, response })
     return messageId
   } catch (error: any) {
@@ -222,18 +1032,27 @@ async function sendMessage(
       stack: error?.stack,
       requestBody
     })
-    
+
     // Update message status to error in store
     const store = messageStoreBus.getOrCreate(instanceId)
     store.upsertMessage({
       id: messageId,
       sessionId,
+      role: "user",
       status: "error",
       updatedAt: Date.now(),
     })
-    
+
+    // Show user-friendly error notification
+    showToastNotification({
+      title: "Message failed",
+      message: error?.message || "Failed to send message. Check your model configuration.",
+      variant: "error",
+      duration: 8000,
+    })
+
     throw error
-  
+
   }
 }
 
@@ -321,6 +1140,155 @@ async function abortSession(instanceId: string, sessionId: string): Promise<void
   }
 }
 
+async function compactSession(instanceId: string, sessionId: string): Promise<CompactionResult> {
+  const instance = instances().get(instanceId)
+  if (!instance || !instance.client) {
+    return {
+      success: false,
+      mode: "compact",
+      human_summary: "Instance not ready",
+      token_before: 0,
+      token_after: 0,
+      token_reduction_pct: 0,
+    }
+  }
+
+  const instanceSessions = sessions().get(instanceId)
+  const session = instanceSessions?.get(sessionId)
+  if (!session) {
+    return {
+      success: false,
+      mode: "compact",
+      human_summary: "Session not found",
+      token_before: 0,
+      token_after: 0,
+      token_reduction_pct: 0,
+    }
+  }
+
+  const store = messageStoreBus.getInstance(instanceId)
+  const messageCount = store?.getSessionMessageIds(sessionId)?.length ?? 0
+  if (messageCount <= 2) {
+    log.info("compactSession: Session too small to compact", { instanceId, sessionId, count: messageCount })
+    return {
+      success: true,
+      mode: "compact",
+      human_summary: "Session too small to compact",
+      token_before: 0,
+      token_after: 0,
+      token_reduction_pct: 0,
+    }
+  }
+
+  const usageBefore = store?.getSessionUsage(sessionId)
+  const tokenBefore = usageBefore
+    ? (usageBefore.totalInputTokens || 0) + (usageBefore.totalOutputTokens || 0) + (usageBefore.totalReasoningTokens || 0)
+    : 0
+
+  log.info("compactSession: Running local compaction", { instanceId, sessionId, messageCount })
+  setSessionCompactionState(instanceId, sessionId, true)
+  compactionAttemptState.set(getWarningKey(instanceId, sessionId), {
+    timestamp: Date.now(),
+    suppressContextError: true,
+  })
+
+  try {
+    const result = await executeCompactionWrapper(instanceId, sessionId, "compact")
+    if (!result.success) {
+      return result
+    }
+
+    const compactedSession = await createSession(instanceId, session.agent || undefined)
+
+    if (session.model.providerId && session.model.modelId) {
+      await updateSessionModel(instanceId, compactedSession.id, {
+        providerId: session.model.providerId,
+        modelId: session.model.modelId,
+      })
+    }
+
+    if (session.tasks && session.tasks.length > 0) {
+      const tasksCopy = session.tasks.map((task) => ({ ...task }))
+      withSession(instanceId, compactedSession.id, (nextSession) => {
+        nextSession.tasks = tasksCopy
+        nextSession.activeTaskId = undefined
+      })
+    }
+
+    if (session.parentId) {
+      withSession(instanceId, compactedSession.id, (nextSession) => {
+        nextSession.parentId = session.parentId
+      })
+      withSession(instanceId, session.parentId, (parentSession) => {
+        if (!parentSession.tasks) return
+        parentSession.tasks = parentSession.tasks.map((task) => (
+          task.taskSessionId === sessionId
+            ? { ...task, taskSessionId: compactedSession.id }
+            : task
+        ))
+      })
+    }
+
+    const summaryText = buildCompactionSeed(result)
+    const summaryMessageId = createId("msg")
+    const summaryPartId = createId("part")
+
+    await instance.client.session.promptAsync({
+      path: { id: compactedSession.id },
+      body: {
+        messageID: summaryMessageId,
+        agent: session.agent || undefined,
+        model: session.model.providerId && session.model.modelId
+          ? { providerID: session.model.providerId, modelID: session.model.modelId }
+          : undefined,
+        noReply: true,
+        system: "You are continuing from a compacted session. Use the summary below as context.",
+        parts: [
+          {
+            id: summaryPartId,
+            type: "text",
+            text: summaryText,
+          },
+        ],
+      },
+    })
+
+    if (session.parentId) {
+      setActiveSession(instanceId, compactedSession.id)
+    } else {
+      setActiveParentSession(instanceId, compactedSession.id)
+    }
+    await loadMessages(instanceId, compactedSession.id, true)
+    updateSessionInfo(instanceId, compactedSession.id)
+
+    showToastNotification({
+      title: "Session compacted",
+      message: "Created a new compacted session with a summary to continue work.",
+      variant: "success",
+      duration: 8000,
+    })
+
+    log.info("compactSession: Complete", { instanceId, sessionId, compactedSessionId: compactedSession.id })
+    return {
+      ...result,
+      token_before: tokenBefore,
+    }
+  } catch (error) {
+    log.error("compactSession: Failed to compact session", { instanceId, sessionId, error })
+    return {
+      success: false,
+      mode: "compact",
+      human_summary: "Compaction failed",
+      token_before: tokenBefore,
+      token_after: tokenBefore,
+      token_reduction_pct: 0,
+    }
+  } finally {
+    compactionAttemptState.delete(getWarningKey(instanceId, sessionId))
+    setSessionCompactionState(instanceId, sessionId, false)
+  }
+}
+
 async function updateSessionAgent(instanceId: string, sessionId: string, agent: string): Promise<void> {
   const instanceSessions = sessions().get(instanceId)
   const session = instanceSessions?.get(sessionId)
@@ -328,7 +1296,9 @@ async function updateSessionAgent(instanceId: string, sessionId: string, agent:
     throw new Error("Session not found")
   }
 
-  const nextModel = await getDefaultModel(instanceId, agent)
+  const agentModelPreference = await getAgentModelPreference(instanceId, agent)
+  const defaultModel = await getDefaultModel(instanceId, agent)
+  const nextModel = agentModelPreference || defaultModel
   const shouldApplyModel = isModelValid(instanceId, nextModel)
 
   withSession(instanceId, sessionId, (current) => {
@@ -338,7 +1308,7 @@ async function updateSessionAgent(instanceId: string, sessionId: string, agent:
     }
   })
 
-  if (agent && shouldApplyModel) {
+  if (agent && shouldApplyModel && !agentModelPreference) {
     await setAgentModelPreference(instanceId, agent, nextModel)
   }
 
@@ -367,6 +1337,27 @@ async function updateSessionModel(
     current.model = model
   })
 
+  const propagateModel = (targetSessionId?: string | null) => {
+    if (!targetSessionId || targetSessionId === sessionId) return
+    withSession(instanceId, targetSessionId, (current) => {
+      current.model = model
+    })
+    updateSessionInfo(instanceId, targetSessionId)
+  }
+
+  if (session.parentId) {
+    propagateModel(session.parentId)
+  }
+
+  if (session.tasks && session.tasks.length > 0) {
+    const seen = new Set<string>()
+    for (const task of session.tasks) {
+      if (!task.taskSessionId || seen.has(task.taskSessionId)) continue
+      seen.add(task.taskSessionId)
+      propagateModel(task.taskSessionId)
+    }
+  }
+
   if (session.agent) {
     await setAgentModelPreference(instanceId, session.agent, model)
   }
@@ -459,6 +1450,7 @@ async function forkSession(instanceId: string, sessionId: string): Promise<strin
 
 export {
   abortSession,
+  compactSession,
   executeCustomCommand,
   forkSession,
   renameSession,
diff --git a/packages/ui/src/styles/components/selector.css b/packages/ui/src/styles/components/selector.css
index 742b1ae..de09779 100644
--- a/packages/ui/src/styles/components/selector.css
+++ b/packages/ui/src/styles/components/selector.css
@@ -152,6 +152,11 @@
   color: var(--text-inverted);
 }
 
+.selector-badge-warning {
+  background-color: var(--status-warning);
+  color: var(--text-inverted);
+}
+
 .selector-badge-version {
   @apply text-xs;
   color: var(--text-muted);