feat: integrate Z.AI, Ollama Cloud, and OpenCode Zen free models

Added comprehensive AI model integrations: Z.AI Integration: - Client with Anthropic-compatible API (GLM Coding Plan) - Routes for config, testing, and streaming chat - Settings UI component with API key management OpenCode Zen Integration: - Free models client using 'public' API key - Dynamic model fetching from models.dev - Supports GPT-5 Nano, Big Pickle, Grok Code Fast 1, MiniMax M2.1 - No API key required for free tier! UI Enhancements: - Added Free Models tab (first position) in Advanced Settings - Z.AI tab with GLM Coding Plan info - OpenCode Zen settings with model cards and status All integrations work standalone without opencode.exe dependency.
2025-12-23 14:26:03 +04:00
parent cdd31feead
commit 68b6c39934
8 changed files with 1316 additions and 17 deletions
--- a/packages/server/src/integrations/opencode-zen.ts
+++ b/packages/server/src/integrations/opencode-zen.ts
@@ -0,0 +1,312 @@
+/**
+ * OpenCode Zen API Integration
+ * Provides direct access to OpenCode's free "Zen" models without requiring opencode.exe
+ * Based on reverse-engineering the OpenCode source at https://github.com/sst/opencode
+ * 
+ * Free models (cost.input === 0) can be accessed with apiKey: "public"
+ */
+
+import { z } from "zod"
+
+// Configuration schema for OpenCode Zen
+export const OpenCodeZenConfigSchema = z.object({
+    enabled: z.boolean().default(true), // Free models enabled by default
+    endpoint: z.string().default("https://api.opencode.ai/v1"),
+    apiKey: z.string().default("public") // "public" key for free models
+})
+
+export type OpenCodeZenConfig = z.infer<typeof OpenCodeZenConfigSchema>
+
+// Model schema matching models.dev format
+export const ZenModelSchema = z.object({
+    id: z.string(),
+    name: z.string(),
+    family: z.string().optional(),
+    reasoning: z.boolean().optional(),
+    tool_call: z.boolean().optional(),
+    attachment: z.boolean().optional(),
+    temperature: z.boolean().optional(),
+    cost: z.object({
+        input: z.number(),
+        output: z.number(),
+        cache_read: z.number().optional(),
+        cache_write: z.number().optional()
+    }).optional(),
+    limit: z.object({
+        context: z.number(),
+        output: z.number()
+    }).optional()
+})
+
+export type ZenModel = z.infer<typeof ZenModelSchema>
+
+// Chat message schema (OpenAI-compatible)
+export const ChatMessageSchema = z.object({
+    role: z.enum(["user", "assistant", "system"]),
+    content: z.string()
+})
+
+export type ChatMessage = z.infer<typeof ChatMessageSchema>
+
+// Chat request schema
+export const ChatRequestSchema = z.object({
+    model: z.string(),
+    messages: z.array(ChatMessageSchema),
+    stream: z.boolean().default(true),
+    temperature: z.number().optional(),
+    max_tokens: z.number().optional()
+})
+
+export type ChatRequest = z.infer<typeof ChatRequestSchema>
+
+// Chat response chunk schema
+export const ChatChunkSchema = z.object({
+    id: z.string().optional(),
+    object: z.string().optional(),
+    created: z.number().optional(),
+    model: z.string().optional(),
+    choices: z.array(z.object({
+        index: z.number(),
+        delta: z.object({
+            role: z.string().optional(),
+            content: z.string().optional()
+        }).optional(),
+        message: z.object({
+            role: z.string(),
+            content: z.string()
+        }).optional(),
+        finish_reason: z.string().nullable().optional()
+    }))
+})
+
+export type ChatChunk = z.infer<typeof ChatChunkSchema>
+
+// Known free OpenCode Zen models (cost.input === 0)
+// From models.dev API - these are the free tier models
+export const FREE_ZEN_MODELS: ZenModel[] = [
+    {
+        id: "gpt-5-nano",
+        name: "GPT-5 Nano",
+        family: "gpt-5-nano",
+        reasoning: true,
+        tool_call: true,
+        attachment: true,
+        temperature: false,
+        cost: { input: 0, output: 0 },
+        limit: { context: 400000, output: 128000 }
+    },
+    {
+        id: "big-pickle",
+        name: "Big Pickle",
+        family: "pickle",
+        reasoning: false,
+        tool_call: true,
+        attachment: false,
+        temperature: true,
+        cost: { input: 0, output: 0 },
+        limit: { context: 128000, output: 16384 }
+    },
+    {
+        id: "grok-code-fast-1",
+        name: "Grok Code Fast 1",
+        family: "grok",
+        reasoning: true,
+        tool_call: true,
+        attachment: false,
+        temperature: true,
+        cost: { input: 0, output: 0 },
+        limit: { context: 256000, output: 10000 }
+    },
+    {
+        id: "minimax-m2.1",
+        name: "MiniMax M2.1",
+        family: "minimax",
+        reasoning: true,
+        tool_call: true,
+        attachment: false,
+        temperature: true,
+        cost: { input: 0, output: 0 },
+        limit: { context: 205000, output: 131072 }
+    }
+]
+
+export class OpenCodeZenClient {
+    private config: OpenCodeZenConfig
+    private baseUrl: string
+    private modelsCache: ZenModel[] | null = null
+    private modelsCacheTime: number = 0
+    private readonly CACHE_TTL_MS = 5 * 60 * 1000 // 5 minutes
+
+    constructor(config?: Partial<OpenCodeZenConfig>) {
+        this.config = OpenCodeZenConfigSchema.parse(config || {})
+        this.baseUrl = this.config.endpoint.replace(/\/$/, "")
+    }
+
+    /**
+     * Get free Zen models from OpenCode
+     */
+    async getModels(): Promise<ZenModel[]> {
+        // Return cached models if still valid
+        const now = Date.now()
+        if (this.modelsCache && (now - this.modelsCacheTime) < this.CACHE_TTL_MS) {
+            return this.modelsCache
+        }
+
+        try {
+            // Try to fetch fresh models from models.dev
+            const response = await fetch("https://models.dev/api.json", {
+                headers: {
+                    "User-Agent": "NomadArch/1.0"
+                },
+                signal: AbortSignal.timeout(10000)
+            })
+
+            if (response.ok) {
+                const data = await response.json()
+                // Extract OpenCode provider and filter free models
+                const opencodeProvider = data["opencode"]
+                if (opencodeProvider && opencodeProvider.models) {
+                    const freeModels: ZenModel[] = []
+                    for (const [id, model] of Object.entries(opencodeProvider.models)) {
+                        const m = model as any
+                        if (m.cost && m.cost.input === 0) {
+                            freeModels.push({
+                                id,
+                                name: m.name,
+                                family: m.family,
+                                reasoning: m.reasoning,
+                                tool_call: m.tool_call,
+                                attachment: m.attachment,
+                                temperature: m.temperature,
+                                cost: m.cost,
+                                limit: m.limit
+                            })
+                        }
+                    }
+                    if (freeModels.length > 0) {
+                        this.modelsCache = freeModels
+                        this.modelsCacheTime = now
+                        return freeModels
+                    }
+                }
+            }
+        } catch (error) {
+            console.warn("Failed to fetch models from models.dev, using fallback:", error)
+        }
+
+        // Fallback to hardcoded free models
+        this.modelsCache = FREE_ZEN_MODELS
+        this.modelsCacheTime = now
+        return FREE_ZEN_MODELS
+    }
+
+    /**
+     * Test connection to OpenCode Zen API
+     */
+    async testConnection(): Promise<boolean> {
+        try {
+            const models = await this.getModels()
+            return models.length > 0
+        } catch (error) {
+            console.error("OpenCode Zen connection test failed:", error)
+            return false
+        }
+    }
+
+    /**
+     * Chat completion (streaming)
+     */
+    async *chatStream(request: ChatRequest): AsyncGenerator<ChatChunk> {
+        const response = await fetch(`${this.baseUrl}/chat/completions`, {
+            method: "POST",
+            headers: {
+                "Content-Type": "application/json",
+                "Authorization": `Bearer ${this.config.apiKey}`,
+                "User-Agent": "NomadArch/1.0"
+            },
+            body: JSON.stringify({
+                ...request,
+                stream: true
+            })
+        })
+
+        if (!response.ok) {
+            const errorText = await response.text()
+            throw new Error(`OpenCode Zen API error (${response.status}): ${errorText}`)
+        }
+
+        if (!response.body) {
+            throw new Error("Response body is missing")
+        }
+
+        const reader = response.body.getReader()
+        const decoder = new TextDecoder()
+        let buffer = ""
+
+        try {
+            while (true) {
+                const { done, value } = await reader.read()
+                if (done) break
+
+                buffer += decoder.decode(value, { stream: true })
+                const lines = buffer.split("\n")
+                buffer = lines.pop() || ""
+
+                for (const line of lines) {
+                    const trimmed = line.trim()
+                    if (trimmed.startsWith("data: ")) {
+                        const data = trimmed.slice(6)
+                        if (data === "[DONE]") return
+
+                        try {
+                            const parsed = JSON.parse(data)
+                            yield parsed as ChatChunk
+
+                            // Check for finish
+                            if (parsed.choices?.[0]?.finish_reason) {
+                                return
+                            }
+                        } catch (e) {
+                            // Skip invalid JSON
+                        }
+                    }
+                }
+            }
+        } finally {
+            reader.releaseLock()
+        }
+    }
+
+    /**
+     * Chat completion (non-streaming)
+     */
+    async chat(request: ChatRequest): Promise<ChatChunk> {
+        const response = await fetch(`${this.baseUrl}/chat/completions`, {
+            method: "POST",
+            headers: {
+                "Content-Type": "application/json",
+                "Authorization": `Bearer ${this.config.apiKey}`,
+                "User-Agent": "NomadArch/1.0"
+            },
+            body: JSON.stringify({
+                ...request,
+                stream: false
+            })
+        })
+
+        if (!response.ok) {
+            const errorText = await response.text()
+            throw new Error(`OpenCode Zen API error (${response.status}): ${errorText}`)
+        }
+
+        return await response.json()
+    }
+}
+
+export function getDefaultZenConfig(): OpenCodeZenConfig {
+    return {
+        enabled: true,
+        endpoint: "https://api.opencode.ai/v1",
+        apiKey: "public"
+    }
+}
--- a/packages/server/src/integrations/zai-api.ts
+++ b/packages/server/src/integrations/zai-api.ts
@@ -0,0 +1,241 @@
+/**
+ * Z.AI API Integration
+ * Provides access to Z.AI's GLM Coding Plan API (Anthropic-compatible)
+ * Based on https://docs.z.ai/devpack/tool/claude#step-2-config-glm-coding-plan
+ */
+
+import { z } from "zod"
+
+// Configuration schema for Z.AI
+export const ZAIConfigSchema = z.object({
+    apiKey: z.string().optional(),
+    endpoint: z.string().default("https://api.z.ai/api/anthropic"),
+    enabled: z.boolean().default(false),
+    timeout: z.number().default(3000000) // 50 minutes as per docs
+})
+
+export type ZAIConfig = z.infer<typeof ZAIConfigSchema>
+
+// Message schema (Anthropic-compatible)
+export const ZAIMessageSchema = z.object({
+    role: z.enum(["user", "assistant"]),
+    content: z.string()
+})
+
+export type ZAIMessage = z.infer<typeof ZAIMessageSchema>
+
+// Chat request schema
+export const ZAIChatRequestSchema = z.object({
+    model: z.string().default("claude-sonnet-4-20250514"),
+    messages: z.array(ZAIMessageSchema),
+    max_tokens: z.number().default(8192),
+    stream: z.boolean().default(true),
+    system: z.string().optional()
+})
+
+export type ZAIChatRequest = z.infer<typeof ZAIChatRequestSchema>
+
+// Chat response schema
+export const ZAIChatResponseSchema = z.object({
+    id: z.string(),
+    type: z.string(),
+    role: z.string(),
+    content: z.array(z.object({
+        type: z.string(),
+        text: z.string().optional()
+    })),
+    model: z.string(),
+    stop_reason: z.string().nullable().optional(),
+    stop_sequence: z.string().nullable().optional(),
+    usage: z.object({
+        input_tokens: z.number(),
+        output_tokens: z.number()
+    }).optional()
+})
+
+export type ZAIChatResponse = z.infer<typeof ZAIChatResponseSchema>
+
+// Stream chunk schema
+export const ZAIStreamChunkSchema = z.object({
+    type: z.string(),
+    index: z.number().optional(),
+    delta: z.object({
+        type: z.string().optional(),
+        text: z.string().optional()
+    }).optional(),
+    message: z.object({
+        id: z.string(),
+        type: z.string(),
+        role: z.string(),
+        content: z.array(z.any()),
+        model: z.string()
+    }).optional(),
+    content_block: z.object({
+        type: z.string(),
+        text: z.string()
+    }).optional()
+})
+
+export type ZAIStreamChunk = z.infer<typeof ZAIStreamChunkSchema>
+
+export class ZAIClient {
+    private config: ZAIConfig
+    private baseUrl: string
+
+    constructor(config: ZAIConfig) {
+        this.config = config
+        this.baseUrl = config.endpoint.replace(/\/$/, "") // Remove trailing slash
+    }
+
+    /**
+     * Test connection to Z.AI API
+     */
+    async testConnection(): Promise<boolean> {
+        if (!this.config.apiKey) {
+            return false
+        }
+
+        try {
+            // Make a minimal request to test auth
+            const response = await fetch(`${this.baseUrl}/v1/messages`, {
+                method: "POST",
+                headers: this.getHeaders(),
+                body: JSON.stringify({
+                    model: "claude-sonnet-4-20250514",
+                    max_tokens: 1,
+                    messages: [{ role: "user", content: "test" }]
+                })
+            })
+
+            // Any response other than auth error means connection works
+            return response.status !== 401 && response.status !== 403
+        } catch (error) {
+            console.error("Z.AI connection test failed:", error)
+            return false
+        }
+    }
+
+    /**
+     * List available models
+     */
+    async listModels(): Promise<string[]> {
+        // Z.AI provides access to Claude models through their proxy
+        return [
+            "claude-sonnet-4-20250514",
+            "claude-3-5-sonnet-20241022",
+            "claude-3-opus-20240229",
+            "claude-3-haiku-20240307"
+        ]
+    }
+
+    /**
+     * Chat completion (streaming)
+     */
+    async *chatStream(request: ZAIChatRequest): AsyncGenerator<ZAIStreamChunk> {
+        if (!this.config.apiKey) {
+            throw new Error("Z.AI API key is required")
+        }
+
+        const response = await fetch(`${this.baseUrl}/v1/messages`, {
+            method: "POST",
+            headers: this.getHeaders(),
+            body: JSON.stringify({
+                ...request,
+                stream: true
+            })
+        })
+
+        if (!response.ok) {
+            const errorText = await response.text()
+            throw new Error(`Z.AI API error (${response.status}): ${errorText}`)
+        }
+
+        if (!response.body) {
+            throw new Error("Response body is missing")
+        }
+
+        const reader = response.body.getReader()
+        const decoder = new TextDecoder()
+        let buffer = ""
+
+        try {
+            while (true) {
+                const { done, value } = await reader.read()
+                if (done) break
+
+                buffer += decoder.decode(value, { stream: true })
+                const lines = buffer.split("\n")
+                buffer = lines.pop() || "" // Keep incomplete line in buffer
+
+                for (const line of lines) {
+                    if (line.startsWith("data: ")) {
+                        const data = line.slice(6).trim()
+                        if (data === "[DONE]") return
+
+                        try {
+                            const parsed = JSON.parse(data)
+                            yield parsed as ZAIStreamChunk
+                        } catch (e) {
+                            // Skip invalid JSON
+                        }
+                    }
+                }
+            }
+        } finally {
+            reader.releaseLock()
+        }
+    }
+
+    /**
+     * Chat completion (non-streaming)
+     */
+    async chat(request: ZAIChatRequest): Promise<ZAIChatResponse> {
+        if (!this.config.apiKey) {
+            throw new Error("Z.AI API key is required")
+        }
+
+        const response = await fetch(`${this.baseUrl}/v1/messages`, {
+            method: "POST",
+            headers: this.getHeaders(),
+            body: JSON.stringify({
+                ...request,
+                stream: false
+            })
+        })
+
+        if (!response.ok) {
+            const errorText = await response.text()
+            throw new Error(`Z.AI API error (${response.status}): ${errorText}`)
+        }
+
+        return await response.json()
+    }
+
+    /**
+     * Get request headers
+     */
+    private getHeaders(): Record<string, string> {
+        return {
+            "Content-Type": "application/json",
+            "x-api-key": this.config.apiKey || "",
+            "anthropic-version": "2023-06-01"
+        }
+    }
+
+    /**
+     * Validate API key
+     */
+    static validateApiKey(apiKey: string): boolean {
+        return typeof apiKey === "string" && apiKey.length > 0
+    }
+}
+
+// Default available models
+export const ZAI_MODELS = [
+    "claude-sonnet-4-20250514",
+    "claude-3-5-sonnet-20241022",
+    "claude-3-opus-20240229",
+    "claude-3-haiku-20240307"
+] as const
+
+export type ZAIModelName = typeof ZAI_MODELS[number]
--- a/packages/server/src/server/http-server.ts
+++ b/packages/server/src/server/http-server.ts
@@ -20,6 +20,8 @@ import { registerEventRoutes } from "./routes/events"
 import { registerStorageRoutes } from "./routes/storage"
 import { registerOllamaRoutes } from "./routes/ollama"
 import { registerQwenRoutes } from "./routes/qwen"
+import { registerZAIRoutes } from "./routes/zai"
+import { registerOpenCodeZenRoutes } from "./routes/opencode-zen"
 import { ServerMeta } from "../api-types"
 import { InstanceStore } from "../storage/instance-store"

@@ -65,7 +67,7 @@ export function createHttpServer(deps: HttpServerDeps) {
  }

  app.addHook("onRequest", (request, _reply, done) => {
-    ;(request as FastifyRequest & { __logMeta?: { start: bigint } }).__logMeta = {
+    ; (request as FastifyRequest & { __logMeta?: { start: bigint } }).__logMeta = {
      start: process.hrtime.bigint(),
    }
    done()
@@ -114,6 +116,8 @@ export function createHttpServer(deps: HttpServerDeps) {
  })
  registerOllamaRoutes(app, { logger: deps.logger })
  registerQwenRoutes(app, { logger: deps.logger })
+  registerZAIRoutes(app, { logger: deps.logger })
+  registerOpenCodeZenRoutes(app, { logger: deps.logger })
  registerInstanceProxyRoutes(app, { workspaceManager: deps.workspaceManager, logger: proxyLogger })


--- a/packages/server/src/server/routes/opencode-zen.ts
+++ b/packages/server/src/server/routes/opencode-zen.ts
@@ -0,0 +1,93 @@
+import { FastifyInstance } from "fastify"
+import { OpenCodeZenClient, type ChatRequest, getDefaultZenConfig } from "../../integrations/opencode-zen"
+import { Logger } from "../../logger"
+
+interface OpenCodeZenRouteDeps {
+    logger: Logger
+}
+
+export async function registerOpenCodeZenRoutes(
+    app: FastifyInstance,
+    deps: OpenCodeZenRouteDeps
+) {
+    const logger = deps.logger.child({ component: "opencode-zen-routes" })
+
+    // Create shared client
+    const client = new OpenCodeZenClient(getDefaultZenConfig())
+
+    // List available free Zen models
+    app.get('/api/opencode-zen/models', async (request, reply) => {
+        try {
+            const models = await client.getModels()
+
+            return {
+                models: models.map(m => ({
+                    id: m.id,
+                    name: m.name,
+                    family: m.family,
+                    provider: "opencode-zen",
+                    free: true,
+                    reasoning: m.reasoning,
+                    tool_call: m.tool_call,
+                    limit: m.limit
+                }))
+            }
+        } catch (error) {
+            logger.error({ error }, "Failed to list OpenCode Zen models")
+            return reply.status(500).send({ error: "Failed to list models" })
+        }
+    })
+
+    // Test connection
+    app.get('/api/opencode-zen/test', async (request, reply) => {
+        try {
+            const connected = await client.testConnection()
+            return { connected }
+        } catch (error) {
+            logger.error({ error }, "OpenCode Zen connection test failed")
+            return reply.status(500).send({ error: "Connection test failed" })
+        }
+    })
+
+    // Chat completion endpoint
+    app.post('/api/opencode-zen/chat', async (request, reply) => {
+        try {
+            const chatRequest = request.body as ChatRequest
+
+            // Handle streaming
+            if (chatRequest.stream) {
+                reply.raw.writeHead(200, {
+                    'Content-Type': 'text/event-stream',
+                    'Cache-Control': 'no-cache',
+                    'Connection': 'keep-alive',
+                })
+
+                try {
+                    for await (const chunk of client.chatStream(chatRequest)) {
+                        reply.raw.write(`data: ${JSON.stringify(chunk)}\n\n`)
+
+                        // Check for finish
+                        if (chunk.choices?.[0]?.finish_reason) {
+                            reply.raw.write('data: [DONE]\n\n')
+                            break
+                        }
+                    }
+
+                    reply.raw.end()
+                } catch (streamError) {
+                    logger.error({ error: streamError }, "OpenCode Zen streaming failed")
+                    reply.raw.write(`data: ${JSON.stringify({ error: String(streamError) })}\n\n`)
+                    reply.raw.end()
+                }
+            } else {
+                const response = await client.chat(chatRequest)
+                return response
+            }
+        } catch (error) {
+            logger.error({ error }, "OpenCode Zen chat request failed")
+            return reply.status(500).send({ error: "Chat request failed" })
+        }
+    })
+
+    logger.info("OpenCode Zen routes registered - Free models available!")
+}
--- a/packages/server/src/server/routes/zai.ts
+++ b/packages/server/src/server/routes/zai.ts
@@ -0,0 +1,153 @@
+import { FastifyInstance } from "fastify"
+import { ZAIClient, type ZAIConfig, type ZAIChatRequest } from "../../integrations/zai-api"
+import { Logger } from "../../logger"
+import { existsSync, readFileSync, writeFileSync, mkdirSync } from "fs"
+import { join } from "path"
+import { homedir } from "os"
+
+interface ZAIRouteDeps {
+    logger: Logger
+}
+
+// Config file path
+const CONFIG_DIR = join(homedir(), ".nomadarch")
+const CONFIG_FILE = join(CONFIG_DIR, "zai-config.json")
+
+export async function registerZAIRoutes(
+    app: FastifyInstance,
+    deps: ZAIRouteDeps
+) {
+    const logger = deps.logger.child({ component: "zai-routes" })
+
+    // Ensure config directory exists
+    if (!existsSync(CONFIG_DIR)) {
+        mkdirSync(CONFIG_DIR, { recursive: true })
+    }
+
+    // Get Z.AI configuration
+    app.get('/api/zai/config', async (request, reply) => {
+        try {
+            const config = getZAIConfig()
+            return { config: { ...config, apiKey: config.apiKey ? '***' : undefined } }
+        } catch (error) {
+            logger.error({ error }, "Failed to get Z.AI config")
+            return reply.status(500).send({ error: "Failed to get Z.AI configuration" })
+        }
+    })
+
+    // Update Z.AI configuration
+    app.post('/api/zai/config', async (request, reply) => {
+        try {
+            const { enabled, apiKey, endpoint } = request.body as Partial<ZAIConfig>
+            updateZAIConfig({ enabled, apiKey, endpoint })
+            logger.info("Z.AI configuration updated")
+            return { success: true, config: { enabled, endpoint, apiKey: apiKey ? '***' : undefined } }
+        } catch (error) {
+            logger.error({ error }, "Failed to update Z.AI config")
+            return reply.status(500).send({ error: "Failed to update Z.AI configuration" })
+        }
+    })
+
+    // Test Z.AI connection
+    app.post('/api/zai/test', async (request, reply) => {
+        try {
+            const config = getZAIConfig()
+            if (!config.enabled) {
+                return reply.status(400).send({ error: "Z.AI is not enabled" })
+            }
+
+            const client = new ZAIClient(config)
+            const isConnected = await client.testConnection()
+
+            return { connected: isConnected }
+        } catch (error) {
+            logger.error({ error }, "Z.AI connection test failed")
+            return reply.status(500).send({ error: "Connection test failed" })
+        }
+    })
+
+    // List available models
+    app.get('/api/zai/models', async (request, reply) => {
+        try {
+            const config = getZAIConfig()
+            if (!config.enabled) {
+                return reply.status(400).send({ error: "Z.AI is not enabled" })
+            }
+
+            const client = new ZAIClient(config)
+            const models = await client.listModels()
+
+            return { models: models.map(name => ({ name, provider: "zai" })) }
+        } catch (error) {
+            logger.error({ error }, "Failed to list Z.AI models")
+            return reply.status(500).send({ error: "Failed to list models" })
+        }
+    })
+
+    // Chat completion endpoint
+    app.post('/api/zai/chat', async (request, reply) => {
+        try {
+            const config = getZAIConfig()
+            if (!config.enabled) {
+                return reply.status(400).send({ error: "Z.AI is not enabled" })
+            }
+
+            const client = new ZAIClient(config)
+            const chatRequest = request.body as ZAIChatRequest
+
+            // Handle streaming
+            if (chatRequest.stream) {
+                reply.raw.writeHead(200, {
+                    'Content-Type': 'text/event-stream',
+                    'Cache-Control': 'no-cache',
+                    'Connection': 'keep-alive',
+                })
+
+                try {
+                    for await (const chunk of client.chatStream(chatRequest)) {
+                        reply.raw.write(`data: ${JSON.stringify(chunk)}\n\n`)
+
+                        // Check for message_stop event
+                        if (chunk.type === "message_stop") {
+                            reply.raw.write('data: [DONE]\n\n')
+                            break
+                        }
+                    }
+
+                    reply.raw.end()
+                } catch (streamError) {
+                    logger.error({ error: streamError }, "Z.AI streaming failed")
+                    reply.raw.write(`data: ${JSON.stringify({ error: String(streamError) })}\n\n`)
+                    reply.raw.end()
+                }
+            } else {
+                const response = await client.chat(chatRequest)
+                return response
+            }
+        } catch (error) {
+            logger.error({ error }, "Z.AI chat request failed")
+            return reply.status(500).send({ error: "Chat request failed" })
+        }
+    })
+
+    logger.info("Z.AI routes registered")
+}
+
+// Configuration management functions using file-based storage
+function getZAIConfig(): ZAIConfig {
+    try {
+        if (existsSync(CONFIG_FILE)) {
+            const data = readFileSync(CONFIG_FILE, 'utf-8')
+            return JSON.parse(data)
+        }
+        return { enabled: false, endpoint: "https://api.z.ai/api/anthropic", timeout: 3000000 }
+    } catch {
+        return { enabled: false, endpoint: "https://api.z.ai/api/anthropic", timeout: 3000000 }
+    }
+}
+
+function updateZAIConfig(config: Partial<ZAIConfig>): void {
+    const current = getZAIConfig()
+    const updated = { ...current, ...config }
+    writeFileSync(CONFIG_FILE, JSON.stringify(updated, null, 2))
+}