NomadArch/packages/server/src/integrations/ollama-cloud.ts

import { z } from "zod"

export const OllamaCloudConfigSchema = z.object({
  apiKey: z.string().optional(),
  endpoint: z.string().default("https://ollama.com"),
  enabled: z.boolean().default(false)
})

export type OllamaCloudConfig = z.infer<typeof OllamaCloudConfigSchema>

// Schema is flexible since Ollama Cloud may return different fields than local Ollama
export const OllamaModelSchema = z.object({
  name: z.string(),
  model: z.string().optional(), // Some APIs return model instead of name
  size: z.union([z.string(), z.number()]).optional(),
  digest: z.string().optional(),
  modified_at: z.string().optional(),
  created_at: z.string().optional(),
  details: z.any().optional() // Model details like family, parameter_size, etc.
})

export type OllamaModel = z.infer<typeof OllamaModelSchema>

export const ChatMessageSchema = z.object({
  role: z.enum(["user", "assistant", "system"]),
  content: z.string(),
  images: z.array(z.string()).optional(),
  tool_calls: z.array(z.any()).optional(),
  thinking: z.string().optional()
})

export type ChatMessage = z.infer<typeof ChatMessageSchema>

export const ToolCallSchema = z.object({
  name: z.string(),
  arguments: z.record(z.any())
})

export type ToolCall = z.infer<typeof ToolCallSchema>

export const ToolDefinitionSchema = z.object({
  name: z.string(),
  description: z.string(),
  parameters: z.object({
    type: z.enum(["object", "string", "number", "boolean", "array"]),
    properties: z.record(z.any()),
    required: z.array(z.string()).optional()
  })
})

export type ToolDefinition = z.infer<typeof ToolDefinitionSchema>

export const ChatRequestSchema = z.object({
  model: z.string(),
  messages: z.array(ChatMessageSchema),
  stream: z.boolean().default(false),
  think: z.union([z.boolean(), z.enum(["low", "medium", "high"])]).optional(),
  format: z.union([z.literal("json"), z.any()]).optional(),
  tools: z.array(ToolDefinitionSchema).optional(),
  web_search: z.boolean().optional(),
  options: z.object({
    temperature: z.number().min(0).max(2).optional(),
    top_p: z.number().min(0).max(1).optional()
  }).optional()
})

export const ChatResponseSchema = z.object({
  model: z.string(),
  created_at: z.string(),
  message: ChatMessageSchema.extend({
    thinking: z.string().optional(),
    tool_calls: z.array(z.any()).optional()
  }),
  done: z.boolean().optional(),
  total_duration: z.number().optional(),
  load_duration: z.number().optional(),
  prompt_eval_count: z.number().optional(),
  prompt_eval_duration: z.number().optional(),
  eval_count: z.number().optional(),
  eval_duration: z.number().optional()
})

export type ChatRequest = z.infer<typeof ChatRequestSchema>
export type ChatResponse = z.infer<typeof ChatResponseSchema>

export const EmbeddingRequestSchema = z.object({
  model: z.string(),
  input: z.union([z.string(), z.array(z.string())])
})

export type EmbeddingRequest = z.infer<typeof EmbeddingRequestSchema>

export const EmbeddingResponseSchema = z.object({
  model: z.string(),
  embeddings: z.array(z.array(z.number()))
})

export type EmbeddingResponse = z.infer<typeof EmbeddingResponseSchema>

export class OllamaCloudClient {
  private config: OllamaCloudConfig
  private baseUrl: string

  constructor(config: OllamaCloudConfig) {
    this.config = config
    this.baseUrl = config.endpoint.replace(/\/$/, "")
  }

  async testConnection(): Promise<boolean> {
    try {
      const response = await this.makeRequest("/tags", { method: "GET" })
      return response.ok
    } catch (error) {
      console.error("Ollama Cloud connection test failed:", error)
      return false
    }
  }

  async listModels(): Promise<OllamaModel[]> {
    try {
      const headers: Record<string, string> = {}
      if (this.config.apiKey) {
        headers["Authorization"] = `Bearer ${this.config.apiKey}`
      }

      const cloudResponse = await fetch(`${this.baseUrl}/v1/models`, {
        method: "GET",
        headers
      })

      if (cloudResponse.ok) {
        const data = await cloudResponse.json()
        const modelsArray = Array.isArray(data?.data) ? data.data : []
        const parsedModels = modelsArray
          .map((model: any) => ({
            name: model.id || model.name || model.model,
            model: model.id || model.model || model.name,
          }))
          .filter((model: any) => model.name)

        if (parsedModels.length > 0) {
          return parsedModels
        }
      }

      const response = await this.makeRequest("/tags", { method: "GET" })

      if (!response.ok) {
        const errorText = await response.text().catch(() => "Unknown error")
        console.error(`[OllamaCloud] Failed to fetch models: ${response.status} ${response.statusText}`, errorText)
        throw new Error(`Failed to fetch models: ${response.status} ${response.statusText} - ${errorText}`)
      }

      const data = await response.json()
      console.log("[OllamaCloud] Models response:", JSON.stringify(data).substring(0, 500))

      // Handle different response formats flexibly
      const modelsArray = Array.isArray(data.models) ? data.models :
        Array.isArray(data) ? data : []

      // Parse with flexible schema, don't throw on validation failure
      // Only include cloud-compatible models (ending in -cloud or known cloud models)
      const parsedModels: OllamaModel[] = []
      for (const model of modelsArray) {
        try {
          const modelName = model.name || model.model || ""
          // Filter to only cloud-compatible models
          const isCloudModel = modelName.endsWith("-cloud") ||
            modelName.includes(":cloud") ||
            modelName.startsWith("gpt-oss") ||
            modelName.startsWith("qwen3-coder") ||
            modelName.startsWith("deepseek-v3")

          if (modelName && isCloudModel) {
            parsedModels.push({
              name: modelName,
              model: model.model || modelName,
              size: model.size,
              digest: model.digest,
              modified_at: model.modified_at,
              created_at: model.created_at,
              details: model.details
            })
          }
        } catch (parseError) {
          console.warn("[OllamaCloud] Skipping model due to parse error:", model, parseError)
        }
      }

      console.log(`[OllamaCloud] Parsed ${parsedModels.length} cloud-compatible models`)
      return parsedModels
    } catch (error) {
      console.error("Failed to list Ollama Cloud models:", error)
      throw error
    }
  }

  async chat(request: ChatRequest): Promise<AsyncIterable<ChatResponse>> {
    if (!this.config.apiKey) {
      throw new Error("Ollama Cloud API key is required")
    }

    const headers: Record<string, string> = {
      "Content-Type": "application/json"
    }

    if (this.config.apiKey) {
      headers["Authorization"] = `Bearer ${this.config.apiKey}`
    }

    try {
      const response = await this.makeRequest("/chat", {
        method: "POST",
        headers,
        body: JSON.stringify(request)
      })

      if (!response.ok) {
        const errorText = await response.text()
        throw new Error(`Chat request failed: ${response.statusText} - ${errorText}`)
      }

      if (request.stream) {
        return this.parseStreamingResponse(response)
      } else {
        const data = ChatResponseSchema.parse(await response.json())
        return this.createAsyncIterable([data])
      }
    } catch (error) {
      console.error("Ollama Cloud chat request failed:", error)
      throw error
    }
  }

  async chatWithThinking(request: ChatRequest): Promise<AsyncIterable<ChatResponse>> {
    const requestWithThinking = {
      ...request,
      think: true
    }
    return this.chat(requestWithThinking)
  }

  async chatWithStructuredOutput(request: ChatRequest, schema: any): Promise<AsyncIterable<ChatResponse>> {
    const requestWithFormat = {
      ...request,
      format: schema
    }
    return this.chat(requestWithFormat)
  }

  async chatWithVision(request: ChatRequest, images: string[]): Promise<AsyncIterable<ChatResponse>> {
    if (!request.messages.length) {
      throw new Error("At least one message is required")
    }

    const messagesWithImages = [...request.messages]
    const lastUserMessage = messagesWithImages.slice().reverse().find(m => m.role === "user")

    if (lastUserMessage) {
      lastUserMessage.images = images
    }

    return this.chat({ ...request, messages: messagesWithImages })
  }

  async chatWithTools(request: ChatRequest, tools: ToolDefinition[]): Promise<AsyncIterable<ChatResponse>> {
    const requestWithTools = {
      ...request,
      tools
    }
    return this.chat(requestWithTools)
  }

  async chatWithWebSearch(request: ChatRequest): Promise<AsyncIterable<ChatResponse>> {
    const requestWithWebSearch = {
      ...request,
      web_search: true
    }
    return this.chat(requestWithWebSearch)
  }

  async generateEmbeddings(request: EmbeddingRequest): Promise<EmbeddingResponse> {
    if (!this.config.apiKey) {
      throw new Error("Ollama Cloud API key is required")
    }

    const headers: Record<string, string> = {
      "Content-Type": "application/json"
    }

    if (this.config.apiKey) {
      headers["Authorization"] = `Bearer ${this.config.apiKey}`
    }

    try {
      const response = await this.makeRequest("/embed", {
        method: "POST",
        headers,
        body: JSON.stringify(request)
      })

      if (!response.ok) {
        throw new Error(`Embeddings request failed: ${response.statusText}`)
      }

      const data = await response.json()
      return EmbeddingResponseSchema.parse(data)
    } catch (error) {
      console.error("Ollama Cloud embeddings request failed:", error)
      throw error
    }
  }

  async pullModel(modelName: string): Promise<void> {
    const headers: Record<string, string> = {
      "Content-Type": "application/json"
    }

    if (this.config.apiKey) {
      headers["Authorization"] = `Bearer ${this.config.apiKey}`
    }

    const response = await this.makeRequest("/pull", {
      method: "POST",
      headers,
      body: JSON.stringify({ name: modelName })
    })

    if (!response.ok) {
      throw new Error(`Failed to pull model ${modelName}: ${response.statusText}`)
    }
  }

  private async *parseStreamingResponse(response: Response): AsyncIterable<ChatResponse> {
    if (!response.body) {
      throw new Error("Response body is missing")
    }

    const reader = response.body.getReader()
    const decoder = new TextDecoder()

    try {
      while (true) {
        const { done, value } = await reader.read()
        if (done) break

        const lines = decoder.decode(value, { stream: true }).split('\n').filter(line => line.trim())

        for (const line of lines) {
          try {
            const data = JSON.parse(line)
            const chatResponse = ChatResponseSchema.parse(data)
            yield chatResponse

            if (chatResponse.done) {
              return
            }
          } catch (parseError) {
            console.warn("Failed to parse streaming line:", line, parseError)
          }
        }
      }
    } finally {
      reader.releaseLock()
    }
  }

  private async *createAsyncIterable<T>(items: T[]): AsyncIterable<T> {
    for (const item of items) {
      yield item
    }
  }

  private async makeRequest(endpoint: string, options: RequestInit): Promise<Response> {
    // Ensure endpoint starts with /api
    const apiEndpoint = endpoint.startsWith('/api') ? endpoint : `/api${endpoint}`
    const url = `${this.baseUrl}${apiEndpoint}`

    const headers: Record<string, string> = {
      ...options.headers as Record<string, string>
    }

    if (this.config.apiKey) {
      headers["Authorization"] = `Bearer ${this.config.apiKey}`
    }

    console.log(`[OllamaCloud] Making request to: ${url}`)

    return fetch(url, {
      ...options,
      headers
    })
  }

  async getCloudModels(): Promise<OllamaModel[]> {
    const allModels = await this.listModels()
    return allModels.filter(model => model.name.endsWith("-cloud"))
  }

  static validateApiKey(apiKey: string): boolean {
    return typeof apiKey === "string" && apiKey.length > 0
  }

  async getCloudModelNames(): Promise<string[]> {
    const cloudModels = await this.getCloudModels()
    return cloudModels.map(model => model.name)
  }

  async getThinkingCapableModels(): Promise<string[]> {
    const allModels = await this.listModels()
    const thinkingModelPatterns = ["qwen3", "deepseek-r1", "gpt-oss", "deepseek-v3.1"]
    return allModels
      .map(m => m.name)
      .filter(name => thinkingModelPatterns.some(pattern => name.toLowerCase().includes(pattern)))
  }

  async getVisionCapableModels(): Promise<string[]> {
    const allModels = await this.listModels()
    const visionModelPatterns = ["gemma3", "llama3.2-vision", "llava", "bakllava", "minicpm-v"]
    return allModels
      .map(m => m.name)
      .filter(name => visionModelPatterns.some(pattern => name.toLowerCase().includes(pattern)))
  }

  async getEmbeddingModels(): Promise<string[]> {
    const allModels = await this.listModels()
    const embeddingModelPatterns = ["embeddinggemma", "qwen3-embedding", "all-minilm", "nomic-embed", "mxbai-embed"]
    return allModels
      .map(m => m.name)
      .filter(name => embeddingModelPatterns.some(pattern => name.toLowerCase().includes(pattern)))
  }
}

export const DEFAULT_CLOUD_MODELS = [
  "gpt-oss:120b-cloud",
  "llama3.1:70b-cloud",
  "llama3.1:8b-cloud",
  "qwen2.5:32b-cloud",
  "qwen2.5:7b-cloud"
] as const

export type CloudModelName = typeof DEFAULT_CLOUD_MODELS[number]

export const THINKING_MODELS = [
  "qwen3",
  "deepseek-r1",
  "deepseek-v3.1",
  "gpt-oss:120b-cloud"
] as const

export type ThinkingModelName = typeof THINKING_MODELS[number]

export const VISION_MODELS = [
  "gemma3",
  "llava",
  "bakllava",
  "minicpm-v"
] as const

export type VisionModelName = typeof VISION_MODELS[number]

export const EMBEDDING_MODELS = [
  "embeddinggemma",
  "qwen3-embedding",
  "all-minilm",
  "nomic-embed-text",
  "mxbai-embed-large"
] as const

export type EmbeddingModelName = typeof EMBEDDING_MODELS[number]