Files
SuperCharged-Claude-Code-Up…/dexto/packages/core/src/llm/registry.ts
admin b52318eeae feat: Add intelligent auto-router and enhanced integrations
- Add intelligent-router.sh hook for automatic agent routing
- Add AUTO-TRIGGER-SUMMARY.md documentation
- Add FINAL-INTEGRATION-SUMMARY.md documentation
- Complete Prometheus integration (6 commands + 4 tools)
- Complete Dexto integration (12 commands + 5 tools)
- Enhanced Ralph with access to all agents
- Fix /clawd command (removed disable-model-invocation)
- Update hooks.json to v5 with intelligent routing
- 291 total skills now available
- All 21 commands with automatic routing

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-28 00:27:56 +04:00

2354 lines
89 KiB
TypeScript

/**
* LLM Model Registry
*
* TODO: maxOutputTokens - Currently we rely on @ai-sdk/anthropic (and other provider SDKs)
* to set appropriate maxOutputTokens defaults per model. As of v2.0.56, @ai-sdk/anthropic
* has getModelCapabilities() which sets correct limits (e.g., 64000 for claude-haiku-4-5).
*
* If we need finer control or want to support models before the SDK does, we could:
* 1. Add maxOutputTokens to ModelInfo interface
* 2. Create getMaxOutputTokensForModel() / getEffectiveMaxOutputTokens() helpers
* 3. Pass explicit maxOutputTokens in TurnExecutor when config doesn't specify one
*
* For now, keeping SDK dependency is simpler and auto-updates with SDK releases.
*/
import { LLMConfig } from './schemas.js';
import { LLMError } from './errors.js';
import { LLMErrorCode } from './error-codes.js';
import { DextoRuntimeError } from '../errors/DextoRuntimeError.js';
import { ErrorScope, ErrorType } from '../errors/types.js';
import {
LLM_PROVIDERS,
type LLMProvider,
type SupportedFileType,
type TokenUsage,
} from './types.js';
import type { IDextoLogger } from '../logger/v2/types.js';
import { getOpenRouterModelContextLength } from './providers/openrouter-model-registry.js';
/**
* Pricing metadata for a model (USD per 1M tokens).
* Optional; when omitted, pricing is unknown.
*/
export interface ModelPricing {
inputPerM: number;
outputPerM: number;
cacheReadPerM?: number;
cacheWritePerM?: number;
currency?: 'USD';
unit?: 'per_million_tokens';
}
export interface ModelInfo {
name: string;
maxInputTokens: number;
default?: boolean;
supportedFileTypes: SupportedFileType[]; // Required - every model must explicitly specify file support
displayName?: string;
// Pricing metadata (USD per 1M tokens). Optional; when omitted, pricing is unknown.
pricing?: ModelPricing;
/**
* OpenRouter model ID for use with gateway providers (dexto, openrouter).
* Only needed when the OpenRouter ID differs from the native model ID.
* For most OpenAI/Google/xAI models, the ID is just `{provider}/{name}`.
* For Anthropic, the IDs differ significantly (e.g., `claude-haiku-4-5-20251001` → `anthropic/claude-haiku-4.5`).
*/
openrouterId?: string;
}
// Central list of supported file type identifiers used across server/UI
// Re-exported constants are defined in types.ts for single source of truth
// (imported above): LLM_PROVIDERS, SUPPORTED_FILE_TYPES
// Central MIME type to file type mapping
export const MIME_TYPE_TO_FILE_TYPE: Record<string, SupportedFileType> = {
'application/pdf': 'pdf',
'audio/mp3': 'audio',
'audio/mpeg': 'audio',
'audio/wav': 'audio',
'audio/x-wav': 'audio',
'audio/wave': 'audio',
'audio/webm': 'audio',
'audio/ogg': 'audio',
'audio/m4a': 'audio',
'audio/aac': 'audio',
// Common image MIME types
'image/jpeg': 'image',
'image/jpg': 'image',
'image/png': 'image',
'image/webp': 'image',
'image/gif': 'image',
};
// Helper function to get array of allowed MIME types
export function getAllowedMimeTypes(): string[] {
return Object.keys(MIME_TYPE_TO_FILE_TYPE);
}
export interface ProviderInfo {
models: ModelInfo[];
baseURLSupport: 'none' | 'optional' | 'required'; // Cleaner single field
supportedFileTypes: SupportedFileType[]; // Provider-level default, used when model doesn't specify
supportsCustomModels?: boolean; // Allow arbitrary model IDs beyond fixed list
/**
* When true, this provider can access all models from all other providers in the registry.
* Used for gateway providers like 'dexto' that route to multiple upstream providers.
* Model names are transformed to the gateway's format (e.g., 'gpt-5-mini' → 'openai/gpt-5-mini').
*/
supportsAllRegistryModels?: boolean;
/**
* OpenRouter prefix for this provider's models (e.g., 'openai', 'anthropic', 'x-ai').
* Used by gateway providers to parse and route prefixed model names.
* - If set: provider's models can be accessed via gateway as `{prefix}/{model}`
* - If undefined: provider is not accessible via gateways (local, gateway providers themselves)
*/
openrouterPrefix?: string;
}
/** Fallback when we cannot determine the model's input-token limit */
export const DEFAULT_MAX_INPUT_TOKENS = 128000;
// Use imported constant LLM_PROVIDERS
/**
* LLM Model Registry - Single Source of Truth for Supported models and their capabilities
*
* IMPORTANT: supportedFileTypes is the SINGLE SOURCE OF TRUTH for file upload capabilities:
* - Empty array [] = Model does NOT support file uploads (UI will hide all attach buttons)
* - Specific types ['image', 'pdf'] = Model supports ONLY those file types
* - DO NOT use empty arrays as "unknown" - research the model's actual capabilities
* - The web UI directly reflects these capabilities without fallback logic
*/
export const LLM_REGISTRY: Record<LLMProvider, ProviderInfo> = {
openai: {
models: [
// GPT-5.2 series (latest, released Dec 2025)
{
name: 'gpt-5.2-chat-latest',
displayName: 'GPT-5.2 Instant',
openrouterId: 'openai/gpt-5.2-chat',
maxInputTokens: 400000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.75,
outputPerM: 14.0,
cacheReadPerM: 0.175,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-5.2',
displayName: 'GPT-5.2 Thinking',
openrouterId: 'openai/gpt-5.2',
maxInputTokens: 400000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.75,
outputPerM: 14.0,
cacheReadPerM: 0.175,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-5.2-pro',
displayName: 'GPT-5.2 Pro',
openrouterId: 'openai/gpt-5.2-pro',
maxInputTokens: 400000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 21.0,
outputPerM: 168.0,
cacheReadPerM: 2.1,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-5.2-codex',
displayName: 'GPT-5.2 Codex',
openrouterId: 'openai/gpt-5.2-codex',
maxInputTokens: 400000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.75,
outputPerM: 14.0,
cacheReadPerM: 0.175,
currency: 'USD',
unit: 'per_million_tokens',
},
},
// GPT-5.1 series
{
name: 'gpt-5.1-chat-latest',
displayName: 'GPT-5.1 Instant',
openrouterId: 'openai/gpt-5.1-chat',
maxInputTokens: 400000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.25,
outputPerM: 10.0,
cacheReadPerM: 0.125,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-5.1',
displayName: 'GPT-5.1 Thinking',
openrouterId: 'openai/gpt-5.1',
maxInputTokens: 400000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.25,
outputPerM: 10.0,
cacheReadPerM: 0.125,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-5.1-codex',
displayName: 'GPT-5.1 Codex',
openrouterId: 'openai/gpt-5.1-codex',
maxInputTokens: 400000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.25,
outputPerM: 10.0,
cacheReadPerM: 0.125,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-5.1-codex-mini',
displayName: 'GPT-5.1 Codex Mini',
openrouterId: 'openai/gpt-5.1-codex-mini',
maxInputTokens: 400000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 0.25,
outputPerM: 2.0,
cacheReadPerM: 0.025,
currency: 'USD',
unit: 'per_million_tokens',
},
},
// {
// name: 'gpt-5.1-codex-max',
// displayName: 'GPT-5.1 Codex Max',
// maxInputTokens: 400000,
// supportedFileTypes: ['pdf', 'image'],
// pricing: {
// inputPerM: 1.25,
// outputPerM: 10.0,
// cacheReadPerM: 0.125,
// currency: 'USD',
// unit: 'per_million_tokens',
// },
// },
{
name: 'gpt-5-pro',
displayName: 'GPT-5 Pro',
openrouterId: 'openai/gpt-5-pro',
maxInputTokens: 400000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 15.0,
outputPerM: 120.0,
cacheReadPerM: 1.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-5',
displayName: 'GPT-5',
openrouterId: 'openai/gpt-5',
maxInputTokens: 400000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.25,
outputPerM: 10.0,
cacheReadPerM: 0.125,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-5-mini',
displayName: 'GPT-5 Mini',
openrouterId: 'openai/gpt-5-mini',
maxInputTokens: 400000,
default: true,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 0.25,
outputPerM: 2.0,
cacheReadPerM: 0.025,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-5-nano',
displayName: 'GPT-5 Nano',
openrouterId: 'openai/gpt-5-nano',
maxInputTokens: 400000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 0.05,
outputPerM: 0.4,
cacheReadPerM: 0.005,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-5-codex',
displayName: 'GPT-5 Codex',
openrouterId: 'openai/gpt-5-codex',
maxInputTokens: 400000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.25,
outputPerM: 10.0,
cacheReadPerM: 0.125,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-4.1',
displayName: 'GPT-4.1',
openrouterId: 'openai/gpt-4.1',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 2.0,
outputPerM: 8.0,
cacheReadPerM: 0.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-4.1-mini',
displayName: 'GPT-4.1 Mini',
openrouterId: 'openai/gpt-4.1-mini',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 0.4,
outputPerM: 1.6,
cacheReadPerM: 0.1,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-4.1-nano',
displayName: 'GPT-4.1 Nano',
openrouterId: 'openai/gpt-4.1-nano',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 0.1,
outputPerM: 0.4,
cacheReadPerM: 0.025,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-4o',
displayName: 'GPT-4o',
openrouterId: 'openai/gpt-4o',
maxInputTokens: 128000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 2.5,
outputPerM: 10.0,
cacheReadPerM: 1.25,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-4o-mini',
displayName: 'GPT-4o Mini',
openrouterId: 'openai/gpt-4o-mini',
maxInputTokens: 128000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 0.15,
outputPerM: 0.6,
cacheReadPerM: 0.075,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gpt-4o-audio-preview',
displayName: 'GPT-4o Audio Preview',
openrouterId: 'openai/gpt-4o-audio-preview',
maxInputTokens: 128000,
supportedFileTypes: ['audio'],
pricing: {
inputPerM: 2.5,
outputPerM: 10.0,
cacheReadPerM: 1.25,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'o4-mini',
displayName: 'O4 Mini',
openrouterId: 'openai/o4-mini',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.1,
outputPerM: 4.4,
cacheReadPerM: 0.275,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'o3',
displayName: 'O3',
openrouterId: 'openai/o3',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 2.0,
outputPerM: 8.0,
cacheReadPerM: 0.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'o3-mini',
displayName: 'O3 Mini',
openrouterId: 'openai/o3-mini',
maxInputTokens: 200000,
supportedFileTypes: [],
pricing: {
inputPerM: 1.1,
outputPerM: 4.4,
cacheReadPerM: 0.55,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'o1',
displayName: 'O1',
openrouterId: 'openai/o1',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 15.0,
outputPerM: 60.0,
cacheReadPerM: 7.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
],
baseURLSupport: 'none',
supportedFileTypes: [], // No defaults - models must explicitly specify support
openrouterPrefix: 'openai',
},
'openai-compatible': {
models: [], // Empty - accepts any model name for custom endpoints
baseURLSupport: 'required',
supportedFileTypes: ['pdf', 'image', 'audio'], // Allow all types for custom endpoints - user assumes responsibility for model capabilities
supportsCustomModels: true,
},
anthropic: {
models: [
{
name: 'claude-haiku-4-5-20251001',
displayName: 'Claude 4.5 Haiku',
openrouterId: 'anthropic/claude-haiku-4.5',
maxInputTokens: 200000,
default: true,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.0,
outputPerM: 5.0,
cacheWritePerM: 1.25,
cacheReadPerM: 0.1,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'claude-sonnet-4-5-20250929',
displayName: 'Claude 4.5 Sonnet',
openrouterId: 'anthropic/claude-sonnet-4.5',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 3.0,
outputPerM: 15.0,
cacheWritePerM: 3.75,
cacheReadPerM: 0.3,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'claude-opus-4-5-20251101',
displayName: 'Claude 4.5 Opus',
openrouterId: 'anthropic/claude-opus-4.5',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 5.0,
outputPerM: 25.0,
cacheWritePerM: 6.25,
cacheReadPerM: 0.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'claude-opus-4-1-20250805',
displayName: 'Claude 4.1 Opus',
openrouterId: 'anthropic/claude-opus-4.1',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 15.0,
outputPerM: 75.0,
cacheWritePerM: 18.75,
cacheReadPerM: 1.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'claude-4-opus-20250514',
displayName: 'Claude 4 Opus',
openrouterId: 'anthropic/claude-opus-4',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 15.0,
outputPerM: 75.0,
cacheWritePerM: 18.75,
cacheReadPerM: 1.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'claude-4-sonnet-20250514',
displayName: 'Claude 4 Sonnet',
openrouterId: 'anthropic/claude-sonnet-4',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 3.0,
outputPerM: 15.0,
cacheWritePerM: 3.75,
cacheReadPerM: 0.3,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'claude-3-7-sonnet-20250219',
displayName: 'Claude 3.7 Sonnet',
openrouterId: 'anthropic/claude-3.7-sonnet',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 3.0,
outputPerM: 15.0,
cacheWritePerM: 3.75,
cacheReadPerM: 0.3,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'claude-3-5-sonnet-20240620',
displayName: 'Claude 3.5 Sonnet',
openrouterId: 'anthropic/claude-3.5-sonnet',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 3.0,
outputPerM: 15.0,
cacheWritePerM: 3.75,
cacheReadPerM: 0.3,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'claude-3-5-haiku-20241022',
displayName: 'Claude 3.5 Haiku',
openrouterId: 'anthropic/claude-3.5-haiku',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 0.8,
outputPerM: 4,
cacheWritePerM: 1,
cacheReadPerM: 0.08,
currency: 'USD',
unit: 'per_million_tokens',
},
},
],
baseURLSupport: 'none',
supportedFileTypes: [], // No defaults - models must explicitly specify support
openrouterPrefix: 'anthropic',
},
google: {
models: [
{
name: 'gemini-3-flash-preview',
displayName: 'Gemini 3 Flash Preview',
openrouterId: 'google/gemini-3-flash-preview',
maxInputTokens: 1048576,
default: true,
supportedFileTypes: ['pdf', 'image', 'audio'],
pricing: {
inputPerM: 0.5,
outputPerM: 3.0,
cacheReadPerM: 0.05,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gemini-3-pro-preview',
displayName: 'Gemini 3 Pro Preview',
openrouterId: 'google/gemini-3-pro-preview',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image', 'audio'],
pricing: {
inputPerM: 2.0,
outputPerM: 12.0,
cacheReadPerM: 0.2,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gemini-3-pro-image-preview',
displayName: 'Gemini 3 Pro Image Preview',
openrouterId: 'google/gemini-3-pro-image-preview',
maxInputTokens: 1048576,
supportedFileTypes: ['image'],
pricing: {
inputPerM: 2.0,
outputPerM: 120.0,
cacheReadPerM: 0.2,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gemini-2.5-pro',
displayName: 'Gemini 2.5 Pro',
openrouterId: 'google/gemini-2.5-pro',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image', 'audio'],
pricing: {
inputPerM: 1.25,
outputPerM: 10.0,
cacheReadPerM: 0.31,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gemini-2.5-flash',
displayName: 'Gemini 2.5 Flash',
openrouterId: 'google/gemini-2.5-flash',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image', 'audio'],
pricing: {
inputPerM: 0.3,
outputPerM: 2.5,
cacheReadPerM: 0.03,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gemini-2.5-flash-lite',
displayName: 'Gemini 2.5 Flash Lite',
openrouterId: 'google/gemini-2.5-flash-lite',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image', 'audio'],
pricing: {
inputPerM: 0.1,
outputPerM: 0.4,
cacheReadPerM: 0.025,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gemini-2.0-flash',
displayName: 'Gemini 2.0 Flash',
openrouterId: 'google/gemini-2.0-flash-001',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image', 'audio'],
pricing: {
inputPerM: 0.15,
outputPerM: 0.6,
cacheReadPerM: 0.025,
cacheWritePerM: 1.0,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gemini-2.0-flash-lite',
displayName: 'Gemini 2.0 Flash Lite',
openrouterId: 'google/gemini-2.0-flash-lite-001',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image', 'audio'],
pricing: {
inputPerM: 0.075,
outputPerM: 0.3,
cacheReadPerM: 0.01875,
currency: 'USD',
unit: 'per_million_tokens',
},
},
],
baseURLSupport: 'none',
supportedFileTypes: [], // No defaults - models must explicitly specify support
openrouterPrefix: 'google',
},
// https://console.groq.com/docs/models
groq: {
models: [
{
name: 'gemma-2-9b-it',
displayName: 'Gemma 2 9B Instruct',
maxInputTokens: 8192,
supportedFileTypes: [],
pricing: {
inputPerM: 0.2,
outputPerM: 0.2,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'openai/gpt-oss-20b',
displayName: 'GPT OSS 20B 128k',
maxInputTokens: 128000,
supportedFileTypes: [],
pricing: {
inputPerM: 0.1,
outputPerM: 0.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'openai/gpt-oss-120b',
displayName: 'GPT OSS 120B 128k',
maxInputTokens: 128000,
supportedFileTypes: [],
pricing: {
inputPerM: 0.15,
outputPerM: 0.75,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'moonshotai/kimi-k2-instruct',
displayName: 'Kimi K2 1T 128k',
maxInputTokens: 128000,
supportedFileTypes: [],
pricing: {
inputPerM: 1.0,
outputPerM: 3.0,
cacheReadPerM: 0.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'meta-llama/llama-4-scout-17b-16e-instruct',
displayName: 'Llama 4 Scout (17Bx16E) 128k',
maxInputTokens: 128000,
supportedFileTypes: [],
pricing: {
inputPerM: 0.11,
outputPerM: 0.34,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'meta-llama/llama-4-maverick-17b-128e-instruct',
displayName: 'Llama 4 Maverick (17Bx128E) 128k',
maxInputTokens: 128000,
supportedFileTypes: [],
pricing: {
inputPerM: 0.2,
outputPerM: 0.6,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'deepseek-r1-distill-llama-70b',
displayName: 'DeepSeek R1 Distill Llama 70B 128k',
maxInputTokens: 128000,
supportedFileTypes: [],
pricing: {
inputPerM: 0.75,
outputPerM: 0.9,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'qwen/qwen3-32b',
displayName: 'Qwen3 32B 131k',
maxInputTokens: 131000,
supportedFileTypes: [],
pricing: {
inputPerM: 0.29,
outputPerM: 0.59,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'llama-3.3-70b-versatile',
displayName: 'Llama 3.3 70B Versatile',
maxInputTokens: 128000,
default: true,
supportedFileTypes: [],
pricing: {
inputPerM: 0.59,
outputPerM: 0.79,
currency: 'USD',
unit: 'per_million_tokens',
},
},
],
baseURLSupport: 'none',
supportedFileTypes: [], // Groq currently doesn't support file uploads
},
// https://docs.x.ai/docs/models
// Note: XAI API only supports image uploads (JPG/PNG up to 20MB), not PDFs
xai: {
models: [
{
name: 'grok-4',
displayName: 'Grok 4',
openrouterId: 'x-ai/grok-4',
maxInputTokens: 256000,
default: true,
supportedFileTypes: ['image'],
pricing: {
inputPerM: 3.0,
outputPerM: 15.0,
cacheReadPerM: 0.75,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'grok-3',
displayName: 'Grok 3',
openrouterId: 'x-ai/grok-3',
maxInputTokens: 131072,
supportedFileTypes: ['image'],
pricing: {
inputPerM: 3.0,
outputPerM: 15.0,
cacheReadPerM: 0.75,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'grok-3-mini',
displayName: 'Grok 3 Mini',
openrouterId: 'x-ai/grok-3-mini',
maxInputTokens: 131072,
supportedFileTypes: ['image'],
pricing: {
inputPerM: 0.3,
outputPerM: 0.5,
cacheReadPerM: 0.075,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'grok-code-fast-1',
displayName: 'Grok Code Fast',
openrouterId: 'x-ai/grok-code-fast-1',
maxInputTokens: 131072,
supportedFileTypes: [],
pricing: {
inputPerM: 0.2,
outputPerM: 1.5,
cacheReadPerM: 0.02,
currency: 'USD',
unit: 'per_million_tokens',
},
},
],
baseURLSupport: 'none',
supportedFileTypes: [], // XAI currently doesn't support file uploads
openrouterPrefix: 'x-ai',
},
// https://docs.cohere.com/reference/models
cohere: {
models: [
{
name: 'command-a-03-2025',
displayName: 'Command A (03-2025)',
openrouterId: 'cohere/command-a',
maxInputTokens: 256000,
default: true,
supportedFileTypes: [],
pricing: {
inputPerM: 2.5,
outputPerM: 10.0,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'command-r-plus',
displayName: 'Command R+',
openrouterId: 'cohere/command-r-plus-08-2024',
maxInputTokens: 128000,
supportedFileTypes: [],
pricing: {
inputPerM: 2.5,
outputPerM: 10.0,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'command-r',
displayName: 'Command R',
openrouterId: 'cohere/command-r-08-2024',
maxInputTokens: 128000,
supportedFileTypes: [],
pricing: {
inputPerM: 0.15,
outputPerM: 0.6,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'command-r7b',
displayName: 'Command R7B',
openrouterId: 'cohere/command-r7b-12-2024',
maxInputTokens: 128000,
supportedFileTypes: [],
pricing: {
inputPerM: 0.0375,
outputPerM: 0.15,
currency: 'USD',
unit: 'per_million_tokens',
},
},
],
baseURLSupport: 'none',
supportedFileTypes: [], // Cohere currently doesn't support file uploads
openrouterPrefix: 'cohere',
},
// https://openrouter.ai/docs
// OpenRouter is a unified API gateway providing access to 100+ models from various providers.
// Model validation is handled dynamically via openrouter-model-registry.ts
openrouter: {
models: [], // Empty - accepts any model name (validated against OpenRouter's catalog)
baseURLSupport: 'none', // Fixed endpoint - baseURL auto-injected in resolver, no user override allowed
supportedFileTypes: ['pdf', 'image', 'audio'], // Allow all types - user assumes responsibility for model capabilities
supportsCustomModels: true,
supportsAllRegistryModels: true, // Can serve models from all other providers
},
// https://docs.litellm.ai/
// LiteLLM is an OpenAI-compatible proxy that unifies 100+ LLM providers.
// User must host their own LiteLLM proxy and provide the baseURL.
litellm: {
models: [], // Empty - accepts any model name (user's proxy determines available models)
baseURLSupport: 'required', // User must provide their LiteLLM proxy URL
supportedFileTypes: ['pdf', 'image', 'audio'], // Allow all types - user assumes responsibility for model capabilities
supportsCustomModels: true,
},
// https://glama.ai/
// Glama is an OpenAI-compatible gateway providing unified access to multiple LLM providers.
// Fixed endpoint: https://glama.ai/api/gateway/openai/v1
glama: {
models: [], // Empty - accepts any model name (format: provider/model e.g., openai/gpt-4o)
baseURLSupport: 'none', // Fixed endpoint - baseURL auto-injected
supportedFileTypes: ['pdf', 'image', 'audio'], // Allow all types - user assumes responsibility for model capabilities
supportsCustomModels: true,
},
// https://cloud.google.com/vertex-ai
// Google Vertex AI - GCP-hosted gateway for Gemini and Claude models
// Supports both Google's Gemini models and Anthropic's Claude via partnership
//
// Setup instructions:
// 1. Create a Google Cloud account and project
// 2. Enable the Vertex AI API: gcloud services enable aiplatform.googleapis.com
// 3. Enable desired Claude models (requires Anthropic Model Garden)
// 4. Install Google Cloud CLI: https://cloud.google.com/sdk/docs/install
// 5. Configure ADC: gcloud auth application-default login
// 6. Set env vars: GOOGLE_VERTEX_PROJECT (required), GOOGLE_VERTEX_LOCATION (optional)
//
// TODO: Add dynamic model fetching via publishers.models.list API
// - Requires: projectId, region, ADC auth
// - Endpoints: GET projects/{project}/locations/{location}/publishers/{google,anthropic}/models
// - Note: API doesn't return aliases (e.g., gemini-2.0-flash), only versioned IDs
// - Docs: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/list
// - Models: https://cloud.google.com/vertex-ai/generative-ai/docs/models
vertex: {
models: [
// Gemini 3 models on Vertex AI (Preview)
{
name: 'gemini-3-flash-preview',
displayName: 'Gemini 3 Flash (Vertex)',
maxInputTokens: 1048576,
default: true,
supportedFileTypes: ['pdf', 'image', 'audio'],
pricing: {
inputPerM: 0.5,
outputPerM: 3.0,
cacheReadPerM: 0.05,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gemini-3-pro-preview',
displayName: 'Gemini 3 Pro (Vertex)',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image', 'audio'],
pricing: {
inputPerM: 2.0,
outputPerM: 12.0,
cacheReadPerM: 0.2,
currency: 'USD',
unit: 'per_million_tokens',
},
},
// Gemini 2.x models on Vertex AI
{
name: 'gemini-2.5-pro',
displayName: 'Gemini 2.5 Pro (Vertex)',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image', 'audio'],
pricing: {
inputPerM: 1.25,
outputPerM: 10.0,
cacheReadPerM: 0.31,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gemini-2.5-flash',
displayName: 'Gemini 2.5 Flash (Vertex)',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image', 'audio'],
pricing: {
inputPerM: 0.15,
outputPerM: 0.6,
cacheReadPerM: 0.0375,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'gemini-2.0-flash',
displayName: 'Gemini 2.0 Flash (Vertex)',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image', 'audio'],
pricing: {
inputPerM: 0.1,
outputPerM: 0.4,
cacheReadPerM: 0.025,
currency: 'USD',
unit: 'per_million_tokens',
},
},
// Claude 4.5 models on Vertex AI (via Anthropic partnership)
// Note: Claude model IDs use @ suffix format on Vertex
{
name: 'claude-opus-4-5@20251101',
displayName: 'Claude 4.5 Opus (Vertex)',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 5.0,
outputPerM: 25.0,
cacheWritePerM: 6.25,
cacheReadPerM: 0.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'claude-sonnet-4-5@20250929',
displayName: 'Claude 4.5 Sonnet (Vertex)',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 3.0,
outputPerM: 15.0,
cacheWritePerM: 3.75,
cacheReadPerM: 0.3,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'claude-haiku-4-5@20251001',
displayName: 'Claude 4.5 Haiku (Vertex)',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.0,
outputPerM: 5.0,
cacheWritePerM: 1.25,
cacheReadPerM: 0.1,
currency: 'USD',
unit: 'per_million_tokens',
},
},
// Claude 4.1 and 4.0 models on Vertex AI
{
name: 'claude-opus-4-1@20250805',
displayName: 'Claude 4.1 Opus (Vertex)',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 15.0,
outputPerM: 75.0,
cacheWritePerM: 18.75,
cacheReadPerM: 1.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'claude-opus-4@20250514',
displayName: 'Claude 4 Opus (Vertex)',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 15.0,
outputPerM: 75.0,
cacheWritePerM: 18.75,
cacheReadPerM: 1.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'claude-sonnet-4@20250514',
displayName: 'Claude 4 Sonnet (Vertex)',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 3.0,
outputPerM: 15.0,
cacheWritePerM: 3.75,
cacheReadPerM: 0.3,
currency: 'USD',
unit: 'per_million_tokens',
},
},
// Claude 3.x models on Vertex AI
{
name: 'claude-3-7-sonnet@20250219',
displayName: 'Claude 3.7 Sonnet (Vertex)',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 3.0,
outputPerM: 15.0,
cacheWritePerM: 3.75,
cacheReadPerM: 0.3,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'claude-3-5-sonnet-v2@20241022',
displayName: 'Claude 3.5 Sonnet v2 (Vertex)',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 3.0,
outputPerM: 15.0,
cacheWritePerM: 3.75,
cacheReadPerM: 0.3,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'claude-3-5-haiku@20241022',
displayName: 'Claude 3.5 Haiku (Vertex)',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 0.8,
outputPerM: 4.0,
cacheWritePerM: 1.0,
cacheReadPerM: 0.08,
currency: 'USD',
unit: 'per_million_tokens',
},
},
],
baseURLSupport: 'none', // Auto-constructed from projectId and region
supportedFileTypes: ['pdf', 'image', 'audio'],
},
// Amazon Bedrock - AWS-hosted gateway for Claude, Nova, and more
// Auth: AWS credentials (env vars) or Bedrock API key (AWS_BEARER_TOKEN_BEDROCK)
//
// Cross-region inference: Auto-added for anthropic.* and amazon.* models
// supportsCustomModels: true allows users to add custom model IDs beyond the fixed list
bedrock: {
supportsCustomModels: true,
models: [
// Claude 4.5 models (latest)
{
name: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
displayName: 'Claude 4.5 Sonnet',
maxInputTokens: 200000,
default: true,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 3.0,
outputPerM: 15.0,
cacheWritePerM: 3.75,
cacheReadPerM: 0.3,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'anthropic.claude-haiku-4-5-20251001-v1:0',
displayName: 'Claude 4.5 Haiku',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.0,
outputPerM: 5.0,
cacheWritePerM: 1.25,
cacheReadPerM: 0.1,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'anthropic.claude-opus-4-5-20251101-v1:0',
displayName: 'Claude 4.5 Opus',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 5.0,
outputPerM: 25.0,
cacheWritePerM: 6.25,
cacheReadPerM: 0.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
// Amazon Nova models
{
name: 'amazon.nova-premier-v1:0',
displayName: 'Nova Premier',
maxInputTokens: 1000000,
supportedFileTypes: ['image'],
pricing: {
inputPerM: 2.5,
outputPerM: 12.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'amazon.nova-pro-v1:0',
displayName: 'Nova Pro',
maxInputTokens: 300000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 0.8,
outputPerM: 3.2,
cacheReadPerM: 0.2,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'amazon.nova-lite-v1:0',
displayName: 'Nova Lite',
maxInputTokens: 300000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 0.06,
outputPerM: 0.24,
cacheReadPerM: 0.015,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'amazon.nova-micro-v1:0',
displayName: 'Nova Micro',
maxInputTokens: 128000,
supportedFileTypes: [],
pricing: {
inputPerM: 0.035,
outputPerM: 0.14,
cacheReadPerM: 0.00875,
currency: 'USD',
unit: 'per_million_tokens',
},
},
// OpenAI GPT-OSS
{
name: 'openai.gpt-oss-120b-1:0',
displayName: 'GPT-OSS 120B',
maxInputTokens: 128000,
supportedFileTypes: [],
pricing: {
inputPerM: 0.15,
outputPerM: 0.6,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'openai.gpt-oss-20b-1:0',
displayName: 'GPT-OSS 20B',
maxInputTokens: 128000,
supportedFileTypes: [],
pricing: {
inputPerM: 0.07,
outputPerM: 0.3,
currency: 'USD',
unit: 'per_million_tokens',
},
},
// Qwen
{
name: 'qwen.qwen3-coder-30b-a3b-v1:0',
displayName: 'Qwen3 Coder 30B',
maxInputTokens: 262144,
supportedFileTypes: [],
pricing: {
inputPerM: 0.15,
outputPerM: 0.6,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'qwen.qwen3-coder-480b-a35b-v1:0',
displayName: 'Qwen3 Coder 480B',
maxInputTokens: 262144,
supportedFileTypes: [],
pricing: {
inputPerM: 0.22,
outputPerM: 1.8,
currency: 'USD',
unit: 'per_million_tokens',
},
},
],
baseURLSupport: 'none', // Auto-constructed from region
supportedFileTypes: ['pdf', 'image'],
},
// Native local model execution via node-llama-cpp
// Runs GGUF models directly on the machine using Metal/CUDA/Vulkan acceleration
// Models are downloaded from HuggingFace and stored in ~/.dexto/models/
local: {
models: [], // Populated dynamically from local model registry
baseURLSupport: 'none', // No external server needed
supportedFileTypes: ['image'], // Vision support depends on model capabilities
supportsCustomModels: true, // Allow any GGUF model path
},
// Ollama server integration
// Uses Ollama's OpenAI-compatible API for local model inference
// Requires Ollama to be installed and running (default: http://localhost:11434)
ollama: {
models: [], // Populated dynamically from Ollama API
baseURLSupport: 'optional', // Default: http://localhost:11434, can be customized
supportedFileTypes: ['image'], // Vision support depends on model
supportsCustomModels: true, // Accept any Ollama model name
},
// Dexto Gateway - OpenAI-compatible proxy through api.dexto.ai
// Routes to OpenRouter with per-request billing (balance decrement)
// Requires DEXTO_API_KEY from `dexto login`
//
// This is a first-class provider that users explicitly select.
// Model IDs are in OpenRouter format (e.g., 'anthropic/claude-sonnet-4.5')
dexto: {
models: [
// Claude models (Anthropic via OpenRouter)
{
name: 'anthropic/claude-haiku-4.5',
displayName: 'Claude 4.5 Haiku',
maxInputTokens: 200000,
default: true,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.0,
outputPerM: 5.0,
cacheWritePerM: 1.25,
cacheReadPerM: 0.1,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'anthropic/claude-sonnet-4.5',
displayName: 'Claude 4.5 Sonnet',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 3.0,
outputPerM: 15.0,
cacheWritePerM: 3.75,
cacheReadPerM: 0.3,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'anthropic/claude-opus-4.5',
displayName: 'Claude 4.5 Opus',
maxInputTokens: 200000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 5.0,
outputPerM: 25.0,
cacheWritePerM: 6.25,
cacheReadPerM: 0.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
// OpenAI models (via OpenRouter)
{
name: 'openai/gpt-5.2',
displayName: 'GPT-5.2',
maxInputTokens: 400000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.75,
outputPerM: 14.0,
cacheReadPerM: 0.175,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'openai/gpt-5.2-codex',
displayName: 'GPT-5.2 Codex',
maxInputTokens: 400000,
supportedFileTypes: ['pdf', 'image'],
pricing: {
inputPerM: 1.75,
outputPerM: 14.0,
cacheReadPerM: 0.175,
currency: 'USD',
unit: 'per_million_tokens',
},
},
// Google models (via OpenRouter)
{
name: 'google/gemini-3-pro-preview',
displayName: 'Gemini 3 Pro',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image', 'audio'],
pricing: {
inputPerM: 2.0,
outputPerM: 12.0,
cacheReadPerM: 0.2,
currency: 'USD',
unit: 'per_million_tokens',
},
},
{
name: 'google/gemini-3-flash-preview',
displayName: 'Gemini 3 Flash',
maxInputTokens: 1048576,
supportedFileTypes: ['pdf', 'image', 'audio'],
pricing: {
inputPerM: 0.5,
outputPerM: 3.0,
cacheReadPerM: 0.05,
currency: 'USD',
unit: 'per_million_tokens',
},
},
// Free models (via OpenRouter)
{
name: 'qwen/qwen3-coder:free',
displayName: 'Qwen3 Coder (Free)',
maxInputTokens: 262000,
supportedFileTypes: [],
// Free - no pricing
},
{
name: 'deepseek/deepseek-r1-0528:free',
displayName: 'DeepSeek R1 (Free)',
maxInputTokens: 163840,
supportedFileTypes: [],
// Free - no pricing
},
// Other models (via OpenRouter)
{
name: 'z-ai/glm-4.7',
displayName: 'GLM 4.7',
maxInputTokens: 202752,
supportedFileTypes: [],
pricing: {
inputPerM: 0.4,
outputPerM: 1.5,
currency: 'USD',
unit: 'per_million_tokens',
},
},
],
baseURLSupport: 'none', // Fixed endpoint: https://api.dexto.ai/v1
supportedFileTypes: ['pdf', 'image', 'audio'], // Same as OpenRouter
supportsCustomModels: true, // Accept any OpenRouter model ID beyond the preset list
supportsAllRegistryModels: true, // Can serve models from all other providers via OpenRouter
},
};
/**
* Strips Bedrock cross-region inference profile prefix (eu., us., global.) from model ID.
* This allows registry lookups to work regardless of whether the user specified a prefix.
* @param model The model ID, potentially with a region prefix
* @returns The model ID without the region prefix
*/
export function stripBedrockRegionPrefix(model: string): string {
if (model.startsWith('eu.') || model.startsWith('us.')) {
return model.slice(3);
}
if (model.startsWith('global.')) {
return model.slice(7);
}
return model;
}
/**
* Gets the default model for a given provider from the registry.
* @param provider The name of the provider.
* @returns The default model for the provider, or null if no default model is found.
*/
export function getDefaultModelForProvider(provider: LLMProvider): string | null {
const providerInfo = LLM_REGISTRY[provider];
return providerInfo.models.find((m) => m.default)?.name || null;
}
/**
* Gets the list of supported providers.
* @returns An array of supported provider names.
*/
export function getSupportedProviders(): LLMProvider[] {
return [...LLM_PROVIDERS];
}
/**
* Gets the list of supported models for a given provider.
* @param provider The name of the provider.
* @returns An array of supported model names for the provider.
*/
export function getSupportedModels(provider: LLMProvider): string[] {
const providerInfo = LLM_REGISTRY[provider];
return providerInfo.models.map((m) => m.name);
}
/**
* Retrieves the maximum input token limit for a given provider and model from the registry.
* For gateway providers with supportsAllRegistryModels, looks up the model in its original provider.
* @param provider The name of the provider (e.g., 'openai', 'anthropic', 'google').
* @param model The specific model name.
* @param logger Optional logger instance for logging. Optional because it's used in zod schema
* @returns The maximum input token limit for the model.
* @throws {LLMError} If the model is not found in the registry.
*/
export function getMaxInputTokensForModel(
provider: LLMProvider,
model: string,
logger?: IDextoLogger
): number {
// Resolve gateway providers to the original provider
const resolved = resolveToNativeProvider(provider, model);
const providerInfo = LLM_REGISTRY[resolved.provider];
const normalizedModel = stripBedrockRegionPrefix(resolved.model).toLowerCase();
const modelInfo = providerInfo.models.find((m) => m.name.toLowerCase() === normalizedModel);
if (!modelInfo) {
const supportedModels = getSupportedModels(resolved.provider).join(', ');
logger?.error(
`Model '${resolved.model}' not found for provider '${resolved.provider}' in LLM registry. Supported models: ${supportedModels}`
);
throw LLMError.unknownModel(resolved.provider, resolved.model);
}
logger?.debug(
`Found max tokens for ${resolved.provider}/${resolved.model}: ${modelInfo.maxInputTokens}`
);
return modelInfo.maxInputTokens;
}
/**
* Validates if a provider and model combination is supported.
* Both parameters are required - structural validation (missing values) is handled by Zod schemas.
* @param provider The provider name.
* @param model The model name.
* @returns True if the combination is valid, false otherwise.
*/
export function isValidProviderModel(provider: LLMProvider, model: string): boolean {
const providerInfo = LLM_REGISTRY[provider];
const normalizedModel = stripBedrockRegionPrefix(model).toLowerCase();
return providerInfo.models.some((m) => m.name.toLowerCase() === normalizedModel);
}
/**
* Infers the LLM provider from the model name by searching the registry.
* Matches the model name (case-insensitive) against all registered models.
* Returns the provider name if found, or 'unknown' if not found.
*
* @param model The model name (e.g., 'gpt-5-mini', 'claude-sonnet-4-5-20250929')
* @returns The inferred provider name ('openai', 'anthropic', etc.), or 'unknown' if no match is found.
*/
export function getProviderFromModel(model: string): LLMProvider {
// Handle OpenRouter format models (e.g., 'anthropic/claude-opus-4.5')
if (model.includes('/')) {
const [prefix, ...rest] = model.split('/');
const modelName = rest.join('/');
if (prefix) {
const normalizedPrefix = prefix.toLowerCase();
// Check if prefix matches a known provider's openrouterPrefix (case-insensitive)
for (const provider of LLM_PROVIDERS) {
const providerPrefix = getOpenrouterPrefix(provider);
if (providerPrefix?.toLowerCase() === normalizedPrefix) {
// Verify model exists in this provider's registry before returning
const providerInfo = LLM_REGISTRY[provider];
const normalizedModelName = stripBedrockRegionPrefix(modelName).toLowerCase();
const existsInProvider = providerInfo.models.some(
(m) =>
m.name.toLowerCase() === normalizedModelName ||
m.openrouterId?.toLowerCase() === model.toLowerCase()
);
if (existsInProvider) {
return provider;
}
// Model not found in matched provider - fall through to registry scan
break;
}
}
}
}
const normalizedModel = stripBedrockRegionPrefix(model).toLowerCase();
for (const provider of LLM_PROVIDERS) {
const info = LLM_REGISTRY[provider];
if (info.models.some((m) => m.name.toLowerCase() === normalizedModel)) {
return provider;
}
}
throw LLMError.modelProviderUnknown(model);
}
/**
* Returns a flat array of all supported model names from all providers.
*/
export function getAllSupportedModels(): string[] {
return Object.values(LLM_REGISTRY).flatMap((info) => info.models.map((m) => m.name));
}
/**
* Checks if a provider supports custom baseURL.
* @param provider The name of the provider.
* @returns True if the provider supports custom baseURL, false otherwise.
*/
export function supportsBaseURL(provider: LLMProvider): boolean {
const providerInfo = LLM_REGISTRY[provider];
return providerInfo.baseURLSupport !== 'none';
}
/**
* Checks if a provider requires a custom baseURL.
* @param provider The name of the provider.
* @returns True if the provider requires a custom baseURL, false otherwise.
*/
export function requiresBaseURL(provider: LLMProvider): boolean {
const providerInfo = LLM_REGISTRY[provider];
return providerInfo.baseURLSupport === 'required';
}
/**
* Checks if a provider accepts any model name (i.e., has empty models list).
* @param provider The name of the provider.
* @returns True if the provider accepts any model name, false otherwise.
*/
export function acceptsAnyModel(provider: LLMProvider): boolean {
const providerInfo = LLM_REGISTRY[provider];
return providerInfo.models.length === 0;
}
/**
* Checks if a provider supports custom model IDs beyond its fixed model list.
* This is set explicitly on providers that allow users to add arbitrary model IDs.
* @param provider The name of the provider.
* @returns True if the provider supports custom models, false otherwise.
*/
export function supportsCustomModels(provider: LLMProvider): boolean {
const providerInfo = LLM_REGISTRY[provider];
return providerInfo.supportsCustomModels === true;
}
/**
* Checks if a provider supports all registry models from all other providers.
* @param provider The name of the provider.
* @returns True if the provider supports all registry models, false otherwise.
*/
export function hasAllRegistryModelsSupport(provider: LLMProvider): boolean {
const providerInfo = LLM_REGISTRY[provider];
return providerInfo.supportsAllRegistryModels === true;
}
/**
* Gets the OpenRouter prefix for a provider from the registry.
* Returns undefined if the provider doesn't have a prefix (e.g., groq models already have vendor prefixes).
*/
function getOpenrouterPrefix(provider: LLMProvider): string | undefined {
return LLM_REGISTRY[provider].openrouterPrefix;
}
/**
* Providers whose models are accessible via gateway providers with supportsAllRegistryModels.
* Derived from providers that have openrouterPrefix OR whose models don't need transformation (groq).
*/
const GATEWAY_ACCESSIBLE_PROVIDERS: LLMProvider[] = (
Object.entries(LLM_REGISTRY) as [LLMProvider, ProviderInfo][]
)
.filter(
([provider, info]) =>
// Has openrouterPrefix (needs transformation)
info.openrouterPrefix !== undefined ||
// Special case: groq models already have vendor prefixes, no transformation needed
provider === 'groq'
)
.map(([provider]) => provider);
/**
* Gets all models available for a provider, including inherited models
* when supportsAllRegistryModels is true.
* @param provider The name of the provider.
* @returns Array of ModelInfo with additional originalProvider field for inherited models.
*/
export function getAllModelsForProvider(
provider: LLMProvider
): Array<ModelInfo & { originalProvider?: LLMProvider }> {
const providerInfo = LLM_REGISTRY[provider];
// If provider doesn't support all registry models, return its own models
if (!providerInfo.supportsAllRegistryModels) {
return providerInfo.models.map((m) => ({ ...m }));
}
// Collect models from all gateway-accessible providers
const allModels: Array<ModelInfo & { originalProvider: LLMProvider }> = [];
for (const sourceProvider of GATEWAY_ACCESSIBLE_PROVIDERS) {
const sourceInfo = LLM_REGISTRY[sourceProvider];
for (const model of sourceInfo.models) {
allModels.push({
...model,
originalProvider: sourceProvider,
});
}
}
return allModels;
}
/**
* Transforms a model name to the format required by a gateway provider (dexto/openrouter).
* Uses the explicit openrouterId mapping from the registry - no fallback guessing.
*
* Transformation is needed when:
* - Target is a gateway (dexto/openrouter)
* - Original provider is a "native" provider (anthropic, openai, google, etc.)
*
* No transformation needed when:
* - Target is not a gateway
* - Original provider is already a gateway (dexto/openrouter) - model is already in correct format
* - Model already contains a slash (already in OpenRouter format)
* - Provider models have vendor prefixes (groq's meta-llama/)
*
* @param model The model name to transform.
* @param originalProvider The provider the model originally belongs to.
* @param targetProvider The provider to transform the model name for.
* @returns The transformed model name.
* @throws {LLMError} If model requires transformation but has no openrouterId mapping.
*/
export function transformModelNameForProvider(
model: string,
originalProvider: LLMProvider,
targetProvider: LLMProvider
): string {
// Only transform when targeting gateway providers (those with supportsAllRegistryModels)
if (!hasAllRegistryModelsSupport(targetProvider)) {
return model;
}
// If original provider is already a gateway, model is already in correct format
if (hasAllRegistryModelsSupport(originalProvider)) {
return model;
}
// If model already has a slash, assume it's already in OpenRouter format
if (model.includes('/')) {
return model;
}
// For providers without openrouterPrefix (like groq whose models already have vendor prefixes),
// no transformation needed
const prefix = getOpenrouterPrefix(originalProvider);
if (!prefix) {
return model;
}
// Look up the explicit openrouterId mapping - no fallback
// Use case-insensitive matching for consistency with other registry lookups
const providerInfo = LLM_REGISTRY[originalProvider];
if (providerInfo) {
const normalizedModel = model.toLowerCase();
const modelInfo = providerInfo.models.find((m) => m.name.toLowerCase() === normalizedModel);
if (modelInfo?.openrouterId) {
return modelInfo.openrouterId;
}
}
// No mapping found - this is a bug in our registry
throw new DextoRuntimeError(
LLMErrorCode.MODEL_UNKNOWN,
ErrorScope.LLM,
ErrorType.SYSTEM,
`Model '${model}' from provider '${originalProvider}' has no openrouterId mapping. ` +
`All models that can be used via gateway providers must have explicit openrouterId in the registry.`,
{ model, originalProvider, targetProvider }
);
}
/**
* Finds the original provider for a model when accessed through a gateway provider.
* This is needed to look up model metadata (pricing, file types, etc.) from the original registry.
* @param model The model name (may include provider prefix like 'openai/gpt-5-mini').
* @param gatewayProvider The gateway provider being used (e.g., 'dexto').
* @returns The original provider and normalized model name, or null if not found.
*/
export function resolveModelOrigin(
model: string,
gatewayProvider: LLMProvider
): { provider: LLMProvider; model: string } | null {
// If the gateway doesn't support all registry models, model belongs to the gateway itself
if (!hasAllRegistryModelsSupport(gatewayProvider)) {
return { provider: gatewayProvider, model };
}
// Check if model has a provider prefix (e.g., 'openai/gpt-5-mini')
if (model.includes('/')) {
const [prefix, ...rest] = model.split('/');
const modelName = rest.join('/');
// Find provider by prefix (case-insensitive)
if (prefix) {
const normalizedPrefix = prefix.toLowerCase();
for (const provider of LLM_PROVIDERS) {
const providerPrefix = getOpenrouterPrefix(provider);
if (providerPrefix?.toLowerCase() === normalizedPrefix) {
// Reverse lookup: find native model name via openrouterId
// e.g., 'anthropic/claude-opus-4.5' → 'claude-opus-4-5-20251101'
const providerInfo = LLM_REGISTRY[provider];
const nativeModel = providerInfo?.models.find(
(m) => m.openrouterId?.toLowerCase() === model.toLowerCase()
);
if (nativeModel) {
return { provider, model: nativeModel.name };
}
// Fallback: return extracted model name (may be custom or already native)
return { provider, model: modelName };
}
}
}
// For models with vendor prefix (like meta-llama/llama-3.3-70b), check all accessible providers
// The full model name including prefix might be in the registry (e.g., groq models)
for (const sourceProvider of GATEWAY_ACCESSIBLE_PROVIDERS) {
const sourceInfo = LLM_REGISTRY[sourceProvider];
if (sourceInfo.models.some((m) => m.name.toLowerCase() === model.toLowerCase())) {
return { provider: sourceProvider, model };
}
}
}
// No prefix - search all accessible providers for the model
for (const sourceProvider of GATEWAY_ACCESSIBLE_PROVIDERS) {
const sourceInfo = LLM_REGISTRY[sourceProvider];
const normalizedModel = stripBedrockRegionPrefix(model).toLowerCase();
if (sourceInfo.models.some((m) => m.name.toLowerCase() === normalizedModel)) {
return { provider: sourceProvider, model };
}
}
// Model not found in any registry - might be a custom model
return null;
}
/**
* Resolves a gateway provider to its underlying native provider.
* For gateway providers (dexto, openrouter), finds the original provider that owns the model.
* For native providers, returns the input unchanged.
*
* @example
* resolveToNativeProvider('dexto', 'anthropic/claude-opus-4.5') → { provider: 'anthropic', model: 'claude-opus-4-5-20251101' }
* resolveToNativeProvider('openai', 'gpt-5-mini') → { provider: 'openai', model: 'gpt-5-mini' }
*/
function resolveToNativeProvider(
provider: LLMProvider,
model: string
): { provider: LLMProvider; model: string } {
if (hasAllRegistryModelsSupport(provider)) {
const origin = resolveModelOrigin(model, provider);
if (origin) {
return origin;
}
}
return { provider, model };
}
/**
* Checks if a model is valid for a provider, considering supportsAllRegistryModels.
* @param provider The provider to check.
* @param model The model name to validate.
* @returns True if the model is valid for the provider.
*/
export function isModelValidForProvider(provider: LLMProvider, model: string): boolean {
const providerInfo = LLM_REGISTRY[provider];
// Check provider's own models first
const normalizedModel = stripBedrockRegionPrefix(model).toLowerCase();
if (providerInfo.models.some((m) => m.name.toLowerCase() === normalizedModel)) {
return true;
}
// If provider supports custom models, any model is valid
if (providerInfo.supportsCustomModels) {
return true;
}
// If provider supports all registry models, check if model exists in any accessible provider
if (providerInfo.supportsAllRegistryModels) {
const origin = resolveModelOrigin(model, provider);
return origin !== null;
}
return false;
}
/**
* Providers that don't require API keys.
* These include:
* - Native local providers (local for node-llama-cpp, ollama for Ollama server)
* - Local/self-hosted providers (openai-compatible for vLLM, LocalAI)
* - Proxies that handle auth internally (litellm)
* - Cloud auth providers (vertex uses ADC, bedrock uses AWS credentials)
*/
const API_KEY_OPTIONAL_PROVIDERS: Set<LLMProvider> = new Set([
'local', // Native node-llama-cpp execution - no auth needed
'ollama', // Ollama server - no auth needed by default
'openai-compatible', // vLLM, LocalAI - often no auth needed
'litellm', // Self-hosted proxy - handles auth internally
'vertex', // Uses Google Cloud ADC (Application Default Credentials)
'bedrock', // Uses AWS credentials (access key + secret or IAM role)
]);
/**
* Checks if a provider requires an API key.
* Returns false for:
* - Local providers (openai-compatible for Ollama, vLLM, LocalAI)
* - Self-hosted proxies (litellm)
* - Cloud auth providers (vertex, bedrock)
*
* @param provider The name of the provider.
* @returns True if the provider requires an API key, false otherwise.
*/
export function requiresApiKey(provider: LLMProvider): boolean {
return !API_KEY_OPTIONAL_PROVIDERS.has(provider);
}
/**
* Gets the supported file types for a specific model.
* For gateway providers with supportsAllRegistryModels, looks up the model in its original provider.
* @param provider The name of the provider.
* @param model The name of the model.
* @returns Array of supported file types for the model.
* @throws {Error} If the model is not found in the registry.
*/
export function getSupportedFileTypesForModel(
provider: LLMProvider,
model: string
): SupportedFileType[] {
// Resolve gateway providers to the original provider
const resolved = resolveToNativeProvider(provider, model);
const providerInfo = LLM_REGISTRY[resolved.provider];
// For providers that accept any model name (openai-compatible, gateways with custom models)
if (acceptsAnyModel(resolved.provider)) {
return providerInfo.supportedFileTypes;
}
// Find the specific model (strip Bedrock region prefix for lookup)
const normalizedModel = stripBedrockRegionPrefix(resolved.model).toLowerCase();
const modelInfo = providerInfo.models.find((m) => m.name.toLowerCase() === normalizedModel);
if (!modelInfo) {
throw LLMError.unknownModel(resolved.provider, resolved.model);
}
return modelInfo.supportedFileTypes;
}
/**
* Checks if a specific model supports a specific file type.
* @param provider The name of the provider.
* @param model The name of the model.
* @param fileType The file type to check support for.
* @returns True if the model supports the file type, false otherwise.
*/
export function modelSupportsFileType(
provider: LLMProvider,
model: string,
fileType: SupportedFileType
): boolean {
const supportedTypes = getSupportedFileTypesForModel(provider, model);
return supportedTypes.includes(fileType);
}
/**
* Validates if file data is supported by a specific model by checking the mimetype
* @param provider The LLM provider name.
* @param model The model name.
* @param mimeType The MIME type of the file to validate.
* @returns Object containing validation result and details.
*/
export function validateModelFileSupport(
provider: LLMProvider,
model: string,
mimeType: string
): {
isSupported: boolean;
fileType?: SupportedFileType;
error?: string;
} {
// Extract base MIME type by removing parameters (e.g., "audio/webm;codecs=opus" -> "audio/webm")
const baseMimeType = mimeType.toLowerCase().split(';')[0]?.trim() || mimeType.toLowerCase();
const fileType = MIME_TYPE_TO_FILE_TYPE[baseMimeType];
if (!fileType) {
return {
isSupported: false,
error: `Unsupported file type: ${mimeType}`,
};
}
try {
if (!modelSupportsFileType(provider, model, fileType)) {
return {
isSupported: false,
fileType,
error: `Model '${model}' (${provider}) does not support ${fileType} files`,
};
}
return {
isSupported: true,
fileType,
};
} catch (error) {
return {
isSupported: false,
fileType,
error:
error instanceof Error
? error.message
: 'Unknown error validating model file support',
};
}
}
/**
* Determines the effective maximum input token limit based on configuration.
* Priority:
* 1. Explicit `maxInputTokens` in config
* 2. Registry lookup for known provider/model.
*
* @param config The validated LLM configuration.
* @param logger Optional logger instance for logging.
* @returns The effective maximum input token count for the LLM.
* @throws {Error}
* If `baseURL` is set but `maxInputTokens` is missing (indicating a Zod validation inconsistency).
* Or if `baseURL` is not set, but model isn't found in registry.
* TODO: make more readable
*/
export function getEffectiveMaxInputTokens(config: LLMConfig, logger: IDextoLogger): number {
const configuredMaxInputTokens = config.maxInputTokens;
// Priority 1: Explicit config override or required value with baseURL
if (configuredMaxInputTokens != null) {
// Case 1a: baseURL is set. maxInputTokens is required and validated by Zod. Trust it.
if (config.baseURL) {
logger.debug(
`Using maxInputTokens from configuration (with baseURL): ${configuredMaxInputTokens}`
);
return configuredMaxInputTokens;
}
// Case 1b: baseURL is NOT set, but maxInputTokens is provided (override).
// Sanity-check against registry limits.
try {
const registryMaxInputTokens = getMaxInputTokensForModel(
config.provider,
config.model,
logger
);
if (configuredMaxInputTokens > registryMaxInputTokens) {
logger.warn(
`Provided maxInputTokens (${configuredMaxInputTokens}) for ${config.provider}/${config.model} exceeds the known limit (${registryMaxInputTokens}) for model ${config.model}. Capping to registry limit.`
);
return registryMaxInputTokens;
} else {
logger.debug(
`Using valid maxInputTokens override from configuration: ${configuredMaxInputTokens} (Registry limit: ${registryMaxInputTokens})`
);
return configuredMaxInputTokens;
}
} catch (error: any) {
// Handle registry lookup failures during override check
if (error instanceof DextoRuntimeError && error.code === LLMErrorCode.MODEL_UNKNOWN) {
logger.warn(
`Registry lookup failed during maxInputTokens override check for ${config.provider}/${config.model}: ${error.message}. ` +
`Proceeding with the provided maxInputTokens value (${configuredMaxInputTokens}), but it might be invalid.`
);
// Return the user's value, assuming Zod validation passed for provider/model existence initially.
return configuredMaxInputTokens;
} else {
// Re-throw unexpected errors
logger.error(
`Unexpected error during registry lookup for maxInputTokens override check: ${error}`
);
throw error;
}
}
}
// Priority 2: OpenRouter - look up context length from cached model registry
if (config.provider === 'openrouter') {
const contextLength = getOpenRouterModelContextLength(config.model);
if (contextLength !== null) {
logger.debug(
`Using maxInputTokens from OpenRouter registry for ${config.model}: ${contextLength}`
);
return contextLength;
}
// Cache miss or stale - fall through to default
logger.warn(
`OpenRouter model ${config.model} not found in cache, defaulting to ${DEFAULT_MAX_INPUT_TOKENS} tokens`
);
return DEFAULT_MAX_INPUT_TOKENS;
}
// Priority 3: baseURL is set but maxInputTokens is missing - default to 128k tokens
if (config.baseURL) {
logger.warn(
`baseURL is set but maxInputTokens is missing. Defaulting to ${DEFAULT_MAX_INPUT_TOKENS}. ` +
`Provide 'maxInputTokens' in configuration to avoid default fallback.`
);
return DEFAULT_MAX_INPUT_TOKENS;
}
// Priority 4: Check if provider accepts any model (like openai-compatible)
if (acceptsAnyModel(config.provider)) {
logger.debug(
`Provider ${config.provider} accepts any model, defaulting to ${DEFAULT_MAX_INPUT_TOKENS} tokens`
);
return DEFAULT_MAX_INPUT_TOKENS;
}
// Priority 5: No override, no baseURL - use registry.
try {
const registryMaxInputTokens = getMaxInputTokensForModel(
config.provider,
config.model,
logger
);
logger.debug(
`Using maxInputTokens from registry for ${config.provider}/${config.model}: ${registryMaxInputTokens}`
);
return registryMaxInputTokens;
} catch (error: any) {
// Handle registry lookup failures gracefully (e.g., typo in validated config)
if (error instanceof DextoRuntimeError && error.code === LLMErrorCode.MODEL_UNKNOWN) {
// For providers that support custom models, use default instead of throwing
if (supportsCustomModels(config.provider)) {
logger.debug(
`Custom model ${config.model} not in ${config.provider} registry, defaulting to ${DEFAULT_MAX_INPUT_TOKENS} tokens`
);
return DEFAULT_MAX_INPUT_TOKENS;
}
// Log as error and throw a specific fatal error
logger.error(
`Registry lookup failed for ${config.provider}/${config.model}: ${error.message}. ` +
`Effective maxInputTokens cannot be determined.`
);
throw LLMError.unknownModel(config.provider, config.model);
} else {
// Re-throw unexpected errors during registry lookup
logger.error(`Unexpected error during registry lookup for maxInputTokens: ${error}`);
throw error;
}
}
}
/**
* Gets the pricing information for a specific model.
* For gateway providers with supportsAllRegistryModels, looks up the model in its original provider.
*
* Note: This returns the original provider's pricing. Gateway providers may have markup
* that should be applied separately if needed.
*
* @param provider The name of the provider.
* @param model The name of the model.
* @returns The pricing information for the model, or undefined if not available.
*/
export function getModelPricing(provider: LLMProvider, model: string): ModelPricing | undefined {
// Resolve gateway providers to the original provider
const resolved = resolveToNativeProvider(provider, model);
const providerInfo = LLM_REGISTRY[resolved.provider];
// For providers that accept any model name, no pricing available
if (acceptsAnyModel(resolved.provider)) {
return undefined;
}
const normalizedModel = stripBedrockRegionPrefix(resolved.model).toLowerCase();
const modelInfo = providerInfo.models.find((m) => m.name.toLowerCase() === normalizedModel);
return modelInfo?.pricing;
}
/**
* Gets the display name for a model, falling back to the model ID if not found.
* For gateway providers with supportsAllRegistryModels, looks up the model in its original provider.
*/
export function getModelDisplayName(model: string, provider?: LLMProvider): string {
let inferredProvider: LLMProvider;
try {
inferredProvider = provider ?? getProviderFromModel(model);
} catch {
// Unknown model - fall back to model ID
return model;
}
// Resolve gateway providers to the original provider
const resolved = resolveToNativeProvider(inferredProvider, model);
const providerInfo = LLM_REGISTRY[resolved.provider];
if (!providerInfo || acceptsAnyModel(resolved.provider)) {
return model;
}
const normalizedModel = stripBedrockRegionPrefix(resolved.model).toLowerCase();
const modelInfo = providerInfo.models.find((m) => m.name.toLowerCase() === normalizedModel);
return modelInfo?.displayName ?? model;
}
// TODO: Add reasoningCapable as a property in the model registry instead of hardcoding here
/**
* Checks if a model supports configurable reasoning effort.
* Currently only OpenAI reasoning models (o1, o3, codex, gpt-5.x) support this.
*
* @param model The model name to check.
* @param provider Optional provider for context (defaults to detecting from model name).
* @returns True if the model supports reasoning effort configuration.
*/
export function isReasoningCapableModel(model: string, _provider?: LLMProvider): boolean {
const modelLower = model.toLowerCase();
// Codex models are optimized for complex coding with reasoning
if (modelLower.includes('codex')) {
return true;
}
// o1 and o3 are dedicated reasoning models
if (modelLower.startsWith('o1') || modelLower.startsWith('o3') || modelLower.startsWith('o4')) {
return true;
}
// GPT-5 series support reasoning effort
if (
modelLower.includes('gpt-5') ||
modelLower.includes('gpt-5.1') ||
modelLower.includes('gpt-5.2')
) {
return true;
}
return false;
}
/**
* Calculates the cost for a given token usage based on model pricing.
*
* @param usage Token usage counts.
* @param pricing Model pricing (per million tokens).
* @returns Cost in USD.
*/
export function calculateCost(usage: TokenUsage, pricing: ModelPricing): number {
const inputCost = ((usage.inputTokens ?? 0) * pricing.inputPerM) / 1_000_000;
const outputCost = ((usage.outputTokens ?? 0) * pricing.outputPerM) / 1_000_000;
const cacheReadCost = ((usage.cacheReadTokens ?? 0) * (pricing.cacheReadPerM ?? 0)) / 1_000_000;
const cacheWriteCost =
((usage.cacheWriteTokens ?? 0) * (pricing.cacheWritePerM ?? 0)) / 1_000_000;
// Charge reasoning tokens at output rate
const reasoningCost = ((usage.reasoningTokens ?? 0) * pricing.outputPerM) / 1_000_000;
return inputCost + outputCost + cacheReadCost + cacheWriteCost + reasoningCost;
}