feat: Add intelligent auto-router and enhanced integrations

- Add intelligent-router.sh hook for automatic agent routing
- Add AUTO-TRIGGER-SUMMARY.md documentation
- Add FINAL-INTEGRATION-SUMMARY.md documentation
- Complete Prometheus integration (6 commands + 4 tools)
- Complete Dexto integration (12 commands + 5 tools)
- Enhanced Ralph with access to all agents
- Fix /clawd command (removed disable-model-invocation)
- Update hooks.json to v5 with intelligent routing
- 291 total skills now available
- All 21 commands with automatic routing

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
admin
2026-01-28 00:27:56 +04:00
Unverified
parent 3b128ba3bd
commit b52318eeae
1724 changed files with 351216 additions and 0 deletions

View File

@@ -0,0 +1,412 @@
import { describe, test, expect, vi, beforeEach } from 'vitest';
import { DextoAgent } from './DextoAgent.js';
import type { AgentConfig, ValidatedAgentConfig } from './schemas.js';
import { AgentConfigSchema } from './schemas.js';
import type { AgentServices } from '../utils/service-initializer.js';
import { DextoRuntimeError } from '../errors/DextoRuntimeError.js';
import { ErrorScope, ErrorType } from '../errors/types.js';
import { AgentErrorCode } from './error-codes.js';
// Mock the createAgentServices function
vi.mock('../utils/service-initializer.js', () => ({
createAgentServices: vi.fn(),
}));
import { createAgentServices } from '../utils/service-initializer.js';
const mockCreateAgentServices = vi.mocked(createAgentServices);
describe('DextoAgent Lifecycle Management', () => {
let mockConfig: AgentConfig;
let mockValidatedConfig: ValidatedAgentConfig;
let mockServices: AgentServices;
beforeEach(() => {
vi.resetAllMocks();
mockConfig = {
systemPrompt: 'You are a helpful assistant',
llm: {
provider: 'openai',
model: 'gpt-5',
apiKey: 'test-key',
maxIterations: 50,
maxInputTokens: 128000,
},
mcpServers: {},
sessions: {
maxSessions: 10,
sessionTTL: 3600,
},
toolConfirmation: {
mode: 'auto-approve',
timeout: 120000,
},
elicitation: {
enabled: false,
timeout: 120000,
},
};
// Create the validated config that DextoAgent actually uses
mockValidatedConfig = AgentConfigSchema.parse(mockConfig);
mockServices = {
mcpManager: {
disconnectAll: vi.fn(),
initializeFromConfig: vi.fn().mockResolvedValue(undefined),
} as any,
toolManager: {
setAgent: vi.fn(),
setPromptManager: vi.fn(),
initialize: vi.fn().mockResolvedValue(undefined),
} as any,
systemPromptManager: {} as any,
agentEventBus: {
on: vi.fn(),
emit: vi.fn(),
} as any,
stateManager: {
getRuntimeConfig: vi.fn().mockReturnValue({
llm: mockValidatedConfig.llm,
mcpServers: {},
storage: {
cache: { type: 'in-memory' },
database: { type: 'in-memory' },
},
sessions: {
maxSessions: 10,
sessionTTL: 3600,
},
}),
getLLMConfig: vi.fn().mockReturnValue(mockValidatedConfig.llm),
} as any,
sessionManager: {
cleanup: vi.fn(),
init: vi.fn().mockResolvedValue(undefined),
createSession: vi.fn().mockResolvedValue({ id: 'test-session' }),
} as any,
searchService: {} as any,
storageManager: {
disconnect: vi.fn(),
getDatabase: vi.fn().mockReturnValue({}),
getCache: vi.fn().mockReturnValue({}),
getBlobStore: vi.fn().mockReturnValue({}),
} as any,
resourceManager: {} as any,
approvalManager: {
requestToolConfirmation: vi.fn(),
requestElicitation: vi.fn(),
cancelApproval: vi.fn(),
cancelAllApprovals: vi.fn(),
hasHandler: vi.fn().mockReturnValue(false),
} as any,
memoryManager: {} as any,
pluginManager: {
cleanup: vi.fn(),
} as any,
};
mockCreateAgentServices.mockResolvedValue(mockServices);
// Set up default behaviors for mock functions that will be overridden in tests
(mockServices.sessionManager.cleanup as any).mockResolvedValue(undefined);
(mockServices.mcpManager.disconnectAll as any).mockResolvedValue(undefined);
(mockServices.storageManager!.disconnect as any).mockResolvedValue(undefined);
});
describe('Constructor Patterns', () => {
test('should create agent with config (new pattern)', () => {
const agent = new DextoAgent(mockConfig);
expect(agent.isStarted()).toBe(false);
expect(agent.isStopped()).toBe(false);
});
});
describe('start() Method', () => {
test('should start successfully with valid config', async () => {
const agent = new DextoAgent(mockConfig);
await agent.start();
expect(agent.isStarted()).toBe(true);
expect(agent.isStopped()).toBe(false);
expect(mockCreateAgentServices).toHaveBeenCalledWith(
mockValidatedConfig,
undefined,
expect.anything(), // logger instance
expect.anything() // eventBus instance
);
});
test('should start with per-server connection modes in config', async () => {
const configWithServerModes = {
...mockConfig,
mcpServers: {
filesystem: {
type: 'stdio' as const,
command: 'npx',
args: ['@modelcontextprotocol/server-filesystem', '.'],
env: {},
timeout: 30000,
connectionMode: 'strict' as const,
},
},
};
const agent = new DextoAgent(configWithServerModes);
await agent.start();
const validatedConfigWithServerModes = AgentConfigSchema.parse(configWithServerModes);
expect(mockCreateAgentServices).toHaveBeenCalledWith(
validatedConfigWithServerModes,
undefined,
expect.anything(), // logger instance
expect.anything() // eventBus instance
);
});
test('should throw error when starting twice', async () => {
const agent = new DextoAgent(mockConfig);
await agent.start();
await expect(agent.start()).rejects.toThrow(
expect.objectContaining({
code: AgentErrorCode.ALREADY_STARTED,
scope: ErrorScope.AGENT,
type: ErrorType.USER,
})
);
});
test('should handle start failure gracefully', async () => {
const agent = new DextoAgent(mockConfig);
mockCreateAgentServices.mockRejectedValue(new Error('Service initialization failed'));
await expect(agent.start()).rejects.toThrow('Service initialization failed');
expect(agent.isStarted()).toBe(false);
});
});
describe('stop() Method', () => {
test('should stop successfully after start', async () => {
const agent = new DextoAgent(mockConfig);
await agent.start();
await agent.stop();
expect(agent.isStarted()).toBe(false);
expect(agent.isStopped()).toBe(true);
expect(mockServices.sessionManager.cleanup).toHaveBeenCalled();
expect(mockServices.mcpManager.disconnectAll).toHaveBeenCalled();
expect(mockServices.storageManager!.disconnect).toHaveBeenCalled();
});
test('should throw error when stopping before start', async () => {
const agent = new DextoAgent(mockConfig);
await expect(agent.stop()).rejects.toThrow(
expect.objectContaining({
code: AgentErrorCode.NOT_STARTED,
scope: ErrorScope.AGENT,
type: ErrorType.USER,
})
);
});
test('should warn when stopping twice but not throw', async () => {
const agent = new DextoAgent(mockConfig);
await agent.start();
await agent.stop();
// Second stop should not throw but should warn
await expect(agent.stop()).resolves.toBeUndefined();
});
test('should handle partial cleanup failures gracefully', async () => {
const agent = new DextoAgent(mockConfig);
await agent.start();
// Make session cleanup fail
(mockServices.sessionManager.cleanup as any).mockRejectedValue(
new Error('Session cleanup failed')
);
// Should not throw, but should still mark as stopped
await expect(agent.stop()).resolves.toBeUndefined();
expect(agent.isStopped()).toBe(true);
// Should still try to clean other services
expect(mockServices.mcpManager.disconnectAll).toHaveBeenCalled();
expect(mockServices.storageManager!.disconnect).toHaveBeenCalled();
});
});
describe('Method Access Control', () => {
const testMethods = [
{ name: 'run', args: ['test message'] },
{ name: 'createSession', args: [] },
{ name: 'getSession', args: ['session-id'] },
{ name: 'listSessions', args: [] },
{ name: 'deleteSession', args: ['session-id'] },
{ name: 'resetConversation', args: [] },
{ name: 'getCurrentLLMConfig', args: [] },
{ name: 'switchLLM', args: [{ model: 'gpt-5' }] },
{ name: 'addMcpServer', args: ['test', { type: 'stdio', command: 'test' }] },
{ name: 'getAllMcpTools', args: [] },
];
test.each(testMethods)('$name should throw before start()', async ({ name, args }) => {
const agent = new DextoAgent(mockConfig);
let thrownError: DextoRuntimeError | undefined;
try {
const method = agent[name as keyof DextoAgent] as Function;
await method.apply(agent, args);
} catch (error) {
thrownError = error as DextoRuntimeError;
}
expect(thrownError).toBeDefined();
expect(thrownError).toMatchObject({
code: AgentErrorCode.NOT_STARTED,
scope: ErrorScope.AGENT,
type: ErrorType.USER,
});
});
test.each(testMethods)('$name should throw after stop()', async ({ name, args }) => {
const agent = new DextoAgent(mockConfig);
await agent.start();
await agent.stop();
let thrownError: DextoRuntimeError | undefined;
try {
const method = agent[name as keyof DextoAgent] as Function;
await method.apply(agent, args);
} catch (error) {
thrownError = error as DextoRuntimeError;
}
expect(thrownError).toBeDefined();
expect(thrownError).toMatchObject({
code: AgentErrorCode.STOPPED,
scope: ErrorScope.AGENT,
type: ErrorType.USER,
});
});
test('isStarted and isStopped should work without start() (read-only)', () => {
const agent = new DextoAgent(mockConfig);
expect(() => agent.isStarted()).not.toThrow();
expect(() => agent.isStopped()).not.toThrow();
});
});
describe('Session Auto-Approve Tools Cleanup (Memory Leak Fix)', () => {
test('endSession should call clearSessionAutoApproveTools', async () => {
const agent = new DextoAgent(mockConfig);
// Add clearSessionAutoApproveTools mock to toolManager
mockServices.toolManager.clearSessionAutoApproveTools = vi.fn();
mockServices.sessionManager.endSession = vi.fn().mockResolvedValue(undefined);
await agent.start();
await agent.endSession('test-session-123');
expect(mockServices.toolManager.clearSessionAutoApproveTools).toHaveBeenCalledWith(
'test-session-123'
);
expect(mockServices.sessionManager.endSession).toHaveBeenCalledWith('test-session-123');
});
test('deleteSession should call clearSessionAutoApproveTools', async () => {
const agent = new DextoAgent(mockConfig);
// Add clearSessionAutoApproveTools mock to toolManager
mockServices.toolManager.clearSessionAutoApproveTools = vi.fn();
mockServices.sessionManager.deleteSession = vi.fn().mockResolvedValue(undefined);
await agent.start();
await agent.deleteSession('test-session-456');
expect(mockServices.toolManager.clearSessionAutoApproveTools).toHaveBeenCalledWith(
'test-session-456'
);
expect(mockServices.sessionManager.deleteSession).toHaveBeenCalledWith(
'test-session-456'
);
});
test('clearSessionAutoApproveTools should be called before session cleanup', async () => {
const agent = new DextoAgent(mockConfig);
const callOrder: string[] = [];
mockServices.toolManager.clearSessionAutoApproveTools = vi.fn(() => {
callOrder.push('clearSessionAutoApproveTools');
});
mockServices.sessionManager.endSession = vi.fn().mockImplementation(() => {
callOrder.push('endSession');
return Promise.resolve();
});
await agent.start();
await agent.endSession('test-session');
expect(callOrder).toEqual(['clearSessionAutoApproveTools', 'endSession']);
});
});
describe('Integration Tests', () => {
test('should handle complete lifecycle without errors', async () => {
const agent = new DextoAgent(mockConfig);
// Initial state
expect(agent.isStarted()).toBe(false);
expect(agent.isStopped()).toBe(false);
// Start
await agent.start();
expect(agent.isStarted()).toBe(true);
expect(agent.isStopped()).toBe(false);
// Use agent (mock a successful operation)
expect(agent.getCurrentLLMConfig()).toBeDefined();
// Stop
await agent.stop();
expect(agent.isStarted()).toBe(false);
expect(agent.isStopped()).toBe(true);
});
test('should handle resource cleanup in correct order', async () => {
const agent = new DextoAgent(mockConfig);
await agent.start();
const cleanupOrder: string[] = [];
(mockServices.sessionManager.cleanup as any).mockImplementation(() => {
cleanupOrder.push('sessions');
return Promise.resolve();
});
(mockServices.mcpManager.disconnectAll as any).mockImplementation(() => {
cleanupOrder.push('clients');
return Promise.resolve();
});
(mockServices.storageManager!.disconnect as any).mockImplementation(() => {
cleanupOrder.push('storage');
return Promise.resolve();
});
await agent.stop();
expect(cleanupOrder).toEqual(['sessions', 'clients', 'storage']);
});
});
});

View File

@@ -0,0 +1,354 @@
import { describe, test, expect } from 'vitest';
import {
createTestEnvironment,
TestConfigs,
requiresApiKey,
cleanupTestEnvironment,
} from '../llm/services/test-utils.integration.js';
import type { StreamingEvent } from '../events/index.js';
/**
* DextoAgent Stream API Integration Tests
*
* Tests the new generate() and stream() APIs with real LLM providers.
* Requires valid API keys (ANTHROPIC_API_KEY, OPENAI_API_KEY, etc.)
*/
describe('DextoAgent.generate() API', () => {
const skipTests = !requiresApiKey('openai');
const t = skipTests ? test.skip : test.concurrent;
t(
'generate() returns complete response with usage stats',
async () => {
const env = await createTestEnvironment(TestConfigs.createOpenAIConfig());
try {
const response = await env.agent.generate('What is 2+2?', env.sessionId);
// Validate response structure
expect(response).toBeDefined();
expect(response.content).toBeTruthy();
expect(typeof response.content).toBe('string');
expect(response.content.length).toBeGreaterThan(0);
// Validate usage stats
expect(response.usage).toBeDefined();
expect(response.usage.inputTokens).toBeGreaterThan(0);
expect(response.usage.outputTokens).toBeGreaterThan(0);
expect(response.usage.totalTokens).toBeGreaterThan(0);
// Validate metadata
expect(response.sessionId).toBe(env.sessionId);
expect(response.toolCalls).toEqual([]);
} finally {
await cleanupTestEnvironment(env);
}
},
60000
);
t(
'generate() maintains conversation context across turns',
async () => {
const env = await createTestEnvironment(TestConfigs.createOpenAIConfig());
try {
const response1 = await env.agent.generate('My name is Alice', env.sessionId);
const response2 = await env.agent.generate('What is my name?', env.sessionId);
// Sometimes response1.content can be empty if the model only acknowledges or uses a tool
// But for this simple prompt, it should have content.
// If empty, check if we got a valid response object at least.
expect(response1).toBeDefined();
if (response1.content === '') {
// Retry or check if it was a valid empty response (e.g. tool call only - unlikely here)
// For now, let's assert it's truthy OR we verify context in second turn regardless
console.warn(
'First turn response was empty, but proceeding to check context retention'
);
} else {
expect(response1.content).toBeTruthy();
}
expect(response2).toBeDefined();
if (response2.content === '') {
console.warn(
'Second turn response was empty, but context retention test is partial success if first turn worked'
);
} else {
expect(response2.content).toBeTruthy();
expect(response2.content.toLowerCase()).toContain('alice');
}
} finally {
await cleanupTestEnvironment(env);
}
},
60000
);
t(
'generate() works with different providers',
async () => {
const providers = [{ name: 'openai', config: TestConfigs.createOpenAIConfig() }];
for (const { name, config } of providers) {
if (!requiresApiKey(name as any)) continue;
const env = await createTestEnvironment(config);
try {
const response = await env.agent.generate('Say hello', env.sessionId);
expect(response.content).toBeTruthy();
expect(response.usage.totalTokens).toBeGreaterThan(0);
} finally {
await cleanupTestEnvironment(env);
}
}
},
40000
);
});
describe('DextoAgent.stream() API', () => {
const skipTests = !requiresApiKey('openai');
const t = skipTests ? test.skip : test.concurrent;
t(
'stream() yields events in correct order',
async () => {
const env = await createTestEnvironment(TestConfigs.createOpenAIConfig());
try {
const events: StreamingEvent[] = [];
for await (const event of await env.agent.stream('Say hello', env.sessionId)) {
events.push(event);
}
// Validate event order
expect(events.length).toBeGreaterThan(0);
expect(events[0]).toBeDefined();
expect(events[events.length - 1]).toBeDefined();
expect(events[0]!.name).toBe('llm:thinking');
// Last event is run:complete (added in lifecycle updates)
expect(events[events.length - 1]!.name).toBe('run:complete');
// Validate message-start event
// First event is typically llm:thinking
const startEvent = events[0];
expect(startEvent).toBeDefined();
expect(startEvent?.sessionId).toBe(env.sessionId);
// Find the llm:response event (second to last, before run:complete)
const responseEvent = events.find((e) => e.name === 'llm:response');
expect(responseEvent).toBeDefined();
if (responseEvent && responseEvent.name === 'llm:response') {
expect(responseEvent.content).toBeTruthy();
expect(responseEvent.tokenUsage?.totalTokens).toBeGreaterThan(0);
}
} finally {
await cleanupTestEnvironment(env);
}
},
60000
);
t(
'stream() yields content-chunk events',
async () => {
const env = await createTestEnvironment(TestConfigs.createOpenAIConfig());
try {
const chunkEvents: StreamingEvent[] = [];
for await (const event of await env.agent.stream('Say hello', env.sessionId)) {
if (event.name === 'llm:chunk') {
chunkEvents.push(event);
}
}
// Should receive multiple chunks
expect(chunkEvents.length).toBeGreaterThan(0);
// Validate chunk structure
for (const event of chunkEvents) {
if (event.name === 'llm:chunk') {
expect(event.content).toBeDefined();
expect(typeof event.content).toBe('string');
expect(event.chunkType).toMatch(/^(text|reasoning)$/);
}
}
// Reconstruct full content from chunks (chunkEvents already filtered to llm:chunk only)
const fullContent = chunkEvents
.map((e) => (e.name === 'llm:chunk' ? e.content : ''))
.join('');
expect(fullContent.length).toBeGreaterThan(0);
} finally {
await cleanupTestEnvironment(env);
}
},
60000
);
t(
'stream() can be consumed multiple times via AsyncIterator',
async () => {
const env = await createTestEnvironment(TestConfigs.createOpenAIConfig());
try {
const stream = await env.agent.stream('Say hello', env.sessionId);
const events: StreamingEvent[] = [];
for await (const event of stream) {
events.push(event);
}
expect(events.length).toBeGreaterThan(0);
expect(events[0]).toBeDefined();
expect(events[events.length - 1]).toBeDefined();
expect(events[0]!.name).toBe('llm:thinking');
// Last event is run:complete (added in lifecycle updates)
expect(events[events.length - 1]!.name).toBe('run:complete');
} finally {
await cleanupTestEnvironment(env);
}
},
60000
);
t(
'stream() maintains conversation context',
async () => {
const env = await createTestEnvironment(TestConfigs.createOpenAIConfig());
try {
// First message
const events1: StreamingEvent[] = [];
for await (const event of await env.agent.stream(
'My favorite color is blue',
env.sessionId
)) {
events1.push(event);
}
// Second message should remember context
const events2: StreamingEvent[] = [];
for await (const event of await env.agent.stream(
'What is my favorite color?',
env.sessionId
)) {
events2.push(event);
}
const completeEvent2 = events2.find((e) => e.name === 'llm:response');
if (completeEvent2 && completeEvent2.name === 'llm:response') {
expect(completeEvent2.content.toLowerCase()).toContain('blue');
}
} finally {
await cleanupTestEnvironment(env);
}
},
60000
);
t(
'stream() works with different providers',
async () => {
const providers = [{ name: 'openai', config: TestConfigs.createOpenAIConfig() }];
for (const { name, config } of providers) {
if (!requiresApiKey(name as any)) continue;
const env = await createTestEnvironment(config);
try {
const events: StreamingEvent[] = [];
for await (const event of await env.agent.stream('Say hello', env.sessionId)) {
events.push(event);
}
expect(events.length).toBeGreaterThan(0);
expect(events[0]).toBeDefined();
expect(events[events.length - 1]).toBeDefined();
expect(events[0]!.name).toBe('llm:thinking');
// Last event is run:complete (added in lifecycle updates)
expect(events[events.length - 1]!.name).toBe('run:complete');
} finally {
await cleanupTestEnvironment(env);
}
}
},
40000
);
});
describe('DextoAgent API Compatibility', () => {
const skipTests = !requiresApiKey('openai');
const t = skipTests ? test.skip : test.concurrent;
t(
'generate() produces same content as run() without streaming',
async () => {
const env = await createTestEnvironment(TestConfigs.createOpenAIConfig());
try {
const prompt = 'What is 2+2? Answer with just the number.';
// Use run() (old API)
const runResponse = await env.agent.run(
prompt,
undefined,
undefined,
env.sessionId
);
// Reset conversation
await env.agent.resetConversation(env.sessionId);
// Use generate() (new API)
const generateResponse = await env.agent.generate(prompt, env.sessionId);
// Both should work and return similar content
expect(runResponse).toBeTruthy();
expect(generateResponse.content).toBeTruthy();
// Content should contain '4'
expect(runResponse).toContain('4');
expect(generateResponse.content).toContain('4');
} finally {
await cleanupTestEnvironment(env);
}
},
60000
);
t(
'stream() works alongside old run() API',
async () => {
const env = await createTestEnvironment(TestConfigs.createOpenAIConfig());
try {
// Use old run() API
const runResponse = await env.agent.run(
'My name is Bob',
undefined,
undefined,
env.sessionId
);
expect(runResponse).toBeTruthy();
// Use new stream() API - should maintain same context
const events: StreamingEvent[] = [];
for await (const event of await env.agent.stream(
'What is my name?',
env.sessionId
)) {
events.push(event);
}
const completeEvent = events.find((e) => e.name === 'llm:response');
if (completeEvent && completeEvent.name === 'llm:response') {
expect(completeEvent.content.toLowerCase()).toContain('bob');
}
} finally {
await cleanupTestEnvironment(env);
}
},
60000
);
});

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,52 @@
import type { AgentCard } from './schemas.js';
import { AgentCardSchema } from '@core/agent/schemas.js';
/**
* Default agent description used when not provided
*/
const DEFAULT_AGENT_DESCRIPTION =
'Dexto is an AI assistant capable of chat and task delegation, accessible via multiple protocols.';
/**
* Minimal runtime context needed to establish defaults
* if not provided in AgentCardOverride or by AgentCardSchema.
*/
export interface MinimalAgentCardContext {
defaultName: string; // Ultimate fallback name if not in overrides
defaultVersion: string; // Ultimate fallback version if not in overrides
defaultBaseUrl: string; // Used to construct default URL if not in overrides
}
/**
* Creates the final AgentCard by merging context-defined values with user-provided overrides,
* then uses AgentCardSchema.parse() to apply schema-defined static defaults and perform validation.
*/
export function createAgentCard(
context: MinimalAgentCardContext,
overrides?: Partial<AgentCard> // Updated type from AgentCardOverride to Partial<AgentCard>
): AgentCard {
const { defaultName, defaultVersion, defaultBaseUrl } = context;
// Start with overrides (which are now Partial<AgentCard> or {})
const effectiveInput: Record<string, any> = { ...(overrides || {}) };
// Layer in context-dependent required fields if not already provided by overrides.
effectiveInput.name = overrides?.name ?? defaultName;
effectiveInput.version = overrides?.version ?? defaultVersion;
effectiveInput.url = overrides?.url ?? `${defaultBaseUrl}/mcp`;
effectiveInput.description = overrides?.description ?? DEFAULT_AGENT_DESCRIPTION;
// Handle capabilities - pushNotifications defaults to false (no WebSocket support)
const capsFromInput = effectiveInput.capabilities;
effectiveInput.capabilities = {
...(capsFromInput ?? {}),
pushNotifications: capsFromInput?.pushNotifications ?? false,
};
// If input specifies an empty skills array, this means "use schema default skills".
if (effectiveInput.skills && effectiveInput.skills.length === 0) {
effectiveInput.skills = undefined;
}
return AgentCardSchema.parse(effectiveInput);
}

View File

@@ -0,0 +1,23 @@
/**
* Agent-specific error codes
* Includes agent configuration and lifecycle errors only
* Domain-specific errors (LLM, Session, MCP, etc.) belong in their respective modules
*/
export enum AgentErrorCode {
// Lifecycle
NOT_STARTED = 'agent_not_started',
ALREADY_STARTED = 'agent_already_started',
STOPPED = 'agent_stopped',
INITIALIZATION_FAILED = 'agent_initialization_failed',
SWITCH_IN_PROGRESS = 'agent_switch_in_progress',
// Configuration
NO_CONFIG_PATH = 'agent_no_config_path',
INVALID_CONFIG = 'agent_invalid_config',
// API layer
API_VALIDATION_ERROR = 'agent_api_validation_error',
// Runtime
STREAM_FAILED = 'agent_stream_failed',
}

View File

@@ -0,0 +1,122 @@
import { DextoRuntimeError } from '@core/errors/DextoRuntimeError.js';
import { ErrorScope, ErrorType } from '@core/errors/types.js';
import { AgentErrorCode } from './error-codes.js';
/**
* Agent-specific error factory
* Creates properly typed errors for Agent operations
* Note: Domain-specific errors (LLM, Session, MCP) have been moved to their respective modules
*/
export class AgentError {
/**
* Agent not started
*/
static notStarted() {
return new DextoRuntimeError(
AgentErrorCode.NOT_STARTED,
ErrorScope.AGENT,
ErrorType.USER,
'Agent must be started before use',
undefined,
'Call agent.start() before using other methods'
);
}
/**
* Agent already started
*/
static alreadyStarted() {
return new DextoRuntimeError(
AgentErrorCode.ALREADY_STARTED,
ErrorScope.AGENT,
ErrorType.USER,
'Agent is already started',
undefined,
'Call agent.stop() before starting again'
);
}
/**
* Agent stopped
*/
static stopped() {
return new DextoRuntimeError(
AgentErrorCode.STOPPED,
ErrorScope.AGENT,
ErrorType.USER,
'Agent has been stopped and cannot be used',
undefined,
'Create a new agent instance or restart this one'
);
}
/**
* Agent switch in progress
*/
static switchInProgress() {
return new DextoRuntimeError(
AgentErrorCode.SWITCH_IN_PROGRESS,
ErrorScope.AGENT,
ErrorType.CONFLICT,
'Agent switch already in progress',
undefined,
'Wait for the current switch operation to complete before starting a new one'
);
}
/**
* Agent initialization failed
*/
static initializationFailed(reason: string, details?: unknown) {
return new DextoRuntimeError(
AgentErrorCode.INITIALIZATION_FAILED,
ErrorScope.AGENT,
ErrorType.SYSTEM,
`Agent initialization failed: ${reason}`,
details,
'Check logs for initialization errors'
);
}
/**
* No config path available
*/
static noConfigPath() {
return new DextoRuntimeError(
AgentErrorCode.NO_CONFIG_PATH,
ErrorScope.AGENT,
ErrorType.SYSTEM,
'No configuration file path is available',
undefined,
'Agent was created without a config file path, cannot perform file operations'
);
}
/**
* API validation error
*/
static apiValidationError(message: string, details?: unknown) {
return new DextoRuntimeError(
AgentErrorCode.API_VALIDATION_ERROR,
ErrorScope.AGENT,
ErrorType.USER,
message,
details,
'Check the request parameters and try again'
);
}
/**
* Stream failed with unexpected error
*/
static streamFailed(message: string, details?: unknown) {
return new DextoRuntimeError(
AgentErrorCode.STREAM_FAILED,
ErrorScope.AGENT,
ErrorType.SYSTEM,
message,
details,
'Check logs for details'
);
}
}

View File

@@ -0,0 +1,29 @@
export { DextoAgent } from './DextoAgent.js';
export {
AgentConfigSchema,
AgentCardSchema,
SecuritySchemeSchema,
type AgentCard,
type ValidatedAgentCard,
} from './schemas.js';
export {
type ValidatedAgentConfig,
type AgentConfig,
type LLMValidationOptions,
createAgentConfigSchema,
} from './schemas.js';
export { createAgentCard } from './agentCard.js';
export * from './errors.js';
export * from './error-codes.js';
// New generate/stream API types
export type {
ContentInput,
GenerateOptions,
GenerateResponse,
StreamOptions,
AgentToolCall,
} from './types.js';
// Stream events are now core AgentEvents (exported from events module)
export type { StreamingEvent, StreamingEventName, STREAMING_EVENTS } from '../events/index.js';

View File

@@ -0,0 +1,789 @@
import { describe, it, expect } from 'vitest';
import { z } from 'zod';
import {
AgentCardSchema,
AgentConfigSchema,
type AgentCard,
type ValidatedAgentCard,
type AgentConfig,
} from './schemas.js';
describe('AgentCardSchema', () => {
const validAgentCard: AgentCard = {
name: 'TestAgent',
description: 'A test agent for validation',
url: 'https://agent.example.com',
version: '1.0.0',
};
describe('Basic Structure Validation', () => {
it('should accept valid minimal config', () => {
const result = AgentCardSchema.parse(validAgentCard);
expect(result.name).toBe('TestAgent');
expect(result.url).toBe('https://agent.example.com');
expect(result.version).toBe('1.0.0');
});
it('should apply default values', () => {
const result = AgentCardSchema.parse(validAgentCard);
expect(result.protocolVersion).toBe('0.3.0');
expect(result.preferredTransport).toBe('JSONRPC');
expect(result.description).toBe('A test agent for validation');
expect(result.capabilities.streaming).toBe(true);
expect(result.capabilities.stateTransitionHistory).toBe(false);
expect(result.defaultInputModes).toEqual(['application/json', 'text/plain']);
expect(result.defaultOutputModes).toEqual([
'application/json',
'text/event-stream',
'text/plain',
]);
expect(result.skills).toHaveLength(1);
expect(result.skills[0]!.id).toBe('chat_with_agent');
});
it('should preserve explicit values', () => {
const config: AgentCard = {
...validAgentCard,
description: 'Custom description',
capabilities: {
streaming: false,
pushNotifications: true,
stateTransitionHistory: true,
},
metadata: {
dexto: {
authentication: {
schemes: ['bearer', 'api-key'],
credentials: 'optional-creds',
},
},
},
defaultInputModes: ['text/plain'],
defaultOutputModes: ['application/json'],
skills: [
{
id: 'custom-skill',
name: 'Custom Skill',
description: 'A custom skill',
tags: ['custom'],
inputModes: ['application/json'],
outputModes: ['text/plain'],
},
],
};
const result = AgentCardSchema.parse(config);
expect(result.description).toBe('Custom description');
expect(result.capabilities.streaming).toBe(false);
expect(result.capabilities.pushNotifications).toBe(true);
expect(result.capabilities.stateTransitionHistory).toBe(true);
expect(result.metadata?.dexto?.authentication?.schemes).toEqual(['bearer', 'api-key']);
expect(result.metadata?.dexto?.authentication?.credentials).toBe('optional-creds');
expect(result.defaultInputModes).toEqual(['text/plain']);
expect(result.defaultOutputModes).toEqual(['application/json']);
expect(result.skills).toHaveLength(1);
expect(result.skills[0]!.id).toBe('custom-skill');
});
});
describe('Required Fields Validation', () => {
it('should require name field', () => {
const config = { ...validAgentCard };
delete (config as any).name;
const result = AgentCardSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['name']);
});
it('should require url field', () => {
const config = { ...validAgentCard };
delete (config as any).url;
const result = AgentCardSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['url']);
});
it('should require version field', () => {
const config = { ...validAgentCard };
delete (config as any).version;
const result = AgentCardSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['version']);
});
});
describe('URL Validation', () => {
it('should accept valid URLs', () => {
const validUrls = [
'https://example.com',
'http://localhost:8080',
'https://agent.company.com/v1',
];
for (const url of validUrls) {
const config = { ...validAgentCard, url };
const result = AgentCardSchema.safeParse(config);
expect(result.success).toBe(true);
}
});
it('should reject invalid URLs', () => {
const invalidUrls = ['not-a-url', 'just-text', ''];
for (const url of invalidUrls) {
const config = { ...validAgentCard, url };
const result = AgentCardSchema.safeParse(config);
expect(result.success).toBe(false);
}
});
it('should validate provider.url when provider is specified', () => {
const config: AgentCard = {
...validAgentCard,
provider: {
organization: 'Test Corp',
url: 'invalid-url',
},
};
const result = AgentCardSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['provider', 'url']);
});
it('should validate documentationUrl when specified', () => {
const config: AgentCard = {
...validAgentCard,
documentationUrl: 'not-a-url',
};
const result = AgentCardSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['documentationUrl']);
});
});
describe('Skills Validation', () => {
it('should validate skill structure', () => {
const config: AgentCard = {
...validAgentCard,
skills: [
{
id: 'test-skill',
name: 'Test Skill',
description: 'A test skill',
tags: ['test', 'demo'],
},
],
};
const result = AgentCardSchema.parse(config);
expect(result.skills[0]!.inputModes).toEqual(['text/plain']); // default
expect(result.skills[0]!.outputModes).toEqual(['text/plain']); // default
});
it('should require skill fields', () => {
const config: AgentCard = {
...validAgentCard,
skills: [
{
id: 'test-skill',
name: 'Test Skill',
// Missing description and tags
} as any,
],
};
const result = AgentCardSchema.safeParse(config);
expect(result.success).toBe(false);
});
});
describe('Strict Validation', () => {
it('should reject unknown fields', () => {
const config: any = {
...validAgentCard,
unknownField: 'should-fail',
};
const result = AgentCardSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.code).toBe(z.ZodIssueCode.unrecognized_keys);
});
it('should reject unknown fields in nested objects', () => {
const config: any = {
...validAgentCard,
capabilities: {
streaming: true,
unknownCapability: true,
},
};
const result = AgentCardSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.code).toBe(z.ZodIssueCode.unrecognized_keys);
});
});
describe('Type Safety', () => {
it('should handle input and output types correctly', () => {
const input: AgentCard = validAgentCard;
const result: ValidatedAgentCard = AgentCardSchema.parse(input);
// Should have applied defaults
expect(result.description).toBeTruthy();
expect(result.capabilities).toBeDefined();
// Should preserve input values
expect(result.name).toBe(input.name);
expect(result.url).toBe(input.url);
expect(result.version).toBe(input.version);
});
});
describe('Security Schemes Validation', () => {
it('should validate apiKey security scheme', () => {
const config: AgentCard = {
...validAgentCard,
securitySchemes: {
apiKey: {
type: 'apiKey',
name: 'X-API-Key',
in: 'header',
},
},
};
const result = AgentCardSchema.parse(config);
expect(result.securitySchemes?.apiKey).toBeDefined();
if (result.securitySchemes?.apiKey) {
expect(result.securitySchemes.apiKey.type).toBe('apiKey');
}
});
it('should require name and in for apiKey type', () => {
const config: any = {
...validAgentCard,
securitySchemes: {
apiKey: {
type: 'apiKey',
// Missing name and in
},
},
};
const result = AgentCardSchema.safeParse(config);
expect(result.success).toBe(false);
});
it('should validate http security scheme', () => {
const config: AgentCard = {
...validAgentCard,
securitySchemes: {
bearer: {
type: 'http',
scheme: 'bearer',
bearerFormat: 'JWT',
},
},
};
const result = AgentCardSchema.parse(config);
expect(result.securitySchemes?.bearer).toBeDefined();
if (result.securitySchemes?.bearer) {
expect(result.securitySchemes.bearer.type).toBe('http');
}
});
it('should require scheme for http type', () => {
const config: any = {
...validAgentCard,
securitySchemes: {
http: {
type: 'http',
// Missing scheme
},
},
};
const result = AgentCardSchema.safeParse(config);
expect(result.success).toBe(false);
});
it('should validate oauth2 security scheme', () => {
const config: AgentCard = {
...validAgentCard,
securitySchemes: {
oauth: {
type: 'oauth2',
flows: {
authorizationCode: {
authorizationUrl: 'https://auth.example.com/oauth/authorize',
tokenUrl: 'https://auth.example.com/oauth/token',
scopes: {
read: 'Read access',
write: 'Write access',
},
},
},
},
},
};
const result = AgentCardSchema.parse(config);
expect(result.securitySchemes?.oauth).toBeDefined();
if (result.securitySchemes?.oauth) {
expect(result.securitySchemes.oauth.type).toBe('oauth2');
}
});
it('should validate openIdConnect security scheme', () => {
const config: AgentCard = {
...validAgentCard,
securitySchemes: {
oidc: {
type: 'openIdConnect',
openIdConnectUrl:
'https://accounts.google.com/.well-known/openid-configuration',
},
},
};
const result = AgentCardSchema.parse(config);
expect(result.securitySchemes?.oidc).toBeDefined();
if (result.securitySchemes?.oidc) {
expect(result.securitySchemes.oidc.type).toBe('openIdConnect');
}
});
it('should require openIdConnectUrl for openIdConnect type', () => {
const config: any = {
...validAgentCard,
securitySchemes: {
oidc: {
type: 'openIdConnect',
// Missing openIdConnectUrl
},
},
};
const result = AgentCardSchema.safeParse(config);
expect(result.success).toBe(false);
});
it('should validate mutualTLS security scheme', () => {
const config: AgentCard = {
...validAgentCard,
securitySchemes: {
mtls: {
type: 'mutualTLS',
},
},
};
const result = AgentCardSchema.parse(config);
expect(result.securitySchemes?.mtls).toBeDefined();
if (result.securitySchemes?.mtls) {
expect(result.securitySchemes.mtls.type).toBe('mutualTLS');
}
});
});
describe('Metadata and Extensions', () => {
it('should support dexto metadata extensions', () => {
const config: AgentCard = {
...validAgentCard,
metadata: {
dexto: {
delegation: {
protocol: 'a2a-jsonrpc',
endpoint: '/delegate',
supportsSession: true,
supportsStreaming: true,
},
owner: {
userId: 'user123',
username: 'testuser',
email: 'test@example.com',
},
},
},
};
const result = AgentCardSchema.parse(config);
expect(result.metadata?.dexto?.delegation?.protocol).toBe('a2a-jsonrpc');
expect(result.metadata?.dexto?.owner?.userId).toBe('user123');
});
it('should support custom metadata namespaces', () => {
const config: AgentCard = {
...validAgentCard,
metadata: {
dexto: {},
customExtension: {
foo: 'bar',
nested: { key: 'value' },
},
},
};
const result = AgentCardSchema.parse(config);
expect(result.metadata?.customExtension).toBeDefined();
});
it('should validate signatures field', () => {
const config: AgentCard = {
...validAgentCard,
signatures: [
{
protected: 'eyJhbGciOiJSUzI1NiJ9',
signature:
'cC4hiUPoj9Eetdgtv3hF80EGrhuB__dzERat0XF9g2VtQgr9PJbu3XOiZj5RZmh7',
},
],
};
const result = AgentCardSchema.parse(config);
expect(result.signatures).toHaveLength(1);
expect(result.signatures![0]!.protected).toBe('eyJhbGciOiJSUzI1NiJ9');
});
});
describe('Required Description Field', () => {
it('should require description field', () => {
const config = {
name: 'TestAgent',
url: 'https://agent.example.com',
version: '1.0.0',
// Missing description
};
const result = AgentCardSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['description']);
});
});
});
describe('AgentConfigSchema', () => {
const validAgentConfig: AgentConfig = {
systemPrompt: 'You are a helpful assistant',
llm: {
provider: 'openai',
model: 'gpt-5',
apiKey: 'test-key',
},
};
describe('Basic Structure Validation', () => {
it('should accept valid minimal config', () => {
const result = AgentConfigSchema.parse(validAgentConfig);
expect(result.systemPrompt.contributors).toHaveLength(1);
expect(result.llm.provider).toBe('openai');
expect(result.llm.model).toBe('gpt-5');
expect(result.llm.apiKey).toBe('test-key');
});
it('should apply default values', () => {
const result = AgentConfigSchema.parse(validAgentConfig);
// Should apply defaults from composed schemas
expect(result.mcpServers).toEqual({});
expect(result.internalTools).toEqual([]);
expect(result.storage).toBeDefined();
expect(result.storage.cache.type).toBe('in-memory');
expect(result.storage.database.type).toBe('in-memory');
expect(result.sessions).toBeDefined();
expect(result.toolConfirmation).toBeDefined();
});
it('should preserve explicit values from all composed schemas', () => {
const config: AgentConfig = {
agentCard: {
name: 'TestAgent',
description: 'Test agent for validation',
url: 'https://agent.example.com',
version: '1.0.0',
},
systemPrompt: {
contributors: [
{
id: 'custom',
type: 'static',
content: 'Custom prompt',
priority: 0,
},
],
},
mcpServers: {
testServer: {
type: 'stdio',
command: 'node',
args: ['server.js'],
},
},
internalTools: ['search_history'],
llm: {
provider: 'anthropic',
model: 'claude-haiku-4-5-20251001',
apiKey: 'test-anthropic-key',
maxIterations: 25,
},
storage: {
cache: { type: 'redis', url: 'redis://localhost:6379' },
database: { type: 'postgres', url: 'postgresql://localhost:5432/test' },
blob: { type: 'local', storePath: '/tmp/test-blobs' },
},
sessions: {
maxSessions: 5,
sessionTTL: 1800,
},
toolConfirmation: {
mode: 'auto-approve',
timeout: 15000,
},
};
const result = AgentConfigSchema.parse(config);
expect(result.agentCard?.name).toBe('TestAgent');
expect(result.systemPrompt.contributors[0]!.id).toBe('custom');
expect(result.mcpServers.testServer).toBeDefined();
expect(result.internalTools).toEqual(['search_history']);
expect(result.llm.provider).toBe('anthropic');
expect(result.storage.cache.type).toBe('redis');
expect(result.sessions.maxSessions).toBe(5);
expect(result.toolConfirmation.mode).toBe('auto-approve');
});
});
describe('Required Fields Validation', () => {
it('should require systemPrompt field', () => {
const config = { ...validAgentConfig };
delete (config as any).systemPrompt;
const result = AgentConfigSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['systemPrompt']);
});
it('should require llm field', () => {
const config = { ...validAgentConfig };
delete (config as any).llm;
const result = AgentConfigSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['llm']);
});
});
describe('Validation Propagation', () => {
it('should propagate validation errors from nested schemas', () => {
// Test that validation failures in composed schemas bubble up correctly
// Detailed validation testing is done in individual schema test files
const configWithInvalidLLM: AgentConfig = {
...validAgentConfig,
llm: {
provider: 'invalid-provider' as any,
model: 'test-model',
apiKey: 'test-key',
},
};
const result = AgentConfigSchema.safeParse(configWithInvalidLLM);
expect(result.success).toBe(false);
// Verify error path points to the nested schema field
expect(result.error?.issues[0]?.path[0]).toBe('llm');
});
});
describe('Schema Composition Integration', () => {
it('should properly transform systemPrompt from string to object', () => {
const config: AgentConfig = {
...validAgentConfig,
systemPrompt: 'Simple string prompt',
};
const result = AgentConfigSchema.parse(config);
expect(result.systemPrompt.contributors).toHaveLength(1);
expect(result.systemPrompt.contributors[0]!.type).toBe('static');
expect((result.systemPrompt.contributors[0] as any).content).toBe(
'Simple string prompt'
);
});
it('should apply defaults from all composed schemas', () => {
const result = AgentConfigSchema.parse(validAgentConfig);
// Defaults from different schemas should all be applied
expect(result.llm.maxIterations).toBeUndefined(); // LLM schema default (unlimited)
expect(result.storage).toBeDefined();
expect(result.storage.cache.type).toBe('in-memory'); // Storage schema default
expect(result.sessions.maxSessions).toBe(100); // Session schema default
expect(result.toolConfirmation.mode).toBe('auto-approve'); // Tool schema default
});
});
describe('Strict Validation', () => {
it('should reject unknown fields', () => {
const config: any = {
...validAgentConfig,
unknownField: 'should-fail',
};
const result = AgentConfigSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.code).toBe(z.ZodIssueCode.unrecognized_keys);
});
});
describe('Type Safety', () => {
it('should handle input and output types correctly', () => {
const input: AgentConfig = validAgentConfig;
const result = AgentConfigSchema.parse(input);
// Should have applied defaults from all composed schemas
expect(result.mcpServers).toBeDefined();
expect(result.internalTools).toBeDefined();
expect(result.storage).toBeDefined();
expect(result.sessions).toBeDefined();
expect(result.toolConfirmation).toBeDefined();
// Should preserve input values
expect(result.llm.provider).toBe(input.llm.provider);
expect(result.llm.model).toBe(input.llm.model);
expect(result.llm.apiKey).toBe(input.llm.apiKey);
});
it('should maintain proper types for nested objects', () => {
const config = AgentConfigSchema.parse(validAgentConfig);
// TypeScript should infer correct nested types
expect(typeof config.llm.provider).toBe('string');
expect(typeof config.llm.model).toBe('string');
expect(typeof config.storage.cache.type).toBe('string');
expect(Array.isArray(config.internalTools)).toBe(true);
expect(typeof config.sessions.maxSessions).toBe('number');
});
});
describe('Real-world Scenarios', () => {
it('should handle complete production config', () => {
const prodConfig: AgentConfig = {
agentCard: {
name: 'Production Agent',
description: 'Production AI agent for customer support',
url: 'https://api.company.com/agent',
version: '2.1.0',
provider: {
organization: 'ACME Corp',
url: 'https://acme.com',
},
documentationUrl: 'https://docs.acme.com/agent',
},
systemPrompt: {
contributors: [
{
id: 'main',
type: 'static',
content: 'You are a customer support agent.',
priority: 0,
},
{
id: 'datetime',
type: 'dynamic',
source: 'date',
priority: 10,
},
],
},
mcpServers: {
database: {
type: 'stdio',
command: 'python',
args: ['-m', 'db_server'],
env: { DB_URL: 'postgresql://prod:5432/db' },
},
search: {
type: 'http',
url: 'https://search.company.com/mcp',
headers: { Authorization: 'Bearer prod-token' },
},
},
internalTools: ['search_history'],
llm: {
provider: 'openai',
model: 'gpt-5',
apiKey: 'sk-prod-key-123',
maxIterations: 30,
temperature: 0.3,
},
storage: {
cache: {
type: 'redis',
url: 'redis://cache.company.com:6379',
},
database: {
type: 'postgres',
url: 'postgresql://db.company.com:5432/agent_db',
},
blob: { type: 'local', storePath: '/tmp/test-blobs' },
},
sessions: {
maxSessions: 100,
sessionTTL: 7200,
},
toolConfirmation: {
mode: 'manual',
timeout: 45000,
allowedToolsStorage: 'storage',
},
};
const result = AgentConfigSchema.parse(prodConfig);
expect(result.agentCard?.name).toBe('Production Agent');
expect(result.systemPrompt.contributors).toHaveLength(2);
expect(Object.keys(result.mcpServers)).toHaveLength(2);
expect(result.internalTools).toEqual(['search_history']);
expect(result.llm.temperature).toBe(0.3);
expect(result.storage.cache.type).toBe('redis');
expect(result.sessions.maxSessions).toBe(100);
expect(result.toolConfirmation.timeout).toBe(45000);
});
it('should handle minimal config with all defaults', () => {
const minimalConfig: AgentConfig = {
systemPrompt: 'You are helpful',
llm: {
provider: 'openai',
model: 'gpt-5-mini',
apiKey: 'sk-test',
},
};
const result = AgentConfigSchema.parse(minimalConfig);
// Should have all defaults applied
expect(result.mcpServers).toEqual({});
expect(result.internalTools).toEqual([]);
expect(result.storage).toBeDefined();
expect(result.storage.cache.type).toBe('in-memory');
expect(result.storage.database.type).toBe('in-memory');
expect(result.storage.blob.type).toBe('in-memory');
expect(result.sessions).toBeDefined();
expect(result.toolConfirmation.mode).toBe('auto-approve');
expect(result.llm.maxIterations).toBeUndefined();
});
});
});

View File

@@ -0,0 +1,466 @@
/**
* Schema Defaults Conventions:
* Field-level defaults live in the leaf schemas.
* AgentConfig decides if a section is optional by adding `.default({})`.
* It never duplicates per-field literal defaults.
*/
import { createLLMConfigSchema, type LLMValidationOptions } from '@core/llm/schemas.js';
import { LoggerConfigSchema } from '@core/logger/index.js';
import { ServerConfigsSchema as McpServersConfigSchema } from '@core/mcp/schemas.js';
import { MemoriesConfigSchema } from '@core/memory/schemas.js';
import { SessionConfigSchema } from '@core/session/schemas.js';
import { StorageSchema } from '@core/storage/schemas.js';
import { SystemPromptConfigSchema } from '@core/systemPrompt/schemas.js';
import {
CompactionConfigSchema,
DEFAULT_COMPACTION_CONFIG,
} from '@core/context/compaction/schemas.js';
import {
InternalToolsSchema,
CustomToolsSchema,
ToolConfirmationConfigSchema,
ElicitationConfigSchema,
ToolsConfigSchema,
} from '@core/tools/schemas.js';
import { z } from 'zod';
import { InternalResourcesSchema } from '@core/resources/schemas.js';
import { PromptsSchema } from '@core/prompts/schemas.js';
import { PluginsConfigSchema } from '@core/plugins/schemas.js';
import { OtelConfigurationSchema } from '@core/telemetry/schemas.js';
// (agent card overrides are now represented as Partial<AgentCard> and processed via AgentCardSchema)
/**
* Security Scheme Schemas (A2A Protocol, based on OpenAPI 3.0 Security Scheme Object)
* Defines authentication mechanisms for the agent as a discriminated union
*/
const ApiKeySecurityScheme = z
.object({
type: z.literal('apiKey').describe('Security scheme type'),
name: z.string().describe('Name of the header/query/cookie parameter'),
in: z.enum(['query', 'header', 'cookie']).describe('Location of API key'),
description: z.string().optional().describe('Description of the security scheme'),
})
.strict();
const HttpSecurityScheme = z
.object({
type: z.literal('http').describe('Security scheme type'),
scheme: z.string().describe('HTTP authorization scheme (e.g., basic, bearer)'),
bearerFormat: z.string().optional().describe('Hint for bearer token format'),
description: z.string().optional().describe('Description of the security scheme'),
})
.strict();
const OAuth2FlowSchema = z
.object({
authorizationUrl: z.string().url().optional().describe('Authorization URL for the flow'),
tokenUrl: z.string().url().optional().describe('Token URL for the flow'),
refreshUrl: z.string().url().optional().describe('Refresh URL for the flow'),
scopes: z.record(z.string()).describe('Available scopes for the OAuth2 flow'),
})
.strict();
const OAuth2SecurityScheme = z
.object({
type: z.literal('oauth2').describe('Security scheme type'),
flows: z
.object({
implicit: OAuth2FlowSchema.optional(),
password: OAuth2FlowSchema.optional(),
clientCredentials: OAuth2FlowSchema.optional(),
authorizationCode: OAuth2FlowSchema.optional(),
})
.strict()
.describe('OAuth2 flow configurations'),
description: z.string().optional().describe('Description of the security scheme'),
})
.strict();
const OpenIdConnectSecurityScheme = z
.object({
type: z.literal('openIdConnect').describe('Security scheme type'),
openIdConnectUrl: z.string().url().describe('OpenID Connect discovery URL'),
description: z.string().optional().describe('Description of the security scheme'),
})
.strict();
const MutualTLSSecurityScheme = z
.object({
type: z.literal('mutualTLS').describe('Security scheme type'),
description: z.string().optional().describe('Description of the security scheme'),
})
.strict();
export const SecuritySchemeSchema = z.discriminatedUnion('type', [
ApiKeySecurityScheme,
HttpSecurityScheme,
OAuth2SecurityScheme,
OpenIdConnectSecurityScheme,
MutualTLSSecurityScheme,
]);
/**
* Agent Card Signature Schema (A2A Protocol v0.3.0)
* JSON Web Signature for verifying AgentCard integrity
*/
const AgentCardSignatureSchema = z
.object({
protected: z.string().describe('Base64url-encoded JWS Protected Header'),
signature: z.string().describe('Base64url-encoded JWS Signature'),
})
.strict();
/**
* Dexto Extension Metadata Schema
* Namespace for Dexto-specific extension fields
*/
const DextoMetadataSchema = z
.object({
authentication: z
.object({
schemes: z
.array(z.string())
.default([])
.describe('Legacy authentication schemes (deprecated: use securitySchemes)'),
credentials: z.string().optional().describe('Credentials information'),
})
.strict()
.optional()
.describe('Legacy authentication configuration'),
delegation: z
.object({
protocol: z
.enum(['dexto-v1', 'http-simple', 'a2a-jsonrpc', 'mcp-http'])
.describe('Delegation protocol version'),
endpoint: z.string().describe('Delegation endpoint (relative path or full URL)'),
supportsSession: z.boolean().describe('Whether agent supports stateful sessions'),
supportsStreaming: z
.boolean()
.optional()
.describe('Whether agent supports streaming responses'),
})
.strict()
.optional()
.describe('Delegation protocol information for agent-to-agent communication'),
owner: z
.object({
userId: z.string().describe('Unique user identifier from auth system'),
username: z.string().describe('Display name'),
email: z
.string()
.email()
.max(254)
.optional()
.describe(
'Optional user email (WARNING: publicly readable via .well-known/agent.json if provided)'
),
})
.strict()
.optional()
.describe('Agent owner information (for multi-tenant deployments)'),
})
.strict();
/**
* Agent Card Schema (A2A Protocol v0.3.0 Compliant)
* Follows the A2A specification with extensions in the metadata field
*/
export const AgentCardSchema = z
.object({
// ────────────────────────────────────────────────────────
// A2A Protocol Required Fields
// ────────────────────────────────────────────────────────
protocolVersion: z
.string()
.default('0.3.0')
.describe('A2A protocol version (e.g., "0.3.0")'),
name: z.string().describe('Human-readable agent name'),
description: z.string().describe('Detailed description of agent purpose and capabilities'),
url: z.string().url().describe('Primary endpoint URL for the agent'),
version: z.string().describe('Agent version (semantic versioning recommended)'),
preferredTransport: z
.enum(['JSONRPC', 'GRPC', 'HTTP+JSON'])
.default('JSONRPC')
.describe('Primary transport protocol for communication'),
defaultInputModes: z
.array(z.string())
.default(['application/json', 'text/plain'])
.describe('Supported input MIME types'),
defaultOutputModes: z
.array(z.string())
.default(['application/json', 'text/event-stream', 'text/plain'])
.describe('Supported output MIME types'),
skills: z
.array(
z
.object({
id: z.string().describe('Unique skill identifier'),
name: z.string().describe('Human-readable skill name'),
description: z.string().describe('Detailed skill description'),
tags: z.array(z.string()).describe('Searchable tags for discovery'),
examples: z
.array(z.string())
.optional()
.describe('Example use cases or queries'),
inputModes: z
.array(z.string())
.optional()
.default(['text/plain'])
.describe('Skill-specific input MIME types'),
outputModes: z
.array(z.string())
.optional()
.default(['text/plain'])
.describe('Skill-specific output MIME types'),
})
.strict()
)
.default([
{
id: 'chat_with_agent',
name: 'chat_with_agent',
description: 'Allows you to chat with an AI agent. Send a message to interact.',
tags: ['chat', 'AI', 'assistant', 'mcp', 'natural language'],
examples: [
`Send a JSON-RPC request to /mcp with method: "chat_with_agent" and params: {"message":"Your query..."}`,
'Alternatively, use a compatible MCP client library.',
],
},
])
.describe('Agent capabilities/skills'),
// ────────────────────────────────────────────────────────
// A2A Protocol Optional Fields
// ────────────────────────────────────────────────────────
provider: z
.object({
organization: z.string().describe('Provider organization name'),
url: z.string().url().describe('Provider organization URL'),
})
.strict()
.optional()
.describe('Agent provider information'),
iconUrl: z.string().url().optional().describe('URL to agent icon/logo (for UI display)'),
documentationUrl: z.string().url().optional().describe('URL to agent documentation'),
additionalInterfaces: z
.array(
z
.object({
url: z.string().url().describe('Endpoint URL'),
transport: z
.enum(['JSONRPC', 'GRPC', 'HTTP+JSON'])
.describe('Transport protocol'),
})
.strict()
)
.optional()
.describe('Additional interfaces/transports supported by the agent'),
capabilities: z
.object({
streaming: z
.boolean()
.optional()
.default(true)
.describe('Supports streaming responses'),
pushNotifications: z.boolean().optional().describe('Supports push notifications'),
stateTransitionHistory: z
.boolean()
.optional()
.default(false)
.describe('Provides state transition history'),
})
.strict()
.default({})
.describe('Agent capabilities and features'),
securitySchemes: z
.record(SecuritySchemeSchema)
.optional()
.describe('Map of security scheme definitions (A2A format)'),
security: z
.array(z.record(z.array(z.string())))
.optional()
.describe(
'Security requirements (array of security scheme references with required scopes)'
),
supportsAuthenticatedExtendedCard: z
.boolean()
.optional()
.describe('Whether extended card is available with authentication'),
signatures: z
.array(AgentCardSignatureSchema)
.optional()
.describe('JSON Web Signatures for verifying AgentCard integrity'),
metadata: z
.object({
dexto: DextoMetadataSchema.optional().describe('Dexto-specific extension metadata'),
})
.passthrough()
.optional()
.describe('Extension-specific metadata (namespaced by extension name)'),
})
.strict();
// Input type for user-facing API (pre-parsing)
export type AgentCard = z.input<typeof AgentCardSchema>;
// Validated type for internal use (post-parsing)
export type ValidatedAgentCard = z.output<typeof AgentCardSchema>;
/**
* Creates an agent config schema with configurable validation strictness.
*
* @param options.strict - When true (default), enforces API key and baseURL requirements.
* When false, allows missing credentials for interactive configuration.
*/
export function createAgentConfigSchema(options: LLMValidationOptions = {}) {
const llmSchema = createLLMConfigSchema(options);
return z
.object({
// ========================================
// REQUIRED FIELDS (user must provide or schema validation fails)
// ========================================
systemPrompt: SystemPromptConfigSchema.describe(
'System prompt: string shorthand or structured config'
),
llm: llmSchema.describe('Core LLM configuration for the agent'),
// ========================================
// OPTIONAL FEATURES (undefined if not provided)
// ========================================
agentCard: AgentCardSchema.describe('Configuration for the agent card').optional(),
greeting: z
.string()
.max(500)
.describe('Default greeting text to show when a chat starts (for UI consumption)')
.optional(),
telemetry: OtelConfigurationSchema.describe(
'OpenTelemetry configuration for distributed tracing and observability'
).optional(),
memories: MemoriesConfigSchema.describe(
'Memory configuration for system prompt inclusion (optional feature)'
).optional(),
image: z
.string()
.describe(
'Image package that provides required providers (e.g., "@dexto/image-local"). Optional - platform can load images via CLI flag, environment variable, or static imports.'
)
.optional(),
// ========================================
// FIELDS WITH DEFAULTS (always present after parsing)
// ========================================
agentId: z
.string()
.describe(
'Unique identifier for this agent instance - CLI enrichment derives from agentCard.name or filename'
)
.default('coding-agent'),
mcpServers: McpServersConfigSchema.describe(
'Configurations for MCP (Model Context Protocol) servers used by the agent'
).default({}),
internalTools: InternalToolsSchema.describe(
'Internal tools configuration (read-file, write-file, bash-exec, etc.)'
).default([]),
customTools: CustomToolsSchema.describe(
'Custom tool provider configurations. Providers must be registered via customToolRegistry before loading agent config.'
).default([]),
tools: ToolsConfigSchema.describe(
'Configuration for individual tools (limits, etc.)'
).default({}),
logger: LoggerConfigSchema.describe(
'Logger configuration with multi-transport support (file, console, remote) - CLI enrichment adds per-agent file transport'
).default({
level: 'error',
transports: [{ type: 'console', colorize: true }],
}),
storage: StorageSchema.describe(
'Storage configuration for cache, database, and blob storage - defaults to in-memory, CLI enrichment provides filesystem paths'
).default({
cache: { type: 'in-memory' },
database: { type: 'in-memory' },
blob: { type: 'in-memory' },
}),
sessions: SessionConfigSchema.describe('Session management configuration').default({}),
toolConfirmation: ToolConfirmationConfigSchema.describe(
'Tool confirmation and approval configuration'
).default({}),
elicitation: ElicitationConfigSchema.default({}).describe(
'Elicitation configuration for user input requests (ask_user tool and MCP server elicitations). Independent from toolConfirmation mode.'
),
internalResources: InternalResourcesSchema.describe(
'Configuration for internal resources (filesystem, etc.)'
).default([]),
prompts: PromptsSchema.describe(
'Agent prompts configuration - sample prompts which can be defined inline or referenced from file'
).default([]),
plugins: PluginsConfigSchema.describe(
'Plugin system configuration for built-in and custom plugins'
).default({}),
compaction: CompactionConfigSchema.describe(
'Context compaction configuration - custom providers can be registered via compactionRegistry'
).default(DEFAULT_COMPACTION_CONFIG),
})
.strict()
.describe('Main configuration for an agent, including its LLM and server connections')
.brand<'ValidatedAgentConfig'>();
}
/**
* Default agent config schema with strict validation (backwards compatible).
* Use createAgentConfigSchema({ strict: false }) for relaxed validation.
*/
export const AgentConfigSchema = createAgentConfigSchema({ strict: true });
/**
* Relaxed agent config schema that allows missing API keys and baseURLs.
* Use this for interactive modes (CLI, WebUI) where users can configure later.
*/
export const AgentConfigSchemaRelaxed = createAgentConfigSchema({ strict: false });
// Input type for user-facing API (pre-parsing) - makes fields with defaults optional
export type AgentConfig = z.input<typeof AgentConfigSchema>;
// Validated type for internal use (post-parsing) - all defaults applied
export type ValidatedAgentConfig = z.output<typeof AgentConfigSchema>;
// Re-export validation options type for consumers
export type { LLMValidationOptions };

View File

@@ -0,0 +1,171 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { AgentStateManager } from './state-manager.js';
import { AgentEventBus } from '../events/index.js';
import { AgentConfigSchema } from '@core/agent/schemas.js';
import { LLMConfigSchema } from '@core/llm/schemas.js';
import { McpServerConfigSchema } from '@core/mcp/schemas.js';
import type { AgentConfig, ValidatedAgentConfig } from '@core/agent/schemas.js';
describe('AgentStateManager Events', () => {
let stateManager: AgentStateManager;
let eventBus: AgentEventBus;
let mockConfig: AgentConfig;
let validatedConfig: ValidatedAgentConfig;
let mockLogger: any;
beforeEach(() => {
eventBus = new AgentEventBus();
mockLogger = {
debug: vi.fn(),
info: vi.fn(),
warn: vi.fn(),
error: vi.fn(),
trackException: vi.fn(),
createChild: vi.fn(function (this: any) {
return this;
}),
destroy: vi.fn(),
} as any;
mockConfig = {
systemPrompt: 'You are a helpful assistant',
mcpServers: {
test: {
type: 'stdio',
command: 'test',
args: [],
env: {},
timeout: 30000,
connectionMode: 'lenient',
},
},
llm: {
provider: 'openai',
model: 'gpt-5',
apiKey: 'test-key',
maxIterations: 50,
},
internalTools: [],
sessions: {
maxSessions: 100,
sessionTTL: 3600000,
},
toolConfirmation: {
mode: 'manual',
timeout: 30000,
allowedToolsStorage: 'storage',
},
};
// Parse through schema to validate and apply defaults, converting input to ValidatedAgentConfig
validatedConfig = AgentConfigSchema.parse(mockConfig);
stateManager = new AgentStateManager(validatedConfig, eventBus, mockLogger);
});
it('emits dexto:stateChanged when LLM config is updated', () => {
const eventSpy = vi.fn();
eventBus.on('state:changed', eventSpy);
const updatedConfig = LLMConfigSchema.parse({
...mockConfig.llm,
model: 'gpt-5-mini',
});
stateManager.updateLLM(updatedConfig);
expect(eventSpy).toHaveBeenCalledWith({
field: 'llm',
oldValue: expect.objectContaining({ model: 'gpt-5' }),
newValue: expect.objectContaining({ model: 'gpt-5-mini' }),
sessionId: undefined,
});
});
it('emits dexto:mcpServerAdded when adding a new MCP server', () => {
const eventSpy = vi.fn();
eventBus.on('mcp:server-added', eventSpy);
const newServerConfig = McpServerConfigSchema.parse({
type: 'stdio' as const,
command: 'new-server',
args: [],
env: {},
timeout: 30000,
connectionMode: 'lenient' as const,
});
stateManager.setMcpServer('new-server', newServerConfig);
expect(eventSpy).toHaveBeenCalledWith({
serverName: 'new-server',
config: newServerConfig,
});
});
it('emits dexto:mcpServerRemoved when removing an MCP server', () => {
const eventSpy = vi.fn();
eventBus.on('mcp:server-removed', eventSpy);
stateManager.removeMcpServer('test');
expect(eventSpy).toHaveBeenCalledWith({
serverName: 'test',
});
});
it('emits dexto:sessionOverrideSet when setting session overrides', () => {
const eventSpy = vi.fn();
eventBus.on('session:override-set', eventSpy);
const sessionConfig = LLMConfigSchema.parse({
...mockConfig.llm,
model: 'gpt-5',
});
stateManager.updateLLM(sessionConfig, 'session-123');
expect(eventSpy).toHaveBeenCalledWith({
sessionId: 'session-123',
override: expect.objectContaining({
llm: expect.objectContaining({ model: 'gpt-5' }),
}),
});
});
it('emits dexto:sessionOverrideCleared when clearing session overrides', () => {
const eventSpy = vi.fn();
eventBus.on('session:override-cleared', eventSpy);
// First set an override
const sessionConfig = LLMConfigSchema.parse({
...mockConfig.llm,
model: 'gpt-5',
});
stateManager.updateLLM(sessionConfig, 'session-123');
// Then clear it
stateManager.clearSessionOverride('session-123');
expect(eventSpy).toHaveBeenCalledWith({
sessionId: 'session-123',
});
});
it('emits dexto:stateReset when resetting to baseline', () => {
const eventSpy = vi.fn();
eventBus.on('state:reset', eventSpy);
stateManager.resetToBaseline();
expect(eventSpy).toHaveBeenCalledWith({
toConfig: validatedConfig,
});
});
it('emits dexto:stateExported when exporting state as config', () => {
const eventSpy = vi.fn();
eventBus.on('state:exported', eventSpy);
const exported = stateManager.exportAsConfig();
expect(eventSpy).toHaveBeenCalledWith({
config: exported,
});
});
});

View File

@@ -0,0 +1,263 @@
import type { IDextoLogger } from '../logger/v2/types.js';
import { DextoLogComponent } from '../logger/v2/types.js';
import type { ValidatedAgentConfig } from '@core/agent/schemas.js';
import type { ValidatedLLMConfig } from '@core/llm/schemas.js';
import type { ValidatedMcpServerConfig } from '@core/mcp/schemas.js';
import type { AgentEventBus } from '../events/index.js';
/**
* Session-specific overrides that can differ from the global configuration
*/
export interface SessionOverride {
/** Override LLM config for this session - must be a complete validated config */
llm?: ValidatedLLMConfig;
}
/**
* Manages the runtime configuration of the agent.
*
* This class handles dynamic configuration changes that occur during agent execution.
*
* Key responsibilities:
* 1. Track runtime changes separate from static config baseline
* 2. Support session-specific overrides for LLM settings
* 3. Dynamic MCP server management (add/remove servers at runtime)
* 4. Export modified state back to config format
* 5. Provide change tracking and validation capabilities
* 6. Maintain effective configuration for each session
*/
export class AgentStateManager {
private runtimeConfig: ValidatedAgentConfig;
private readonly baselineConfig: ValidatedAgentConfig;
private sessionOverrides: Map<string, SessionOverride> = new Map();
private logger: IDextoLogger;
/**
* Initialize AgentStateManager from a validated static configuration.
*
* @param staticConfig The validated configuration from DextoAgent
* @param agentEventBus The agent event bus for emitting state change events
* @param logger Logger instance for this agent
*/
constructor(
staticConfig: ValidatedAgentConfig,
private agentEventBus: AgentEventBus,
logger: IDextoLogger
) {
this.baselineConfig = structuredClone(staticConfig);
this.runtimeConfig = structuredClone(staticConfig);
this.logger = logger.createChild(DextoLogComponent.AGENT);
this.logger.debug('AgentStateManager initialized', {
staticConfigKeys: Object.keys(this.baselineConfig),
mcpServerCount: Object.keys(this.runtimeConfig.mcpServers).length,
});
}
// ============= GETTERS =============
/**
* Get runtime configuration for a session (includes session overrides if sessionId provided)
*/
public getRuntimeConfig(sessionId?: string): Readonly<ValidatedAgentConfig> {
if (!sessionId) {
return structuredClone(this.runtimeConfig);
}
const override = this.sessionOverrides.get(sessionId);
if (!override) {
return structuredClone(this.runtimeConfig);
}
return {
...this.runtimeConfig,
llm: { ...this.runtimeConfig.llm, ...override.llm },
};
}
// ============= LLM CONFIGURATION =============
/**
* Update the LLM configuration (globally or for a specific session)
*
* This method is a pure state updater - it assumes the input has already been validated
* by the caller (typically DextoAgent.switchLLM). The ValidatedLLMConfig branded type
* ensures validation has occurred.
*/
public updateLLM(validatedConfig: ValidatedLLMConfig, sessionId?: string): void {
const oldValue = sessionId ? this.getRuntimeConfig(sessionId).llm : this.runtimeConfig.llm;
if (sessionId) {
this.setSessionOverride(sessionId, {
llm: validatedConfig,
});
} else {
this.runtimeConfig.llm = validatedConfig;
}
this.agentEventBus.emit('state:changed', {
field: 'llm',
oldValue,
newValue: validatedConfig,
...(sessionId && { sessionId }),
});
this.logger.info('LLM config updated', {
sessionId,
provider: validatedConfig.provider,
model: validatedConfig.model,
isSessionSpecific: !!sessionId,
});
}
// ============= MCP SERVER MANAGEMENT =============
/**
* Set an MCP server configuration at runtime (add or update).
*
* This method is a pure state updater - it assumes the input has already been validated
* by the caller (typically DextoAgent). The ValidatedMcpServerConfig branded type
* ensures validation has occurred.
*/
public setMcpServer(serverName: string, validatedConfig: ValidatedMcpServerConfig): void {
this.logger.debug(`Setting MCP server: ${serverName}`);
// Update state
const isUpdate = serverName in this.runtimeConfig.mcpServers;
this.runtimeConfig.mcpServers[serverName] = validatedConfig;
// Emit events
const eventName = isUpdate ? 'mcp:server-updated' : 'mcp:server-added';
this.agentEventBus.emit(eventName, { serverName, config: validatedConfig });
this.agentEventBus.emit('state:changed', {
field: 'mcpServers',
oldValue: isUpdate ? 'updated' : 'added',
newValue: validatedConfig,
// sessionId omitted - MCP servers are global
});
this.logger.info(
`MCP server '${serverName}' ${isUpdate ? 'updated' : 'added'} successfully`
);
}
/**
* Remove an MCP server configuration at runtime.
*/
public removeMcpServer(serverName: string): void {
this.logger.debug(`Removing MCP server: ${serverName}`);
if (serverName in this.runtimeConfig.mcpServers) {
delete this.runtimeConfig.mcpServers[serverName];
this.agentEventBus.emit('mcp:server-removed', { serverName });
this.agentEventBus.emit('state:changed', {
field: 'mcpServers',
oldValue: 'removed',
newValue: undefined,
// sessionId omitted - MCP servers are global
});
this.logger.info(`MCP server '${serverName}' removed successfully`);
} else {
this.logger.warn(`MCP server '${serverName}' not found for removal`);
}
}
// ============= SESSION MANAGEMENT =============
/**
* Set a session-specific override
*/
private setSessionOverride(sessionId: string, override: SessionOverride): void {
this.sessionOverrides.set(sessionId, override);
this.agentEventBus.emit('session:override-set', {
sessionId,
override: structuredClone(override),
});
}
/**
* Get a session override (internal helper)
*/
private getSessionOverride(sessionId: string): SessionOverride | undefined {
return this.sessionOverrides.get(sessionId);
}
/**
* Clear session-specific overrides
*/
public clearSessionOverride(sessionId: string): void {
const hadOverride = this.sessionOverrides.has(sessionId);
this.sessionOverrides.delete(sessionId);
if (hadOverride) {
this.agentEventBus.emit('session:override-cleared', { sessionId });
this.logger.info('Session override cleared', { sessionId });
}
}
/**
* Clear all session overrides (private helper for resetToBaseline)
*/
private clearAllSessionOverrides(): void {
const sessionIds = Array.from(this.sessionOverrides.keys());
this.sessionOverrides.clear();
sessionIds.forEach((sessionId) => {
this.agentEventBus.emit('session:override-cleared', { sessionId });
});
if (sessionIds.length > 0) {
this.logger.info('All session overrides cleared', { clearedSessions: sessionIds });
}
}
// ============= CONFIG EXPORT =============
/**
* Export current runtime state as config.
* This allows users to save their runtime modifications as a new agent config.
*/
public exportAsConfig(): ValidatedAgentConfig {
const exportedConfig: ValidatedAgentConfig = {
...this.baselineConfig,
llm: structuredClone(this.runtimeConfig.llm),
systemPrompt: this.runtimeConfig.systemPrompt,
mcpServers: structuredClone(this.runtimeConfig.mcpServers),
};
this.agentEventBus.emit('state:exported', {
config: exportedConfig,
});
this.logger.info('Runtime state exported as config', {
exportedConfig,
});
return exportedConfig;
}
/**
* Reset runtime state back to baseline configuration
*/
public resetToBaseline(): void {
this.runtimeConfig = structuredClone(this.baselineConfig);
this.clearAllSessionOverrides();
this.agentEventBus.emit('state:reset', { toConfig: this.baselineConfig });
this.logger.info('Runtime state reset to baseline config');
}
// ============= CONVENIENCE GETTERS FOR USED FUNCTIONALITY =============
/**
* Get the current effective LLM configuration for a session.
* **Use this for session-specific LLM config** (includes session overrides).
*/
public getLLMConfig(sessionId?: string): Readonly<ValidatedLLMConfig> {
return this.getRuntimeConfig(sessionId).llm;
}
}

View File

@@ -0,0 +1,82 @@
/**
* Type definitions for DextoAgent generate() and stream() APIs
*
* Re-uses existing types from context, llm/services, and events to avoid duplication.
*/
import type { ContentPart } from '../context/types.js';
import type { LLMTokenUsage } from '../llm/services/types.js';
/**
* Re-export content part types for API consumers
*/
export type {
ContentPart,
TextPart,
ImagePart,
FilePart,
ImageData,
FileData,
} from '../context/types.js';
export type { LLMTokenUsage as TokenUsage } from '../llm/services/types.js';
/**
* Tool call information for agent streaming
* Simplified version of tools/types.ts ToolCall for streaming context
*/
export interface AgentToolCall {
toolName: string;
args: Record<string, any>;
callId: string;
result?:
| {
success: boolean;
data: any;
}
| undefined;
}
/**
* Content input for generate() and stream() methods.
* Can be a simple string (for text-only messages) or an array of ContentPart (for multimodal).
*
* @example
* ```typescript
* // Simple text
* agent.generate('What is 2+2?', sessionId);
*
* // Multimodal with image
* agent.generate([
* { type: 'text', text: 'Describe this image' },
* { type: 'image', image: base64Data, mimeType: 'image/png' }
* ], sessionId);
* ```
*/
export type ContentInput = string | ContentPart[];
/**
* Options for generate() and stream() methods
*/
export interface GenerateOptions {
/** AbortSignal for cancellation */
signal?: AbortSignal;
}
/**
* Complete response from generate() method
*/
export interface GenerateResponse {
content: string;
reasoning?: string | undefined; // Extended thinking for o1/o3 models
usage: LLMTokenUsage;
toolCalls: AgentToolCall[];
sessionId: string;
}
/**
* Options for stream() method (same as generate)
*
* Note: stream() now returns core StreamingEvent types directly from the event system.
* See packages/core/src/events/index.ts for event definitions.
*/
export type StreamOptions = GenerateOptions;

View File

@@ -0,0 +1,31 @@
/**
* Error codes for the approval system
* Covers validation, timeout, cancellation, and provider errors
*/
export enum ApprovalErrorCode {
// Validation errors
APPROVAL_INVALID_REQUEST = 'approval_invalid_request',
APPROVAL_INVALID_RESPONSE = 'approval_invalid_response',
APPROVAL_INVALID_METADATA = 'approval_invalid_metadata',
APPROVAL_INVALID_SCHEMA = 'approval_invalid_schema',
// Timeout errors
APPROVAL_TIMEOUT = 'approval_timeout',
// Cancellation errors
APPROVAL_CANCELLED = 'approval_cancelled',
APPROVAL_CANCELLED_ALL = 'approval_cancelled_all',
// Provider errors
APPROVAL_PROVIDER_NOT_CONFIGURED = 'approval_provider_not_configured',
APPROVAL_PROVIDER_ERROR = 'approval_provider_error',
APPROVAL_NOT_FOUND = 'approval_not_found',
// Type-specific errors
APPROVAL_TOOL_CONFIRMATION_DENIED = 'approval_tool_confirmation_denied',
APPROVAL_ELICITATION_DENIED = 'approval_elicitation_denied',
APPROVAL_ELICITATION_VALIDATION_FAILED = 'approval_elicitation_validation_failed',
// Configuration errors
APPROVAL_CONFIG_INVALID = 'approval_config_invalid',
}

View File

@@ -0,0 +1,425 @@
import { DextoRuntimeError, ErrorScope, ErrorType } from '../errors/index.js';
import { ApprovalErrorCode } from './error-codes.js';
import type { ApprovalType, DenialReason } from './types.js';
/**
* Context for approval validation errors
*/
export interface ApprovalValidationContext {
approvalId?: string;
type?: ApprovalType;
field?: string;
reason?: string;
}
/**
* Context for approval timeout errors
*/
export interface ApprovalTimeoutContext {
approvalId: string;
type: ApprovalType;
timeout: number;
sessionId?: string;
}
/**
* Context for approval cancellation errors
*/
export interface ApprovalCancellationContext {
approvalId?: string;
type?: ApprovalType;
reason?: string;
}
/**
* Context for elicitation validation errors
*/
export interface ElicitationValidationContext {
approvalId: string;
serverName: string;
errors: string[];
}
/**
* Error factory for approval system errors
*/
export class ApprovalError {
/**
* Create an error for invalid approval request
*/
static invalidRequest(
reason: string,
context?: ApprovalValidationContext
): DextoRuntimeError<ApprovalValidationContext> {
return new DextoRuntimeError(
ApprovalErrorCode.APPROVAL_INVALID_REQUEST,
ErrorScope.TOOLS, // Approvals are part of tool execution flow
ErrorType.USER,
`Invalid approval request: ${reason}`,
context,
['Check the approval request structure', 'Ensure all required fields are provided']
);
}
/**
* Create an error for invalid approval response
*/
static invalidResponse(
reason: string,
context?: ApprovalValidationContext
): DextoRuntimeError<ApprovalValidationContext> {
return new DextoRuntimeError(
ApprovalErrorCode.APPROVAL_INVALID_RESPONSE,
ErrorScope.TOOLS,
ErrorType.USER,
`Invalid approval response: ${reason}`,
context,
[
'Check the approval response structure',
'Ensure approvalId matches the request',
'Verify status is valid',
]
);
}
/**
* Create an error for invalid metadata
*/
static invalidMetadata(
type: ApprovalType,
reason: string
): DextoRuntimeError<ApprovalValidationContext> {
return new DextoRuntimeError(
ApprovalErrorCode.APPROVAL_INVALID_METADATA,
ErrorScope.TOOLS,
ErrorType.USER,
`Invalid metadata for ${type}: ${reason}`,
{ type, reason },
['Check the metadata structure for this approval type']
);
}
/**
* Create an error for invalid elicitation schema
*/
static invalidSchema(reason: string): DextoRuntimeError<ApprovalValidationContext> {
return new DextoRuntimeError(
ApprovalErrorCode.APPROVAL_INVALID_SCHEMA,
ErrorScope.TOOLS,
ErrorType.USER,
`Invalid elicitation schema: ${reason}`,
{ reason },
['Ensure the schema is a valid JSON Schema', 'Check MCP server implementation']
);
}
/**
* Create an error for approval timeout
*/
static timeout(
approvalId: string,
type: ApprovalType,
timeout: number,
sessionId?: string
): DextoRuntimeError<ApprovalTimeoutContext> {
const context: ApprovalTimeoutContext = {
approvalId,
type,
timeout,
};
if (sessionId !== undefined) {
context.sessionId = sessionId;
}
return new DextoRuntimeError(
ApprovalErrorCode.APPROVAL_TIMEOUT,
ErrorScope.TOOLS,
ErrorType.TIMEOUT,
`Approval request timed out after ${timeout}ms`,
context,
[
'Increase the timeout value',
'Respond to approval requests more quickly',
'Check if approval UI is functioning',
]
);
}
/**
* Create an error for cancelled approval
*/
static cancelled(
approvalId: string,
type: ApprovalType,
reason?: string
): DextoRuntimeError<ApprovalCancellationContext> {
const message = reason
? `Approval request cancelled: ${reason}`
: 'Approval request was cancelled';
const context: ApprovalCancellationContext = {
approvalId,
type,
};
if (reason !== undefined) {
context.reason = reason;
}
return new DextoRuntimeError(
ApprovalErrorCode.APPROVAL_CANCELLED,
ErrorScope.TOOLS,
ErrorType.USER,
message,
context
);
}
/**
* Create an error for all approvals cancelled
*/
static cancelledAll(reason?: string): DextoRuntimeError<ApprovalCancellationContext> {
const message = reason
? `All approval requests cancelled: ${reason}`
: 'All approval requests were cancelled';
const context: ApprovalCancellationContext = {};
if (reason !== undefined) {
context.reason = reason;
}
return new DextoRuntimeError(
ApprovalErrorCode.APPROVAL_CANCELLED_ALL,
ErrorScope.TOOLS,
ErrorType.USER,
message,
context
);
}
/**
* Create an error for approval provider not configured
*/
static providerNotConfigured(): DextoRuntimeError<Record<string, never>> {
return new DextoRuntimeError(
ApprovalErrorCode.APPROVAL_PROVIDER_NOT_CONFIGURED,
ErrorScope.TOOLS,
ErrorType.SYSTEM,
'Approval provider not configured',
{},
[
'Configure an approval provider in your agent configuration',
'Check approval.mode in agent.yml',
]
);
}
/**
* Create an error for approval provider error
*/
static providerError(message: string, cause?: Error): DextoRuntimeError<{ cause?: string }> {
const context: { cause?: string } = {};
if (cause?.message !== undefined) {
context.cause = cause.message;
}
return new DextoRuntimeError(
ApprovalErrorCode.APPROVAL_PROVIDER_ERROR,
ErrorScope.TOOLS,
ErrorType.SYSTEM,
`Approval provider error: ${message}`,
context,
['Check approval provider implementation', 'Review system logs for details']
);
}
/**
* Create an error for approval not found
*/
static notFound(approvalId: string): DextoRuntimeError<{ approvalId: string }> {
return new DextoRuntimeError(
ApprovalErrorCode.APPROVAL_NOT_FOUND,
ErrorScope.TOOLS,
ErrorType.NOT_FOUND,
`Approval request not found: ${approvalId}`,
{ approvalId },
[
'Verify the approvalId is correct',
'Check if the approval has already been resolved or timed out',
]
);
}
/**
* Create an error for tool confirmation denied
*/
static toolConfirmationDenied(
toolName: string,
reason?: DenialReason,
customMessage?: string,
sessionId?: string
): DextoRuntimeError<{ toolName: string; reason?: DenialReason; sessionId?: string }> {
// Generate message based on reason
let message: string;
let suggestions: string[];
switch (reason) {
case 'user_denied':
message = customMessage ?? `Tool execution denied by user: ${toolName}`;
suggestions = ['Tool was denied by user'];
break;
case 'system_denied':
message = customMessage ?? `Tool execution denied by system policy: ${toolName}`;
suggestions = [
'Tool is in the alwaysDeny list',
'Check toolConfirmation.toolPolicies in agent configuration',
];
break;
case 'timeout':
message = customMessage ?? `Tool confirmation timed out: ${toolName}`;
suggestions = [
'Increase the timeout value',
'Respond to approval requests more quickly',
];
break;
default:
message = customMessage ?? `Tool execution denied: ${toolName}`;
suggestions = [
'Approve the tool in the confirmation dialog',
'Check tool permissions',
];
}
const context: { toolName: string; reason?: DenialReason; sessionId?: string } = {
toolName,
};
if (reason) context.reason = reason;
if (sessionId) context.sessionId = sessionId;
return new DextoRuntimeError(
ApprovalErrorCode.APPROVAL_TOOL_CONFIRMATION_DENIED,
ErrorScope.TOOLS,
ErrorType.FORBIDDEN,
message,
context,
suggestions
);
}
/**
* Create an error for elicitation denied
*/
static elicitationDenied(
serverName: string,
reason?: DenialReason,
customMessage?: string,
sessionId?: string
): DextoRuntimeError<{ serverName: string; reason?: DenialReason; sessionId?: string }> {
// Generate message based on reason
let message: string;
let suggestions: string[];
switch (reason) {
case 'user_denied':
message =
customMessage ??
`Elicitation request denied by user from MCP server: ${serverName}`;
suggestions = [
'User clicked deny on the form',
'The agent cannot proceed without this input',
];
break;
case 'user_cancelled':
message =
customMessage ??
`Elicitation request cancelled by user from MCP server: ${serverName}`;
suggestions = [
'User cancelled the form',
'The agent cannot proceed without this input',
];
break;
case 'system_cancelled':
message =
customMessage ?? `Elicitation request cancelled from MCP server: ${serverName}`;
suggestions = ['Session may have ended', 'Try again'];
break;
case 'timeout':
message =
customMessage ?? `Elicitation request timed out from MCP server: ${serverName}`;
suggestions = [
'Increase the timeout value',
'Respond to elicitation requests more quickly',
];
break;
case 'elicitation_disabled':
message =
customMessage ??
`Elicitation is disabled. Cannot request input from MCP server: ${serverName}`;
suggestions = [
'Enable elicitation in your agent configuration',
'Set elicitation.enabled: true in agent.yml',
];
break;
case 'validation_failed':
message =
customMessage ??
`Elicitation form validation failed from MCP server: ${serverName}`;
suggestions = ['Check the form inputs match the schema requirements'];
break;
default:
message =
customMessage ?? `Elicitation request denied from MCP server: ${serverName}`;
suggestions = ['Complete the requested form', 'Check MCP server requirements'];
}
const context: { serverName: string; reason?: DenialReason; sessionId?: string } = {
serverName,
};
if (reason) context.reason = reason;
if (sessionId) context.sessionId = sessionId;
return new DextoRuntimeError(
ApprovalErrorCode.APPROVAL_ELICITATION_DENIED,
ErrorScope.TOOLS,
ErrorType.FORBIDDEN,
message,
context,
suggestions
);
}
/**
* Create an error for elicitation validation failed
*/
static elicitationValidationFailed(
serverName: string,
errors: string[],
approvalId: string
): DextoRuntimeError<ElicitationValidationContext> {
return new DextoRuntimeError(
ApprovalErrorCode.APPROVAL_ELICITATION_VALIDATION_FAILED,
ErrorScope.TOOLS,
ErrorType.USER,
`Elicitation form validation failed: ${errors.join(', ')}`,
{ approvalId, serverName, errors },
['Check the form inputs match the schema requirements', 'Review validation errors']
);
}
/**
* Create an error for invalid approval configuration
*/
static invalidConfig(reason: string): DextoRuntimeError<{ reason: string }> {
return new DextoRuntimeError(
ApprovalErrorCode.APPROVAL_CONFIG_INVALID,
ErrorScope.TOOLS,
ErrorType.USER,
`Invalid approval configuration: ${reason}`,
{ reason },
['Check approval configuration in agent.yml', 'Review approval.mode and related fields']
);
}
}

View File

@@ -0,0 +1,22 @@
import { randomUUID } from 'crypto';
import type { ApprovalRequest, ApprovalRequestDetails } from './types.js';
/**
* Factory function to create an approval request with generated ID and timestamp.
*
* This is a generic helper used by ApprovalManager to create properly
* formatted approval requests from simplified details.
*
* @param details - Simplified approval request details without ID and timestamp
* @returns A complete ApprovalRequest with generated UUID and current timestamp
*/
export function createApprovalRequest(details: ApprovalRequestDetails): ApprovalRequest {
return {
approvalId: randomUUID(),
type: details.type,
sessionId: details.sessionId,
timeout: details.timeout,
timestamp: new Date(),
metadata: details.metadata,
} as ApprovalRequest;
}

View File

@@ -0,0 +1,72 @@
// ============================================================================
// USER APPROVAL SYSTEM - Public API
// ============================================================================
// Types
export type {
ApprovalHandler,
ApprovalRequest,
ApprovalResponse,
ApprovalRequestDetails,
ElicitationMetadata,
ElicitationRequest,
ElicitationResponse,
ElicitationResponseData,
CustomApprovalMetadata,
CustomApprovalRequest,
CustomApprovalResponse,
CustomApprovalResponseData,
BaseApprovalRequest,
BaseApprovalResponse,
} from './types.js';
// Internal types - not exported to avoid naming conflicts with tools module
// ToolConfirmationMetadata, ToolConfirmationRequest, ToolConfirmationResponse, ToolConfirmationResponseData
export { ApprovalType, ApprovalStatus, DenialReason } from './types.js';
// Schemas
export {
ApprovalTypeSchema,
ApprovalStatusSchema,
DenialReasonSchema,
ToolConfirmationMetadataSchema,
ElicitationMetadataSchema,
CustomApprovalMetadataSchema,
BaseApprovalRequestSchema,
ToolConfirmationRequestSchema,
ElicitationRequestSchema,
CustomApprovalRequestSchema,
ApprovalRequestSchema,
ToolConfirmationResponseDataSchema,
ElicitationResponseDataSchema,
CustomApprovalResponseDataSchema,
BaseApprovalResponseSchema,
ToolConfirmationResponseSchema,
ElicitationResponseSchema,
CustomApprovalResponseSchema,
ApprovalResponseSchema,
ApprovalRequestDetailsSchema,
} from './schemas.js';
export type {
ValidatedApprovalRequest,
ValidatedApprovalResponse,
ValidatedToolConfirmationRequest,
ValidatedElicitationRequest,
ValidatedCustomApprovalRequest,
} from './schemas.js';
// Error codes and errors
export { ApprovalErrorCode } from './error-codes.js';
export { ApprovalError } from './errors.js';
export type {
ApprovalValidationContext,
ApprovalTimeoutContext,
ApprovalCancellationContext,
ElicitationValidationContext,
} from './errors.js';
// Manager
export { ApprovalManager } from './manager.js';
export type { ApprovalManagerConfig } from './manager.js';

View File

@@ -0,0 +1,957 @@
import { describe, it, expect, beforeEach } from 'vitest';
import { ApprovalManager } from './manager.js';
import { ApprovalStatus, DenialReason } from './types.js';
import { AgentEventBus } from '../events/index.js';
import { DextoRuntimeError } from '../errors/index.js';
import { ApprovalErrorCode } from './error-codes.js';
import { createMockLogger } from '../logger/v2/test-utils.js';
describe('ApprovalManager', () => {
let agentEventBus: AgentEventBus;
const mockLogger = createMockLogger();
beforeEach(() => {
agentEventBus = new AgentEventBus();
});
describe('Configuration - Separate tool and elicitation control', () => {
it('should allow auto-approve for tools while elicitation is enabled', async () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'auto-approve',
timeout: 120000,
},
elicitation: {
enabled: true,
timeout: 120000,
},
},
mockLogger
);
// Tool confirmation should be auto-approved
const toolResponse = await manager.requestToolConfirmation({
toolName: 'test_tool',
toolCallId: 'test-call-id',
args: { foo: 'bar' },
});
expect(toolResponse.status).toBe(ApprovalStatus.APPROVED);
});
it('should reject elicitation when disabled, even if tools are auto-approved', async () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'auto-approve',
timeout: 120000,
},
elicitation: {
enabled: false,
timeout: 120000,
},
},
mockLogger
);
// Elicitation should throw error when disabled
await expect(
manager.requestElicitation({
schema: {
type: 'object' as const,
properties: {
name: { type: 'string' as const },
},
},
prompt: 'Enter your name',
serverName: 'Test Server',
})
).rejects.toThrow(DextoRuntimeError);
await expect(
manager.requestElicitation({
schema: {
type: 'object' as const,
properties: {
name: { type: 'string' as const },
},
},
prompt: 'Enter your name',
serverName: 'Test Server',
})
).rejects.toThrow(/Elicitation is disabled/);
});
it('should auto-deny tools while elicitation is enabled', async () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'auto-deny',
timeout: 120000,
},
elicitation: {
enabled: true,
timeout: 120000,
},
},
mockLogger
);
// Tool confirmation should be auto-denied
const toolResponse = await manager.requestToolConfirmation({
toolName: 'test_tool',
toolCallId: 'test-call-id',
args: { foo: 'bar' },
});
expect(toolResponse.status).toBe(ApprovalStatus.DENIED);
});
it('should use separate timeouts for tools and elicitation', () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'manual',
timeout: 60000,
},
elicitation: {
enabled: true,
timeout: 180000,
},
},
mockLogger
);
const config = manager.getConfig();
expect(config.toolConfirmation.timeout).toBe(60000);
expect(config.elicitation.timeout).toBe(180000);
});
});
describe('Approval routing by type', () => {
it('should route tool confirmations to tool provider', async () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'auto-approve',
timeout: 120000,
},
elicitation: {
enabled: true,
timeout: 120000,
},
},
mockLogger
);
const response = await manager.requestToolConfirmation({
toolName: 'test_tool',
toolCallId: 'test-call-id',
args: {},
});
expect(response.status).toBe(ApprovalStatus.APPROVED);
});
it('should route command confirmations to tool provider', async () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'auto-approve',
timeout: 120000,
},
elicitation: {
enabled: true,
timeout: 120000,
},
},
mockLogger
);
const response = await manager.requestCommandConfirmation({
toolName: 'bash_exec',
command: 'rm -rf /',
originalCommand: 'rm -rf /',
});
expect(response.status).toBe(ApprovalStatus.APPROVED);
});
it('should route elicitation to elicitation provider when enabled', async () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'auto-deny', // Different mode for tools
timeout: 120000,
},
elicitation: {
enabled: true,
timeout: 120000,
},
},
mockLogger
);
// Elicitation should not be auto-denied (uses manual handler)
// We'll timeout immediately to avoid hanging tests
await expect(
manager.requestElicitation({
schema: {
type: 'object' as const,
properties: {
name: { type: 'string' as const },
},
},
prompt: 'Enter your name',
serverName: 'Test Server',
timeout: 1, // 1ms timeout to fail fast
})
).rejects.toThrow(); // Should timeout, not be auto-denied
});
});
describe('Pending approvals tracking', () => {
it('should track pending approvals across both providers', () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'manual',
timeout: 120000,
},
elicitation: {
enabled: true,
timeout: 120000,
},
},
mockLogger
);
// Initially no pending approvals
expect(manager.getPendingApprovals()).toEqual([]);
// Auto-approve mode would not create pending approvals
// Event-based mode would, but we don't want hanging requests in tests
});
it('should cancel approvals in both providers', () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'manual',
timeout: 120000,
},
elicitation: {
enabled: true,
timeout: 120000,
},
},
mockLogger
);
// Should not throw when cancelling (even if approval doesn't exist)
expect(() => manager.cancelApproval('test-id')).not.toThrow();
expect(() => manager.cancelAllApprovals()).not.toThrow();
});
});
describe('Error handling', () => {
it('should throw clear error when elicitation is disabled', async () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'auto-approve',
timeout: 120000,
},
elicitation: {
enabled: false,
timeout: 120000,
},
},
mockLogger
);
await expect(
manager.getElicitationData({
schema: {
type: 'object' as const,
properties: {
name: { type: 'string' as const },
},
},
prompt: 'Enter your name',
serverName: 'Test Server',
})
).rejects.toThrow(/Elicitation is disabled/);
});
it('should provide helpful error message about enabling elicitation', async () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'auto-approve',
timeout: 120000,
},
elicitation: {
enabled: false,
timeout: 120000,
},
},
mockLogger
);
try {
await manager.requestElicitation({
schema: {
type: 'object' as const,
properties: {
name: { type: 'string' as const },
},
},
prompt: 'Enter your name',
serverName: 'Test Server',
});
expect.fail('Should have thrown error');
} catch (error) {
expect(error).toBeInstanceOf(DextoRuntimeError);
expect((error as Error).message).toContain('Enable elicitation');
expect((error as Error).message).toContain('agent configuration');
}
});
});
describe('Timeout Configuration', () => {
it('should allow undefined timeout (infinite wait) for tool confirmation', () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'manual',
// No timeout specified - should wait indefinitely
},
elicitation: {
enabled: true,
timeout: 120000,
},
},
mockLogger
);
const config = manager.getConfig();
expect(config.toolConfirmation.timeout).toBeUndefined();
});
it('should allow undefined timeout (infinite wait) for elicitation', () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'manual',
timeout: 60000,
},
elicitation: {
enabled: true,
// No timeout specified - should wait indefinitely
},
},
mockLogger
);
const config = manager.getConfig();
expect(config.elicitation.timeout).toBeUndefined();
});
it('should allow both timeouts to be undefined (infinite wait for all approvals)', () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'manual',
// No timeout
},
elicitation: {
enabled: true,
// No timeout
},
},
mockLogger
);
const config = manager.getConfig();
expect(config.toolConfirmation.timeout).toBeUndefined();
expect(config.elicitation.timeout).toBeUndefined();
});
it('should use per-request timeout override when provided', async () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'auto-approve', // Auto-approve so we can test immediately
timeout: 60000,
},
elicitation: {
enabled: true,
timeout: 120000,
},
},
mockLogger
);
// The per-request timeout should override the config timeout
// This is tested implicitly through the factory flow
const response = await manager.requestToolConfirmation({
toolName: 'test_tool',
toolCallId: 'test-call-id',
args: { foo: 'bar' },
timeout: 30000, // Per-request override
});
expect(response.status).toBe(ApprovalStatus.APPROVED);
});
it('should not timeout when timeout is undefined in auto-approve mode', async () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'auto-approve',
// No timeout - should not cause any issues with auto-approve
},
elicitation: {
enabled: false,
},
},
mockLogger
);
const response = await manager.requestToolConfirmation({
toolName: 'test_tool',
toolCallId: 'test-call-id',
args: {},
});
expect(response.status).toBe(ApprovalStatus.APPROVED);
});
it('should not timeout when timeout is undefined in auto-deny mode', async () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'auto-deny',
// No timeout - should not cause any issues with auto-deny
},
elicitation: {
enabled: false,
},
},
mockLogger
);
const response = await manager.requestToolConfirmation({
toolName: 'test_tool',
toolCallId: 'test-call-id',
args: {},
});
expect(response.status).toBe(ApprovalStatus.DENIED);
expect(response.reason).toBe(DenialReason.SYSTEM_DENIED);
});
});
describe('Backward compatibility', () => {
it('should work with manual mode for both tools and elicitation', () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'manual',
timeout: 120000,
},
elicitation: {
enabled: true,
timeout: 120000,
},
},
mockLogger
);
expect(manager.getConfig()).toEqual({
toolConfirmation: {
mode: 'manual',
timeout: 120000,
},
elicitation: {
enabled: true,
timeout: 120000,
},
});
});
it('should respect explicitly set elicitation enabled value', () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'manual',
timeout: 120000,
},
elicitation: {
enabled: true,
timeout: 120000,
},
},
mockLogger
);
expect(manager.getConfig().elicitation.enabled).toBe(true);
});
});
describe('Denial Reasons', () => {
it('should include system_denied reason in auto-deny mode', async () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'auto-deny',
timeout: 120000,
},
elicitation: {
enabled: true,
timeout: 120000,
},
},
mockLogger
);
const response = await manager.requestToolConfirmation({
toolName: 'test_tool',
toolCallId: 'test-call-id',
args: {},
});
expect(response.status).toBe(ApprovalStatus.DENIED);
expect(response.reason).toBe(DenialReason.SYSTEM_DENIED);
expect(response.message).toContain('system policy');
});
it('should throw error with specific reason when tool is denied', async () => {
const manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'auto-deny',
timeout: 120000,
},
elicitation: {
enabled: true,
timeout: 120000,
},
},
mockLogger
);
try {
await manager.checkToolConfirmation({
toolName: 'test_tool',
toolCallId: 'test-call-id',
args: {},
});
expect.fail('Should have thrown error');
} catch (error) {
expect(error).toBeInstanceOf(DextoRuntimeError);
expect((error as DextoRuntimeError).code).toBe(
ApprovalErrorCode.APPROVAL_TOOL_CONFIRMATION_DENIED
);
expect((error as DextoRuntimeError).message).toContain('system policy');
expect((error as any).context.reason).toBe(DenialReason.SYSTEM_DENIED);
}
});
it('should handle user_denied reason in error message', async () => {
const _manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'manual',
timeout: 1, // Quick timeout for test
},
elicitation: {
enabled: true,
timeout: 120000,
},
},
mockLogger
);
// Simulate user denying via event
setTimeout(() => {
agentEventBus.emit('approval:response', {
approvalId: expect.any(String),
status: ApprovalStatus.DENIED,
reason: DenialReason.USER_DENIED,
message: 'User clicked deny',
} as any);
}, 50);
// This will be challenging to test properly without mocking more,
// so let's just ensure the type system accepts it
expect(DenialReason.USER_DENIED).toBe('user_denied');
expect(DenialReason.TIMEOUT).toBe('timeout');
});
it('should include reason in response schema', () => {
// Verify the type system allows reason and message
const response: { reason?: DenialReason; message?: string } = {
reason: DenialReason.USER_DENIED,
message: 'You denied this request',
};
expect(response.reason).toBe(DenialReason.USER_DENIED);
expect(response.message).toBe('You denied this request');
});
it('should support all denial reason types', () => {
const reasons: DenialReason[] = [
DenialReason.USER_DENIED,
DenialReason.SYSTEM_DENIED,
DenialReason.TIMEOUT,
DenialReason.USER_CANCELLED,
DenialReason.SYSTEM_CANCELLED,
DenialReason.VALIDATION_FAILED,
DenialReason.ELICITATION_DISABLED,
];
expect(reasons.length).toBe(7);
reasons.forEach((reason) => {
expect(typeof reason).toBe('string');
});
});
});
describe('Bash Pattern Approval', () => {
let manager: ApprovalManager;
beforeEach(() => {
manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'manual',
timeout: 120000,
},
elicitation: {
enabled: false,
},
},
mockLogger
);
});
describe('addBashPattern', () => {
it('should add a pattern to the approved list', () => {
manager.addBashPattern('git *');
expect(manager.getBashPatterns().has('git *')).toBe(true);
});
it('should add multiple patterns', () => {
manager.addBashPattern('git *');
manager.addBashPattern('npm *');
manager.addBashPattern('ls *');
const patterns = manager.getBashPatterns();
expect(patterns.size).toBe(3);
expect(patterns.has('git *')).toBe(true);
expect(patterns.has('npm *')).toBe(true);
expect(patterns.has('ls *')).toBe(true);
});
it('should not duplicate patterns', () => {
manager.addBashPattern('git *');
manager.addBashPattern('git *');
expect(manager.getBashPatterns().size).toBe(1);
});
});
describe('matchesBashPattern (pattern-to-pattern covering)', () => {
// Note: matchesBashPattern expects pattern keys (e.g., "git push *"),
// not raw commands. ToolManager generates pattern keys from commands.
it('should match exact pattern against exact stored pattern', () => {
manager.addBashPattern('git status *');
expect(manager.matchesBashPattern('git status *')).toBe(true);
expect(manager.matchesBashPattern('git push *')).toBe(false);
});
it('should cover narrower pattern with broader pattern', () => {
// "git *" is broader and should cover "git push *", "git status *", etc.
manager.addBashPattern('git *');
expect(manager.matchesBashPattern('git *')).toBe(true);
expect(manager.matchesBashPattern('git push *')).toBe(true);
expect(manager.matchesBashPattern('git status *')).toBe(true);
expect(manager.matchesBashPattern('npm *')).toBe(false);
});
it('should not let narrower pattern cover broader pattern', () => {
// "git push *" should NOT cover "git *"
manager.addBashPattern('git push *');
expect(manager.matchesBashPattern('git push *')).toBe(true);
expect(manager.matchesBashPattern('git *')).toBe(false);
expect(manager.matchesBashPattern('git status *')).toBe(false);
});
it('should match against multiple patterns', () => {
manager.addBashPattern('git *');
manager.addBashPattern('npm install *');
expect(manager.matchesBashPattern('git status *')).toBe(true);
expect(manager.matchesBashPattern('npm install *')).toBe(true);
// npm * is not covered, only npm install * specifically
expect(manager.matchesBashPattern('npm run *')).toBe(false);
});
it('should return false when no patterns are set', () => {
expect(manager.matchesBashPattern('git status *')).toBe(false);
});
it('should not cross-match unrelated commands', () => {
manager.addBashPattern('npm *');
// "npx" starts with "np" but is not "npm " + something
expect(manager.matchesBashPattern('npx *')).toBe(false);
});
it('should handle multi-level subcommands', () => {
manager.addBashPattern('docker compose *');
expect(manager.matchesBashPattern('docker compose *')).toBe(true);
expect(manager.matchesBashPattern('docker compose up *')).toBe(true);
expect(manager.matchesBashPattern('docker *')).toBe(false);
});
});
describe('clearBashPatterns', () => {
it('should clear all patterns', () => {
manager.addBashPattern('git *');
manager.addBashPattern('npm *');
expect(manager.getBashPatterns().size).toBe(2);
manager.clearBashPatterns();
expect(manager.getBashPatterns().size).toBe(0);
});
it('should allow adding patterns after clearing', () => {
manager.addBashPattern('git *');
manager.clearBashPatterns();
manager.addBashPattern('npm *');
expect(manager.getBashPatterns().size).toBe(1);
expect(manager.getBashPatterns().has('npm *')).toBe(true);
});
});
describe('getBashPatterns', () => {
it('should return empty set initially', () => {
expect(manager.getBashPatterns().size).toBe(0);
});
it('should return a copy that reflects current patterns', () => {
manager.addBashPattern('git *');
const patterns = manager.getBashPatterns();
expect(patterns.has('git *')).toBe(true);
// Note: ReadonlySet is a TypeScript type constraint, not runtime protection
// The returned set IS the internal set, so modifying it would affect the manager
// This is acceptable for our use case (debugging/display)
});
});
});
describe('Directory Access Approval', () => {
let manager: ApprovalManager;
beforeEach(() => {
manager = new ApprovalManager(
{
toolConfirmation: {
mode: 'manual',
timeout: 120000,
},
elicitation: {
enabled: false,
},
},
mockLogger
);
});
describe('initializeWorkingDirectory', () => {
it('should add working directory as session-approved', () => {
manager.initializeWorkingDirectory('/home/user/project');
expect(manager.isDirectorySessionApproved('/home/user/project/src/file.ts')).toBe(
true
);
});
it('should normalize the path before adding', () => {
manager.initializeWorkingDirectory('/home/user/../user/project');
expect(manager.isDirectorySessionApproved('/home/user/project/file.ts')).toBe(true);
});
});
describe('addApprovedDirectory', () => {
it('should add directory with session type by default', () => {
manager.addApprovedDirectory('/external/project');
expect(manager.isDirectorySessionApproved('/external/project/file.ts')).toBe(true);
});
it('should add directory with explicit session type', () => {
manager.addApprovedDirectory('/external/project', 'session');
expect(manager.isDirectorySessionApproved('/external/project/file.ts')).toBe(true);
});
it('should add directory with once type', () => {
manager.addApprovedDirectory('/external/project', 'once');
// 'once' type should NOT be session-approved (requires prompt each time)
expect(manager.isDirectorySessionApproved('/external/project/file.ts')).toBe(false);
// But should be generally approved for execution
expect(manager.isDirectoryApproved('/external/project/file.ts')).toBe(true);
});
it('should not downgrade from session to once', () => {
manager.addApprovedDirectory('/external/project', 'session');
manager.addApprovedDirectory('/external/project', 'once');
// Should still be session-approved
expect(manager.isDirectorySessionApproved('/external/project/file.ts')).toBe(true);
});
it('should upgrade from once to session', () => {
manager.addApprovedDirectory('/external/project', 'once');
expect(manager.isDirectorySessionApproved('/external/project/file.ts')).toBe(false);
manager.addApprovedDirectory('/external/project', 'session');
expect(manager.isDirectorySessionApproved('/external/project/file.ts')).toBe(true);
});
it('should normalize paths before adding', () => {
manager.addApprovedDirectory('/external/../external/project');
expect(manager.isDirectoryApproved('/external/project/file.ts')).toBe(true);
});
});
describe('isDirectorySessionApproved', () => {
it('should return true for files within session-approved directory', () => {
manager.addApprovedDirectory('/external/project', 'session');
expect(manager.isDirectorySessionApproved('/external/project/file.ts')).toBe(true);
expect(
manager.isDirectorySessionApproved('/external/project/src/deep/file.ts')
).toBe(true);
});
it('should return false for files within once-approved directory', () => {
manager.addApprovedDirectory('/external/project', 'once');
expect(manager.isDirectorySessionApproved('/external/project/file.ts')).toBe(false);
});
it('should return false for files outside approved directories', () => {
manager.addApprovedDirectory('/external/project', 'session');
expect(manager.isDirectorySessionApproved('/other/file.ts')).toBe(false);
});
it('should handle path containment correctly', () => {
manager.addApprovedDirectory('/external', 'session');
// Approving /external should cover /external/sub/file.ts
expect(manager.isDirectorySessionApproved('/external/sub/file.ts')).toBe(true);
// But not /external-other/file.ts (different directory)
expect(manager.isDirectorySessionApproved('/external-other/file.ts')).toBe(false);
});
it('should return true when working directory is initialized', () => {
manager.initializeWorkingDirectory('/home/user/project');
expect(manager.isDirectorySessionApproved('/home/user/project/any/file.ts')).toBe(
true
);
});
});
describe('isDirectoryApproved', () => {
it('should return true for files within session-approved directory', () => {
manager.addApprovedDirectory('/external/project', 'session');
expect(manager.isDirectoryApproved('/external/project/file.ts')).toBe(true);
});
it('should return true for files within once-approved directory', () => {
manager.addApprovedDirectory('/external/project', 'once');
expect(manager.isDirectoryApproved('/external/project/file.ts')).toBe(true);
});
it('should return false for files outside approved directories', () => {
manager.addApprovedDirectory('/external/project', 'session');
expect(manager.isDirectoryApproved('/other/file.ts')).toBe(false);
});
it('should handle multiple approved directories', () => {
manager.addApprovedDirectory('/external/project1', 'session');
manager.addApprovedDirectory('/external/project2', 'once');
expect(manager.isDirectoryApproved('/external/project1/file.ts')).toBe(true);
expect(manager.isDirectoryApproved('/external/project2/file.ts')).toBe(true);
expect(manager.isDirectoryApproved('/external/project3/file.ts')).toBe(false);
});
it('should handle nested directory approvals', () => {
manager.addApprovedDirectory('/external', 'session');
// Approving /external should cover all subdirectories
expect(manager.isDirectoryApproved('/external/sub/deep/file.ts')).toBe(true);
});
});
describe('getApprovedDirectories', () => {
it('should return empty map initially', () => {
expect(manager.getApprovedDirectories().size).toBe(0);
});
it('should return map with type information', () => {
manager.addApprovedDirectory('/external/project1', 'session');
manager.addApprovedDirectory('/external/project2', 'once');
const dirs = manager.getApprovedDirectories();
expect(dirs.size).toBe(2);
// Check that paths are normalized (absolute)
const keys = Array.from(dirs.keys());
expect(keys.some((k) => k.includes('project1'))).toBe(true);
expect(keys.some((k) => k.includes('project2'))).toBe(true);
});
it('should include working directory after initialization', () => {
manager.initializeWorkingDirectory('/home/user/project');
const dirs = manager.getApprovedDirectories();
expect(dirs.size).toBe(1);
// Check that working directory is session type
const entries = Array.from(dirs.entries());
expect(entries[0]![1]).toBe('session');
});
});
describe('Session vs Once Prompting Behavior', () => {
// These tests verify the expected prompting flow
it('working directory should not require prompt (session-approved)', () => {
manager.initializeWorkingDirectory('/home/user/project');
// isDirectorySessionApproved returns true → no directory prompt needed
expect(manager.isDirectorySessionApproved('/home/user/project/src/file.ts')).toBe(
true
);
});
it('external dir after session approval should not require prompt', () => {
manager.addApprovedDirectory('/external', 'session');
// isDirectorySessionApproved returns true → no directory prompt needed
expect(manager.isDirectorySessionApproved('/external/file.ts')).toBe(true);
});
it('external dir after once approval should require prompt each time', () => {
manager.addApprovedDirectory('/external', 'once');
// isDirectorySessionApproved returns false → directory prompt needed
expect(manager.isDirectorySessionApproved('/external/file.ts')).toBe(false);
// But isDirectoryApproved returns true → execution allowed
expect(manager.isDirectoryApproved('/external/file.ts')).toBe(true);
});
it('unapproved external dir should require prompt', () => {
// No directories approved
expect(manager.isDirectorySessionApproved('/external/file.ts')).toBe(false);
expect(manager.isDirectoryApproved('/external/file.ts')).toBe(false);
});
});
});
});

View File

@@ -0,0 +1,661 @@
import path from 'node:path';
import type {
ApprovalHandler,
ApprovalRequest,
ApprovalResponse,
ApprovalRequestDetails,
ToolConfirmationMetadata,
CommandConfirmationMetadata,
ElicitationMetadata,
DirectoryAccessMetadata,
} from './types.js';
import { ApprovalType, ApprovalStatus, DenialReason } from './types.js';
import { createApprovalRequest } from './factory.js';
import type { IDextoLogger } from '../logger/v2/types.js';
import { DextoLogComponent } from '../logger/v2/types.js';
import { ApprovalError } from './errors.js';
import { patternCovers } from '../tools/bash-pattern-utils.js';
/**
* Configuration for the approval manager
*/
export interface ApprovalManagerConfig {
toolConfirmation: {
mode: 'manual' | 'auto-approve' | 'auto-deny';
timeout?: number; // Optional - no timeout if not specified
};
elicitation: {
enabled: boolean;
timeout?: number; // Optional - no timeout if not specified
};
}
/**
* ApprovalManager orchestrates all user approval flows in Dexto.
*
* It provides a unified interface for requesting user approvals across different
* types (tool confirmation, MCP elicitation, custom approvals) and manages the
* underlying approval provider based on configuration.
*
* Key responsibilities:
* - Create and submit approval requests
* - Route approvals to appropriate providers
* - Provide convenience methods for specific approval types
* - Handle approval responses and errors
* - Support multiple approval modes (manual, auto-approve, auto-deny)
*
* @example
* ```typescript
* const manager = new ApprovalManager(
* { toolConfirmation: { mode: 'manual', timeout: 60000 }, elicitation: { enabled: true, timeout: 60000 } },
* logger
* );
*
* // Request tool confirmation
* const response = await manager.requestToolConfirmation({
* toolName: 'git_commit',
* args: { message: 'feat: add feature' },
* sessionId: 'session-123'
* });
*
* if (response.status === 'approved') {
* // Execute tool
* }
* ```
*/
export class ApprovalManager {
private handler: ApprovalHandler | undefined;
private config: ApprovalManagerConfig;
private logger: IDextoLogger;
/**
* Bash command patterns approved for the current session.
* Patterns use simple glob syntax (e.g., "git *", "npm install *").
* Cleared when session ends.
*/
private bashPatterns: Set<string> = new Set();
/**
* Directories approved for file access for the current session.
* Stores normalized absolute paths mapped to their approval type:
* - 'session': No directory prompt, follows tool config (working dir + user session-approved)
* - 'once': Prompts each time, but tool can execute
* Cleared when session ends.
*/
private approvedDirectories: Map<string, 'session' | 'once'> = new Map();
constructor(config: ApprovalManagerConfig, logger: IDextoLogger) {
this.config = config;
this.logger = logger.createChild(DextoLogComponent.APPROVAL);
this.logger.debug(
`ApprovalManager initialized with toolConfirmation.mode: ${config.toolConfirmation.mode}, elicitation.enabled: ${config.elicitation.enabled}`
);
}
// ==================== Bash Pattern Methods ====================
/**
* Add a bash command pattern to the approved list for this session.
* Patterns use simple glob syntax with * as wildcard.
*
* @example
* ```typescript
* manager.addBashPattern("git *"); // Approves all git commands
* manager.addBashPattern("npm install *"); // Approves npm install with any package
* ```
*/
addBashPattern(pattern: string): void {
this.bashPatterns.add(pattern);
this.logger.debug(`Added bash pattern: "${pattern}"`);
}
/**
* Check if a bash pattern key is covered by any approved pattern.
* Uses pattern-to-pattern covering for broader pattern support.
*
* @param patternKey The pattern key generated from the command (e.g., "git push *")
* @returns true if the pattern key is covered by an approved pattern
*/
matchesBashPattern(patternKey: string): boolean {
for (const storedPattern of this.bashPatterns) {
if (patternCovers(storedPattern, patternKey)) {
this.logger.debug(
`Pattern key "${patternKey}" is covered by approved pattern "${storedPattern}"`
);
return true;
}
}
return false;
}
/**
* Clear all approved bash patterns.
* Should be called when session ends.
*/
clearBashPatterns(): void {
const count = this.bashPatterns.size;
this.bashPatterns.clear();
if (count > 0) {
this.logger.debug(`Cleared ${count} bash patterns`);
}
}
/**
* Get the current set of approved bash patterns (for debugging/display).
*/
getBashPatterns(): ReadonlySet<string> {
return this.bashPatterns;
}
// ==================== Directory Access Methods ====================
/**
* Initialize the working directory as a session-approved directory.
* This should be called once during setup to ensure the working directory
* never triggers directory access prompts.
*
* @param workingDir The working directory path
*/
initializeWorkingDirectory(workingDir: string): void {
const normalized = path.resolve(workingDir);
this.approvedDirectories.set(normalized, 'session');
this.logger.debug(`Initialized working directory as session-approved: "${normalized}"`);
}
/**
* Add a directory to the approved list for this session.
* Files within this directory (including subdirectories) will be allowed.
*
* @param directory Absolute path to the directory to approve
* @param type The approval type:
* - 'session': No directory prompt on future accesses, follows tool config
* - 'once': Will prompt again on future accesses, but tool can execute this time
* @example
* ```typescript
* manager.addApprovedDirectory("/external/project", 'session');
* // Now /external/project/src/file.ts is accessible without directory prompt
*
* manager.addApprovedDirectory("/tmp/files", 'once');
* // Tool can access, but will prompt again next time
* ```
*/
addApprovedDirectory(directory: string, type: 'session' | 'once' = 'session'): void {
const normalized = path.resolve(directory);
const existing = this.approvedDirectories.get(normalized);
// Don't downgrade from 'session' to 'once'
if (existing === 'session') {
this.logger.debug(
`Directory "${normalized}" already approved as 'session', not downgrading to '${type}'`
);
return;
}
this.approvedDirectories.set(normalized, type);
this.logger.debug(`Added approved directory: "${normalized}" (type: ${type})`);
}
/**
* Check if a file path is within any session-approved directory.
* This is used for PROMPTING decisions - only 'session' type directories count.
* Working directory and user session-approved directories return true.
*
* @param filePath The file path to check (can be relative or absolute)
* @returns true if the path is within a session-approved directory
*/
isDirectorySessionApproved(filePath: string): boolean {
const normalized = path.resolve(filePath);
for (const [approvedDir, type] of this.approvedDirectories) {
// Only check 'session' type directories for prompting decisions
if (type !== 'session') continue;
const relative = path.relative(approvedDir, normalized);
if (!relative.startsWith('..') && !path.isAbsolute(relative)) {
this.logger.debug(
`Path "${normalized}" is within session-approved directory "${approvedDir}"`
);
return true;
}
}
return false;
}
/**
* Check if a file path is within any approved directory (session OR once).
* This is used for EXECUTION decisions - both 'session' and 'once' types count.
* PathValidator uses this to determine if a tool can access the path.
*
* @param filePath The file path to check (can be relative or absolute)
* @returns true if the path is within any approved directory
*/
isDirectoryApproved(filePath: string): boolean {
const normalized = path.resolve(filePath);
for (const [approvedDir] of this.approvedDirectories) {
const relative = path.relative(approvedDir, normalized);
if (!relative.startsWith('..') && !path.isAbsolute(relative)) {
this.logger.debug(
`Path "${normalized}" is within approved directory "${approvedDir}"`
);
return true;
}
}
return false;
}
/**
* Clear all approved directories.
* Should be called when session ends.
*/
clearApprovedDirectories(): void {
const count = this.approvedDirectories.size;
this.approvedDirectories.clear();
if (count > 0) {
this.logger.debug(`Cleared ${count} approved directories`);
}
}
/**
* Get the current map of approved directories with their types (for debugging/display).
*/
getApprovedDirectories(): ReadonlyMap<string, 'session' | 'once'> {
return this.approvedDirectories;
}
/**
* Get just the directory paths that are approved (for debugging/display).
*/
getApprovedDirectoryPaths(): string[] {
return Array.from(this.approvedDirectories.keys());
}
/**
* Clear all session-scoped approvals (bash patterns and directories).
* Convenience method for clearing all session state at once.
*/
clearSessionApprovals(): void {
this.clearBashPatterns();
this.clearApprovedDirectories();
this.logger.debug('Cleared all session approvals');
}
/**
* Request directory access approval.
* Convenience method for directory access requests.
*
* @example
* ```typescript
* const response = await manager.requestDirectoryAccess({
* path: '/external/project/src/file.ts',
* parentDir: '/external/project',
* operation: 'write',
* toolName: 'write_file',
* sessionId: 'session-123'
* });
* ```
*/
async requestDirectoryAccess(
metadata: DirectoryAccessMetadata & { sessionId?: string; timeout?: number }
): Promise<ApprovalResponse> {
const { sessionId, timeout, ...directoryMetadata } = metadata;
const details: ApprovalRequestDetails = {
type: ApprovalType.DIRECTORY_ACCESS,
// Use provided timeout, fallback to config timeout, or undefined (no timeout)
timeout: timeout !== undefined ? timeout : this.config.toolConfirmation.timeout,
metadata: directoryMetadata,
};
if (sessionId !== undefined) {
details.sessionId = sessionId;
}
return this.requestApproval(details);
}
/**
* Request a generic approval
*/
async requestApproval(details: ApprovalRequestDetails): Promise<ApprovalResponse> {
const request = createApprovalRequest(details);
// Check elicitation config if this is an elicitation request
if (request.type === ApprovalType.ELICITATION && !this.config.elicitation.enabled) {
throw ApprovalError.invalidConfig(
'Elicitation is disabled. Enable elicitation in your agent configuration to use the ask_user tool or MCP server elicitations.'
);
}
// Handle all approval types uniformly
return this.handleApproval(request);
}
/**
* Handle approval requests (tool confirmation, elicitation, command confirmation, directory access, custom)
* @private
*/
private async handleApproval(request: ApprovalRequest): Promise<ApprovalResponse> {
// Elicitation always uses manual mode (requires handler)
if (request.type === ApprovalType.ELICITATION) {
const handler = this.ensureHandler();
this.logger.info(
`Elicitation requested, approvalId: ${request.approvalId}, sessionId: ${request.sessionId ?? 'global'}`
);
return handler(request);
}
// Tool/command/directory-access/custom confirmations respect the configured mode
const mode = this.config.toolConfirmation.mode;
// Auto-approve mode
if (mode === 'auto-approve') {
this.logger.info(
`Auto-approve approval '${request.type}', approvalId: ${request.approvalId}`
);
const response: ApprovalResponse = {
approvalId: request.approvalId,
status: ApprovalStatus.APPROVED,
};
if (request.sessionId !== undefined) {
response.sessionId = request.sessionId;
}
return response;
}
// Auto-deny mode
if (mode === 'auto-deny') {
this.logger.info(
`Auto-deny approval '${request.type}', approvalId: ${request.approvalId}`
);
const response: ApprovalResponse = {
approvalId: request.approvalId,
status: ApprovalStatus.DENIED,
reason: DenialReason.SYSTEM_DENIED,
message: `Approval automatically denied by system policy (auto-deny mode)`,
};
if (request.sessionId !== undefined) {
response.sessionId = request.sessionId;
}
return response;
}
// Manual mode - delegate to handler
const handler = this.ensureHandler();
this.logger.info(
`Manual approval '${request.type}' requested, approvalId: ${request.approvalId}, sessionId: ${request.sessionId ?? 'global'}`
);
return handler(request);
}
/**
* Request tool confirmation approval
* Convenience method for tool execution confirmation
*
* TODO: Make sessionId required once all callers are updated to pass it
* Tool confirmations always happen in session context during LLM execution
*/
async requestToolConfirmation(
metadata: ToolConfirmationMetadata & { sessionId?: string; timeout?: number }
): Promise<ApprovalResponse> {
const { sessionId, timeout, ...toolMetadata } = metadata;
const details: ApprovalRequestDetails = {
type: ApprovalType.TOOL_CONFIRMATION,
// Use provided timeout, fallback to config timeout, or undefined (no timeout)
timeout: timeout !== undefined ? timeout : this.config.toolConfirmation.timeout,
metadata: toolMetadata,
};
if (sessionId !== undefined) {
details.sessionId = sessionId;
}
return this.requestApproval(details);
}
/**
* Request command confirmation approval
* Convenience method for dangerous command execution within an already-approved tool
*
* This is different from tool confirmation - it's for per-command approval
* of dangerous operations (like rm, git push) within tools that are already approved.
*
* TODO: Make sessionId required once all callers are updated to pass it
* Command confirmations always happen during tool execution which has session context
*
* @example
* ```typescript
* // bash_exec tool is approved, but dangerous commands still require approval
* const response = await manager.requestCommandConfirmation({
* toolName: 'bash_exec',
* command: 'rm -rf /important',
* originalCommand: 'rm -rf /important',
* sessionId: 'session-123'
* });
* ```
*/
async requestCommandConfirmation(
metadata: CommandConfirmationMetadata & { sessionId?: string; timeout?: number }
): Promise<ApprovalResponse> {
const { sessionId, timeout, ...commandMetadata } = metadata;
const details: ApprovalRequestDetails = {
type: ApprovalType.COMMAND_CONFIRMATION,
// Use provided timeout, fallback to config timeout, or undefined (no timeout)
timeout: timeout !== undefined ? timeout : this.config.toolConfirmation.timeout,
metadata: commandMetadata,
};
if (sessionId !== undefined) {
details.sessionId = sessionId;
}
return this.requestApproval(details);
}
/**
* Request elicitation from MCP server
* Convenience method for MCP elicitation requests
*
* Note: sessionId is optional because MCP servers are shared across sessions
* and the MCP protocol doesn't include session context in elicitation requests.
*/
async requestElicitation(
metadata: ElicitationMetadata & { sessionId?: string; timeout?: number }
): Promise<ApprovalResponse> {
const { sessionId, timeout, ...elicitationMetadata } = metadata;
const details: ApprovalRequestDetails = {
type: ApprovalType.ELICITATION,
// Use provided timeout, fallback to config timeout, or undefined (no timeout)
timeout: timeout !== undefined ? timeout : this.config.elicitation.timeout,
metadata: elicitationMetadata,
};
if (sessionId !== undefined) {
details.sessionId = sessionId;
}
return this.requestApproval(details);
}
/**
* Check if tool confirmation was approved
* Throws appropriate error if denied
*/
async checkToolConfirmation(
metadata: ToolConfirmationMetadata & { sessionId?: string; timeout?: number }
): Promise<boolean> {
const response = await this.requestToolConfirmation(metadata);
if (response.status === ApprovalStatus.APPROVED) {
return true;
} else if (response.status === ApprovalStatus.DENIED) {
throw ApprovalError.toolConfirmationDenied(
metadata.toolName,
response.reason,
response.message,
metadata.sessionId
);
} else {
throw ApprovalError.cancelled(
response.approvalId,
ApprovalType.TOOL_CONFIRMATION,
response.message ?? response.reason
);
}
}
/**
* Get elicitation form data
* Throws appropriate error if denied or cancelled
*/
async getElicitationData(
metadata: ElicitationMetadata & { sessionId?: string; timeout?: number }
): Promise<Record<string, unknown>> {
const response = await this.requestElicitation(metadata);
if (response.status === ApprovalStatus.APPROVED) {
// Extract formData from response (handler always provides formData for elicitation)
if (
response.data &&
typeof response.data === 'object' &&
'formData' in response.data &&
typeof (response.data as { formData: unknown }).formData === 'object' &&
(response.data as { formData: unknown }).formData !== null
) {
return (response.data as { formData: Record<string, unknown> }).formData;
}
// Fallback to empty form if data is missing (edge case)
return {};
} else if (response.status === ApprovalStatus.DENIED) {
throw ApprovalError.elicitationDenied(
metadata.serverName,
response.reason,
response.message,
metadata.sessionId
);
} else {
throw ApprovalError.cancelled(
response.approvalId,
ApprovalType.ELICITATION,
response.message ?? response.reason
);
}
}
/**
* Cancel a specific approval request
*/
cancelApproval(approvalId: string): void {
this.handler?.cancel?.(approvalId);
}
/**
* Cancel all pending approval requests
*/
cancelAllApprovals(): void {
this.handler?.cancelAll?.();
}
/**
* Get list of pending approval IDs
*/
getPendingApprovals(): string[] {
return this.handler?.getPending?.() ?? [];
}
/**
* Get full pending approval requests
*/
getPendingApprovalRequests(): ApprovalRequest[] {
return this.handler?.getPendingRequests?.() ?? [];
}
/**
* Auto-approve pending requests that match a predicate.
* Used when a pattern is remembered to auto-approve other parallel requests
* that would now match the same pattern.
*
* @param predicate Function that returns true for requests that should be auto-approved
* @param responseData Optional data to include in the auto-approval response
* @returns Number of requests that were auto-approved
*/
autoApprovePendingRequests(
predicate: (request: ApprovalRequest) => boolean,
responseData?: Record<string, unknown>
): number {
const count = this.handler?.autoApprovePending?.(predicate, responseData) ?? 0;
if (count > 0) {
this.logger.info(`Auto-approved ${count} pending request(s) due to matching pattern`);
}
return count;
}
/**
* Get current configuration
*/
getConfig(): ApprovalManagerConfig {
return { ...this.config };
}
/**
* Set the approval handler for manual approval mode.
*
* The handler will be called for:
* - Tool confirmation requests when toolConfirmation.mode is 'manual'
* - All elicitation requests (when elicitation is enabled, regardless of toolConfirmation.mode)
*
* A handler must be set before processing requests if:
* - toolConfirmation.mode is 'manual', or
* - elicitation is enabled (elicitation.enabled is true)
*
* @param handler The approval handler function, or null to clear
*/
setHandler(handler: ApprovalHandler | null): void {
if (handler === null) {
this.handler = undefined;
} else {
this.handler = handler;
}
this.logger.debug(`Approval handler ${handler ? 'registered' : 'cleared'}`);
}
/**
* Clear the current approval handler
*/
clearHandler(): void {
this.handler = undefined;
this.logger.debug('Approval handler cleared');
}
/**
* Check if an approval handler is registered
*/
public hasHandler(): boolean {
return this.handler !== undefined;
}
/**
* Get the approval handler, throwing if not set
* @private
*/
private ensureHandler(): ApprovalHandler {
if (!this.handler) {
// TODO: add an example for usage here for users
throw ApprovalError.invalidConfig(
'An approval handler is required but not configured.\n' +
'Handlers are required for:\n' +
' • manual tool confirmation mode\n' +
' • all elicitation requests (when elicitation is enabled)\n' +
'Either:\n' +
' • set toolConfirmation.mode to "auto-approve" or "auto-deny", or\n' +
' • disable elicitation (set elicitation.enabled: false), or\n' +
' • call agent.setApprovalHandler(...) before processing requests.'
);
}
return this.handler;
}
}

View File

@@ -0,0 +1,386 @@
// ============================================================================
// USER APPROVAL SCHEMAS - Zod validation schemas for approval requests/responses
// ============================================================================
import { z } from 'zod';
import type { JSONSchema7 } from 'json-schema';
import { ApprovalType, ApprovalStatus, DenialReason } from './types.js';
import type { ToolDisplayData } from '../tools/display-types.js';
import { isValidDisplayData } from '../tools/display-types.js';
// Zod schema that validates as object but types as JSONSchema7
const JsonSchema7Schema = z.record(z.unknown()) as z.ZodType<JSONSchema7>;
/**
* Schema for approval types
*/
export const ApprovalTypeSchema = z.nativeEnum(ApprovalType);
/**
* Schema for approval status
*/
export const ApprovalStatusSchema = z.nativeEnum(ApprovalStatus);
/**
* Schema for denial/cancellation reasons
*/
export const DenialReasonSchema = z.nativeEnum(DenialReason);
// Custom Zod schema for ToolDisplayData validation
const ToolDisplayDataSchema = z.custom<ToolDisplayData>((val) => isValidDisplayData(val), {
message: 'Invalid ToolDisplayData',
});
/**
* Tool confirmation metadata schema
*/
export const ToolConfirmationMetadataSchema = z
.object({
toolName: z.string().describe('Name of the tool to confirm'),
toolCallId: z.string().describe('Unique tool call ID for tracking parallel tool calls'),
args: z.record(z.unknown()).describe('Arguments for the tool'),
description: z.string().optional().describe('Description of the tool'),
displayPreview: ToolDisplayDataSchema.optional().describe(
'Preview display data for approval UI (e.g., diff preview)'
),
suggestedPatterns: z
.array(z.string())
.optional()
.describe(
'Suggested patterns for session approval (for bash commands). ' +
'E.g., ["git push *", "git *"] for command "git push origin main"'
),
})
.strict()
.describe('Tool confirmation metadata');
/**
* Command confirmation metadata schema
* TODO: Consider combining this with regular tools schemas for consistency
*/
export const CommandConfirmationMetadataSchema = z
.object({
toolName: z.string().describe('Name of the tool executing the command'),
command: z.string().describe('The normalized command to execute'),
originalCommand: z
.string()
.optional()
.describe('The original command before normalization'),
})
.strict()
.describe('Command confirmation metadata');
/**
* Elicitation metadata schema
*/
export const ElicitationMetadataSchema = z
.object({
schema: JsonSchema7Schema.describe('JSON Schema for the form'),
prompt: z.string().describe('Prompt to show the user'),
serverName: z.string().describe('MCP server requesting input'),
context: z.record(z.unknown()).optional().describe('Additional context'),
})
.strict()
.describe('Elicitation metadata');
/**
* Custom approval metadata schema - flexible
*/
export const CustomApprovalMetadataSchema = z.record(z.unknown()).describe('Custom metadata');
/**
* Directory access metadata schema
* Used when a tool tries to access files outside the working directory
*/
export const DirectoryAccessMetadataSchema = z
.object({
path: z.string().describe('Full path being accessed'),
parentDir: z.string().describe('Parent directory (what gets approved for session)'),
operation: z.enum(['read', 'write', 'edit']).describe('Type of file operation'),
toolName: z.string().describe('Name of the tool requesting access'),
})
.strict()
.describe('Directory access metadata');
/**
* Base approval request schema
*/
export const BaseApprovalRequestSchema = z
.object({
approvalId: z.string().uuid().describe('Unique approval identifier'),
type: ApprovalTypeSchema.describe('Type of approval'),
sessionId: z.string().optional().describe('Session identifier'),
timeout: z
.number()
.int()
.positive()
.optional()
.describe('Timeout in milliseconds (optional - no timeout if not specified)'),
timestamp: z.date().describe('When the request was created'),
})
.describe('Base approval request');
/**
* Tool confirmation request schema
*/
export const ToolConfirmationRequestSchema = BaseApprovalRequestSchema.extend({
type: z.literal(ApprovalType.TOOL_CONFIRMATION),
metadata: ToolConfirmationMetadataSchema,
}).strict();
/**
* Command confirmation request schema
*/
export const CommandConfirmationRequestSchema = BaseApprovalRequestSchema.extend({
type: z.literal(ApprovalType.COMMAND_CONFIRMATION),
metadata: CommandConfirmationMetadataSchema,
}).strict();
/**
* Elicitation request schema
*/
export const ElicitationRequestSchema = BaseApprovalRequestSchema.extend({
type: z.literal(ApprovalType.ELICITATION),
metadata: ElicitationMetadataSchema,
}).strict();
/**
* Custom approval request schema
*/
export const CustomApprovalRequestSchema = BaseApprovalRequestSchema.extend({
type: z.literal(ApprovalType.CUSTOM),
metadata: CustomApprovalMetadataSchema,
}).strict();
/**
* Directory access request schema
*/
export const DirectoryAccessRequestSchema = BaseApprovalRequestSchema.extend({
type: z.literal(ApprovalType.DIRECTORY_ACCESS),
metadata: DirectoryAccessMetadataSchema,
}).strict();
/**
* Discriminated union for all approval requests
*/
export const ApprovalRequestSchema = z.discriminatedUnion('type', [
ToolConfirmationRequestSchema,
CommandConfirmationRequestSchema,
ElicitationRequestSchema,
CustomApprovalRequestSchema,
DirectoryAccessRequestSchema,
]);
/**
* Tool confirmation response data schema
*/
export const ToolConfirmationResponseDataSchema = z
.object({
rememberChoice: z
.boolean()
.optional()
.describe('Remember this tool for the session (approves ALL uses of this tool)'),
rememberPattern: z
.string()
.optional()
.describe(
'Remember a command pattern for bash commands (e.g., "git *"). ' +
'Only applicable for bash_exec tool approvals.'
),
})
.strict()
.describe('Tool confirmation response data');
/**
* Command confirmation response data schema
*/
export const CommandConfirmationResponseDataSchema = z
.object({
// Command confirmations don't have remember choice - they're per-command
// Could add command pattern remembering in future (e.g., "remember git push *")
})
.strict()
.describe('Command confirmation response data');
/**
* Elicitation response data schema
*/
export const ElicitationResponseDataSchema = z
.object({
formData: z.record(z.unknown()).describe('Form data matching schema'),
})
.strict()
.describe('Elicitation response data');
/**
* Custom approval response data schema
*/
export const CustomApprovalResponseDataSchema = z
.record(z.unknown())
.describe('Custom response data');
/**
* Directory access response data schema
*/
export const DirectoryAccessResponseDataSchema = z
.object({
rememberDirectory: z
.boolean()
.optional()
.describe('Remember this directory for the session (allows all file access within it)'),
})
.strict()
.describe('Directory access response data');
/**
* Base approval response schema
*/
export const BaseApprovalResponseSchema = z
.object({
approvalId: z.string().uuid().describe('Must match request approvalId'),
status: ApprovalStatusSchema.describe('Approval status'),
sessionId: z.string().optional().describe('Session identifier'),
reason: DenialReasonSchema.optional().describe(
'Reason for denial/cancellation (only present when status is denied or cancelled)'
),
message: z
.string()
.optional()
.describe('Human-readable message explaining the denial/cancellation'),
timeoutMs: z
.number()
.int()
.positive()
.optional()
.describe('Timeout duration in milliseconds (present for timeout events)'),
})
.describe('Base approval response');
/**
* Tool confirmation response schema
*/
export const ToolConfirmationResponseSchema = BaseApprovalResponseSchema.extend({
data: ToolConfirmationResponseDataSchema.optional(),
}).strict();
/**
* Command confirmation response schema
*/
export const CommandConfirmationResponseSchema = BaseApprovalResponseSchema.extend({
data: CommandConfirmationResponseDataSchema.optional(),
}).strict();
/**
* Elicitation response schema
*/
export const ElicitationResponseSchema = BaseApprovalResponseSchema.extend({
data: ElicitationResponseDataSchema.optional(),
}).strict();
/**
* Custom approval response schema
*/
export const CustomApprovalResponseSchema = BaseApprovalResponseSchema.extend({
data: CustomApprovalResponseDataSchema.optional(),
}).strict();
/**
* Directory access response schema
*/
export const DirectoryAccessResponseSchema = BaseApprovalResponseSchema.extend({
data: DirectoryAccessResponseDataSchema.optional(),
}).strict();
/**
* Union of all approval responses
*/
export const ApprovalResponseSchema = z.union([
ToolConfirmationResponseSchema,
CommandConfirmationResponseSchema,
ElicitationResponseSchema,
CustomApprovalResponseSchema,
DirectoryAccessResponseSchema,
]);
/**
* Approval request details schema for creating requests
*/
export const ApprovalRequestDetailsSchema = z
.object({
type: ApprovalTypeSchema,
sessionId: z.string().optional(),
timeout: z
.number()
.int()
.positive()
.optional()
.describe('Timeout in milliseconds (optional - no timeout if not specified)'),
metadata: z.union([
ToolConfirmationMetadataSchema,
CommandConfirmationMetadataSchema,
ElicitationMetadataSchema,
CustomApprovalMetadataSchema,
DirectoryAccessMetadataSchema,
]),
})
.superRefine((data, ctx) => {
// Validate metadata matches type
if (data.type === ApprovalType.TOOL_CONFIRMATION) {
const result = ToolConfirmationMetadataSchema.safeParse(data.metadata);
if (!result.success) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message:
'Metadata must match ToolConfirmationMetadataSchema for TOOL_CONFIRMATION type',
path: ['metadata'],
});
}
} else if (data.type === ApprovalType.COMMAND_CONFIRMATION) {
const result = CommandConfirmationMetadataSchema.safeParse(data.metadata);
if (!result.success) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message:
'Metadata must match CommandConfirmationMetadataSchema for COMMAND_CONFIRMATION type',
path: ['metadata'],
});
}
} else if (data.type === ApprovalType.ELICITATION) {
const result = ElicitationMetadataSchema.safeParse(data.metadata);
if (!result.success) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: 'Metadata must match ElicitationMetadataSchema for ELICITATION type',
path: ['metadata'],
});
}
} else if (data.type === ApprovalType.DIRECTORY_ACCESS) {
const result = DirectoryAccessMetadataSchema.safeParse(data.metadata);
if (!result.success) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message:
'Metadata must match DirectoryAccessMetadataSchema for DIRECTORY_ACCESS type',
path: ['metadata'],
});
}
} else if (data.type === ApprovalType.CUSTOM) {
const result = CustomApprovalMetadataSchema.safeParse(data.metadata);
if (!result.success) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: 'Metadata must match CustomApprovalMetadataSchema for CUSTOM type',
path: ['metadata'],
});
}
}
});
/**
* Type inference for validated schemas
*/
export type ValidatedApprovalRequest = z.output<typeof ApprovalRequestSchema>;
export type ValidatedApprovalResponse = z.output<typeof ApprovalResponseSchema>;
export type ValidatedToolConfirmationRequest = z.output<typeof ToolConfirmationRequestSchema>;
export type ValidatedElicitationRequest = z.output<typeof ElicitationRequestSchema>;
export type ValidatedCustomApprovalRequest = z.output<typeof CustomApprovalRequestSchema>;

View File

@@ -0,0 +1,352 @@
// ============================================================================
// USER APPROVAL TYPES - Generalized approval and user input system
// ============================================================================
import type { z } from 'zod';
import type {
ToolConfirmationMetadataSchema,
CommandConfirmationMetadataSchema,
ElicitationMetadataSchema,
CustomApprovalMetadataSchema,
DirectoryAccessMetadataSchema,
BaseApprovalRequestSchema,
ToolConfirmationRequestSchema,
CommandConfirmationRequestSchema,
ElicitationRequestSchema,
CustomApprovalRequestSchema,
DirectoryAccessRequestSchema,
ApprovalRequestSchema,
ApprovalRequestDetailsSchema,
ToolConfirmationResponseDataSchema,
CommandConfirmationResponseDataSchema,
ElicitationResponseDataSchema,
CustomApprovalResponseDataSchema,
DirectoryAccessResponseDataSchema,
BaseApprovalResponseSchema,
ToolConfirmationResponseSchema,
CommandConfirmationResponseSchema,
ElicitationResponseSchema,
CustomApprovalResponseSchema,
DirectoryAccessResponseSchema,
ApprovalResponseSchema,
} from './schemas.js';
/**
* Types of approval requests supported by the system
*/
export enum ApprovalType {
/**
* Binary approval for tool execution
* Metadata contains: toolName, args, description
*/
TOOL_CONFIRMATION = 'tool_confirmation',
/**
* Binary approval for dangerous commands within an already-approved tool
* Metadata contains: toolName, command, originalCommand
* (sessionId is provided at the request level, not in metadata)
*/
COMMAND_CONFIRMATION = 'command_confirmation',
/**
* Schema-based form input from MCP servers
* Metadata contains: schema, prompt, serverName, context
*/
ELICITATION = 'elicitation',
/**
* Approval for accessing files outside the working directory
* Metadata contains: path, parentDir, operation, toolName
*/
DIRECTORY_ACCESS = 'directory_access',
/**
* Custom approval types for extensibility
* Metadata format defined by consumer
*/
CUSTOM = 'custom',
}
/**
* Status of an approval response
*/
export enum ApprovalStatus {
APPROVED = 'approved',
DENIED = 'denied',
CANCELLED = 'cancelled',
}
/**
* Reason for denial or cancellation
* Provides context about why an approval was not granted
*/
export enum DenialReason {
/** User explicitly clicked deny/reject */
USER_DENIED = 'user_denied',
/** System denied due to policy (auto-deny mode, alwaysDeny list) */
SYSTEM_DENIED = 'system_denied',
/** Request timed out waiting for user response */
TIMEOUT = 'timeout',
/** User cancelled the request */
USER_CANCELLED = 'user_cancelled',
/** System cancelled (session ended, agent stopped) */
SYSTEM_CANCELLED = 'system_cancelled',
/** Validation failed (form validation, schema mismatch) */
VALIDATION_FAILED = 'validation_failed',
/** Elicitation disabled in configuration */
ELICITATION_DISABLED = 'elicitation_disabled',
}
// ============================================================================
// Metadata Types - Derived from Zod schemas
// ============================================================================
/**
* Tool confirmation specific metadata
* Derived from ToolConfirmationMetadataSchema
*/
export type ToolConfirmationMetadata = z.output<typeof ToolConfirmationMetadataSchema>;
/**
* Command confirmation specific metadata
* Derived from CommandConfirmationMetadataSchema
*/
export type CommandConfirmationMetadata = z.output<typeof CommandConfirmationMetadataSchema>;
/**
* Elicitation specific metadata (MCP)
* Derived from ElicitationMetadataSchema
*/
export type ElicitationMetadata = z.output<typeof ElicitationMetadataSchema>;
/**
* Custom approval metadata - flexible structure
* Derived from CustomApprovalMetadataSchema
*/
export type CustomApprovalMetadata = z.output<typeof CustomApprovalMetadataSchema>;
/**
* Directory access metadata
* Derived from DirectoryAccessMetadataSchema
*/
export type DirectoryAccessMetadata = z.output<typeof DirectoryAccessMetadataSchema>;
// ============================================================================
// Request Types - Derived from Zod schemas
// ============================================================================
/**
* Base approval request that all approvals extend
* Derived from BaseApprovalRequestSchema
*/
export type BaseApprovalRequest<_TMetadata = unknown> = z.output<typeof BaseApprovalRequestSchema>;
/**
* Tool confirmation request
* Derived from ToolConfirmationRequestSchema
*/
export type ToolConfirmationRequest = z.output<typeof ToolConfirmationRequestSchema>;
/**
* Command confirmation request
* Derived from CommandConfirmationRequestSchema
*/
export type CommandConfirmationRequest = z.output<typeof CommandConfirmationRequestSchema>;
/**
* Elicitation request from MCP server
* Derived from ElicitationRequestSchema
*/
export type ElicitationRequest = z.output<typeof ElicitationRequestSchema>;
/**
* Custom approval request
* Derived from CustomApprovalRequestSchema
*/
export type CustomApprovalRequest = z.output<typeof CustomApprovalRequestSchema>;
/**
* Directory access request
* Derived from DirectoryAccessRequestSchema
*/
export type DirectoryAccessRequest = z.output<typeof DirectoryAccessRequestSchema>;
/**
* Union of all approval request types
* Derived from ApprovalRequestSchema
*/
export type ApprovalRequest = z.output<typeof ApprovalRequestSchema>;
// ============================================================================
// Response Data Types - Derived from Zod schemas
// ============================================================================
/**
* Tool confirmation response data
* Derived from ToolConfirmationResponseDataSchema
*/
export type ToolConfirmationResponseData = z.output<typeof ToolConfirmationResponseDataSchema>;
/**
* Command confirmation response data
* Derived from CommandConfirmationResponseDataSchema
*/
export type CommandConfirmationResponseData = z.output<
typeof CommandConfirmationResponseDataSchema
>;
/**
* Elicitation response data - validated form inputs
* Derived from ElicitationResponseDataSchema
*/
export type ElicitationResponseData = z.output<typeof ElicitationResponseDataSchema>;
/**
* Custom approval response data
* Derived from CustomApprovalResponseDataSchema
*/
export type CustomApprovalResponseData = z.output<typeof CustomApprovalResponseDataSchema>;
/**
* Directory access response data
* Derived from DirectoryAccessResponseDataSchema
*/
export type DirectoryAccessResponseData = z.output<typeof DirectoryAccessResponseDataSchema>;
// ============================================================================
// Response Types - Derived from Zod schemas
// ============================================================================
/**
* Base approval response
* Derived from BaseApprovalResponseSchema
*/
export type BaseApprovalResponse<_TData = unknown> = z.output<typeof BaseApprovalResponseSchema>;
/**
* Tool confirmation response
* Derived from ToolConfirmationResponseSchema
*/
export type ToolConfirmationResponse = z.output<typeof ToolConfirmationResponseSchema>;
/**
* Command confirmation response
* Derived from CommandConfirmationResponseSchema
*/
export type CommandConfirmationResponse = z.output<typeof CommandConfirmationResponseSchema>;
/**
* Elicitation response
* Derived from ElicitationResponseSchema
*/
export type ElicitationResponse = z.output<typeof ElicitationResponseSchema>;
/**
* Custom approval response
* Derived from CustomApprovalResponseSchema
*/
export type CustomApprovalResponse = z.output<typeof CustomApprovalResponseSchema>;
/**
* Directory access response
* Derived from DirectoryAccessResponseSchema
*/
export type DirectoryAccessResponse = z.output<typeof DirectoryAccessResponseSchema>;
/**
* Union of all approval response types
* Derived from ApprovalResponseSchema
*/
export type ApprovalResponse = z.output<typeof ApprovalResponseSchema>;
// ============================================================================
// Helper Types
// ============================================================================
/**
* Details for creating an approval request
* Derived from ApprovalRequestDetailsSchema
*/
export type ApprovalRequestDetails = z.output<typeof ApprovalRequestDetailsSchema>;
/**
* Handler interface for processing approval requests.
*
* This is the core abstraction for approval handling in Dexto. When tool confirmation
* mode is 'manual', a handler must be provided to process approval requests.
*
* The handler is a callable interface that:
* - Processes approval requests and returns responses
* - Manages pending approval state (for cancellation)
* - Provides lifecycle management methods
*
* @example
* ```typescript
* const handler: ApprovalHandler = Object.assign(
* async (request: ApprovalRequest) => {
* console.log(`Approve tool: ${request.metadata.toolName}?`);
* // In real implementation, wait for user input
* return {
* approvalId: request.approvalId,
* status: ApprovalStatus.APPROVED,
* sessionId: request.sessionId,
* };
* },
* {
* cancel: (id: string) => { },
* cancelAll: () => { },
* getPending: () => [] as string[],
* }
* );
* ```
*/
export interface ApprovalHandler {
/**
* Process an approval request
* @param request The approval request to handle
* @returns Promise resolving to the approval response
*/
(request: ApprovalRequest): Promise<ApprovalResponse>;
/**
* Cancel a specific pending approval request (optional)
* @param approvalId The ID of the approval to cancel
* @remarks Not all handlers support cancellation (e.g., auto-approve handlers)
*/
cancel?(approvalId: string): void;
/**
* Cancel all pending approval requests (optional)
* @remarks Not all handlers support cancellation (e.g., auto-approve handlers)
*/
cancelAll?(): void;
/**
* Get list of pending approval request IDs (optional)
* @returns Array of approval IDs currently pending
* @remarks Not all handlers track pending requests (e.g., auto-approve handlers)
*/
getPending?(): string[];
/**
* Get full pending approval requests (optional)
* @returns Array of pending approval requests
* @remarks Not all handlers track pending requests (e.g., auto-approve handlers)
*/
getPendingRequests?(): ApprovalRequest[];
/**
* Auto-approve pending requests that match a predicate (optional)
* Used when a pattern is remembered to auto-approve other parallel requests
* that would now match the same pattern.
*
* @param predicate Function that returns true for requests that should be auto-approved
* @param responseData Optional data to include in the auto-approval response
* @returns Number of requests that were auto-approved
* @remarks Not all handlers support this (e.g., auto-approve handlers don't need it)
*/
autoApprovePending?(
predicate: (request: ApprovalRequest) => boolean,
responseData?: Record<string, unknown>
): number;
}

View File

@@ -0,0 +1,405 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { ContextManager } from '../manager.js';
import { filterCompacted } from '../utils.js';
import { ReactiveOverflowStrategy } from './strategies/reactive-overflow.js';
import { VercelMessageFormatter } from '../../llm/formatters/vercel.js';
import { SystemPromptManager } from '../../systemPrompt/manager.js';
import { SystemPromptConfigSchema } from '../../systemPrompt/schemas.js';
import { MemoryHistoryProvider } from '../../session/history/memory.js';
import { ResourceManager } from '../../resources/index.js';
import { MCPManager } from '../../mcp/manager.js';
import { MemoryManager } from '../../memory/index.js';
import { createStorageManager, StorageManager } from '../../storage/storage-manager.js';
import { createLogger } from '../../logger/factory.js';
import type { ModelMessage } from 'ai';
import type { LanguageModel } from 'ai';
import type { ValidatedLLMConfig } from '../../llm/schemas.js';
import type { ValidatedStorageConfig } from '../../storage/schemas.js';
import type { IDextoLogger } from '../../logger/v2/types.js';
import type { InternalMessage } from '../types.js';
// Only mock the AI SDK's generateText - everything else is real
vi.mock('ai', async (importOriginal) => {
const actual = await importOriginal<typeof import('ai')>();
return {
...actual,
generateText: vi.fn(),
};
});
import { generateText } from 'ai';
const mockGenerateText = vi.mocked(generateText);
function createMockModel(): LanguageModel {
return {
modelId: 'test-model',
provider: 'test-provider',
specificationVersion: 'v1',
doStream: vi.fn(),
doGenerate: vi.fn(),
} as unknown as LanguageModel;
}
/**
* Integration tests for context compaction.
*
* These tests use real components (ContextManager, ReactiveOverflowStrategy, filterCompacted)
* and only mock the LLM calls. This ensures the full compaction flow works correctly,
* including the interaction between compaction and filterCompacted.
*/
describe('Context Compaction Integration Tests', () => {
let contextManager: ContextManager<ModelMessage>;
let compactionStrategy: ReactiveOverflowStrategy;
let logger: IDextoLogger;
let historyProvider: MemoryHistoryProvider;
let storageManager: StorageManager;
let mcpManager: MCPManager;
let resourceManager: ResourceManager;
const sessionId = 'compaction-test-session';
beforeEach(async () => {
vi.clearAllMocks();
// Create real logger (quiet for tests)
logger = createLogger({
config: {
level: 'warn',
transports: [{ type: 'console', colorize: false }],
},
agentId: 'test-agent',
});
// Create real storage manager with in-memory backends
const storageConfig = {
cache: { type: 'in-memory' },
database: { type: 'in-memory' },
blob: {
type: 'in-memory',
maxBlobSize: 10 * 1024 * 1024,
maxTotalSize: 100 * 1024 * 1024,
},
} as unknown as ValidatedStorageConfig;
storageManager = await createStorageManager(storageConfig, logger);
// Create real MCP and resource managers
mcpManager = new MCPManager(logger);
resourceManager = new ResourceManager(
mcpManager,
{
internalResourcesConfig: { enabled: false, resources: [] },
blobStore: storageManager.getBlobStore(),
},
logger
);
await resourceManager.initialize();
// Create real history provider
historyProvider = new MemoryHistoryProvider(logger);
// Create real memory and system prompt managers
const memoryManager = new MemoryManager(storageManager.getDatabase(), logger);
const systemPromptConfig = SystemPromptConfigSchema.parse('You are a helpful assistant.');
const systemPromptManager = new SystemPromptManager(
systemPromptConfig,
'/tmp',
memoryManager,
undefined,
logger
);
// Create real context manager
const formatter = new VercelMessageFormatter(logger);
const llmConfig = {
provider: 'openai',
model: 'gpt-4',
apiKey: 'test-api-key',
maxInputTokens: 100000,
maxOutputTokens: 4096,
} as unknown as ValidatedLLMConfig;
contextManager = new ContextManager<ModelMessage>(
llmConfig,
formatter,
systemPromptManager,
100000,
historyProvider,
sessionId,
resourceManager,
logger
);
// Create real compaction strategy
compactionStrategy = new ReactiveOverflowStrategy(createMockModel(), {}, logger);
// Default mock for generateText (compaction summary)
mockGenerateText.mockResolvedValue({
text: '<session_compaction>Summary of conversation</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
});
afterEach(async () => {
vi.restoreAllMocks();
logger.destroy();
});
/**
* Helper to add a batch of messages to the context
*/
async function addMessages(count: number): Promise<void> {
for (let i = 0; i < count; i++) {
await contextManager.addUserMessage([{ type: 'text', text: `Question ${i}` }]);
await contextManager.addAssistantMessage(`Answer ${i}`);
}
}
/**
* Helper to run compaction and add result to history
*/
async function runCompaction(): Promise<InternalMessage | null> {
const history = await contextManager.getHistory();
const summaryMessages = await compactionStrategy.compact(history);
if (summaryMessages.length === 0) {
return null;
}
const summary = summaryMessages[0]!;
await contextManager.addMessage(summary);
return summary;
}
describe('Single Compaction', () => {
it('should compact history and filterCompacted should return correct messages', async () => {
// Add 20 messages (10 turns)
await addMessages(10);
const historyBefore = await contextManager.getHistory();
expect(historyBefore).toHaveLength(20);
// Run compaction
const summary = await runCompaction();
expect(summary).not.toBeNull();
expect(summary?.metadata?.isSummary).toBe(true);
// Verify history grew by 1 (summary added)
const historyAfter = await contextManager.getHistory();
expect(historyAfter).toHaveLength(21);
// filterCompacted should return much fewer messages
const filtered = filterCompacted(historyAfter);
expect(filtered.length).toBeLessThan(historyAfter.length);
expect(filtered[0]?.metadata?.isSummary).toBe(true);
// Preserved messages should be non-summary messages
const nonSummaryMessages = filtered.filter((m) => !m.metadata?.isSummary);
expect(nonSummaryMessages.length).toBeGreaterThan(0);
expect(nonSummaryMessages.length).toBeLessThan(10); // Some were summarized
});
});
describe('Multiple Sequential Compactions', () => {
it('should handle two compactions correctly', async () => {
// === FIRST COMPACTION ===
await addMessages(10);
const summary1 = await runCompaction();
expect(summary1).not.toBeNull();
expect(summary1?.metadata?.isRecompaction).toBeUndefined();
const historyAfter1 = await contextManager.getHistory();
// Verify first compaction produced fewer filtered messages
const filtered1 = filterCompacted(historyAfter1);
expect(filtered1.length).toBeLessThan(historyAfter1.length);
// === ADD MORE MESSAGES ===
await addMessages(10);
const historyBefore2 = await contextManager.getHistory();
// 21 (after first compaction) + 20 new = 41
expect(historyBefore2).toHaveLength(41);
// === SECOND COMPACTION ===
const summary2 = await runCompaction();
expect(summary2).not.toBeNull();
expect(summary2?.metadata?.isRecompaction).toBe(true);
const historyAfter2 = await contextManager.getHistory();
expect(historyAfter2).toHaveLength(42);
const filtered2 = filterCompacted(historyAfter2);
// Critical: second compaction should result in FEWER filtered messages
// (or at least not significantly more)
expect(filtered2.length).toBeLessThan(30);
// Only the most recent summary should be in filtered result
const summariesInFiltered = filtered2.filter((m) => m.metadata?.isSummary);
expect(summariesInFiltered).toHaveLength(1);
expect(summariesInFiltered[0]?.metadata?.isRecompaction).toBe(true);
// The first summary should NOT be in filtered result
expect(filtered2).not.toContain(summary1);
});
it('should handle three compactions correctly', async () => {
// === FIRST COMPACTION ===
await addMessages(10);
const summary1 = await runCompaction();
expect(summary1).not.toBeNull();
// === SECOND COMPACTION ===
await addMessages(10);
const summary2 = await runCompaction();
expect(summary2).not.toBeNull();
expect(summary2?.metadata?.isRecompaction).toBe(true);
// === THIRD COMPACTION ===
await addMessages(10);
const summary3 = await runCompaction();
expect(summary3).not.toBeNull();
expect(summary3?.metadata?.isRecompaction).toBe(true);
// Verify final state
const historyFinal = await contextManager.getHistory();
// 20 + 1 + 20 + 1 + 20 + 1 = 63
expect(historyFinal).toHaveLength(63);
const filteredFinal = filterCompacted(historyFinal);
// Critical assertions:
// 1. Only the most recent summary should be visible
const summariesInFiltered = filteredFinal.filter((m) => m.metadata?.isSummary);
expect(summariesInFiltered).toHaveLength(1);
expect(summariesInFiltered[0]).toBe(summary3);
// 2. Neither summary1 nor summary2 should be in the result
expect(filteredFinal).not.toContain(summary1);
expect(filteredFinal).not.toContain(summary2);
// 3. Filtered result should be much smaller than full history
expect(filteredFinal.length).toBeLessThan(20);
// 4. Preserved messages should exist and be reasonable count
const nonSummaryMessages = filteredFinal.filter((m) => !m.metadata?.isSummary);
expect(nonSummaryMessages.length).toBeGreaterThan(0);
expect(nonSummaryMessages.length).toBeLessThan(15);
});
it('should correctly calculate originalMessageCount for each compaction', async () => {
// === FIRST COMPACTION ===
await addMessages(10);
const summary1 = await runCompaction();
expect(summary1).not.toBeNull();
// First compaction: originalMessageCount should be the number of summarized messages
const originalCount1 = summary1?.metadata?.originalMessageCount;
expect(typeof originalCount1).toBe('number');
expect(originalCount1).toBeLessThan(20); // Less than total, some were preserved
// === SECOND COMPACTION ===
await addMessages(10);
const historyBefore2 = await contextManager.getHistory();
const summary1Index = historyBefore2.findIndex((m) => m === summary1);
const summary2 = await runCompaction();
expect(summary2).not.toBeNull();
// Second compaction: originalMessageCount should be ABSOLUTE
// It should be > summary1Index (pointing past the first summary)
const originalCount2 = summary2?.metadata?.originalMessageCount;
expect(typeof originalCount2).toBe('number');
expect(originalCount2).toBeGreaterThan(summary1Index);
// Verify filterCompacted works with this absolute count
const historyAfter2 = await contextManager.getHistory();
const filtered2 = filterCompacted(historyAfter2);
// The filtered result should NOT include summary1
expect(filtered2).not.toContain(summary1);
// Preserved messages should exist
const preserved = filtered2.filter((m) => !m.metadata?.isSummary);
expect(preserved.length).toBeGreaterThan(0);
});
});
describe('Edge Cases', () => {
it('should not compact if history is too short', async () => {
await addMessages(1); // Only 2 messages
const summary = await runCompaction();
expect(summary).toBeNull();
});
it('should not re-compact if few messages after existing summary', async () => {
// First compaction
await addMessages(10);
await runCompaction();
// Add only 2 messages (4 messages = 2 turns, below threshold)
await addMessages(2);
// Should skip re-compaction
const summary2 = await runCompaction();
expect(summary2).toBeNull();
});
it('should handle compaction through prepareHistory flow', async () => {
// This tests the real integration with ContextManager.prepareHistory()
// which is what's used when formatting messages for LLM
await addMessages(10);
await runCompaction();
await addMessages(10);
await runCompaction();
// prepareHistory uses filterCompacted internally
const { preparedHistory, stats } = await contextManager.prepareHistory();
// Stats should reflect the filtered counts
expect(stats.filteredCount).toBeLessThan(stats.originalCount);
// preparedHistory should only contain filtered messages
const summaries = preparedHistory.filter((m) => m.metadata?.isSummary);
expect(summaries).toHaveLength(1);
});
});
describe('Token Estimation After Compaction', () => {
it('should provide accurate token estimates after compaction', async () => {
await addMessages(10);
// Get estimate before compaction
const estimateBefore = await contextManager.getContextTokenEstimate({ mcpManager }, {});
const messagesBefore = estimateBefore.stats.filteredMessageCount;
// Run compaction
await runCompaction();
contextManager.resetActualTokenTracking();
// Get estimate after compaction
const estimateAfter = await contextManager.getContextTokenEstimate({ mcpManager }, {});
const messagesAfter = estimateAfter.stats.filteredMessageCount;
// After compaction, should have fewer messages
expect(messagesAfter).toBeLessThan(messagesBefore);
});
it('should maintain consistency between /context and compaction stats', async () => {
await addMessages(10);
await runCompaction();
await addMessages(10);
await runCompaction();
// This is what /context command uses
const estimate = await contextManager.getContextTokenEstimate({ mcpManager }, {});
// The filteredMessageCount should match what filterCompacted returns
const history = await contextManager.getHistory();
const filtered = filterCompacted(history);
expect(estimate.stats.filteredMessageCount).toBe(filtered.length);
expect(estimate.stats.originalMessageCount).toBe(history.length);
});
});
});

View File

@@ -0,0 +1,60 @@
import { z } from 'zod';
import type { ICompactionStrategy } from './types.js';
import type { CompactionContext, CompactionConfig } from './provider.js';
import { compactionRegistry } from './registry.js';
import { ContextError } from '../errors.js';
/**
* Create a compaction strategy from configuration.
*
* Follows the same pattern as blob storage and tools:
* - Validates provider exists
* - Validates configuration with Zod schema
* - Checks LLM requirements
* - Creates strategy instance
*
* @param config - Compaction configuration from agent config
* @param context - Context with logger and optional LanguageModel
* @returns Strategy instance or null if disabled
*/
export async function createCompactionStrategy(
config: CompactionConfig,
context: CompactionContext
): Promise<ICompactionStrategy | null> {
// If disabled, return null
if (config.enabled === false) {
context.logger.info(`Compaction provider '${config.type}' is disabled`);
return null;
}
// Get provider
const provider = compactionRegistry.get(config.type);
if (!provider) {
const available = compactionRegistry.getTypes();
throw ContextError.compactionInvalidType(config.type, available);
}
// Validate configuration
try {
const validatedConfig = provider.configSchema.parse(config);
// Check if LLM is required but not provided
if (provider.metadata?.requiresLLM && !context.model) {
throw ContextError.compactionMissingLLM(config.type);
}
// Create strategy instance
const strategy = await provider.create(validatedConfig, context);
context.logger.info(
`Created compaction strategy: ${provider.metadata?.displayName || config.type}`
);
return strategy;
} catch (error) {
if (error instanceof z.ZodError) {
throw ContextError.compactionValidation(config.type, error.errors);
}
throw error;
}
}

View File

@@ -0,0 +1,31 @@
// Core types and interfaces
export * from './types.js';
export * from './provider.js';
export * from './registry.js';
export * from './factory.js';
export * from './schemas.js';
// Strategies
export * from './strategies/reactive-overflow.js';
export * from './strategies/noop.js';
// Providers
export * from './providers/reactive-overflow-provider.js';
export * from './providers/noop-provider.js';
// Utilities
export * from './overflow.js';
// Register built-in providers
import { compactionRegistry } from './registry.js';
import { reactiveOverflowProvider } from './providers/reactive-overflow-provider.js';
import { noopProvider } from './providers/noop-provider.js';
// Auto-register built-in providers when module is imported
// Guard against duplicate registration when module is imported multiple times
if (!compactionRegistry.has('reactive-overflow')) {
compactionRegistry.register(reactiveOverflowProvider);
}
if (!compactionRegistry.has('noop')) {
compactionRegistry.register(noopProvider);
}

View File

@@ -0,0 +1,234 @@
import { describe, it, expect } from 'vitest';
import { isOverflow, getCompactionTarget, type ModelLimits } from './overflow.js';
import type { TokenUsage } from '../../llm/types.js';
describe('isOverflow', () => {
describe('basic overflow detection', () => {
it('should return false when input tokens are well below limit', () => {
const tokens: TokenUsage = {
inputTokens: 50000,
};
const modelLimits: ModelLimits = {
contextWindow: 200000,
};
const result = isOverflow(tokens, modelLimits);
expect(result).toBe(false);
});
it('should return false when input tokens are just below context window (with 100% threshold)', () => {
const tokens: TokenUsage = {
inputTokens: 199999,
};
const modelLimits: ModelLimits = {
contextWindow: 200000,
};
// Explicitly use 1.0 threshold to test full capacity boundary
const result = isOverflow(tokens, modelLimits, 1.0);
expect(result).toBe(false);
});
it('should return true when input tokens exceed context window', () => {
const tokens: TokenUsage = {
inputTokens: 200001,
};
const modelLimits: ModelLimits = {
contextWindow: 200000,
};
const result = isOverflow(tokens, modelLimits);
expect(result).toBe(true);
});
it('should return false when input tokens exactly equal context window (with 100% threshold)', () => {
// Edge case: exactly at the limit should NOT trigger overflow
// (inputTokens > effectiveLimit, not >=)
const tokens: TokenUsage = {
inputTokens: 200000,
};
const modelLimits: ModelLimits = {
contextWindow: 200000,
};
// Explicitly use 1.0 threshold to test full capacity boundary
const result = isOverflow(tokens, modelLimits, 1.0);
expect(result).toBe(false);
});
});
describe('handling missing inputTokens', () => {
it('should default to 0 when inputTokens is undefined', () => {
const tokens: TokenUsage = {
// inputTokens is undefined
};
const modelLimits: ModelLimits = {
contextWindow: 200000,
};
const result = isOverflow(tokens, modelLimits);
expect(result).toBe(false);
});
});
describe('small context windows', () => {
it('should correctly detect overflow for small context windows', () => {
const tokens: TokenUsage = {
inputTokens: 8193,
};
const modelLimits: ModelLimits = {
contextWindow: 8192,
};
const result = isOverflow(tokens, modelLimits);
expect(result).toBe(true);
});
});
describe('configurable context window (via maxContextTokens override)', () => {
it('should work with reduced context window from config', () => {
// User configured maxContextTokens: 50000
// Even though model supports 200K, we treat it as 50K
const tokens: TokenUsage = {
inputTokens: 50001,
};
// The effective context window passed would be 50000
const modelLimits: ModelLimits = {
contextWindow: 50000,
};
const result = isOverflow(tokens, modelLimits);
expect(result).toBe(true);
});
});
describe('thresholdPercent parameter', () => {
it('should trigger overflow earlier when thresholdPercent is less than 1.0', () => {
// contextWindow: 200000
// With threshold 0.9: effectiveLimit = floor(200000 * 0.9) = 180000
const tokens: TokenUsage = {
inputTokens: 180001, // Just over 90% threshold
};
const modelLimits: ModelLimits = {
contextWindow: 200000,
};
// Without threshold (or threshold=1.0), this would NOT overflow
expect(isOverflow(tokens, modelLimits, 1.0)).toBe(false);
// With threshold=0.9, this SHOULD overflow
expect(isOverflow(tokens, modelLimits, 0.9)).toBe(true);
});
it('should use default threshold of 0.9 when not specified', () => {
const tokens: TokenUsage = {
inputTokens: 180000, // 90% of context window
};
const modelLimits: ModelLimits = {
contextWindow: 200000,
};
// Default should be same as explicit 0.9
expect(isOverflow(tokens, modelLimits)).toBe(isOverflow(tokens, modelLimits, 0.9));
});
it('should handle threshold of 0.5 (50%)', () => {
// contextWindow: 200000
// With threshold 0.5: effectiveLimit = floor(200000 * 0.5) = 100000
const tokens: TokenUsage = {
inputTokens: 100001,
};
const modelLimits: ModelLimits = {
contextWindow: 200000,
};
expect(isOverflow(tokens, modelLimits, 0.5)).toBe(true);
expect(isOverflow(tokens, modelLimits, 1.0)).toBe(false);
});
it('should floor the effective limit', () => {
// contextWindow: 100, thresholdPercent: 0.9
// effectiveLimit = floor(100 * 0.9) = 90
const modelLimits: ModelLimits = {
contextWindow: 100,
};
// At exactly 90 tokens, should NOT overflow
expect(isOverflow({ inputTokens: 90 }, modelLimits, 0.9)).toBe(false);
// At 91 tokens, SHOULD overflow
expect(isOverflow({ inputTokens: 91 }, modelLimits, 0.9)).toBe(true);
});
});
});
describe('getCompactionTarget', () => {
describe('default target percentage (70%)', () => {
it('should return 70% of context window by default', () => {
// target = floor(200000 * 0.7) = 140000
const modelLimits: ModelLimits = {
contextWindow: 200000,
};
const target = getCompactionTarget(modelLimits);
expect(target).toBe(140000);
});
});
describe('custom target percentage', () => {
it('should return correct target for 50% percentage', () => {
// target = floor(200000 * 0.5) = 100000
const modelLimits: ModelLimits = {
contextWindow: 200000,
};
const target = getCompactionTarget(modelLimits, 0.5);
expect(target).toBe(100000);
});
it('should return correct target for 90% percentage', () => {
// target = floor(200000 * 0.9) = 180000
const modelLimits: ModelLimits = {
contextWindow: 200000,
};
const target = getCompactionTarget(modelLimits, 0.9);
expect(target).toBe(180000);
});
});
describe('floor behavior', () => {
it('should floor the result to avoid fractional tokens', () => {
// target = floor(100000 * 0.33) = 33000
const modelLimits: ModelLimits = {
contextWindow: 100000,
};
const target = getCompactionTarget(modelLimits, 0.33);
expect(Number.isInteger(target)).toBe(true);
expect(target).toBe(33000);
});
});
describe('small context windows', () => {
it('should work correctly with small context windows', () => {
// target = floor(8192 * 0.7) = 5734
const modelLimits: ModelLimits = {
contextWindow: 8192,
};
const target = getCompactionTarget(modelLimits);
expect(target).toBe(5734);
});
});
});

View File

@@ -0,0 +1,59 @@
import type { TokenUsage } from '../../llm/types.js';
/**
* Model limits configuration for overflow detection.
* These limits define the context window boundaries.
*/
export interface ModelLimits {
/** Maximum context window size in tokens (the model's input limit) */
contextWindow: number;
}
/**
* Determines if the context has overflowed based on token usage.
*
* Overflow is detected when:
* inputTokens > contextWindow * thresholdPercent
*
* The thresholdPercent allows triggering compaction before hitting 100% (e.g., at 90%).
* This provides a safety margin for estimation errors and prevents hitting hard limits.
*
* Note: We don't reserve space for "output" because input and output have separate limits
* in LLM APIs. The model's output doesn't consume from the input context window.
*
* @param tokens The token usage (actual from API or estimated)
* @param modelLimits The model's context window limit
* @param thresholdPercent Percentage of context window at which to trigger (default 0.9 = 90%)
* @returns true if context has overflowed and compaction is needed
*/
export function isOverflow(
tokens: TokenUsage,
modelLimits: ModelLimits,
thresholdPercent: number = 0.9
): boolean {
const { contextWindow } = modelLimits;
// Apply threshold - trigger compaction at thresholdPercent of context window
const effectiveLimit = Math.floor(contextWindow * thresholdPercent);
// Calculate used tokens - inputTokens is the main metric
const inputTokens = tokens.inputTokens ?? 0;
// Check if we've exceeded the effective limit
return inputTokens > effectiveLimit;
}
/**
* Calculate the compaction target - how many tokens we need to reduce to.
*
* @param modelLimits The model's context window limit
* @param targetPercentage What percentage of context to target (default 70%)
* @returns The target token count after compaction
*/
export function getCompactionTarget(
modelLimits: ModelLimits,
targetPercentage: number = 0.7
): number {
const { contextWindow } = modelLimits;
return Math.floor(contextWindow * targetPercentage);
}

View File

@@ -0,0 +1,59 @@
import { z } from 'zod';
import type { LanguageModel } from 'ai';
import type { ICompactionStrategy } from './types.js';
import type { IDextoLogger } from '../../logger/v2/types.js';
/**
* Context provided to compaction strategy creation
*/
export interface CompactionContext {
logger: IDextoLogger;
model?: LanguageModel; // Optional - some strategies may not need LLM
}
/**
* Provider interface for compaction strategies.
*
* Follows the same pattern as blob storage and tools providers:
* - Type discriminator for config validation
* - Zod schema for runtime validation
* - Factory function to create instances
* - Metadata for discovery and UI
*
* TConfig should be the output type (z.output) with defaults applied
*/
export interface CompactionProvider<
TType extends string = string,
TConfig extends CompactionConfig = CompactionConfig,
> {
/** Unique identifier for this strategy type */
type: TType;
/** Zod schema for validating configuration - accepts input, produces TConfig output */
configSchema: z.ZodType<TConfig, z.ZodTypeDef, any>;
/** Metadata for discovery and UI */
metadata?: {
displayName: string;
description: string;
requiresLLM: boolean; // Does it need LLM access?
isProactive: boolean; // Proactive vs reactive?
};
/**
* Create a compaction strategy instance
* @param config - Validated configuration with defaults applied (output type)
*/
create(
config: TConfig,
context: CompactionContext
): ICompactionStrategy | Promise<ICompactionStrategy>;
}
/**
* Base configuration for all compaction strategies
*/
export interface CompactionConfig {
type: string;
enabled?: boolean; // Allow disabling without removing config
}

View File

@@ -0,0 +1,36 @@
import { z } from 'zod';
import type { CompactionProvider } from '../provider.js';
import { NoOpCompactionStrategy } from '../strategies/noop.js';
/**
* Configuration schema for no-op compaction
*/
export const NoOpConfigSchema = z
.object({
type: z.literal('noop'),
enabled: z.boolean().default(true).describe('Enable or disable compaction'),
})
.strict();
export type NoOpConfig = z.output<typeof NoOpConfigSchema>;
/**
* Provider for no-op compaction strategy.
*
* This strategy disables compaction entirely, keeping full conversation history.
* Useful for testing, debugging, or contexts where full history is required.
*/
export const noopProvider: CompactionProvider<'noop', NoOpConfig> = {
type: 'noop',
configSchema: NoOpConfigSchema,
metadata: {
displayName: 'No Compaction',
description: 'Disables compaction entirely, keeping full conversation history',
requiresLLM: false,
isProactive: false,
},
create(_config, _context) {
return new NoOpCompactionStrategy();
},
};

View File

@@ -0,0 +1,95 @@
import { z } from 'zod';
import type { CompactionProvider } from '../provider.js';
import { ReactiveOverflowStrategy } from '../strategies/reactive-overflow.js';
/**
* Configuration schema for reactive overflow compaction
*/
export const ReactiveOverflowConfigSchema = z
.object({
type: z.literal('reactive-overflow'),
enabled: z.boolean().default(true).describe('Enable or disable compaction'),
/**
* Maximum context tokens before compaction triggers.
* When set, overrides the model's context window for compaction threshold.
* Useful for capping context size below the model's maximum limit.
*/
maxContextTokens: z
.number()
.positive()
.optional()
.describe(
'Maximum context tokens before compaction triggers. Overrides model context window when set.'
),
/**
* Percentage of context window that triggers compaction (0.1 to 1.0).
* Default is 1.0 (100%), meaning compaction triggers when context is full.
*/
thresholdPercent: z
.number()
.min(0.1)
.max(1.0)
.default(1.0)
.describe(
'Percentage of context window that triggers compaction (0.1 to 1.0, default 1.0)'
),
preserveLastNTurns: z
.number()
.int()
.positive()
.default(2)
.describe('Number of recent turns (user+assistant pairs) to preserve'),
maxSummaryTokens: z
.number()
.int()
.positive()
.default(2000)
.describe('Maximum tokens for the summary output'),
summaryPrompt: z
.string()
.optional()
.describe('Custom summary prompt template. Use {conversation} as placeholder'),
})
.strict();
export type ReactiveOverflowConfig = z.output<typeof ReactiveOverflowConfigSchema>;
/**
* Provider for reactive overflow compaction strategy.
*
* This strategy triggers compaction when context window overflow is detected:
* - Generates LLM-powered summaries of older messages
* - Preserves recent turns for context continuity
* - Falls back to simple text summary if LLM call fails
* - Adds summary message to history (read-time filtering excludes old messages)
*/
export const reactiveOverflowProvider: CompactionProvider<
'reactive-overflow',
ReactiveOverflowConfig
> = {
type: 'reactive-overflow',
configSchema: ReactiveOverflowConfigSchema,
metadata: {
displayName: 'Reactive Overflow Compaction',
description: 'Generates summaries when context window overflows, preserving recent turns',
requiresLLM: true,
isProactive: false,
},
create(config, context) {
if (!context.model) {
throw new Error('ReactiveOverflowStrategy requires LanguageModel');
}
const options: import('../strategies/reactive-overflow.js').ReactiveOverflowOptions = {
preserveLastNTurns: config.preserveLastNTurns,
maxSummaryTokens: config.maxSummaryTokens,
};
if (config.summaryPrompt !== undefined) {
options.summaryPrompt = config.summaryPrompt;
}
return new ReactiveOverflowStrategy(context.model, options, context.logger);
},
};

View File

@@ -0,0 +1,537 @@
import { describe, it, expect, beforeEach } from 'vitest';
import { z } from 'zod';
import { compactionRegistry } from './registry.js';
import type { CompactionProvider, CompactionConfig, CompactionContext } from './provider.js';
import type { ICompactionStrategy } from './types.js';
import type { InternalMessage } from '../types.js';
// Mock compaction config types
interface MockCompactionConfig extends CompactionConfig {
type: 'mock';
enabled?: boolean;
maxTokens?: number;
}
interface AnotherMockConfig extends CompactionConfig {
type: 'another-mock';
enabled?: boolean;
threshold?: number;
}
// Mock compaction strategy implementation
class MockCompressionStrategy implements ICompactionStrategy {
readonly name = 'mock-compaction';
constructor(private config: MockCompactionConfig) {}
async compact(history: readonly InternalMessage[]): Promise<InternalMessage[]> {
return history.slice(0, this.config.maxTokens || 100) as InternalMessage[];
}
}
class AnotherMockStrategy implements ICompactionStrategy {
readonly name = 'another-mock-compaction';
constructor(private config: AnotherMockConfig) {}
async compact(history: readonly InternalMessage[]): Promise<InternalMessage[]> {
return history.slice(0, this.config.threshold || 50) as InternalMessage[];
}
}
// Mock compaction providers
const mockProvider: CompactionProvider<'mock', MockCompactionConfig> = {
type: 'mock',
configSchema: z.object({
type: z.literal('mock'),
enabled: z.boolean().default(true),
maxTokens: z.number().default(100),
}),
metadata: {
displayName: 'Mock Compaction',
description: 'A mock compaction strategy for testing',
requiresLLM: false,
isProactive: true,
},
create(config: MockCompactionConfig, _context: CompactionContext): ICompactionStrategy {
return new MockCompressionStrategy(config);
},
};
const anotherMockProvider: CompactionProvider<'another-mock', AnotherMockConfig> = {
type: 'another-mock',
configSchema: z.object({
type: z.literal('another-mock'),
enabled: z.boolean().default(true),
threshold: z.number().default(50),
}),
metadata: {
displayName: 'Another Mock Compaction',
description: 'Another mock compaction strategy for testing',
requiresLLM: true,
isProactive: false,
},
create(config: AnotherMockConfig, _context: CompactionContext): ICompactionStrategy {
return new AnotherMockStrategy(config);
},
};
const minimalProvider: CompactionProvider<'minimal', CompactionConfig> = {
type: 'minimal',
configSchema: z.object({
type: z.literal('minimal'),
enabled: z.boolean().default(true),
}),
create(_config: CompactionConfig, _context: CompactionContext): ICompactionStrategy {
return {
name: 'minimal-compaction',
compact: async (history: readonly InternalMessage[]) =>
history.slice() as InternalMessage[],
};
},
};
describe('CompactionRegistry', () => {
beforeEach(() => {
// Clear registry before each test to ensure isolation
compactionRegistry.clear();
});
describe('register()', () => {
it('successfully registers a provider', () => {
expect(() => compactionRegistry.register(mockProvider)).not.toThrow();
expect(compactionRegistry.has('mock')).toBe(true);
});
it('successfully registers multiple providers', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
expect(compactionRegistry.has('mock')).toBe(true);
expect(compactionRegistry.has('another-mock')).toBe(true);
});
it('throws error when registering duplicate provider', () => {
compactionRegistry.register(mockProvider);
expect(() => compactionRegistry.register(mockProvider)).toThrow(
"Compaction provider 'mock' is already registered"
);
});
it('allows re-registration after unregistering', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.unregister('mock');
expect(() => compactionRegistry.register(mockProvider)).not.toThrow();
expect(compactionRegistry.has('mock')).toBe(true);
});
it('registers provider with minimal metadata', () => {
compactionRegistry.register(minimalProvider);
const provider = compactionRegistry.get('minimal');
expect(provider).toBeDefined();
expect(provider?.type).toBe('minimal');
expect(provider?.metadata).toBeUndefined();
});
});
describe('unregister()', () => {
it('successfully unregisters an existing provider', () => {
compactionRegistry.register(mockProvider);
const result = compactionRegistry.unregister('mock');
expect(result).toBe(true);
expect(compactionRegistry.has('mock')).toBe(false);
});
it('returns false when unregistering non-existent provider', () => {
const result = compactionRegistry.unregister('non-existent');
expect(result).toBe(false);
});
it('returns false when unregistering from empty registry', () => {
const result = compactionRegistry.unregister('mock');
expect(result).toBe(false);
});
it('can unregister one provider while keeping others', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
const result = compactionRegistry.unregister('mock');
expect(result).toBe(true);
expect(compactionRegistry.has('mock')).toBe(false);
expect(compactionRegistry.has('another-mock')).toBe(true);
});
});
describe('get()', () => {
it('returns registered provider', () => {
compactionRegistry.register(mockProvider);
const provider = compactionRegistry.get('mock');
expect(provider).toBeDefined();
expect(provider?.type).toBe('mock');
expect(provider?.metadata?.displayName).toBe('Mock Compaction');
});
it('returns undefined for non-existent provider', () => {
const provider = compactionRegistry.get('non-existent');
expect(provider).toBeUndefined();
});
it('returns undefined from empty registry', () => {
const provider = compactionRegistry.get('mock');
expect(provider).toBeUndefined();
});
it('returns correct provider when multiple are registered', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
const provider1 = compactionRegistry.get('mock');
const provider2 = compactionRegistry.get('another-mock');
expect(provider1?.type).toBe('mock');
expect(provider2?.type).toBe('another-mock');
});
it('returns full provider interface including create function', () => {
compactionRegistry.register(mockProvider);
const provider = compactionRegistry.get('mock');
expect(provider).toBeDefined();
expect(typeof provider?.create).toBe('function');
expect(provider?.configSchema).toBeDefined();
});
});
describe('has()', () => {
it('returns true for registered provider', () => {
compactionRegistry.register(mockProvider);
expect(compactionRegistry.has('mock')).toBe(true);
});
it('returns false for non-existent provider', () => {
expect(compactionRegistry.has('non-existent')).toBe(false);
});
it('returns false from empty registry', () => {
expect(compactionRegistry.has('mock')).toBe(false);
});
it('returns false after unregistering', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.unregister('mock');
expect(compactionRegistry.has('mock')).toBe(false);
});
it('correctly identifies multiple registered providers', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
expect(compactionRegistry.has('mock')).toBe(true);
expect(compactionRegistry.has('another-mock')).toBe(true);
expect(compactionRegistry.has('non-existent')).toBe(false);
});
});
describe('getTypes()', () => {
it('returns all registered provider types', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
const types = compactionRegistry.getTypes();
expect(types).toHaveLength(2);
expect(types).toContain('mock');
expect(types).toContain('another-mock');
});
it('returns empty array for empty registry', () => {
const types = compactionRegistry.getTypes();
expect(types).toEqual([]);
});
it('returns single type when only one provider is registered', () => {
compactionRegistry.register(mockProvider);
const types = compactionRegistry.getTypes();
expect(types).toEqual(['mock']);
});
it('updates after unregistering a provider', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
compactionRegistry.unregister('mock');
const types = compactionRegistry.getTypes();
expect(types).toHaveLength(1);
expect(types).toContain('another-mock');
expect(types).not.toContain('mock');
});
it('returns array that can be iterated', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
const types = compactionRegistry.getTypes();
const typeArray: string[] = [];
types.forEach((type) => {
expect(typeof type).toBe('string');
typeArray.push(type);
});
expect(typeArray.length).toBe(2);
});
});
describe('getAll()', () => {
it('returns all registered providers', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
const providers = compactionRegistry.getAll();
expect(providers).toHaveLength(2);
expect(providers[0]!.type).toBe('mock');
expect(providers[1]!.type).toBe('another-mock');
});
it('returns empty array for empty registry', () => {
const providers = compactionRegistry.getAll();
expect(providers).toEqual([]);
});
it('returns single provider when only one is registered', () => {
compactionRegistry.register(mockProvider);
const providers = compactionRegistry.getAll();
expect(providers).toHaveLength(1);
expect(providers[0]!.type).toBe('mock');
});
it('updates after unregistering a provider', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
compactionRegistry.unregister('mock');
const providers = compactionRegistry.getAll();
expect(providers).toHaveLength(1);
expect(providers[0]!.type).toBe('another-mock');
});
it('returns providers with full interface including metadata', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
const providers = compactionRegistry.getAll();
expect(providers[0]!.metadata).toBeDefined();
expect(providers[0]!.metadata?.displayName).toBe('Mock Compaction');
expect(providers[1]!.metadata).toBeDefined();
expect(providers[1]!.metadata?.requiresLLM).toBe(true);
});
it('returns array that can be filtered and mapped', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
compactionRegistry.register(minimalProvider);
const providers = compactionRegistry.getAll();
const providersWithLLM = providers.filter((p) => p.metadata?.requiresLLM === true);
const providerTypes = providers.map((p) => p.type);
expect(providersWithLLM).toHaveLength(1);
expect(providersWithLLM[0]!.type).toBe('another-mock');
expect(providerTypes).toEqual(['mock', 'another-mock', 'minimal']);
});
});
describe('clear()', () => {
it('clears all registered providers', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
compactionRegistry.clear();
expect(compactionRegistry.getTypes()).toEqual([]);
expect(compactionRegistry.getAll()).toEqual([]);
expect(compactionRegistry.has('mock')).toBe(false);
expect(compactionRegistry.has('another-mock')).toBe(false);
});
it('can clear empty registry without errors', () => {
expect(() => compactionRegistry.clear()).not.toThrow();
expect(compactionRegistry.getTypes()).toEqual([]);
});
it('allows re-registration after clearing', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.clear();
expect(() => compactionRegistry.register(mockProvider)).not.toThrow();
expect(compactionRegistry.has('mock')).toBe(true);
});
it('truly removes all providers including their state', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
compactionRegistry.register(minimalProvider);
compactionRegistry.clear();
expect(compactionRegistry.get('mock')).toBeUndefined();
expect(compactionRegistry.get('another-mock')).toBeUndefined();
expect(compactionRegistry.get('minimal')).toBeUndefined();
expect(compactionRegistry.getAll().length).toBe(0);
});
});
describe('Integration scenarios', () => {
it('supports complete provider lifecycle', () => {
// Register
compactionRegistry.register(mockProvider);
expect(compactionRegistry.has('mock')).toBe(true);
// Get and verify
const provider = compactionRegistry.get('mock');
expect(provider?.type).toBe('mock');
// Use provider
expect(typeof provider?.create).toBe('function');
// Unregister
const unregistered = compactionRegistry.unregister('mock');
expect(unregistered).toBe(true);
expect(compactionRegistry.has('mock')).toBe(false);
});
it('handles multiple provider types with different configurations', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
compactionRegistry.register(minimalProvider);
const types = compactionRegistry.getTypes();
expect(types).toHaveLength(3);
const withMetadata = compactionRegistry
.getAll()
.filter((p) => p.metadata !== undefined);
expect(withMetadata).toHaveLength(2);
const requiresLLM = compactionRegistry
.getAll()
.filter((p) => p.metadata?.requiresLLM === true);
expect(requiresLLM).toHaveLength(1);
expect(requiresLLM[0]!.type).toBe('another-mock');
});
it('maintains provider isolation between operations', () => {
compactionRegistry.register(mockProvider);
const provider1 = compactionRegistry.get('mock');
const provider2 = compactionRegistry.get('mock');
// Both should return the same provider instance
expect(provider1).toBe(provider2);
// Unregistering should affect all references
compactionRegistry.unregister('mock');
expect(compactionRegistry.get('mock')).toBeUndefined();
});
it('supports provider discovery pattern', () => {
compactionRegistry.register(mockProvider);
compactionRegistry.register(anotherMockProvider);
// Discover all available providers
const allProviders = compactionRegistry.getAll();
// Filter by capability
const proactiveProviders = allProviders.filter((p) => p.metadata?.isProactive === true);
const llmProviders = allProviders.filter((p) => p.metadata?.requiresLLM === true);
expect(proactiveProviders).toHaveLength(1);
expect(proactiveProviders[0]!.type).toBe('mock');
expect(llmProviders).toHaveLength(1);
expect(llmProviders[0]!.type).toBe('another-mock');
});
});
describe('Edge cases and error handling', () => {
it('handles provider types with special characters', () => {
const specialProvider: CompactionProvider = {
type: 'special-provider_v2',
configSchema: z.object({
type: z.literal('special-provider_v2'),
}),
create: () => ({
name: 'special-provider',
compact: async (history: readonly InternalMessage[]) =>
history.slice() as InternalMessage[],
}),
};
compactionRegistry.register(specialProvider);
expect(compactionRegistry.has('special-provider_v2')).toBe(true);
expect(compactionRegistry.get('special-provider_v2')?.type).toBe('special-provider_v2');
});
it('preserves provider metadata exactly as provided', () => {
compactionRegistry.register(mockProvider);
const retrieved = compactionRegistry.get('mock');
expect(retrieved?.metadata).toEqual(mockProvider.metadata);
expect(retrieved?.metadata?.displayName).toBe(mockProvider.metadata?.displayName);
expect(retrieved?.metadata?.description).toBe(mockProvider.metadata?.description);
expect(retrieved?.metadata?.requiresLLM).toBe(mockProvider.metadata?.requiresLLM);
expect(retrieved?.metadata?.isProactive).toBe(mockProvider.metadata?.isProactive);
});
it('handles providers without optional metadata gracefully', () => {
compactionRegistry.register(minimalProvider);
const provider = compactionRegistry.get('minimal');
expect(provider).toBeDefined();
expect(provider?.metadata).toBeUndefined();
expect(provider?.type).toBe('minimal');
});
it('maintains type safety for provider retrieval', () => {
compactionRegistry.register(mockProvider);
const provider = compactionRegistry.get('mock');
// TypeScript should know this is CompactionProvider<any, any>
if (provider) {
expect(provider.type).toBeDefined();
expect(provider.configSchema).toBeDefined();
expect(provider.create).toBeDefined();
}
});
});
});

View File

@@ -0,0 +1,32 @@
import type { CompactionProvider } from './provider.js';
import { ContextError } from '../errors.js';
import { BaseRegistry, type RegistryErrorFactory } from '../../providers/base-registry.js';
/**
* Error factory for compaction registry errors.
* Uses ContextError for consistent error handling.
*/
const compactionErrorFactory: RegistryErrorFactory = {
alreadyRegistered: (type: string) => ContextError.compactionProviderAlreadyRegistered(type),
notFound: (type: string, availableTypes: string[]) =>
ContextError.compactionInvalidType(type, availableTypes),
};
/**
* Global registry for compaction providers.
*
* Follows the same pattern as blob storage and tools registries:
* - Singleton instance exported
* - Registration before agent initialization
* - Type-safe provider lookup
*
* Extends BaseRegistry for common registry functionality.
*/
class CompactionRegistry extends BaseRegistry<CompactionProvider<any, any>> {
constructor() {
super(compactionErrorFactory);
}
}
/** Global singleton instance */
export const compactionRegistry = new CompactionRegistry();

View File

@@ -0,0 +1,265 @@
import { describe, it, expect } from 'vitest';
import {
CompactionConfigSchema,
DEFAULT_COMPACTION_CONFIG,
type CompactionConfigInput,
} from './schemas.js';
describe('CompactionConfigSchema', () => {
describe('basic validation', () => {
it('should accept valid minimal config', () => {
const input = {
type: 'reactive-overflow',
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.type).toBe('reactive-overflow');
expect(result.data.enabled).toBe(true);
expect(result.data.thresholdPercent).toBe(0.9);
}
});
it('should accept config with enabled explicitly set', () => {
const input = {
type: 'reactive-overflow',
enabled: false,
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.enabled).toBe(false);
}
});
it('should reject config without type', () => {
const input = {
enabled: true,
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(false);
});
});
describe('maxContextTokens', () => {
it('should accept positive maxContextTokens', () => {
const input = {
type: 'reactive-overflow',
maxContextTokens: 50000,
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.maxContextTokens).toBe(50000);
}
});
it('should reject zero maxContextTokens', () => {
const input = {
type: 'reactive-overflow',
maxContextTokens: 0,
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(false);
});
it('should reject negative maxContextTokens', () => {
const input = {
type: 'reactive-overflow',
maxContextTokens: -1000,
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(false);
});
it('should allow omitting maxContextTokens', () => {
const input = {
type: 'reactive-overflow',
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.maxContextTokens).toBeUndefined();
}
});
});
describe('thresholdPercent', () => {
it('should default thresholdPercent to 0.9', () => {
const input = {
type: 'reactive-overflow',
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.thresholdPercent).toBe(0.9);
}
});
it('should accept thresholdPercent of 0.8 (80%)', () => {
const input = {
type: 'reactive-overflow',
thresholdPercent: 0.8,
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.thresholdPercent).toBe(0.8);
}
});
it('should accept thresholdPercent of 0.1 (10% - minimum)', () => {
const input = {
type: 'reactive-overflow',
thresholdPercent: 0.1,
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.thresholdPercent).toBe(0.1);
}
});
it('should accept thresholdPercent of 1.0 (100% - maximum)', () => {
const input = {
type: 'reactive-overflow',
thresholdPercent: 1.0,
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.thresholdPercent).toBe(1.0);
}
});
it('should reject thresholdPercent below 0.1', () => {
const input = {
type: 'reactive-overflow',
thresholdPercent: 0.05,
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(false);
});
it('should reject thresholdPercent above 1.0', () => {
const input = {
type: 'reactive-overflow',
thresholdPercent: 1.5,
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(false);
});
it('should reject thresholdPercent of 0', () => {
const input = {
type: 'reactive-overflow',
thresholdPercent: 0,
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(false);
});
});
describe('combined configuration', () => {
it('should accept full config with all fields', () => {
const input = {
type: 'reactive-overflow',
enabled: true,
maxContextTokens: 100000,
thresholdPercent: 0.75,
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.type).toBe('reactive-overflow');
expect(result.data.enabled).toBe(true);
expect(result.data.maxContextTokens).toBe(100000);
expect(result.data.thresholdPercent).toBe(0.75);
}
});
it('should allow additional passthrough fields for provider-specific config', () => {
const input = {
type: 'reactive-overflow',
enabled: true,
maxSummaryTokens: 2000,
preserveLastNTurns: 3,
};
const result = CompactionConfigSchema.safeParse(input);
expect(result.success).toBe(true);
if (result.success) {
// Passthrough fields should be preserved
expect((result.data as Record<string, unknown>).maxSummaryTokens).toBe(2000);
expect((result.data as Record<string, unknown>).preserveLastNTurns).toBe(3);
}
});
});
describe('DEFAULT_COMPACTION_CONFIG', () => {
it('should have expected default values', () => {
expect(DEFAULT_COMPACTION_CONFIG.type).toBe('reactive-overflow');
expect(DEFAULT_COMPACTION_CONFIG.enabled).toBe(true);
expect(DEFAULT_COMPACTION_CONFIG.thresholdPercent).toBe(0.9);
});
it('should validate successfully', () => {
const result = CompactionConfigSchema.safeParse(DEFAULT_COMPACTION_CONFIG);
expect(result.success).toBe(true);
});
});
describe('type inference', () => {
it('should produce correct output type', () => {
const config: CompactionConfigInput = {
type: 'reactive-overflow',
enabled: true,
maxContextTokens: 50000,
thresholdPercent: 0.9,
};
// Type checking - these should compile without errors
const type: string = config.type;
const enabled: boolean = config.enabled;
const maxTokens: number | undefined = config.maxContextTokens;
const threshold: number = config.thresholdPercent;
expect(type).toBe('reactive-overflow');
expect(enabled).toBe(true);
expect(maxTokens).toBe(50000);
expect(threshold).toBe(0.9);
});
});
});

View File

@@ -0,0 +1,55 @@
import { z } from 'zod';
/**
* Base compaction configuration schema.
* Uses discriminated union to support different provider types.
*
* Each provider registers its own schema with specific validation rules.
* This schema accepts any configuration with a 'type' field.
*/
export const CompactionConfigSchema = z
.object({
type: z.string().describe('Compaction provider type'),
enabled: z.boolean().default(true).describe('Enable or disable compaction'),
/**
* Maximum context tokens before compaction triggers.
* When set, overrides the model's context window for compaction threshold.
* Useful for capping context size below the model's maximum limit.
* Example: Set to 50000 to trigger compaction at 50K tokens even if
* the model supports 200K tokens.
*/
maxContextTokens: z
.number()
.positive()
.optional()
.describe(
'Maximum context tokens before compaction triggers. Overrides model context window when set.'
),
/**
* Percentage of context window that triggers compaction (0.0 to 1.0).
* Default is 0.9 (90%), leaving a 10% buffer to avoid context degradation.
* Set lower values to trigger compaction earlier.
* Example: 0.8 triggers compaction when 80% of context is used.
*/
thresholdPercent: z
.number()
.min(0.1)
.max(1.0)
.default(0.9)
.describe(
'Percentage of context window that triggers compaction (0.1 to 1.0, default 0.9)'
),
})
.passthrough() // Allow additional fields that will be validated by provider schemas
.describe('Context compaction configuration');
export type CompactionConfigInput = z.output<typeof CompactionConfigSchema>;
/**
* Default compaction configuration - uses reactive-overflow strategy
*/
export const DEFAULT_COMPACTION_CONFIG: CompactionConfigInput = {
type: 'reactive-overflow',
enabled: true,
thresholdPercent: 0.9,
};

View File

@@ -0,0 +1,21 @@
import type { ICompactionStrategy } from '../types.js';
import type { InternalMessage } from '../../types.js';
/**
* No-op compaction strategy that doesn't perform any compaction.
*
* Useful for:
* - Testing without compaction overhead
* - Disabling compaction temporarily
* - Contexts where full history is required
*/
export class NoOpCompactionStrategy implements ICompactionStrategy {
readonly name = 'noop';
/**
* Does nothing - returns empty array (no summary needed)
*/
async compact(_history: readonly InternalMessage[]): Promise<InternalMessage[]> {
return [];
}
}

View File

@@ -0,0 +1,703 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { ReactiveOverflowStrategy } from './reactive-overflow.js';
import type { InternalMessage } from '../../types.js';
import type { LanguageModel } from 'ai';
import { createMockLogger } from '../../../logger/v2/test-utils.js';
import { filterCompacted } from '../../utils.js';
// Mock the ai module
vi.mock('ai', async (importOriginal) => {
const actual = await importOriginal<typeof import('ai')>();
return {
...actual,
generateText: vi.fn(),
};
});
import { generateText } from 'ai';
const mockGenerateText = vi.mocked(generateText);
/**
* Helper to create a mock LanguageModel
*/
function createMockModel(): LanguageModel {
return {
modelId: 'test-model',
provider: 'test-provider',
specificationVersion: 'v1',
doStream: vi.fn(),
doGenerate: vi.fn(),
} as unknown as LanguageModel;
}
/**
* Helper to create test messages
*/
function createUserMessage(text: string, timestamp?: number): InternalMessage {
return {
role: 'user',
content: [{ type: 'text', text }],
timestamp: timestamp ?? Date.now(),
};
}
function createAssistantMessage(text: string, timestamp?: number): InternalMessage {
return {
role: 'assistant',
content: [{ type: 'text', text }],
timestamp: timestamp ?? Date.now(),
};
}
function createSummaryMessage(
text: string,
originalMessageCount: number,
timestamp?: number
): InternalMessage {
return {
role: 'assistant',
content: [{ type: 'text', text }],
timestamp: timestamp ?? Date.now(),
metadata: {
isSummary: true,
summarizedAt: Date.now(),
originalMessageCount,
},
};
}
describe('ReactiveOverflowStrategy', () => {
const logger = createMockLogger();
let strategy: ReactiveOverflowStrategy;
beforeEach(() => {
vi.clearAllMocks();
strategy = new ReactiveOverflowStrategy(createMockModel(), {}, logger);
});
describe('compact() - short history guard', () => {
it('should return empty array when history has 2 or fewer messages', async () => {
const history: InternalMessage[] = [
createUserMessage('Hello'),
createAssistantMessage('Hi there!'),
];
const result = await strategy.compact(history);
expect(result).toEqual([]);
expect(mockGenerateText).not.toHaveBeenCalled();
});
it('should return empty array for empty history', async () => {
const result = await strategy.compact([]);
expect(result).toEqual([]);
expect(mockGenerateText).not.toHaveBeenCalled();
});
it('should return empty array for single message', async () => {
const history: InternalMessage[] = [createUserMessage('Hello')];
const result = await strategy.compact(history);
expect(result).toEqual([]);
});
});
describe('compact() - summary message metadata', () => {
it('should return summary with isSummary=true metadata', async () => {
mockGenerateText.mockResolvedValue({
text: '<session_compaction>Test summary</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
// Create enough messages to trigger compaction
// preserveLastNTurns=2 by default, so we need more than 2 turns
const history: InternalMessage[] = [
createUserMessage('First question', 1000),
createAssistantMessage('First answer', 1001),
createUserMessage('Second question', 1002),
createAssistantMessage('Second answer', 1003),
createUserMessage('Third question', 1004),
createAssistantMessage('Third answer', 1005),
];
const result = await strategy.compact(history);
expect(result).toHaveLength(1);
expect(result[0]?.metadata?.isSummary).toBe(true);
});
it('should set originalMessageCount to number of summarized messages', async () => {
mockGenerateText.mockResolvedValue({
text: '<session_compaction>Test summary</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
// 6 messages total, preserveLastNTurns=2 means last 4 messages are kept
// (2 turns = 2 user + 2 assistant messages in the last 2 turns)
const history: InternalMessage[] = [
createUserMessage('Old question 1', 1000),
createAssistantMessage('Old answer 1', 1001),
createUserMessage('Recent question 1', 1002),
createAssistantMessage('Recent answer 1', 1003),
createUserMessage('Recent question 2', 1004),
createAssistantMessage('Recent answer 2', 1005),
];
const result = await strategy.compact(history);
expect(result).toHaveLength(1);
// First 2 messages (1 turn) should be summarized
expect(result[0]?.metadata?.originalMessageCount).toBe(2);
});
it('should include summarizedAt timestamp in metadata', async () => {
mockGenerateText.mockResolvedValue({
text: '<session_compaction>Test summary</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
const history: InternalMessage[] = [
createUserMessage('Question 1', 1000),
createAssistantMessage('Answer 1', 1001),
createUserMessage('Question 2', 1002),
createAssistantMessage('Answer 2', 1003),
createUserMessage('Question 3', 1004),
createAssistantMessage('Answer 3', 1005),
];
const beforeTime = Date.now();
const result = await strategy.compact(history);
const afterTime = Date.now();
expect(result[0]?.metadata?.summarizedAt).toBeGreaterThanOrEqual(beforeTime);
expect(result[0]?.metadata?.summarizedAt).toBeLessThanOrEqual(afterTime);
});
it('should include original timestamps in metadata', async () => {
mockGenerateText.mockResolvedValue({
text: '<session_compaction>Test summary</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
const history: InternalMessage[] = [
createUserMessage('Old question', 1000),
createAssistantMessage('Old answer', 2000),
createUserMessage('Recent question 1', 3000),
createAssistantMessage('Recent answer 1', 4000),
createUserMessage('Recent question 2', 5000),
createAssistantMessage('Recent answer 2', 6000),
];
const result = await strategy.compact(history);
expect(result[0]?.metadata?.originalFirstTimestamp).toBe(1000);
expect(result[0]?.metadata?.originalLastTimestamp).toBe(2000);
});
});
describe('compact() - re-compaction with existing summary', () => {
it('should detect existing summary and only summarize messages after it', async () => {
mockGenerateText.mockResolvedValue({
text: '<session_compaction>New summary</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
// History with existing summary
const history: InternalMessage[] = [
createUserMessage('Very old question', 1000),
createAssistantMessage('Very old answer', 1001),
createSummaryMessage('Previous summary', 2, 1002),
// Messages after the summary
createUserMessage('Question after summary 1', 2000),
createAssistantMessage('Answer after summary 1', 2001),
createUserMessage('Question after summary 2', 2002),
createAssistantMessage('Answer after summary 2', 2003),
createUserMessage('Question after summary 3', 2004),
createAssistantMessage('Answer after summary 3', 2005),
];
const result = await strategy.compact(history);
expect(result).toHaveLength(1);
// Should mark as re-compaction
expect(result[0]?.metadata?.isRecompaction).toBe(true);
});
it('should skip re-compaction if few messages after existing summary', async () => {
// History with summary and only 3 messages after (threshold is 4)
const history: InternalMessage[] = [
createUserMessage('Old question', 1000),
createAssistantMessage('Old answer', 1001),
createSummaryMessage('Existing summary', 2, 1002),
createUserMessage('New question', 2000),
createAssistantMessage('New answer', 2001),
createUserMessage('Another question', 2002),
];
const result = await strategy.compact(history);
expect(result).toEqual([]);
expect(mockGenerateText).not.toHaveBeenCalled();
});
it('should find most recent summary when multiple exist', async () => {
mockGenerateText.mockResolvedValue({
text: '<session_compaction>Newest summary</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
// History with two summaries - should use the most recent one
const history: InternalMessage[] = [
createUserMessage('Ancient question', 100),
createSummaryMessage('First summary', 1, 200),
createUserMessage('Old question', 300),
createAssistantMessage('Old answer', 301),
createSummaryMessage('Second summary', 2, 400),
// Messages after second summary
createUserMessage('Q1', 500),
createAssistantMessage('A1', 501),
createUserMessage('Q2', 502),
createAssistantMessage('A2', 503),
createUserMessage('Q3', 504),
createAssistantMessage('A3', 505),
];
const result = await strategy.compact(history);
// Should have re-compaction metadata
expect(result).toHaveLength(1);
expect(result[0]?.metadata?.isRecompaction).toBe(true);
});
it('should set originalMessageCount as absolute index for filterCompacted compatibility', async () => {
mockGenerateText.mockResolvedValue({
text: '<session_compaction>Re-compacted summary</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
// History with existing summary at index 2
// - Indices 0-1: old messages (summarized by old summary)
// - Index 2: old summary with originalMessageCount=2
// - Indices 3-8: 6 messages after old summary
const history: InternalMessage[] = [
createUserMessage('Very old question', 1000),
createAssistantMessage('Very old answer', 1001),
createSummaryMessage('Previous summary', 2, 1002),
// 6 messages after the summary
createUserMessage('Q1', 2000),
createAssistantMessage('A1', 2001),
createUserMessage('Q2', 2002),
createAssistantMessage('A2', 2003),
createUserMessage('Q3', 2004),
createAssistantMessage('A3', 2005),
];
// Run re-compaction
const result = await strategy.compact(history);
expect(result).toHaveLength(1);
const newSummary = result[0]!;
expect(newSummary.metadata?.isRecompaction).toBe(true);
// The existing summary is at index 2, and messagesAfterSummary has 6 messages
// With default preserveLastNTurns=2, we split: toSummarize=2, toKeep=4
// So originalMessageCount should be: (2 + 1) + 2 = 5 (absolute index)
// NOT 2 (relative count of summarized messages)
expect(newSummary.metadata?.originalMessageCount).toBe(5);
// Simulate adding the new summary to history
const historyAfterCompaction = [...history, newSummary];
// Verify filterCompacted works correctly with the new summary
const filtered = filterCompacted(historyAfterCompaction);
// Should return: [newSummary, 4 preserved messages]
// NOT: [newSummary, everything from index 2 onwards]
expect(filtered).toHaveLength(5); // 1 summary + 4 preserved
expect(filtered[0]?.metadata?.isRecompaction).toBe(true);
// The preserved messages should be the last 4 (indices 5-8 in original)
expect(filtered[1]?.role).toBe('user');
expect(filtered[4]?.role).toBe('assistant');
});
it('should ensure filterCompacted does not return old summary or pre-summary messages after re-compaction', async () => {
mockGenerateText.mockResolvedValue({
text: '<session_compaction>New summary</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
// Large history to make the bug more obvious
const history: InternalMessage[] = [];
// 50 old messages (indices 0-49)
for (let i = 0; i < 50; i++) {
history.push(createUserMessage(`Old Q${i}`, 1000 + i * 2));
history.push(createAssistantMessage(`Old A${i}`, 1001 + i * 2));
}
// Old summary at index 100 with originalMessageCount=90
history.push(createSummaryMessage('Old summary', 90, 2000));
// 30 more messages after the old summary (indices 101-130)
for (let i = 0; i < 15; i++) {
history.push(createUserMessage(`New Q${i}`, 3000 + i * 2));
history.push(createAssistantMessage(`New A${i}`, 3001 + i * 2));
}
expect(history).toHaveLength(131);
// Re-compaction should happen
const result = await strategy.compact(history);
expect(result).toHaveLength(1);
const newSummary = result[0]!;
expect(newSummary.metadata?.isRecompaction).toBe(true);
// Add new summary to history
const historyAfterCompaction = [...history, newSummary];
// filterCompacted should NOT return the old summary or pre-old-summary messages
const filtered = filterCompacted(historyAfterCompaction);
// Check that the old summary is NOT in the filtered result
const hasOldSummary = filtered.some(
(msg) => msg.metadata?.isSummary && !msg.metadata?.isRecompaction
);
expect(hasOldSummary).toBe(false);
// The filtered result should be much smaller than the original
// With 30 messages after old summary, keeping ~20%, we should have:
// ~6 preserved messages + 1 new summary = ~7 messages
expect(filtered.length).toBeLessThan(20);
});
it('should handle three sequential compactions correctly', async () => {
mockGenerateText.mockResolvedValue({
text: '<session_compaction>Summary content</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
// Helper to simulate adding messages and compacting
let history: InternalMessage[] = [];
// === PHASE 1: First compaction ===
// Add 20 messages (10 turns)
for (let i = 0; i < 10; i++) {
history.push(createUserMessage(`Q${i}`, 1000 + i * 2));
history.push(createAssistantMessage(`A${i}`, 1001 + i * 2));
}
expect(history).toHaveLength(20);
// First compaction - no existing summary
const result1 = await strategy.compact(history);
expect(result1).toHaveLength(1);
const summary1 = result1[0]!;
expect(summary1.metadata?.isRecompaction).toBeUndefined();
// Add summary1 to history
history.push(summary1);
expect(history).toHaveLength(21);
// Verify filterCompacted after first compaction
let filtered = filterCompacted(history);
expect(filtered.length).toBeLessThan(15); // Should be summary + few preserved
// === PHASE 2: Add more messages, then second compaction ===
// Add 20 more messages after summary1
for (let i = 10; i < 20; i++) {
history.push(createUserMessage(`Q${i}`, 2000 + i * 2));
history.push(createAssistantMessage(`A${i}`, 2001 + i * 2));
}
expect(history).toHaveLength(41);
// Second compaction - should detect summary1
const result2 = await strategy.compact(history);
expect(result2).toHaveLength(1);
const summary2 = result2[0]!;
expect(summary2.metadata?.isRecompaction).toBe(true);
// Add summary2 to history
history.push(summary2);
expect(history).toHaveLength(42);
// Verify filterCompacted after second compaction
filtered = filterCompacted(history);
// Should return summary2 + preserved, NOT summary1
expect(filtered[0]?.metadata?.isRecompaction).toBe(true);
const hasSummary1 = filtered.some(
(m) => m.metadata?.isSummary && !m.metadata?.isRecompaction
);
expect(hasSummary1).toBe(false);
// === PHASE 3: Add more messages, then third compaction ===
// Add 20 more messages after summary2
for (let i = 20; i < 30; i++) {
history.push(createUserMessage(`Q${i}`, 3000 + i * 2));
history.push(createAssistantMessage(`A${i}`, 3001 + i * 2));
}
expect(history).toHaveLength(62);
// Third compaction - should detect summary2 (most recent)
const result3 = await strategy.compact(history);
expect(result3).toHaveLength(1);
const summary3 = result3[0]!;
expect(summary3.metadata?.isRecompaction).toBe(true);
// Add summary3 to history
history.push(summary3);
expect(history).toHaveLength(63);
// Verify filterCompacted after third compaction
filtered = filterCompacted(history);
// Critical assertions:
// 1. Most recent summary (summary3) should be first
expect(filtered[0]?.metadata?.isRecompaction).toBe(true);
expect(filtered[0]).toBe(summary3);
// 2. Neither summary1 nor summary2 should be in the result
const oldSummaries = filtered.filter((m) => m.metadata?.isSummary && m !== summary3);
expect(oldSummaries).toHaveLength(0);
// 3. Result should be much smaller than total history
expect(filtered.length).toBeLessThan(20);
// 4. All messages in filtered result should be either:
// - summary3, or
// - messages with timestamps from the most recent batch (3000+)
for (const msg of filtered) {
if (msg === summary3) continue;
// Recent messages should have timestamps >= 3000
expect(msg.timestamp).toBeGreaterThanOrEqual(3000);
}
});
it('should work correctly with manual compaction followed by automatic compaction', async () => {
mockGenerateText.mockResolvedValue({
text: '<session_compaction>Summary</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
// Simulate manual compaction first
let history: InternalMessage[] = [];
for (let i = 0; i < 10; i++) {
history.push(createUserMessage(`Q${i}`, 1000 + i));
history.push(createAssistantMessage(`A${i}`, 1000 + i));
}
// Manual compaction (uses same compact() method)
const manualResult = await strategy.compact(history);
expect(manualResult).toHaveLength(1);
history.push(manualResult[0]!);
// Add more messages
for (let i = 10; i < 20; i++) {
history.push(createUserMessage(`Q${i}`, 2000 + i));
history.push(createAssistantMessage(`A${i}`, 2000 + i));
}
// Automatic compaction (also uses same compact() method)
const autoResult = await strategy.compact(history);
expect(autoResult).toHaveLength(1);
expect(autoResult[0]?.metadata?.isRecompaction).toBe(true);
history.push(autoResult[0]!);
// Verify final state
const filtered = filterCompacted(history);
expect(filtered[0]?.metadata?.isRecompaction).toBe(true);
// Only the most recent summary should be visible
const summaryCount = filtered.filter((m) => m.metadata?.isSummary).length;
expect(summaryCount).toBe(1);
});
});
describe('compact() - history splitting', () => {
it('should preserve last N turns based on options', async () => {
mockGenerateText.mockResolvedValue({
text: '<session_compaction>Summary</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
// Create strategy with custom preserveLastNTurns
const customStrategy = new ReactiveOverflowStrategy(
createMockModel(),
{ preserveLastNTurns: 3 },
logger
);
// 8 messages = 4 turns, with preserveLastNTurns=3, first turn should be summarized
const history: InternalMessage[] = [
createUserMessage('Turn 1 Q', 1000),
createAssistantMessage('Turn 1 A', 1001),
createUserMessage('Turn 2 Q', 2000),
createAssistantMessage('Turn 2 A', 2001),
createUserMessage('Turn 3 Q', 3000),
createAssistantMessage('Turn 3 A', 3001),
createUserMessage('Turn 4 Q', 4000),
createAssistantMessage('Turn 4 A', 4001),
];
const result = await customStrategy.compact(history);
expect(result).toHaveLength(1);
// Only first turn (2 messages) should be summarized
expect(result[0]?.metadata?.originalMessageCount).toBe(2);
});
it('should return empty when message count is at or below minKeep threshold', async () => {
// The fallback logic uses minKeep=3, so with 3 or fewer messages
// nothing should be summarized
const history: InternalMessage[] = [
createUserMessage('Q1', 1000),
createAssistantMessage('A1', 1001),
createUserMessage('Q2', 2000),
];
const result = await strategy.compact(history);
// 3 messages <= minKeep(3), so nothing to summarize
expect(result).toEqual([]);
expect(mockGenerateText).not.toHaveBeenCalled();
});
});
describe('compact() - LLM failure fallback', () => {
it('should create fallback summary when LLM call fails', async () => {
mockGenerateText.mockRejectedValue(new Error('LLM API error'));
const history: InternalMessage[] = [
createUserMessage('Question 1', 1000),
createAssistantMessage('Answer 1', 1001),
createUserMessage('Question 2', 2000),
createAssistantMessage('Answer 2', 2001),
createUserMessage('Question 3', 3000),
createAssistantMessage('Answer 3', 3001),
];
const result = await strategy.compact(history);
expect(result).toHaveLength(1);
expect(result[0]?.metadata?.isSummary).toBe(true);
// Fallback summary should still have XML structure
const content = result[0]?.content;
expect(content).toBeDefined();
expect(content![0]).toMatchObject({
type: 'text',
text: expect.stringContaining('<session_compaction>'),
});
expect(content![0]).toMatchObject({
type: 'text',
text: expect.stringContaining('Fallback'),
});
});
it('should include current task in fallback summary', async () => {
mockGenerateText.mockRejectedValue(new Error('LLM API error'));
const history: InternalMessage[] = [
createUserMessage('Old question', 1000),
createAssistantMessage('Old answer', 1001),
createUserMessage('Recent question 1', 2000),
createAssistantMessage('Recent answer 1', 2001),
createUserMessage('My current task is to fix the bug', 3000),
createAssistantMessage('Working on it', 3001),
];
const result = await strategy.compact(history);
expect(result).toHaveLength(1);
const content = result[0]!.content;
expect(content).not.toBeNull();
const firstContent = content![0];
const summaryText = firstContent?.type === 'text' ? firstContent.text : '';
expect(summaryText).toContain('<current_task>');
});
});
describe('compact() - summary content', () => {
it('should prefix summary with [Session Compaction Summary]', async () => {
mockGenerateText.mockResolvedValue({
text: '<session_compaction>LLM generated content</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
const history: InternalMessage[] = [
createUserMessage('Q1', 1000),
createAssistantMessage('A1', 1001),
createUserMessage('Q2', 2000),
createAssistantMessage('A2', 2001),
createUserMessage('Q3', 3000),
createAssistantMessage('A3', 3001),
];
const result = await strategy.compact(history);
expect(result).toHaveLength(1);
const content = result[0]!.content;
expect(content).not.toBeNull();
const firstContent = content![0];
const summaryText = firstContent?.type === 'text' ? firstContent.text : '';
expect(summaryText).toMatch(/^\[Session Compaction Summary\]/);
});
it('should pass conversation to LLM with proper formatting', async () => {
mockGenerateText.mockResolvedValue({
text: '<session_compaction>Summary</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
const history: InternalMessage[] = [
createUserMessage('What is 2+2?', 1000),
createAssistantMessage('The answer is 4', 1001),
createUserMessage('Thanks!', 2000),
createAssistantMessage('You are welcome', 2001),
createUserMessage('New question', 3000),
createAssistantMessage('New answer', 3001),
];
await strategy.compact(history);
expect(mockGenerateText).toHaveBeenCalledWith(
expect.objectContaining({
prompt: expect.stringContaining('USER: What is 2+2?'),
})
);
expect(mockGenerateText).toHaveBeenCalledWith(
expect.objectContaining({
prompt: expect.stringContaining('ASSISTANT: The answer is 4'),
})
);
});
});
describe('compact() - tool message handling', () => {
it('should include tool call information in summary', async () => {
mockGenerateText.mockResolvedValue({
text: '<session_compaction>Summary with tools</session_compaction>',
} as Awaited<ReturnType<typeof generateText>>);
const history: InternalMessage[] = [
createUserMessage('Read the file', 1000),
{
role: 'assistant',
content: [{ type: 'text', text: 'Let me read that file' }],
timestamp: 1001,
toolCalls: [
{
id: 'call-1',
type: 'function',
function: { name: 'read_file', arguments: '{"path": "/test.txt"}' },
},
],
},
{
role: 'tool',
content: [{ type: 'text', text: 'File contents here' }],
timestamp: 1002,
name: 'read_file',
toolCallId: 'call-1',
},
createUserMessage('Q2', 2000),
createAssistantMessage('A2', 2001),
createUserMessage('Q3', 3000),
createAssistantMessage('A3', 3001),
];
await strategy.compact(history);
expect(mockGenerateText).toHaveBeenCalledWith(
expect.objectContaining({
prompt: expect.stringContaining('[Used tools: read_file]'),
})
);
});
});
});

View File

@@ -0,0 +1,489 @@
import { generateText, type LanguageModel } from 'ai';
import type { ICompactionStrategy } from '../types.js';
import type { InternalMessage, ToolCall } from '../../types.js';
import { isAssistantMessage, isToolMessage } from '../../types.js';
import type { IDextoLogger } from '../../../logger/v2/types.js';
/**
* Configuration options for ReactiveOverflowStrategy.
*/
export interface ReactiveOverflowOptions {
/**
* Number of recent turns to preserve (not summarize).
* A "turn" is a user message + assistant response pair.
* Default: 2
*/
preserveLastNTurns?: number;
/**
* Maximum tokens for the summary output.
* Default: 2000
*/
maxSummaryTokens?: number;
/**
* Custom summary prompt template.
* Use {conversation} as placeholder for formatted messages.
*/
summaryPrompt?: string;
}
const DEFAULT_OPTIONS: Required<ReactiveOverflowOptions> = {
preserveLastNTurns: 2,
maxSummaryTokens: 2000,
summaryPrompt: `You are a conversation summarizer creating a structured summary for session continuation.
Analyze the conversation and produce a summary in the following XML format:
<session_compaction>
<conversation_history>
A concise summary of what happened in the conversation:
- Tasks attempted and their outcomes (success/failure/in-progress)
- Important decisions made
- Key information discovered (file paths, configurations, errors encountered)
- Tools used and their results
</conversation_history>
<current_task>
The most recent task or instruction the user requested that may still be in progress.
Be specific - include the exact request and current status.
</current_task>
<important_context>
Critical state that must be preserved:
- File paths being worked on
- Variable values or configurations
- Error messages that need addressing
- Any pending actions or next steps
</important_context>
</session_compaction>
IMPORTANT: The assistant will continue working based on this summary. Ensure the current_task section clearly states what needs to be done next.
Conversation to summarize:
{conversation}`,
};
/**
* ReactiveOverflowStrategy implements reactive compaction.
*
* Key behaviors:
* - Triggers on overflow (after actual tokens exceed context limit)
* - Uses LLM to generate intelligent summary of older messages
* - Returns summary message to ADD to history (not replace)
* - Read-time filtering via filterCompacted() excludes pre-summary messages
*
* This strategy is designed to work with TurnExecutor's main loop:
* 1. After each step, check if overflow occurred
* 2. If yes, generate summary and ADD it to history
* 3. filterCompacted() in getFormattedMessages() excludes old messages
* 4. Continue with fresh context (summary + recent messages)
*
* NOTE: This does NOT replace history. The summary message is ADDED,
* and filterCompacted() handles excluding old messages at read-time.
* This preserves full history for audit/recovery purposes.
*/
export class ReactiveOverflowStrategy implements ICompactionStrategy {
readonly name = 'reactive-overflow';
private readonly model: LanguageModel;
private readonly options: Required<ReactiveOverflowOptions>;
private readonly logger: IDextoLogger;
constructor(model: LanguageModel, options: ReactiveOverflowOptions = {}, logger: IDextoLogger) {
this.model = model;
this.options = { ...DEFAULT_OPTIONS, ...options };
this.logger = logger;
}
/**
* Generate a summary message for the old portion of history.
*
* IMPORTANT: This does NOT replace history. It returns a summary message
* that the caller should ADD to history via contextManager.addMessage().
* Read-time filtering (filterCompacted) will then exclude pre-summary
* messages when formatting for LLM.
*
* @param history The full conversation history
* @returns Array with single summary message to add, or empty if nothing to summarize
*/
async compact(history: readonly InternalMessage[]): Promise<InternalMessage[]> {
// Don't compact if history is too short
if (history.length <= 2) {
this.logger.debug('ReactiveOverflowStrategy: History too short, skipping compaction');
return [];
}
// Check if there's already a summary in history
// If so, we need to work with messages AFTER the summary only
// Use reverse search to find the MOST RECENT summary (important for re-compaction)
let existingSummaryIndex = -1;
for (let i = history.length - 1; i >= 0; i--) {
const msg = history[i];
if (msg?.metadata?.isSummary === true || msg?.metadata?.isSessionSummary === true) {
existingSummaryIndex = i;
break;
}
}
if (existingSummaryIndex !== -1) {
// There's already a summary - only consider messages AFTER it
const messagesAfterSummary = history.slice(existingSummaryIndex + 1);
// If there are very few messages after the summary, skip compaction
// (nothing meaningful to re-summarize)
if (messagesAfterSummary.length <= 4) {
this.logger.debug(
`ReactiveOverflowStrategy: Only ${messagesAfterSummary.length} messages after existing summary, skipping re-compaction`
);
return [];
}
this.logger.info(
`ReactiveOverflowStrategy: Found existing summary at index ${existingSummaryIndex}, ` +
`working with ${messagesAfterSummary.length} messages after it`
);
// Re-run compaction on the subset after the summary
// This prevents cascading summaries of summaries
return this.compactSubset(messagesAfterSummary, history, existingSummaryIndex);
}
// Split history into messages to summarize and messages to keep
const { toSummarize, toKeep } = this.splitHistory(history);
// If nothing to summarize, return empty (no summary needed)
if (toSummarize.length === 0) {
this.logger.debug('ReactiveOverflowStrategy: No messages to summarize');
return [];
}
// Find the most recent user message to understand current task
const currentTaskMessage = this.findCurrentTaskMessage(history);
this.logger.info(
`ReactiveOverflowStrategy: Summarizing ${toSummarize.length} messages, keeping ${toKeep.length}`
);
// Generate LLM summary of old messages with current task context
const summary = await this.generateSummary(toSummarize, currentTaskMessage);
// Create summary message (will be ADDED to history, not replace)
// originalMessageCount tells filterCompacted() how many messages were summarized
const summaryMessage: InternalMessage = {
role: 'assistant',
content: [{ type: 'text', text: summary }],
timestamp: Date.now(),
metadata: {
isSummary: true,
summarizedAt: Date.now(),
originalMessageCount: toSummarize.length,
originalFirstTimestamp: toSummarize[0]?.timestamp,
originalLastTimestamp: toSummarize[toSummarize.length - 1]?.timestamp,
},
};
// Return just the summary message - caller adds it to history
// filterCompacted() will handle excluding old messages at read-time
return [summaryMessage];
}
/**
* Handle re-compaction when there's already a summary in history.
* Only summarizes messages AFTER the existing summary, preventing
* cascading summaries of summaries.
*
* @param messagesAfterSummary Messages after the existing summary
* @param fullHistory The complete history (for current task detection)
* @param existingSummaryIndex Index of the existing summary in fullHistory
* @returns Array with single summary message, or empty if nothing to summarize
*/
private async compactSubset(
messagesAfterSummary: readonly InternalMessage[],
fullHistory: readonly InternalMessage[],
existingSummaryIndex: number
): Promise<InternalMessage[]> {
// Split the subset into messages to summarize and keep
const { toSummarize, toKeep } = this.splitHistory(messagesAfterSummary);
if (toSummarize.length === 0) {
this.logger.debug('ReactiveOverflowStrategy: No messages to summarize in subset');
return [];
}
// Get current task from the full history
const currentTaskMessage = this.findCurrentTaskMessage(fullHistory);
this.logger.info(
`ReactiveOverflowStrategy (re-compact): Summarizing ${toSummarize.length} messages after existing summary, keeping ${toKeep.length}`
);
// Generate summary
const summary = await this.generateSummary(toSummarize, currentTaskMessage);
// Create summary message
// originalMessageCount must be an ABSOLUTE index for filterCompacted() to work correctly.
// filterCompacted() uses this as: history.slice(originalMessageCount, summaryIndex)
// to get the preserved messages. For re-compaction:
// - Messages 0 to existingSummaryIndex are the old summarized + preserved + old summary
// - Messages (existingSummaryIndex + 1) onwards are what we're re-compacting
// - We summarize toSummarize.length of those, so preserved starts at:
// (existingSummaryIndex + 1) + toSummarize.length
const absoluteOriginalMessageCount = existingSummaryIndex + 1 + toSummarize.length;
const summaryMessage: InternalMessage = {
role: 'assistant',
content: [{ type: 'text', text: summary }],
timestamp: Date.now(),
metadata: {
isSummary: true,
summarizedAt: Date.now(),
originalMessageCount: absoluteOriginalMessageCount,
isRecompaction: true, // Mark that this is a re-compaction
originalFirstTimestamp: toSummarize[0]?.timestamp,
originalLastTimestamp: toSummarize[toSummarize.length - 1]?.timestamp,
},
};
return [summaryMessage];
}
/**
* Find the most recent user message that represents the current task.
* This helps preserve context about what the user is currently asking for.
*/
private findCurrentTaskMessage(history: readonly InternalMessage[]): string | null {
// Search backwards for the most recent user message
for (let i = history.length - 1; i >= 0; i--) {
const msg = history[i];
if (msg?.role === 'user') {
if (typeof msg.content === 'string') {
return msg.content;
} else if (Array.isArray(msg.content)) {
const textParts = msg.content
.filter(
(part): part is { type: 'text'; text: string } => part.type === 'text'
)
.map((part) => part.text)
.join('\n');
if (textParts.length > 0) {
return textParts;
}
}
}
}
return null;
}
/**
* Split history into messages to summarize and messages to keep.
* Keeps the last N turns (user + assistant pairs) intact.
*
* For long agentic conversations with many tool calls, this also ensures
* we don't try to keep too many messages even within preserved turns.
*/
private splitHistory(history: readonly InternalMessage[]): {
toSummarize: readonly InternalMessage[];
toKeep: readonly InternalMessage[];
} {
const turnsToKeep = this.options.preserveLastNTurns;
// Find indices of the last N user messages (start of each turn)
const userMessageIndices: number[] = [];
for (let i = history.length - 1; i >= 0; i--) {
if (history[i]?.role === 'user') {
userMessageIndices.unshift(i);
if (userMessageIndices.length >= turnsToKeep) {
break;
}
}
}
// If we found turn boundaries, split at the first one
if (userMessageIndices.length > 0) {
const splitIndex = userMessageIndices[0];
if (splitIndex !== undefined && splitIndex > 0) {
return {
toSummarize: history.slice(0, splitIndex),
toKeep: history.slice(splitIndex),
};
}
}
// Fallback for agentic conversations: if splitIndex is 0 (few user messages)
// or we can't identify turns, use a message-count-based approach.
// Keep only the last ~20% of messages or minimum 3 messages
// Note: We use a low minKeep because even a few messages can have huge token counts
// (e.g., tool outputs with large file contents). Token-based compaction needs to be
// aggressive about message counts when tokens are overflowing.
const minKeep = 3;
const maxKeepPercent = 0.2;
const keepCount = Math.max(minKeep, Math.floor(history.length * maxKeepPercent));
// But don't summarize if we'd keep everything anyway
if (keepCount >= history.length) {
return {
toSummarize: [],
toKeep: history,
};
}
this.logger.debug(
`splitHistory: Using fallback - keeping last ${keepCount} of ${history.length} messages`
);
return {
toSummarize: history.slice(0, -keepCount),
toKeep: history.slice(-keepCount),
};
}
/**
* Generate an LLM summary of the messages.
*
* @param messages Messages to summarize
* @param currentTask The most recent user message (current task context)
*/
private async generateSummary(
messages: readonly InternalMessage[],
currentTask: string | null
): Promise<string> {
const formattedConversation = this.formatMessagesForSummary(messages);
// Add current task context to the prompt if available
let conversationWithContext = formattedConversation;
if (currentTask) {
conversationWithContext += `\n\n--- CURRENT TASK (most recent user request) ---\n${currentTask}`;
}
const prompt = this.options.summaryPrompt.replace(
'{conversation}',
conversationWithContext
);
try {
const result = await generateText({
model: this.model,
prompt,
maxOutputTokens: this.options.maxSummaryTokens,
});
// Return structured summary - the XML format from the LLM
return `[Session Compaction Summary]\n${result.text}`;
} catch (error) {
this.logger.error(
`ReactiveOverflowStrategy: Failed to generate summary - ${error instanceof Error ? error.message : String(error)}`
);
// Fallback: return a simple truncated version with current task
return this.createFallbackSummary(messages, currentTask);
}
}
/**
* Format messages for the summary prompt.
*/
private formatMessagesForSummary(messages: readonly InternalMessage[]): string {
return messages
.map((msg) => {
const role = msg.role.toUpperCase();
let content: string;
if (typeof msg.content === 'string') {
content = msg.content;
} else if (Array.isArray(msg.content)) {
// Extract text from content parts
content = msg.content
.filter(
(part): part is { type: 'text'; text: string } => part.type === 'text'
)
.map((part) => part.text)
.join('\n');
} else {
content = '[no content]';
}
// Truncate very long messages
if (content.length > 2000) {
content = content.slice(0, 2000) + '... [truncated]';
}
// Handle tool calls
if (isAssistantMessage(msg) && msg.toolCalls && msg.toolCalls.length > 0) {
const toolNames = msg.toolCalls
.map((tc: ToolCall) => tc.function.name)
.join(', ');
content += `\n[Used tools: ${toolNames}]`;
}
// Handle tool results
if (isToolMessage(msg)) {
return `TOOL (${msg.name}): ${content.slice(0, 500)}${content.length > 500 ? '...' : ''}`;
}
return `${role}: ${content}`;
})
.join('\n\n');
}
/**
* Create a fallback summary if LLM call fails.
*/
private createFallbackSummary(
messages: readonly InternalMessage[],
currentTask: string | null
): string {
const userMessages = messages.filter((m) => m.role === 'user');
const assistantWithTools = messages.filter(
(m): m is InternalMessage & { role: 'assistant'; toolCalls: ToolCall[] } =>
isAssistantMessage(m) && !!m.toolCalls && m.toolCalls.length > 0
);
const userTopics = userMessages
.slice(-3)
.map((m) => {
const text =
typeof m.content === 'string'
? m.content
: Array.isArray(m.content)
? m.content
.filter(
(p): p is { type: 'text'; text: string } => p.type === 'text'
)
.map((p) => p.text)
.join(' ')
: '';
return text.slice(0, 100);
})
.join('; ');
const toolsUsed = [
...new Set(
assistantWithTools.flatMap((m) => m.toolCalls.map((tc) => tc.function.name))
),
].join(', ');
// Create XML-structured fallback
let fallback = `[Session Compaction Summary - Fallback]
<session_compaction>
<conversation_history>
User discussed: ${userTopics || 'various topics'}
Tools used: ${toolsUsed || 'none'}
Messages summarized: ${messages.length}
</conversation_history>`;
if (currentTask) {
fallback += `
<current_task>
${currentTask.slice(0, 500)}${currentTask.length > 500 ? '...' : ''}
</current_task>`;
}
fallback += `
<important_context>
Note: This is a fallback summary due to LLM error. Context may be incomplete.
</important_context>
</session_compaction>`;
return fallback;
}
}

View File

@@ -0,0 +1,33 @@
import { InternalMessage } from '../types.js';
/**
* Compaction strategy interface.
*
* Strategies are responsible for reducing conversation history size
* when context limits are exceeded. The strategy is called by TurnExecutor
* after detecting overflow via actual token usage from the API.
*/
export interface ICompactionStrategy {
/** Human-readable name for logging/UI */
readonly name: string;
/**
* Compacts the provided message history.
*
* The returned summary messages MUST include specific metadata fields for
* `filterCompacted()` to correctly exclude pre-summary messages at read-time:
*
* Required metadata:
* - `isSummary: true` - Marks the message as a compaction summary
* - `originalMessageCount: number` - Count of messages that were summarized
* (used by filterCompacted to determine which messages to exclude)
*
* Optional metadata:
* - `isRecompaction: true` - Set when re-compacting after a previous summary
* - `isSessionSummary: true` - Alternative to isSummary for session-level summaries
*
* @param history The current conversation history.
* @returns Summary messages to add to history. Empty array if nothing to compact.
*/
compact(history: readonly InternalMessage[]): Promise<InternalMessage[]> | InternalMessage[];
}

View File

@@ -0,0 +1,41 @@
/**
* Context-specific error codes
* Includes initialization, message validation, token processing, and formatting errors
*/
export enum ContextErrorCode {
// Message validation
MESSAGE_ROLE_MISSING = 'context_message_role_missing',
MESSAGE_CONTENT_EMPTY = 'context_message_content_empty',
// User message validation
USER_MESSAGE_CONTENT_INVALID = 'context_user_message_content_invalid',
// Assistant message validation
ASSISTANT_MESSAGE_CONTENT_OR_TOOLS_REQUIRED = 'context_assistant_message_content_or_tools_required',
ASSISTANT_MESSAGE_TOOL_CALLS_INVALID = 'context_assistant_message_tool_calls_invalid',
// Tool message validation
TOOL_MESSAGE_FIELDS_MISSING = 'context_tool_message_fields_missing',
TOOL_CALL_ID_NAME_REQUIRED = 'context_tool_call_id_name_required',
// System message validation
SYSTEM_MESSAGE_CONTENT_INVALID = 'context_system_message_content_invalid',
TOKEN_COUNT_FAILED = 'context_token_count_failed',
// (removed) Operation/formatting wrappers; domain errors bubble up
// (removed) Token processing wrappers; domain errors bubble up
// (removed) Provider/model required; validated at LLM or agent layer
// Compaction strategy configuration errors
PRESERVE_VALUES_NEGATIVE = 'context_preserve_values_negative',
MIN_MESSAGES_NEGATIVE = 'context_min_messages_negative',
COMPACTION_INVALID_TYPE = 'context_compaction_invalid_type',
COMPACTION_VALIDATION = 'context_compaction_validation',
COMPACTION_MISSING_LLM = 'context_compaction_missing_llm',
COMPACTION_PROVIDER_ALREADY_REGISTERED = 'context_compaction_provider_already_registered',
// Message lookup errors
MESSAGE_NOT_FOUND = 'context_message_not_found',
MESSAGE_NOT_ASSISTANT = 'context_message_not_assistant',
ASSISTANT_CONTENT_NOT_STRING = 'context_assistant_content_not_string',
}

View File

@@ -0,0 +1,210 @@
import { DextoRuntimeError } from '../errors/index.js';
import { ErrorScope, ErrorType } from '../errors/types.js';
import { ContextErrorCode } from './error-codes.js';
/**
* Context runtime error factory methods
* Creates properly typed errors for context management operations
*/
export class ContextError {
// Message validation errors
static messageRoleMissing() {
return new DextoRuntimeError(
ContextErrorCode.MESSAGE_ROLE_MISSING,
ErrorScope.CONTEXT,
ErrorType.USER,
'Message must have a role',
{},
'Ensure all messages have a valid role field'
);
}
static userMessageContentInvalid() {
return new DextoRuntimeError(
ContextErrorCode.USER_MESSAGE_CONTENT_INVALID,
ErrorScope.CONTEXT,
ErrorType.USER,
'User message content should be a non-empty string or a non-empty array of parts',
{},
'Provide valid content for user messages'
);
}
static assistantMessageContentOrToolsRequired() {
return new DextoRuntimeError(
ContextErrorCode.ASSISTANT_MESSAGE_CONTENT_OR_TOOLS_REQUIRED,
ErrorScope.CONTEXT,
ErrorType.USER,
'Assistant message must have content or toolCalls',
{},
'Provide either content or toolCalls for assistant messages'
);
}
static assistantMessageToolCallsInvalid() {
return new DextoRuntimeError(
ContextErrorCode.ASSISTANT_MESSAGE_TOOL_CALLS_INVALID,
ErrorScope.CONTEXT,
ErrorType.USER,
'Invalid toolCalls structure in assistant message',
{},
'Ensure toolCalls have proper structure with function name and arguments'
);
}
static toolMessageFieldsMissing() {
return new DextoRuntimeError(
ContextErrorCode.TOOL_MESSAGE_FIELDS_MISSING,
ErrorScope.CONTEXT,
ErrorType.USER,
'Tool message missing required fields (toolCallId, name, content)',
{},
'Ensure tool messages have toolCallId, name, and content fields'
);
}
static systemMessageContentInvalid() {
return new DextoRuntimeError(
ContextErrorCode.SYSTEM_MESSAGE_CONTENT_INVALID,
ErrorScope.CONTEXT,
ErrorType.USER,
'System message content must be a non-empty string',
{},
'Provide valid string content for system messages'
);
}
static userMessageContentEmpty() {
return new DextoRuntimeError(
ContextErrorCode.MESSAGE_CONTENT_EMPTY,
ErrorScope.CONTEXT,
ErrorType.USER,
'Content must be a non-empty string or have imageData/fileData',
{},
'Provide non-empty content or attach image/file data'
);
}
static toolCallIdNameRequired() {
return new DextoRuntimeError(
ContextErrorCode.TOOL_CALL_ID_NAME_REQUIRED,
ErrorScope.CONTEXT,
ErrorType.USER,
'toolCallId and name are required',
{},
'Provide both toolCallId and name for tool results'
);
}
// Operation errors
// Removed operation and tokenization/formatting wrappers; let domain errors bubble
// Compression strategy configuration errors
static preserveValuesNegative() {
return new DextoRuntimeError(
ContextErrorCode.PRESERVE_VALUES_NEGATIVE,
ErrorScope.CONTEXT,
ErrorType.USER,
'preserveStart and preserveEnd must be non-negative',
{},
'Set preserveStart and preserveEnd to zero or positive values'
);
}
static tokenCountFailed(cause: string) {
return new DextoRuntimeError(
ContextErrorCode.TOKEN_COUNT_FAILED,
ErrorScope.CONTEXT,
ErrorType.SYSTEM,
`Failed to count tokens: ${cause}`,
{ cause },
'Check tokenizer implementation and message content structure'
);
}
static minMessagesNegative() {
return new DextoRuntimeError(
ContextErrorCode.MIN_MESSAGES_NEGATIVE,
ErrorScope.CONTEXT,
ErrorType.USER,
'minMessagesToKeep must be non-negative',
{},
'Set minMessagesToKeep to zero or positive value'
);
}
static compactionInvalidType(type: string, available: string[]) {
return new DextoRuntimeError(
ContextErrorCode.COMPACTION_INVALID_TYPE,
ErrorScope.CONTEXT,
ErrorType.USER,
`Unknown compaction provider type: '${type}'`,
{ type, available },
`Use one of the available types: ${available.join(', ')}`
);
}
static compactionValidation(type: string, errors: unknown) {
return new DextoRuntimeError(
ContextErrorCode.COMPACTION_VALIDATION,
ErrorScope.CONTEXT,
ErrorType.USER,
`Invalid configuration for compaction provider '${type}'`,
{ type, errors },
'Check the configuration schema for this provider'
);
}
static compactionMissingLLM(type: string) {
return new DextoRuntimeError(
ContextErrorCode.COMPACTION_MISSING_LLM,
ErrorScope.CONTEXT,
ErrorType.USER,
`Compaction provider '${type}' requires LLM service but none provided`,
{ type },
'Ensure LLM service is initialized before creating this compaction provider'
);
}
static compactionProviderAlreadyRegistered(type: string) {
return new DextoRuntimeError(
ContextErrorCode.COMPACTION_PROVIDER_ALREADY_REGISTERED,
ErrorScope.CONTEXT,
ErrorType.USER,
`Compaction provider '${type}' is already registered`,
{ type },
'Each provider type can only be registered once'
);
}
// Message lookup errors
static messageNotFound(messageId: string) {
return new DextoRuntimeError(
ContextErrorCode.MESSAGE_NOT_FOUND,
ErrorScope.CONTEXT,
ErrorType.NOT_FOUND,
`Message with ID ${messageId} not found`,
{ messageId }
);
}
static messageNotAssistant(messageId: string) {
return new DextoRuntimeError(
ContextErrorCode.MESSAGE_NOT_ASSISTANT,
ErrorScope.CONTEXT,
ErrorType.USER,
`Message with ID ${messageId} is not an assistant message`,
{ messageId }
);
}
static assistantContentNotString() {
return new DextoRuntimeError(
ContextErrorCode.ASSISTANT_CONTENT_NOT_STRING,
ErrorScope.CONTEXT,
ErrorType.USER,
'Cannot append text to non-string assistant message content',
{}
);
}
}

View File

@@ -0,0 +1,4 @@
export * from './manager.js';
export * from './types.js';
export { getFileMediaKind, getResourceKind } from './media-helpers.js';
export * from './compaction/index.js';

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,27 @@
/**
* Browser-safe media kind helpers.
* These functions have no dependencies and can be safely imported in browser environments.
*/
/**
* Derive file media kind from MIME type.
* This is the canonical way to determine media kind - use this instead of storing redundant fields.
*/
export function getFileMediaKind(mimeType: string | undefined): 'audio' | 'video' | 'binary' {
if (mimeType?.startsWith('audio/')) return 'audio';
if (mimeType?.startsWith('video/')) return 'video';
return 'binary';
}
/**
* Derive resource kind from MIME type (includes images).
* Use this to determine the kind of resource for display/rendering purposes.
*/
export function getResourceKind(
mimeType: string | undefined
): 'image' | 'audio' | 'video' | 'binary' {
if (mimeType?.startsWith('image/')) return 'image';
if (mimeType?.startsWith('audio/')) return 'audio';
if (mimeType?.startsWith('video/')) return 'video';
return 'binary';
}

View File

@@ -0,0 +1,336 @@
import type { LLMProvider, TokenUsage } from '../llm/types.js';
import type { ToolDisplayData } from '../tools/display-types.js';
// =============================================================================
// Content Part Types
// =============================================================================
/**
* Base interface for image data.
* Supports multiple formats for flexibility across different use cases.
*/
export interface ImageData {
image: string | Uint8Array | Buffer | ArrayBuffer | URL;
mimeType?: string;
}
/**
* Base interface for file data.
* Supports multiple formats for flexibility across different use cases.
*/
export interface FileData {
data: string | Uint8Array | Buffer | ArrayBuffer | URL;
mimeType: string;
filename?: string;
}
/**
* Text content part.
*/
export interface TextPart {
type: 'text';
text: string;
}
/**
* Image content part.
*/
export interface ImagePart extends ImageData {
type: 'image';
}
/**
* File content part.
*/
export interface FilePart extends FileData {
type: 'file';
}
/**
* UI Resource content part for MCP-UI interactive components.
* Enables MCP servers to return rich, interactive UI (live streams, dashboards, forms).
* @see https://mcpui.dev/ for MCP-UI specification
*/
export interface UIResourcePart {
type: 'ui-resource';
/** URI identifying the UI resource, must start with ui:// */
uri: string;
/** MIME type: text/html, text/uri-list, or application/vnd.mcp-ui.remote-dom */
mimeType: string;
/** Inline HTML content or URL (for text/html and text/uri-list) */
content?: string;
/** Base64-encoded content (alternative to content field) */
blob?: string;
/** Optional metadata for the UI resource */
metadata?: {
/** Display title for the UI resource */
title?: string;
/** Preferred rendering size in pixels */
preferredSize?: { width: number; height: number };
};
}
/**
* Union of all content part types.
* Discriminated by the `type` field.
*/
export type ContentPart = TextPart | ImagePart | FilePart | UIResourcePart;
// =============================================================================
// Content Part Type Guards
// =============================================================================
/**
* Type guard for TextPart.
*/
export function isTextPart(part: ContentPart): part is TextPart {
return part.type === 'text';
}
/**
* Type guard for ImagePart.
*/
export function isImagePart(part: ContentPart): part is ImagePart {
return part.type === 'image';
}
/**
* Type guard for FilePart.
*/
export function isFilePart(part: ContentPart): part is FilePart {
return part.type === 'file';
}
/**
* Type guard for UIResourcePart.
*/
export function isUIResourcePart(part: ContentPart): part is UIResourcePart {
return part.type === 'ui-resource';
}
// =============================================================================
// Tool Result Types
// =============================================================================
/**
* Sanitized tool execution result with content parts and resource references.
*/
export interface SanitizedToolResult {
/** Ordered content parts ready for rendering or provider formatting */
content: ContentPart[];
/**
* Resource references created during sanitization (e.g. blob store URIs).
* Consumers can dereference these via ResourceManager APIs.
*/
resources?: Array<{
uri: string;
kind: 'image' | 'audio' | 'video' | 'binary';
mimeType: string;
filename?: string;
}>;
meta: {
toolName: string;
toolCallId: string;
/** Whether the tool execution succeeded. Always set by sanitizeToolResult(). */
success: boolean;
/** Structured display data for tool-specific rendering (diffs, shell output, etc.) */
display?: ToolDisplayData;
};
}
// =============================================================================
// Shared Message Types
// =============================================================================
// TokenUsage imported from llm/types.ts (used by AssistantMessage)
/**
* Tool call request from an assistant message.
*/
export interface ToolCall {
/** Unique identifier for this tool call */
id: string;
/** The type of tool call (currently only 'function' is supported) */
type: 'function';
/** Function call details */
function: {
/** Name of the function to call */
name: string;
/** Arguments for the function in JSON string format */
arguments: string;
};
/**
* Provider-specific options (e.g., thought signatures for Gemini 3).
* These are opaque tokens passed through to maintain model state across tool calls.
* Not intended for display - purely for API round-tripping.
*/
providerOptions?: Record<string, unknown>;
}
/**
* Approval status for tool message executions.
* (Not to be confused with ApprovalStatus enum from approval module)
*/
export type ToolApprovalStatus = 'pending' | 'approved' | 'rejected';
// =============================================================================
// Message Types (Discriminated Union by 'role')
// =============================================================================
/**
* Base interface for all message types.
* Contains fields common to all messages.
*/
interface MessageBase {
/**
* Unique message identifier (UUID).
* Auto-generated by ContextManager.addMessage() if not provided.
*/
id?: string;
/**
* Timestamp when the message was created (Unix timestamp in milliseconds).
* Auto-generated by ContextManager.addMessage() if not provided.
*/
timestamp?: number;
/**
* Optional metadata for the message.
* Used for tracking summary status, original message IDs, etc.
*/
metadata?: Record<string, unknown>;
}
/**
* System message containing instructions or context for the LLM.
*/
export interface SystemMessage extends MessageBase {
role: 'system';
/** System prompt content as array of content parts */
content: ContentPart[];
}
/**
* User message containing end-user input.
* Content can be text, images, files, or UI resources.
*/
export interface UserMessage extends MessageBase {
role: 'user';
/** User input content as array of content parts */
content: ContentPart[];
}
/**
* Assistant message containing LLM response.
* May include text content, reasoning, and/or tool calls.
*/
export interface AssistantMessage extends MessageBase {
role: 'assistant';
/** Response content - null if message only contains tool calls */
content: ContentPart[] | null;
/**
* Model reasoning text associated with this response.
* Present when the provider supports reasoning and returns a final reasoning trace.
*/
reasoning?: string;
/**
* Provider-specific metadata for reasoning, used for round-tripping.
* Contains opaque tokens (e.g., OpenAI itemId, Gemini thought signatures)
* that must be passed back to the provider on subsequent requests.
*/
reasoningMetadata?: Record<string, unknown>;
/** Token usage accounting for this response */
tokenUsage?: TokenUsage;
/** Model identifier that generated this response */
model?: string;
/** Provider identifier for this response */
provider?: LLMProvider;
/**
* Tool calls requested by the assistant.
* Present when the LLM requests tool execution.
*/
toolCalls?: ToolCall[];
}
/**
* Tool message containing the result of a tool execution.
* Links back to the original tool call via toolCallId.
*/
export interface ToolMessage extends MessageBase {
role: 'tool';
/** Tool execution result as array of content parts */
content: ContentPart[];
/** ID of the tool call this message is responding to (REQUIRED) */
toolCallId: string;
/** Name of the tool that produced this result (REQUIRED) */
name: string;
/** Whether the tool execution was successful */
success?: boolean;
/** Whether this tool call required user approval before execution */
requireApproval?: boolean;
/** The approval status for this tool call */
approvalStatus?: ToolApprovalStatus;
/**
* Timestamp when the tool output was compacted/pruned.
* Present when the tool result has been summarized to save context space.
*/
compactedAt?: number;
/**
* Structured display data for tool-specific rendering (diffs, shell output, etc.)
* Persisted from SanitizedToolResult.meta.display for proper rendering on session resume.
*/
displayData?: ToolDisplayData;
}
/**
* Union of all message types.
* Discriminated by the `role` field.
*
* Use type guards (isSystemMessage, isUserMessage, etc.) for type narrowing.
*/
export type InternalMessage = SystemMessage | UserMessage | AssistantMessage | ToolMessage;
// =============================================================================
// Message Type Guards
// =============================================================================
/**
* Type guard for SystemMessage.
*/
export function isSystemMessage(msg: InternalMessage): msg is SystemMessage {
return msg.role === 'system';
}
/**
* Type guard for UserMessage.
*/
export function isUserMessage(msg: InternalMessage): msg is UserMessage {
return msg.role === 'user';
}
/**
* Type guard for AssistantMessage.
*/
export function isAssistantMessage(msg: InternalMessage): msg is AssistantMessage {
return msg.role === 'assistant';
}
/**
* Type guard for ToolMessage.
*/
export function isToolMessage(msg: InternalMessage): msg is ToolMessage {
return msg.role === 'tool';
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,22 @@
import { randomUUID } from 'crypto';
/**
* Abstract base class for all Dexto errors
* Provides common functionality like trace ID generation and JSON serialization
*/
export abstract class DextoBaseError extends Error {
public readonly traceId: string;
constructor(message: string, traceId?: string) {
super(message);
this.traceId = traceId || randomUUID();
// Ensure the name is set to the actual class name
this.name = this.constructor.name;
}
/**
* Convert error to JSON representation
* Must be implemented by subclasses
*/
abstract toJSON(): Record<string, any>;
}

View File

@@ -0,0 +1,35 @@
import { DextoBaseError } from './DextoBaseError.js';
import { ErrorScope } from './types.js';
import { ErrorType } from './types.js';
import type { DextoErrorCode } from './types.js';
/**
* Runtime error class for single-issue errors
* Provides structured error information with scope, type, and recovery guidance
*/
export class DextoRuntimeError<C = unknown> extends DextoBaseError {
constructor(
public readonly code: DextoErrorCode | string,
public readonly scope: ErrorScope | string,
public readonly type: ErrorType,
message: string,
public readonly context?: C,
public readonly recovery?: string | string[],
traceId?: string
) {
super(message, traceId);
this.name = 'DextoRuntimeError';
}
toJSON() {
return {
code: this.code,
message: this.message,
scope: this.scope,
type: this.type,
context: this.context,
recovery: this.recovery,
traceId: this.traceId,
};
}
}

View File

@@ -0,0 +1,111 @@
import { DextoBaseError } from './DextoBaseError.js';
import type { Issue } from './types.js';
/**
* Validation error class for handling multiple validation issues
* Similar to ZodError, provides first-class access to all validation issues
*/
export class DextoValidationError extends DextoBaseError {
public readonly issues: Issue[];
constructor(issues: Issue[]) {
const message = DextoValidationError.formatMessage(issues);
super(message);
this.name = 'DextoValidationError';
this.issues = issues;
}
/**
* Format multiple issues into a readable error message
*/
private static formatMessage(issues: Issue[]): string {
if (issues.length === 0) {
return 'Validation failed';
}
if (issues.length === 1) {
return issues[0]!.message; // We know it exists after length check
}
const errors = issues.filter((i) => i.severity === 'error');
const warnings = issues.filter((i) => i.severity === 'warning');
const parts: string[] = [];
if (errors.length > 0) {
parts.push(`${errors.length} error${errors.length > 1 ? 's' : ''}`);
}
if (warnings.length > 0) {
parts.push(`${warnings.length} warning${warnings.length > 1 ? 's' : ''}`);
}
return `Validation failed with ${parts.join(' and ')}`;
}
/**
* Get only error-severity issues
*/
get errors(): Issue[] {
return this.issues.filter((i) => i.severity === 'error');
}
/**
* Get only warning-severity issues
*/
get warnings(): Issue[] {
return this.issues.filter((i) => i.severity === 'warning');
}
/**
* Check if there are any error-severity issues
*/
hasErrors(): boolean {
return this.errors.length > 0;
}
/**
* Check if there are any warning-severity issues
*/
hasWarnings(): boolean {
return this.warnings.length > 0;
}
/**
* Get the first error-severity issue (if any)
* Useful for getting the primary error when multiple exist
*/
get firstError(): Issue | undefined {
return this.errors[0];
}
/**
* Get the first warning-severity issue (if any)
*/
get firstWarning(): Issue | undefined {
return this.warnings[0];
}
/**
* Format issues for display
* Returns an object with categorized issues for easy logging
*/
format(): { errors: string[]; warnings: string[] } {
return {
errors: this.errors.map((e) => `[${e.code}] ${e.message}`),
warnings: this.warnings.map((w) => `[${w.code}] ${w.message}`),
};
}
/**
* Convert to JSON representation
*/
toJSON(): Record<string, any> {
return {
name: this.name,
message: this.message,
issues: this.issues,
traceId: this.traceId,
errorCount: this.errors.length,
warningCount: this.warnings.length,
};
}
}

View File

@@ -0,0 +1,11 @@
/**
* Main entry point for the error management system
* Exports core types and utilities for error handling
*/
export { DextoBaseError } from './DextoBaseError.js';
export { DextoRuntimeError } from './DextoRuntimeError.js';
export { DextoValidationError } from './DextoValidationError.js';
export { ErrorScope, ErrorType } from './types.js';
export type { Issue, Severity, DextoErrorCode } from './types.js';
export { ensureOk } from './result-bridge.js';

View File

@@ -0,0 +1,36 @@
import type { Result } from '../utils/result.js';
import { DextoValidationError } from './DextoValidationError.js';
import type { IDextoLogger } from '../logger/v2/types.js';
/**
* Bridge function to convert Result pattern to validation exceptions
* Used at public API boundaries for validation flows
*
* Note: Runtime errors are thrown directly, not through Result pattern
*
* @param result - The Result to check (typically from validation functions)
* @param logger - Logger instance for logging errors
* @returns The data if successful
* @throws DextoValidationError if the result contains validation issues
*
* @example
* ```typescript
* // Validation flow
* const result = validateInputForLLM(input, config);
* const data = ensureOk(result, logger); // Throws DextoValidationError if validation failed
*
* // LLM config validation
* const configResult = resolveAndValidateLLMConfig(current, updates);
* const validatedConfig = ensureOk(configResult, logger);
* ```
*/
export function ensureOk<T, C>(result: Result<T, C>, logger: IDextoLogger): T {
if (result.ok) {
return result.data;
}
const issueMessages = result.issues.map((i) => i.message).join('; ');
logger.error(`ensureOk: validation failed - ${issueMessages}`);
// Result pattern is used for validation - throw validation error
throw new DextoValidationError(result.issues);
}

View File

@@ -0,0 +1,90 @@
import type { AgentErrorCode } from '@core/agent/error-codes.js';
// ConfigErrorCode has been moved to @dexto/agent-management
// Import from there if needed for error type unions
import type { ContextErrorCode } from '@core/context/error-codes.js';
import type { LLMErrorCode } from '@core/llm/error-codes.js';
import type { MCPErrorCode } from '@core/mcp/error-codes.js';
import type { SessionErrorCode } from '@core/session/error-codes.js';
import type { StorageErrorCode } from '@core/storage/error-codes.js';
import type { SystemPromptErrorCode } from '@core/systemPrompt/error-codes.js';
import type { ToolErrorCode } from '@core/tools/error-codes.js';
import type { ResourceErrorCode } from '@core/resources/error-codes.js';
import type { PromptErrorCode } from '@core/prompts/error-codes.js';
import type { ApprovalErrorCode } from '@core/approval/error-codes.js';
import type { MemoryErrorCode } from '@core/memory/error-codes.js';
import type { PluginErrorCode } from '@core/plugins/error-codes.js';
import type { TelemetryErrorCode } from '@core/telemetry/error-codes.js';
/**
* Error scopes representing functional domains in the system
* Each scope owns its validation and error logic
*/
export enum ErrorScope {
LLM = 'llm', // LLM operations, model compatibility, input validation for LLMs
AGENT = 'agent', // Agent lifecycle, configuration
CONFIG = 'config', // Configuration file operations, parsing, validation
CONTEXT = 'context', // Context management, message validation, token processing
SESSION = 'session', // Session lifecycle, management, and state
MCP = 'mcp', // MCP server connections and protocol
TOOLS = 'tools', // Tool execution and authorization
STORAGE = 'storage', // Storage backend operations
LOGGER = 'logger', // Logging system operations, transports, and configuration
SYSTEM_PROMPT = 'system_prompt', // System prompt contributors and file processing
RESOURCE = 'resource', // Resource management (MCP/internal) discovery and access
PROMPT = 'prompt', // Prompt management, resolution, and providers
MEMORY = 'memory', // Memory management and storage
PLUGIN = 'plugin', // Plugin loading, validation, and execution
TELEMETRY = 'telemetry', // Telemetry initialization and export operations
}
/**
* Error types that map directly to HTTP status codes
* Each type represents the nature of the error
*/
export enum ErrorType {
USER = 'user', // 400 - bad input, config errors, validation failures
PAYMENT_REQUIRED = 'payment_required', // 402 - insufficient credits, billing issue
FORBIDDEN = 'forbidden', // 403 - permission denied, unauthorized
NOT_FOUND = 'not_found', // 404 - resource doesn't exist (session, file, etc.)
TIMEOUT = 'timeout', // 408 - operation timed out
CONFLICT = 'conflict', // 409 - resource conflict, concurrent operation
RATE_LIMIT = 'rate_limit', // 429 - too many requests
SYSTEM = 'system', // 500 - bugs, internal failures, unexpected states
THIRD_PARTY = 'third_party', // 502 - upstream provider failures, API errors
UNKNOWN = 'unknown', // 500 - unclassified errors, fallback
}
/**
* Union type for all error codes across domains
* Provides type safety for error handling
* Note: ConfigErrorCode has been moved to @dexto/agent-management
*/
export type DextoErrorCode =
| LLMErrorCode
| AgentErrorCode
| ContextErrorCode
| SessionErrorCode
| MCPErrorCode
| ToolErrorCode
| StorageErrorCode
| SystemPromptErrorCode
| ResourceErrorCode
| PromptErrorCode
| ApprovalErrorCode
| MemoryErrorCode
| PluginErrorCode
| TelemetryErrorCode;
/** Severity of an issue */
export type Severity = 'error' | 'warning';
/** Generic issue type for validation results */
export interface Issue<C = unknown> {
code: DextoErrorCode | string;
message: string;
scope: ErrorScope | string; // Domain that generated this issue
type: ErrorType; // HTTP status mapping
severity: Severity;
path?: Array<string | number>;
context?: C;
}

View File

@@ -0,0 +1,113 @@
import { describe, it, expect, vi } from 'vitest';
import { AgentEventBus } from './index.js';
describe('EventBus AbortController Support', () => {
it('should remove event listener when signal is aborted', () => {
const eventBus = new AgentEventBus();
const abortController = new AbortController();
const listener = vi.fn();
// Add listener with abort signal
eventBus.on('session:reset', listener, { signal: abortController.signal });
// Emit event - should be received
eventBus.emit('session:reset', { sessionId: 'test' });
expect(listener).toHaveBeenCalledTimes(1);
// Abort the signal
abortController.abort();
// Emit event again - should not be received
eventBus.emit('session:reset', { sessionId: 'test' });
expect(listener).toHaveBeenCalledTimes(1);
});
it('should not add listener if signal is already aborted', () => {
const eventBus = new AgentEventBus();
const abortController = new AbortController();
const listener = vi.fn();
// Abort signal first
abortController.abort();
// Try to add listener with aborted signal
eventBus.on('session:reset', listener, { signal: abortController.signal });
// Emit event - should not be received
eventBus.emit('session:reset', { sessionId: 'test' });
expect(listener).not.toHaveBeenCalled();
});
it('should work with once() and abort signal', () => {
const eventBus = new AgentEventBus();
const abortController = new AbortController();
const listener = vi.fn();
// Add once listener with abort signal
eventBus.once('session:reset', listener, { signal: abortController.signal });
// Abort the signal before emitting
abortController.abort();
// Emit event - should not be received
eventBus.emit('session:reset', { sessionId: 'test' });
expect(listener).not.toHaveBeenCalled();
});
it('should work without signal (backward compatibility)', () => {
const eventBus = new AgentEventBus();
const listener = vi.fn();
// Add listener without signal (old way)
eventBus.on('session:reset', listener);
// Emit event - should be received
eventBus.emit('session:reset', { sessionId: 'test' });
expect(listener).toHaveBeenCalledTimes(1);
// Remove manually
eventBus.off('session:reset', listener);
// Emit event again - should not be received
eventBus.emit('session:reset', { sessionId: 'test' });
expect(listener).toHaveBeenCalledTimes(1);
});
it('should handle multiple listeners with different signals', () => {
const eventBus = new AgentEventBus();
const controller1 = new AbortController();
const controller2 = new AbortController();
const listener1 = vi.fn();
const listener2 = vi.fn();
const listener3 = vi.fn();
// Add listeners with different signals
eventBus.on('session:reset', listener1, { signal: controller1.signal });
eventBus.on('session:reset', listener2, { signal: controller2.signal });
eventBus.on('session:reset', listener3); // No signal
// Emit event - all should receive
eventBus.emit('session:reset', { sessionId: 'test' });
expect(listener1).toHaveBeenCalledTimes(1);
expect(listener2).toHaveBeenCalledTimes(1);
expect(listener3).toHaveBeenCalledTimes(1);
// Abort first signal
controller1.abort();
// Emit event - only listener2 and listener3 should receive
eventBus.emit('session:reset', { sessionId: 'test' });
expect(listener1).toHaveBeenCalledTimes(1); // Still 1
expect(listener2).toHaveBeenCalledTimes(2);
expect(listener3).toHaveBeenCalledTimes(2);
// Abort second signal
controller2.abort();
// Emit event - only listener3 should receive
eventBus.emit('session:reset', { sessionId: 'test' });
expect(listener1).toHaveBeenCalledTimes(1); // Still 1
expect(listener2).toHaveBeenCalledTimes(2); // Still 2
expect(listener3).toHaveBeenCalledTimes(3);
});
});

View File

@@ -0,0 +1,902 @@
import { EventEmitter } from 'events';
import type { LLMProvider } from '../llm/types.js';
import { ValidatedAgentConfig } from '../agent/schemas.js';
import type { ApprovalRequest, ApprovalResponse } from '../approval/types.js';
import type { SanitizedToolResult } from '../context/types.js';
/**
* LLM finish reason - why the LLM stopped generating
*
* Superset of Vercel AI SDK's LanguageModelV3FinishReason with app-specific additions.
*/
export type LLMFinishReason =
// From Vercel AI SDK (LanguageModelV3FinishReason)
| 'stop' // Normal completion
| 'tool-calls' // Stopped to execute tool calls (more steps coming)
| 'length' // Hit token/length limit
| 'content-filter' // Content filter violation stopped the model
| 'error' // Error occurred
| 'other' // Other reason
| 'unknown' // Model has not transmitted a finish reason
// App-specific additions
| 'cancelled' // User cancelled
| 'max-steps'; // Hit max steps limit
/**
* Agent-level event names - events that occur at the agent/global level
*/
export const AGENT_EVENT_NAMES = [
'session:reset',
'session:created',
'session:title-updated',
'session:override-set',
'session:override-cleared',
'mcp:server-connected',
'mcp:server-added',
'mcp:server-removed',
'mcp:server-restarted',
'mcp:server-updated',
'mcp:resource-updated',
'mcp:prompts-list-changed',
'mcp:tools-list-changed',
'tools:available-updated',
'llm:switched',
'state:changed',
'state:exported',
'state:reset',
'resource:cache-invalidated',
'approval:request',
'approval:response',
'run:invoke',
] as const;
/**
* Session-level event names - events that occur within individual sessions
*/
export const SESSION_EVENT_NAMES = [
'llm:thinking',
'llm:chunk',
'llm:response',
'llm:tool-call',
'llm:tool-result',
'llm:error',
'llm:switched',
'llm:unsupported-input',
'tool:running',
'context:compacting',
'context:compacted',
'context:pruned',
'message:queued',
'message:dequeued',
'message:removed',
'run:complete',
] as const;
/**
* All event names combined for backward compatibility
*/
export const EVENT_NAMES = [...AGENT_EVENT_NAMES, ...SESSION_EVENT_NAMES] as const;
/**
* Event Visibility Tiers
*
* These define which events are exposed through different APIs:
* - STREAMING_EVENTS: Exposed via DextoAgent.stream() for real-time chat UIs
* - INTEGRATION_EVENTS: Exposed via webhooks, A2A, and monitoring systems
* - Internal events: Only available via direct EventBus access
*/
/**
* Tier 1: Streaming Events
*
* Events exposed via DextoAgent.stream() for real-time streaming.
* These are the most commonly used events for building chat UIs and
* represent the core user-facing event stream.
*/
export const STREAMING_EVENTS = [
// LLM events (session-scoped, forwarded to agent bus with sessionId)
'llm:thinking',
'llm:chunk',
'llm:response',
'llm:tool-call',
'llm:tool-result',
'llm:error',
'llm:unsupported-input',
// Tool execution events
'tool:running',
// Context management events
'context:compacting',
'context:compacted',
'context:pruned',
// Message queue events (for mid-task user guidance)
'message:queued',
'message:dequeued',
// Run lifecycle events
'run:complete',
// Session metadata
'session:title-updated',
// Approval events (needed for tool confirmation in streaming UIs)
'approval:request',
'approval:response',
// Service events (extensible pattern for non-core services)
'service:event',
] as const;
/**
* Tier 2: Integration Events
*
* Events exposed via webhooks, A2A subscriptions, and monitoring systems.
* Includes all streaming events plus lifecycle and state management events
* useful for external integrations.
*/
export const INTEGRATION_EVENTS = [
...STREAMING_EVENTS,
// Session lifecycle
'session:created',
'session:reset',
// MCP lifecycle
'mcp:server-connected',
'mcp:server-restarted',
'mcp:tools-list-changed',
'mcp:prompts-list-changed',
// Tools
'tools:available-updated',
// LLM provider switching
'llm:switched',
// State management
'state:changed',
] as const;
/**
* Tier 3: Internal Events
*
* Events only exposed via direct AgentEventBus access for advanced use cases.
* These are implementation details that may change between versions.
*
* Internal events include:
* - resource:cache-invalidated
* - state:exported
* - state:reset
* - mcp:server-added
* - mcp:server-removed
* - mcp:server-updated
* - mcp:resource-updated
* - session:override-set
* - session:override-cleared
*/
export type StreamingEventName = (typeof STREAMING_EVENTS)[number];
export type IntegrationEventName = (typeof INTEGRATION_EVENTS)[number];
export type InternalEventName = Exclude<AgentEventName, IntegrationEventName>;
/**
* Type helper to extract events by name from AgentEventMap
*/
export type AgentEventByName<T extends AgentEventName> = {
name: T;
} & AgentEventMap[T];
/**
* Union type of all streaming events with their payloads
* Automatically derived from STREAMING_EVENTS const to stay in sync.
* Uses 'name' property (not 'type') to avoid collision with payload fields like ApprovalRequest.type
* These are the events that the message-stream API actually returns.
*/
export type StreamingEvent = {
[K in StreamingEventName]: { name: K } & AgentEventMap[K];
}[StreamingEventName];
/**
* Union type of all integration events with their payloads
*/
export type IntegrationEvent =
| StreamingEvent
| ({ name: 'session:created' } & AgentEventMap['session:created'])
| ({ name: 'session:reset' } & AgentEventMap['session:reset'])
| ({ name: 'mcp:server-connected' } & AgentEventMap['mcp:server-connected'])
| ({ name: 'mcp:server-restarted' } & AgentEventMap['mcp:server-restarted'])
| ({ name: 'mcp:tools-list-changed' } & AgentEventMap['mcp:tools-list-changed'])
| ({ name: 'mcp:prompts-list-changed' } & AgentEventMap['mcp:prompts-list-changed'])
| ({ name: 'tools:available-updated' } & AgentEventMap['tools:available-updated'])
| ({ name: 'llm:switched' } & AgentEventMap['llm:switched'])
| ({ name: 'state:changed' } & AgentEventMap['state:changed']);
/**
* Combined event map for the agent bus - includes agent events and session events with sessionId
* This is what the global agent event bus uses to aggregate all events
*/
export interface AgentEventMap {
// Session events
/** Fired when session conversation is reset */
'session:reset': {
sessionId: string;
};
/** Fired when a new session is created and should become active */
'session:created': {
sessionId: string | null; // null means clear without creating (deferred creation)
switchTo: boolean; // Whether UI should switch to this session
};
/** Fired when a session's human-friendly title is updated */
'session:title-updated': {
sessionId: string;
title: string;
};
/** Fired when session override is set */
'session:override-set': {
sessionId: string;
override: any; // SessionOverride type
};
/** Fired when session override is cleared */
'session:override-cleared': {
sessionId: string;
};
// MCP events
/** Fired when MCP server connection succeeds or fails */
'mcp:server-connected': {
name: string;
success: boolean;
error?: string;
};
/** Fired when MCP server is added to runtime state */
'mcp:server-added': {
serverName: string;
config: any; // McpServerConfig type
};
/** Fired when MCP server is removed from runtime state */
'mcp:server-removed': {
serverName: string;
};
/** Fired when MCP server is restarted */
'mcp:server-restarted': {
serverName: string;
};
/** Fired when MCP server is updated in runtime state */
'mcp:server-updated': {
serverName: string;
config: any; // McpServerConfig type
};
/** Fired when MCP server resource is updated */
'mcp:resource-updated': {
serverName: string;
resourceUri: string;
};
/** Fired when MCP server prompts list changes */
'mcp:prompts-list-changed': {
serverName: string;
prompts: string[];
};
/** Fired when MCP server tools list changes */
'mcp:tools-list-changed': {
serverName: string;
tools: string[];
};
// Tools events
/** Fired when available tools list updates */
'tools:available-updated': {
tools: string[];
source: 'mcp' | 'builtin';
};
/**
* Agent run is being invoked externally (e.g., by scheduler, A2A, API).
* Fired BEFORE agent.stream()/run() is called.
* UI can use this to display the incoming prompt and set up streaming subscriptions.
*/
'run:invoke': {
/** The session this run will execute in */
sessionId: string;
/** The prompt/content being sent */
content: import('../context/types.js').ContentPart[];
/** Source of the invocation */
source: 'scheduler' | 'a2a' | 'api' | 'external';
/** Optional metadata about the invocation */
metadata?: Record<string, unknown>;
};
// LLM events (forwarded from session bus with sessionId added)
/** LLM service started thinking */
'llm:thinking': {
sessionId: string;
};
/** LLM service sent a streaming chunk */
'llm:chunk': {
chunkType: 'text' | 'reasoning';
content: string;
isComplete?: boolean;
sessionId: string;
};
/** LLM service final response */
'llm:response': {
content: string;
reasoning?: string;
provider?: LLMProvider;
model?: string;
tokenUsage?: {
inputTokens?: number;
outputTokens?: number;
reasoningTokens?: number;
totalTokens?: number;
cacheReadTokens?: number;
cacheWriteTokens?: number;
};
/** Estimated input tokens before LLM call (for analytics/calibration) */
estimatedInputTokens?: number;
/** Finish reason: 'tool-calls' means more steps coming, others indicate completion */
finishReason?: LLMFinishReason;
sessionId: string;
};
/** LLM service requested a tool call */
'llm:tool-call': {
toolName: string;
args: Record<string, any>;
callId?: string;
sessionId: string;
};
/** LLM service returned a tool result */
'llm:tool-result': {
toolName: string;
callId?: string;
success: boolean;
/** Sanitized result - present when success=true */
sanitized?: SanitizedToolResult;
rawResult?: unknown;
/** Error message - present when success=false */
error?: string;
/** Whether this tool required user approval */
requireApproval?: boolean;
/** The approval status (only present if requireApproval is true) */
approvalStatus?: 'approved' | 'rejected';
sessionId: string;
};
/** Tool execution actually started (after approval if needed) */
'tool:running': {
toolName: string;
toolCallId: string;
sessionId: string;
};
/** LLM service error */
'llm:error': {
error: Error;
context?: string;
recoverable?: boolean;
/** Tool call ID if error occurred during tool execution */
toolCallId?: string;
sessionId: string;
};
/** LLM service switched */
'llm:switched': {
newConfig: any; // LLMConfig type
historyRetained?: boolean;
sessionIds: string[]; // Array of affected session IDs
};
/** LLM service unsupported input */
'llm:unsupported-input': {
errors: string[];
provider: LLMProvider;
model?: string;
fileType?: string;
details?: any;
sessionId: string;
};
/** Context compaction is starting */
'context:compacting': {
/** Estimated tokens that triggered compaction */
estimatedTokens: number;
sessionId: string;
};
/** Context was compacted during multi-step tool calling */
'context:compacted': {
/** Actual input tokens from API that triggered compaction */
originalTokens: number;
/** Estimated tokens after compaction (simple length/4 heuristic) */
compactedTokens: number;
originalMessages: number;
compactedMessages: number;
strategy: string;
reason: 'overflow' | 'manual';
sessionId: string;
};
/** Old tool outputs were pruned (marked with compactedAt) to save tokens */
'context:pruned': {
prunedCount: number;
savedTokens: number;
sessionId: string;
};
/** Context was manually cleared via /clear command */
'context:cleared': {
sessionId: string;
};
/** User message was queued during agent execution */
'message:queued': {
position: number;
id: string;
sessionId: string;
};
/** Queued messages were dequeued and injected into context */
'message:dequeued': {
count: number;
ids: string[];
coalesced: boolean;
/** Combined content of all dequeued messages (for UI display) */
content: import('../context/types.js').ContentPart[];
sessionId: string;
};
/** Queued message was removed from queue */
'message:removed': {
id: string;
sessionId: string;
};
/** Agent run completed (all steps done, no queued messages remaining) */
'run:complete': {
/** How the run ended */
finishReason: LLMFinishReason;
/** Number of steps executed */
stepCount: number;
/** Total wall-clock duration of the run in milliseconds */
durationMs: number;
/** Error that caused termination (only if finishReason === 'error') */
error?: Error;
sessionId: string;
};
// State events
/** Fired when agent runtime state changes */
'state:changed': {
field: string; // keyof AgentRuntimeState
oldValue: any;
newValue: any;
sessionId?: string;
};
/** Fired when agent state is exported as config */
'state:exported': {
config: ValidatedAgentConfig;
};
/** Fired when agent state is reset to baseline */
'state:reset': {
toConfig: any; // AgentConfig type
};
// Resource events
/** Fired when resource cache should be invalidated */
'resource:cache-invalidated': {
resourceUri?: string;
serverName: string;
action: 'updated' | 'server_connected' | 'server_removed' | 'blob_stored';
};
// Approval events - use ApprovalRequest directly
// No transformation needed since we use 'name' (not 'type') as SSE discriminant
/** Fired when user approval is requested (generalized approval system) */
'approval:request': ApprovalRequest;
/** Fired when user approval response is received */
'approval:response': ApprovalResponse;
/**
* Extensible service event for non-core/additive services.
* Allows services like agent-spawner, process-tools, etc. to emit events
* without polluting the core event namespace.
*/
'service:event': {
/** Service identifier (e.g., 'agent-spawner', 'process-tools') */
service: string;
/** Event type within the service (e.g., 'progress', 'stdout') */
event: string;
/** Links this event to a parent tool call */
toolCallId?: string;
/** Session this event belongs to */
sessionId: string;
/** Arbitrary event data - service-specific payload */
data: Record<string, unknown>;
};
}
/**
* Session-level events - these occur within individual sessions without session context
* (since they're already scoped to a session)
*/
export interface SessionEventMap {
/** LLM service started thinking */
'llm:thinking': void;
/** LLM service sent a streaming chunk */
'llm:chunk': {
chunkType: 'text' | 'reasoning';
content: string;
isComplete?: boolean;
};
/** LLM service final response */
'llm:response': {
content: string;
reasoning?: string;
provider?: LLMProvider;
model?: string;
tokenUsage?: {
inputTokens?: number;
outputTokens?: number;
reasoningTokens?: number;
totalTokens?: number;
cacheReadTokens?: number;
cacheWriteTokens?: number;
};
/** Estimated input tokens before LLM call (for analytics/calibration) */
estimatedInputTokens?: number;
/** Finish reason: 'tool-calls' means more steps coming, others indicate completion */
finishReason?: LLMFinishReason;
};
/** LLM service requested a tool call */
'llm:tool-call': {
toolName: string;
args: Record<string, any>;
callId?: string;
};
/** LLM service returned a tool result */
'llm:tool-result': {
toolName: string;
callId?: string;
success: boolean;
/** Sanitized result - present when success=true */
sanitized?: SanitizedToolResult;
rawResult?: unknown;
/** Error message - present when success=false */
error?: string;
/** Whether this tool required user approval */
requireApproval?: boolean;
/** The approval status (only present if requireApproval is true) */
approvalStatus?: 'approved' | 'rejected';
};
/** Tool execution actually started (after approval if needed) */
'tool:running': {
toolName: string;
toolCallId: string;
};
/** LLM service error */
'llm:error': {
error: Error;
context?: string;
recoverable?: boolean;
/** Tool call ID if error occurred during tool execution */
toolCallId?: string;
};
/** LLM service switched */
'llm:switched': {
newConfig: any; // LLMConfig type
historyRetained?: boolean;
};
/** LLM service unsupported input */
'llm:unsupported-input': {
errors: string[];
provider: LLMProvider;
model?: string;
fileType?: string;
details?: any;
};
/** Context compaction is starting */
'context:compacting': {
/** Estimated tokens that triggered compaction */
estimatedTokens: number;
};
/** Context was compacted during multi-step tool calling */
'context:compacted': {
/** Actual input tokens from API that triggered compaction */
originalTokens: number;
/** Estimated tokens after compaction (simple length/4 heuristic) */
compactedTokens: number;
originalMessages: number;
compactedMessages: number;
strategy: string;
reason: 'overflow' | 'manual';
};
/** Old tool outputs were pruned (marked with compactedAt) to save tokens */
'context:pruned': {
prunedCount: number;
savedTokens: number;
};
/** User message was queued during agent execution */
'message:queued': {
position: number;
id: string;
};
/** Queued messages were dequeued and injected into context */
'message:dequeued': {
count: number;
ids: string[];
coalesced: boolean;
/** Combined content of all dequeued messages (for UI display) */
content: import('../context/types.js').ContentPart[];
};
/** Queued message was removed from queue */
'message:removed': {
id: string;
};
/** Agent run completed (all steps done, no queued messages remaining) */
'run:complete': {
/** How the run ended */
finishReason: LLMFinishReason;
/** Number of steps executed */
stepCount: number;
/** Total wall-clock duration of the run in milliseconds */
durationMs: number;
/** Error that caused termination (only if finishReason === 'error') */
error?: Error;
};
}
export type AgentEventName = keyof AgentEventMap;
export type SessionEventName = keyof SessionEventMap;
export type EventName = keyof AgentEventMap;
/**
* Compile-time checks to ensure event name arrays and maps stay synchronized
*/
type _AgentEventNamesInMap = (typeof AGENT_EVENT_NAMES)[number] extends keyof AgentEventMap
? true
: never;
type _SessionEventNamesInMap = (typeof SESSION_EVENT_NAMES)[number] extends SessionEventName
? true
: never;
type _EventNamesInMap = (typeof EVENT_NAMES)[number] extends EventName ? true : never;
const _checkAgentEventNames: _AgentEventNamesInMap = true;
const _checkSessionEventNames: _SessionEventNamesInMap = true;
const _checkEventNames: _EventNamesInMap = true;
// Explicitly mark compile-time checks as used to avoid linter warnings
void _checkAgentEventNames;
void _checkSessionEventNames;
void _checkEventNames;
/**
* Runtime arrays of event names for iteration, validation, etc.
*/
export const AgentEventNames: readonly AgentEventName[] = Object.freeze([...AGENT_EVENT_NAMES]);
export const SessionEventNames: readonly SessionEventName[] = Object.freeze([
...SESSION_EVENT_NAMES,
]);
export const EventNames: readonly EventName[] = Object.freeze([...EVENT_NAMES]);
/**
* Generic typed EventEmitter base class using composition instead of inheritance
* This provides full compile-time type safety by not extending EventEmitter
*
* Exported for extension by packages like multi-agent-server that need custom event buses.
*/
export class BaseTypedEventEmitter<TEventMap extends Record<string, any>> {
// Wrapped EventEmitter instance
private _emitter = new EventEmitter();
// Store listeners with their abort controllers for cleanup
// Maps AbortSignal -> Event Name -> Set of listener functions
private _abortListeners = new WeakMap<AbortSignal, Map<keyof TEventMap, Set<Function>>>();
/**
* Emit an event with type-safe payload
*/
emit<K extends keyof TEventMap>(
event: K,
...args: TEventMap[K] extends void ? [] : [TEventMap[K]]
): boolean {
return this._emitter.emit(event as string, ...args);
}
/**
* Subscribe to an event with type-safe listener
*/
on<K extends keyof TEventMap>(
event: K,
listener: TEventMap[K] extends void ? () => void : (payload: TEventMap[K]) => void,
options?: { signal?: AbortSignal }
): this {
// If signal is already aborted, don't add the listener
if (options?.signal?.aborted) {
return this;
}
// Add the listener
this._emitter.on(event as string, listener);
// Set up abort handling if signal is provided
if (options?.signal) {
const signal = options.signal;
// Track this listener for cleanup using Map -> Set structure
if (!this._abortListeners.has(signal)) {
this._abortListeners.set(signal, new Map());
}
const eventMap = this._abortListeners.get(signal)!;
if (!eventMap.has(event)) {
eventMap.set(event, new Set());
}
eventMap.get(event)!.add(listener as Function);
// Set up abort handler
const abortHandler = () => {
this.off(event, listener);
// Clean up tracking
const eventMap = this._abortListeners.get(signal);
if (eventMap) {
const listenerSet = eventMap.get(event);
if (listenerSet) {
listenerSet.delete(listener as Function);
if (listenerSet.size === 0) {
eventMap.delete(event);
}
}
if (eventMap.size === 0) {
this._abortListeners.delete(signal);
}
}
};
signal.addEventListener('abort', abortHandler, { once: true });
}
return this;
}
/**
* Subscribe to an event once with type-safe listener
*/
once<K extends keyof TEventMap>(
event: K,
listener: TEventMap[K] extends void ? () => void : (payload: TEventMap[K]) => void,
options?: { signal?: AbortSignal }
): this {
// If signal is already aborted, don't add the listener
if (options?.signal?.aborted) {
return this;
}
// Create a wrapper that handles both once and abort cleanup
const onceWrapper = (...args: any[]) => {
// Clean up abort tracking before calling the original listener
if (options?.signal) {
const eventMap = this._abortListeners.get(options.signal);
if (eventMap) {
const listenerSet = eventMap.get(event);
if (listenerSet) {
listenerSet.delete(onceWrapper);
if (listenerSet.size === 0) {
eventMap.delete(event);
}
}
if (eventMap.size === 0) {
this._abortListeners.delete(options.signal);
}
}
}
(listener as any)(...args);
};
// Add the wrapped listener
this._emitter.once(event as string, onceWrapper);
// Set up abort handling if signal is provided
if (options?.signal) {
const signal = options.signal;
// Track this listener for cleanup using Map -> Set structure
if (!this._abortListeners.has(signal)) {
this._abortListeners.set(signal, new Map());
}
const eventMap = this._abortListeners.get(signal)!;
if (!eventMap.has(event)) {
eventMap.set(event, new Set());
}
eventMap.get(event)!.add(onceWrapper);
// Set up abort handler
const abortHandler = () => {
this.off(event, onceWrapper);
// Clean up tracking
const eventMap = this._abortListeners.get(signal);
if (eventMap) {
const listenerSet = eventMap.get(event);
if (listenerSet) {
listenerSet.delete(onceWrapper);
if (listenerSet.size === 0) {
eventMap.delete(event);
}
}
if (eventMap.size === 0) {
this._abortListeners.delete(signal);
}
}
};
signal.addEventListener('abort', abortHandler, { once: true });
}
return this;
}
/**
* Unsubscribe from an event
*/
off<K extends keyof TEventMap>(
event: K,
listener: TEventMap[K] extends void ? () => void : (payload: TEventMap[K]) => void
): this {
this._emitter.off(event as string, listener);
return this;
}
}
/**
* Agent-level typed event emitter for global agent events
*/
export class AgentEventBus extends BaseTypedEventEmitter<AgentEventMap> {}
/**
* Session-level typed event emitter for session-scoped events
*/
export class SessionEventBus extends BaseTypedEventEmitter<SessionEventMap> {}
/**
* Combined typed event emitter for backward compatibility
*/
export class TypedEventEmitter extends BaseTypedEventEmitter<AgentEventMap> {}
/**
* Global shared event bus (backward compatibility)
*/
export const eventBus = new TypedEventEmitter();

View File

@@ -0,0 +1,212 @@
/**
* Image definition helper
*
* Provides type-safe API for defining base images.
*/
import type { ImageDefinition } from './types.js';
/**
* Define a Dexto base image.
*
* This function provides type checking and validation for image definitions.
* Use this in your dexto.image.ts file.
*
* @example
* ```typescript
* // dexto.image.ts
* import { defineImage } from '@dexto/core';
* import { localBlobProvider } from './providers/blob.js';
*
* export default defineImage({
* name: 'local',
* version: '1.0.0',
* description: 'Local development base image',
* target: 'local-development',
*
* providers: {
* blobStore: {
* providers: [localBlobProvider],
* },
* },
*
* defaults: {
* storage: {
* blob: { type: 'local', storePath: './data/blobs' },
* },
* },
*
* constraints: ['filesystem-required', 'offline-capable'],
* });
* ```
*
* @param definition - Image definition object
* @returns The same definition (for type inference)
*/
export function defineImage(definition: ImageDefinition): ImageDefinition {
// Validation
if (!definition.name) {
throw new Error('Image definition must have a name');
}
if (!definition.version) {
throw new Error('Image definition must have a version');
}
if (!definition.description) {
throw new Error('Image definition must have a description');
}
// Validate provider categories have at least one of: providers or register
for (const [category, config] of Object.entries(definition.providers)) {
if (!config) continue;
if (!config.providers && !config.register) {
throw new Error(
`Provider category '${category}' must have either 'providers' array or 'register' function`
);
}
}
return definition;
}
/**
* Helper to create a provider category configuration.
*
* @example
* ```typescript
* import { defineProviderCategory } from '@dexto/core';
*
* const blobStore = defineProviderCategory({
* providers: [localBlobProvider, s3BlobProvider],
* });
* ```
*/
export function defineProviderCategory(config: {
providers?: any[];
register?: () => void | Promise<void>;
}) {
if (!config.providers && !config.register) {
throw new Error('Provider category must have either providers or register function');
}
return config;
}
/**
* Validate an image definition.
* Throws if the definition is invalid.
*
* Used by bundler to validate images before building.
*/
export function validateImageDefinition(definition: ImageDefinition): void {
// Basic validation
if (!definition.name || typeof definition.name !== 'string') {
throw new Error('Image name must be a non-empty string');
}
if (!definition.version || typeof definition.version !== 'string') {
throw new Error('Image version must be a non-empty string');
}
if (!definition.description || typeof definition.description !== 'string') {
throw new Error('Image description must be a non-empty string');
}
// Validate version format (basic semver check)
const versionRegex = /^\d+\.\d+\.\d+(-[a-zA-Z0-9.-]+)?$/;
if (!versionRegex.test(definition.version)) {
throw new Error(
`Image version '${definition.version}' is not valid semver. Expected format: x.y.z`
);
}
// Validate target if provided
const validTargets = [
'local-development',
'cloud-production',
'edge-serverless',
'embedded-iot',
'enterprise',
'custom',
];
if (definition.target && !validTargets.includes(definition.target)) {
throw new Error(
`Invalid target '${definition.target}'. Valid targets: ${validTargets.join(', ')}`
);
}
// Validate provider categories
// Allow empty providers if extending a base image (providers inherited from base)
const hasProviders =
definition.providers &&
Object.values(definition.providers).some((config) => config !== undefined);
if (!hasProviders && !definition.extends) {
throw new Error(
'Image must either define at least one provider category or extend a base image'
);
}
for (const [category, config] of Object.entries(definition.providers)) {
if (!config) continue;
if (!config.providers && !config.register) {
throw new Error(
`Provider category '${category}' must have either 'providers' array or 'register' function`
);
}
if (config.providers && !Array.isArray(config.providers)) {
throw new Error(`Provider category '${category}' providers must be an array`);
}
if (config.register && typeof config.register !== 'function') {
throw new Error(`Provider category '${category}' register must be a function`);
}
}
// Validate constraints if provided
const validConstraints = [
'filesystem-required',
'network-required',
'offline-capable',
'serverless-compatible',
'cold-start-optimized',
'low-memory',
'edge-compatible',
'browser-compatible',
];
if (definition.constraints) {
if (!Array.isArray(definition.constraints)) {
throw new Error('Image constraints must be an array');
}
for (const constraint of definition.constraints) {
if (!validConstraints.includes(constraint)) {
throw new Error(
`Invalid constraint '${constraint}'. Valid constraints: ${validConstraints.join(', ')}`
);
}
}
}
// Validate utils if provided
if (definition.utils) {
for (const [name, path] of Object.entries(definition.utils)) {
if (typeof path !== 'string') {
throw new Error(`Utility '${name}' path must be a string`);
}
if (!path.startsWith('./')) {
throw new Error(
`Utility '${name}' path must be relative (start with './'). Got: ${path}`
);
}
}
}
// Validate extends if provided
if (definition.extends) {
if (typeof definition.extends !== 'string') {
throw new Error('Image extends must be a string (parent image name)');
}
}
}

View File

@@ -0,0 +1,68 @@
/**
* Base Image Infrastructure
*
* Provides types and helpers for defining Dexto base images.
* Base images are pre-configured backend surfaces that bundle providers,
* utilities, and defaults for specific deployment targets.
*
* @example Creating a base image
* ```typescript
* // dexto.image.ts
* import { defineImage } from '@dexto/core';
*
* export default defineImage({
* name: 'local',
* version: '1.0.0',
* description: 'Local development base image',
* target: 'local-development',
*
* providers: {
* blobStore: {
* providers: [localBlobProvider],
* },
* database: {
* register: async () => {
* const { sqliteProvider } = await import('./providers/database.js');
* databaseRegistry.register(sqliteProvider);
* },
* },
* },
*
* defaults: {
* storage: {
* blob: { type: 'local', storePath: './data/blobs' },
* database: { type: 'sqlite', path: './data/agent.db' },
* },
* },
*
* constraints: ['filesystem-required', 'offline-capable'],
* });
* ```
*
* @example Using a base image
* ```typescript
* // my-app/src/index.ts
* import { createAgent, enrichConfigForLocal } from '@dexto/image-local';
*
* const config = enrichConfigForLocal(rawConfig);
* const agent = createAgent(config); // Providers already registered!
* ```
*/
// Core types
export type {
ImageProvider,
ProviderMetadata,
ProviderRegistrationFn,
ProviderCategoryConfig,
ImageDefinition,
ImageTarget,
ImageConstraint,
ImageDefaults,
ImageMetadata,
ImageBuildResult,
ImageBuildOptions,
} from './types.js';
// Definition helpers
export { defineImage, defineProviderCategory, validateImageDefinition } from './define-image.js';

View File

@@ -0,0 +1,278 @@
/**
* Dexto Base Image Definition
*
* Base images are pre-configured backend surfaces that bundle providers,
* utilities, and defaults for specific deployment targets.
*
* Like Alpine Linux or Ubuntu, but for AI agents.
*/
import type { z } from 'zod';
/**
* Generic provider interface that all provider types should extend.
* Provides common structure for type-safe provider registration.
*
* Note: This is a simplified interface for image definitions.
* Actual provider implementations should use the specific provider
* interfaces from their respective modules (e.g., BlobStoreProvider).
*/
export interface ImageProvider<TType extends string = string> {
/** Unique type identifier for this provider (e.g., 'sqlite', 'local', 's3') */
type: TType;
/** Zod schema for validating provider configuration */
configSchema: z.ZodType<any>;
/** Factory function to create provider instance */
create: (config: any, deps: any) => any;
/** Optional metadata about the provider */
metadata?: ProviderMetadata;
}
/**
* Metadata about a provider's characteristics and requirements
*/
export interface ProviderMetadata {
/** Human-readable display name */
displayName?: string;
/** Brief description of what this provider does */
description?: string;
/** Whether this provider requires network connectivity */
requiresNetwork?: boolean;
/** Whether this provider requires filesystem access */
requiresFilesystem?: boolean;
/** Persistence level of storage providers */
persistenceLevel?: 'ephemeral' | 'persistent';
/** Platforms this provider is compatible with */
platforms?: ('node' | 'browser' | 'edge' | 'worker')[];
}
/**
* Registry function that registers providers on module initialization.
* Called automatically when the image is imported.
*/
export type ProviderRegistrationFn = () => void | Promise<void>;
/**
* Configuration for a single provider category in an image.
* Supports both direct provider objects and registration functions.
*/
export interface ProviderCategoryConfig {
/** Direct provider objects to register */
providers?: ImageProvider[];
/** Registration function for complex initialization */
register?: ProviderRegistrationFn;
}
/**
* Complete image definition structure.
* This is what dexto.image.ts exports.
*/
export interface ImageDefinition {
/** Unique name for this image (e.g., 'local', 'cloud', 'edge') */
name: string;
/** Semantic version of this image */
version: string;
/** Brief description of this image's purpose and target environment */
description: string;
/** Target deployment environment (for documentation and validation) */
target?: ImageTarget;
/**
* Provider categories to register.
* Each category can include direct providers or a registration function.
*/
providers: {
/** Blob storage providers (e.g., local filesystem, S3, R2) */
blobStore?: ProviderCategoryConfig;
/** Database providers (e.g., SQLite, PostgreSQL, D1) */
database?: ProviderCategoryConfig;
/** Cache providers (e.g., in-memory, Redis, KV) */
cache?: ProviderCategoryConfig;
/** Custom tool providers (e.g., datetime helpers, API integrations) */
customTools?: ProviderCategoryConfig;
/** Plugin providers (e.g., audit logging, content filtering) */
plugins?: ProviderCategoryConfig;
/** Compression strategy providers (e.g., sliding window, summarization) */
compression?: ProviderCategoryConfig;
};
/**
* Default configuration values.
* Used when agent config doesn't specify values.
* Merged with agent config during agent creation.
*/
defaults?: ImageDefaults;
/**
* Runtime constraints this image requires.
* Used for validation and error messages.
*/
constraints?: ImageConstraint[];
/**
* Utilities exported by this image.
* Maps utility name to file path (relative to image root).
*
* Example:
* {
* configEnrichment: './utils/config.js',
* lifecycle: './utils/lifecycle.js'
* }
*/
utils?: Record<string, string>;
/**
* Selective named exports from packages.
* Allows re-exporting specific types and values from dependencies.
*
* Example:
* {
* '@dexto/core': ['logger', 'createAgentCard', 'type DextoAgent'],
* '@dexto/utils': ['formatDate', 'parseConfig']
* }
*/
exports?: Record<string, string[]>;
/**
* Parent image to extend (for image inheritance).
* Optional: enables creating specialized images from base images.
*/
extends?: string;
/**
* Bundled plugin paths.
* Absolute paths to plugin directories containing .dexto-plugin or .claude-plugin manifests.
* These plugins are automatically discovered alongside user/project plugins.
*
* Example:
* ```typescript
* import { PLUGIN_PATH as planToolsPluginPath } from '@dexto/tools-plan';
*
* bundledPlugins: [planToolsPluginPath]
* ```
*/
bundledPlugins?: string[];
}
/**
* Target deployment environments for images.
* Helps users choose the right image for their use case.
*/
export type ImageTarget =
| 'local-development'
| 'cloud-production'
| 'edge-serverless'
| 'embedded-iot'
| 'enterprise'
| 'custom';
/**
* Runtime constraints that an image requires.
* Used for validation and helpful error messages.
*/
export type ImageConstraint =
| 'filesystem-required'
| 'network-required'
| 'offline-capable'
| 'serverless-compatible'
| 'cold-start-optimized'
| 'low-memory'
| 'edge-compatible'
| 'browser-compatible';
/**
* Default configuration values provided by an image.
* These are used when agent config doesn't specify values.
*/
export interface ImageDefaults {
/** Default storage configuration */
storage?: {
database?: {
type: string;
[key: string]: any;
};
blob?: {
type: string;
[key: string]: any;
};
cache?: {
type: string;
[key: string]: any;
};
};
/** Default logging configuration */
logging?: {
level?: 'debug' | 'info' | 'warn' | 'error';
fileLogging?: boolean;
[key: string]: any;
};
/** Default LLM configuration */
llm?: {
provider?: string;
model?: string;
[key: string]: any;
};
/** Default tool configuration */
tools?: {
internalTools?: string[];
[key: string]: any;
};
/** Other default values */
[key: string]: any;
}
/**
* Metadata about a built image (generated by bundler).
* Included in the compiled image output.
*/
export interface ImageMetadata {
/** Image name */
name: string;
/** Image version */
version: string;
/** Description */
description: string;
/** Target environment */
target?: ImageTarget;
/** Runtime constraints */
constraints: ImageConstraint[];
/** Build timestamp */
builtAt: string;
/** Core version this image was built for */
coreVersion: string;
/** Base image this extends (if any) */
extends?: string;
/** Bundled plugin paths (absolute paths to plugin directories) */
bundledPlugins?: string[];
}
/**
* Result of building an image.
* Contains the generated code and metadata.
*/
export interface ImageBuildResult {
/** Generated JavaScript code for the image entry point */
code: string;
/** Generated TypeScript definitions */
types: string;
/** Image metadata */
metadata: ImageMetadata;
/** Warnings encountered during build */
warnings?: string[];
}
/**
* Options for building an image.
*/
export interface ImageBuildOptions {
/** Path to dexto.image.ts file */
imagePath: string;
/** Output directory for built image */
outDir: string;
/** Whether to generate source maps */
sourcemap?: boolean;
/** Whether to minify output */
minify?: boolean;
/** Additional validation rules */
strict?: boolean;
}

View File

@@ -0,0 +1,108 @@
// Browser-safe root exports for @dexto/core
// Export only what's actually used by client packages (webui, cli, client-sdk)
// Runtime utilities (actually used by client packages)
export { toError } from './utils/error-conversion.js'; // Used by webui package
export { zodToIssues } from './utils/result.js'; // Used by client-sdk package
export { ErrorScope, ErrorType } from './errors/types.js'; // Used by client-sdk package
// Type-only exports (used as types, no runtime overhead)
export type { Issue, Severity, DextoErrorCode } from './errors/types.js';
// Context/message types (used by webui package)
export type {
InternalMessage,
SystemMessage,
UserMessage,
AssistantMessage,
ToolMessage,
TextPart,
FilePart,
ImageData,
FileData,
UIResourcePart,
ContentPart,
ToolCall,
ToolApprovalStatus,
} from './context/types.js';
// Note: ImagePart not exported - only used internally in core package
// Message type guards (used by CLI and webui packages)
export {
isSystemMessage,
isUserMessage,
isAssistantMessage,
isToolMessage,
isTextPart,
isImagePart,
isFilePart,
isUIResourcePart,
} from './context/types.js';
// Context utilities (used by webui package for media kind detection)
export { getFileMediaKind, getResourceKind } from './context/media-helpers.js';
// LLM types (used by client packages)
export type { LLMProvider } from './llm/types.js';
export { LLM_PROVIDERS } from './llm/types.js';
// MCP types and constants (used by webui)
export type { McpServerType, McpConnectionMode } from './mcp/schemas.js';
export {
MCP_SERVER_TYPES,
MCP_CONNECTION_MODES,
DEFAULT_MCP_CONNECTION_MODE,
} from './mcp/schemas.js';
// Storage types and constants (used by webui)
export type { CacheType, DatabaseType } from './storage/schemas.js';
export { CACHE_TYPES, DATABASE_TYPES } from './storage/schemas.js';
// Tool confirmation types and constants (used by webui)
export type { ToolConfirmationMode, AllowedToolsStorageType } from './tools/schemas.js';
export {
TOOL_CONFIRMATION_MODES,
ALLOWED_TOOLS_STORAGE_TYPES,
DEFAULT_TOOL_CONFIRMATION_MODE,
DEFAULT_ALLOWED_TOOLS_STORAGE,
} from './tools/schemas.js';
// Approval types and constants (used by webui)
export { ApprovalStatus, ApprovalType, DenialReason } from './approval/types.js';
export type { ApprovalRequest, ApprovalResponse } from './approval/types.js';
// Session types (used by CLI package)
export type { SessionMetadata } from './session/session-manager.js';
// Agent types (used by webui for form configuration)
export type { AgentConfig, ValidatedAgentConfig } from './agent/schemas.js';
// System prompt types and constants (used by webui)
export { PROMPT_GENERATOR_SOURCES } from './systemPrompt/registry.js';
export type { ContributorConfig, SystemPromptConfig } from './systemPrompt/schemas.js';
// Search types (used by client-sdk package)
export type {
SearchOptions,
SearchResult,
SessionSearchResult,
SearchResponse,
SessionSearchResponse,
} from './search/types.js';
// Event types (used by client-sdk package)
export type { AgentEventMap, SessionEventMap } from './events/index.js';
// LLM registry types (used by client-sdk package)
export type { ModelInfo, ProviderInfo } from './llm/registry.js';
export type { SupportedFileType } from './llm/types.js';
// Resource types and utilities (used by webui package)
// Note: Only export browser-safe reference parsing functions, NOT ResourceManager
// (ResourceManager requires logger which has Node.js dependencies)
export type { ResourceMetadata } from './resources/types.js';
export type { ResourceReference } from './resources/reference-parser.js';
export {
parseResourceReferences,
resolveResourceReferences,
} from './resources/reference-parser.js';

View File

@@ -0,0 +1,95 @@
/**
* @dexto/core - Main entry point
*
* This package is designed for server-side use (Node.js).
* For browser/client usage, use server components/actions or the API.
*
* The package.json conditional exports handle environment routing:
* - Browser: Routes to index.browser.ts (minimal safe exports)
* - Node: Routes to this file (full exports)
*
* TODO: Break down into subpath exports for better tree-shaking
* Consider adding exports like:
* - @dexto/core/telemetry - Telemetry utilities
* - @dexto/core/llm - LLM services and factories
* - @dexto/core/session - Session management (currently internal)
* - @dexto/core/tools - Tool system
* This would allow:
* 1. Better tree-shaking (only import what you need)
* 2. Cleaner public API boundaries
* 3. Reduced bundle sizes for packages that only need specific functionality
* 4. Avoid pulling in OpenTelemetry decorators for packages that don't need instrumentation
*/
// Core Agent
export * from './agent/index.js';
// Configuration
// Config loading has been moved to @dexto/agent-management
// Import from '@dexto/agent-management' instead:
// - loadAgentConfig
// - ConfigError
// - ConfigErrorCode
// Errors
export * from './errors/index.js';
// Events
export * from './events/index.js';
// LLM
export * from './llm/index.js';
// Search
export * from './search/index.js';
// Logger
export * from './logger/index.js';
// MCP
export * from './mcp/index.js';
// Session
export * from './session/index.js';
// Storage
export * from './storage/index.js';
// System Prompt
export * from './systemPrompt/index.js';
// Tools
export * from './tools/index.js';
// Context
export * from './context/index.js';
export { getFileMediaKind, getResourceKind } from './context/index.js';
// Prompts
export * from './prompts/index.js';
// Utils
export * from './utils/index.js';
// Resources
export * from './resources/index.js';
// Approval (User Approval System)
export * from './approval/index.js';
// Memory
export * from './memory/index.js';
// Plugins
export * from './plugins/index.js';
// Telemetry
export * from './telemetry/index.js';
// Providers
export * from './providers/index.js';
// Base Image Infrastructure
export * from './image/index.js';
// Note: Blob types, schemas, and errors are exported from './storage/index.js'

View File

@@ -0,0 +1,38 @@
/**
* LLM-specific error codes
* Includes configuration, validation, and runtime errors for LLM operations
*/
export enum LLMErrorCode {
// Configuration errors
API_KEY_MISSING = 'llm_api_key_missing',
API_KEY_INVALID = 'llm_api_key_invalid', // Too short, wrong format
API_KEY_CANDIDATE_MISSING = 'llm_api_key_candidate_missing',
BASE_URL_MISSING = 'llm_base_url_missing',
BASE_URL_INVALID = 'llm_base_url_invalid',
CONFIG_MISSING = 'llm_config_missing', // Required config (e.g., GOOGLE_VERTEX_PROJECT)
// Model/Provider compatibility
MODEL_INCOMPATIBLE = 'llm_model_incompatible',
MODEL_UNKNOWN = 'llm_model_unknown',
PROVIDER_UNSUPPORTED = 'llm_provider_unsupported',
// Input validation (formerly generic "validation")
INPUT_FILE_UNSUPPORTED = 'llm_input_file_unsupported',
INPUT_IMAGE_UNSUPPORTED = 'llm_input_image_unsupported',
INPUT_TEXT_INVALID = 'llm_input_text_invalid',
// Limits
TOKENS_EXCEEDED = 'llm_tokens_exceeded',
RATE_LIMIT_EXCEEDED = 'llm_rate_limit_exceeded',
INSUFFICIENT_CREDITS = 'llm_insufficient_credits',
// Operations
SWITCH_FAILED = 'llm_switch_failed',
GENERATION_FAILED = 'llm_generation_failed',
// Input validation (moved from agent)
SWITCH_INPUT_MISSING = 'llm_switch_input_missing', // At least model or provider must be specified
// Schema validation
REQUEST_INVALID_SCHEMA = 'llm_request_invalid_schema',
}

View File

@@ -0,0 +1,142 @@
import { DextoRuntimeError } from '../errors/DextoRuntimeError.js';
import { ErrorScope } from '@core/errors/types.js';
import { ErrorType } from '../errors/types.js';
import { LLMErrorCode } from './error-codes.js';
// Use types solely from types.ts to avoid duplication
import { getSupportedProviders } from './registry.js';
import type { LLMProvider } from './types.js';
/**
* LLM runtime error factory methods
* Creates properly typed errors for LLM runtime operations
*
* Note: Validation errors (missing API keys, invalid models, etc.) are handled
* by DextoValidationError through Zod schema validation
*/
export class LLMError {
// Runtime model/provider lookup errors
static unknownModel(provider: LLMProvider, model: string) {
return new DextoRuntimeError(
LLMErrorCode.MODEL_UNKNOWN,
ErrorScope.LLM,
ErrorType.USER,
`Unknown model '${model}' for provider '${provider}'`,
{ provider, model }
);
}
static baseUrlMissing(provider: LLMProvider) {
return new DextoRuntimeError(
LLMErrorCode.BASE_URL_MISSING,
ErrorScope.LLM,
ErrorType.USER,
`Provider '${provider}' requires a baseURL (set config.baseURL or OPENAI_BASE_URL environment variable)`,
{ provider }
);
}
static missingConfig(provider: LLMProvider, configName: string) {
return new DextoRuntimeError(
LLMErrorCode.CONFIG_MISSING,
ErrorScope.LLM,
ErrorType.USER,
`Provider '${provider}' requires ${configName}`,
{ provider, configName }
);
}
static unsupportedProvider(provider: string) {
const availableProviders = getSupportedProviders();
return new DextoRuntimeError(
LLMErrorCode.PROVIDER_UNSUPPORTED,
ErrorScope.LLM,
ErrorType.USER,
`Provider '${provider}' is not supported. Available providers: ${availableProviders.join(', ')}`,
{ provider, availableProviders }
);
}
/**
* Runtime error when API key is missing for a provider that requires it.
* This occurs when relaxed validation allowed the app to start without an API key,
* and the user then tries to use the LLM functionality.
*/
static apiKeyMissing(provider: LLMProvider, envVar: string) {
return new DextoRuntimeError(
LLMErrorCode.API_KEY_MISSING,
ErrorScope.LLM,
ErrorType.USER,
`API key required for provider '${provider}'`,
{ provider, envVar },
`Set the ${envVar} environment variable or configure it in Settings`
);
}
static modelProviderUnknown(model: string) {
const availableProviders = getSupportedProviders();
return new DextoRuntimeError(
LLMErrorCode.MODEL_UNKNOWN,
ErrorScope.LLM,
ErrorType.USER,
`Unknown model '${model}' - could not infer provider. Available providers: ${availableProviders.join(', ')}`,
{ model, availableProviders },
'Specify the provider explicitly or use a recognized model name'
);
}
// Runtime service errors
static rateLimitExceeded(provider: LLMProvider, retryAfter?: number) {
return new DextoRuntimeError(
LLMErrorCode.RATE_LIMIT_EXCEEDED,
ErrorScope.LLM,
ErrorType.RATE_LIMIT,
`Rate limit exceeded for ${provider}`,
{
details: { provider, retryAfter },
recovery: retryAfter
? `Wait ${retryAfter} seconds before retrying`
: 'Wait before retrying or upgrade your plan',
}
);
}
/**
* Error when Dexto account has insufficient credits.
* Returned as 402 from the gateway with code INSUFFICIENT_CREDITS.
*/
static insufficientCredits(balance?: number) {
const balanceStr = balance !== undefined ? `$${balance.toFixed(2)}` : 'low';
return new DextoRuntimeError(
LLMErrorCode.INSUFFICIENT_CREDITS,
ErrorScope.LLM,
ErrorType.FORBIDDEN,
`Insufficient Dexto credits. Balance: ${balanceStr}`,
{ balance },
'Run `dexto billing` to check your balance'
);
}
// Runtime operation errors
static generationFailed(error: string, provider: LLMProvider, model: string) {
return new DextoRuntimeError(
LLMErrorCode.GENERATION_FAILED,
ErrorScope.LLM,
ErrorType.THIRD_PARTY,
`Generation failed: ${error}`,
{ details: { error, provider, model } }
);
}
// Switch operation errors (runtime checks not covered by Zod)
static switchInputMissing() {
return new DextoRuntimeError(
LLMErrorCode.SWITCH_INPUT_MISSING,
ErrorScope.LLM,
ErrorType.USER,
'At least model or provider must be specified for LLM switch',
{},
'Provide either a model name, provider, or both'
);
}
}

View File

@@ -0,0 +1,131 @@
/**
* Provider-specific options builder for Vercel AI SDK's streamText/generateText.
*
* Centralizes provider-specific configuration that requires explicit opt-in:
* - Anthropic: cacheControl for prompt caching, sendReasoning for extended thinking
* - Bedrock/Vertex Claude: Same as Anthropic (Claude models on these platforms)
* - Google: thinkingConfig for Gemini thinking models
* - OpenAI: reasoningEffort for o1/o3/codex/gpt-5 models
*
* Caching notes:
* - Anthropic: Requires explicit cacheControl option (we enable it)
* - OpenAI: Automatic for prompts ≥1024 tokens (no config needed)
* - Google: Implicit caching automatic for Gemini 2.5+ (≥1024 tokens for Flash,
* ≥2048 for Pro). Explicit caching requires pre-created cachedContent IDs.
* All providers return cached token counts in the response (cachedInputTokens).
*/
import type { LLMProvider } from '../types.js';
import { isReasoningCapableModel } from '../registry.js';
export type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
export interface ProviderOptionsConfig {
provider: LLMProvider;
model: string;
reasoningEffort?: ReasoningEffort | undefined;
}
/**
* Build provider-specific options for streamText/generateText.
*
* @param config Provider, model, and optional reasoning effort configuration
* @returns Provider options object or undefined if no special options needed
*/
export function buildProviderOptions(
config: ProviderOptionsConfig
): Record<string, Record<string, unknown>> | undefined {
const { provider, model, reasoningEffort } = config;
const modelLower = model.toLowerCase();
// Anthropic: Enable prompt caching and reasoning streaming
if (provider === 'anthropic') {
return {
anthropic: {
// Enable prompt caching - saves money and improves latency
cacheControl: { type: 'ephemeral' },
// Stream reasoning/thinking content when model supports it
sendReasoning: true,
},
};
}
// Bedrock: Enable caching and reasoning for Claude models
if (provider === 'bedrock' && modelLower.includes('claude')) {
return {
bedrock: {
cacheControl: { type: 'ephemeral' },
sendReasoning: true,
},
};
}
// Vertex: Enable caching and reasoning for Claude models
if (provider === 'vertex' && modelLower.includes('claude')) {
return {
'vertex-anthropic': {
cacheControl: { type: 'ephemeral' },
sendReasoning: true,
},
};
}
// Google: Enable thinking for models that support it
// Note: Google automatically enables thinking for thinking models,
// but we explicitly enable includeThoughts to receive the reasoning
if (provider === 'google' || (provider === 'vertex' && !modelLower.includes('claude'))) {
return {
google: {
thinkingConfig: {
// Include thoughts in the response for transparency
includeThoughts: true,
},
},
};
}
// OpenAI: Set reasoning effort for reasoning-capable models
// Use config value if provided, otherwise auto-detect based on model
if (provider === 'openai') {
const effectiveEffort = reasoningEffort ?? getDefaultReasoningEffort(model);
if (effectiveEffort) {
return {
openai: {
reasoningEffort: effectiveEffort,
},
};
}
}
return undefined;
}
/**
* Determine the default reasoning effort for OpenAI models.
*
* OpenAI reasoning effort levels (from lowest to highest):
* - 'none': No reasoning, fastest responses
* - 'low': Minimal reasoning, fast responses
* - 'medium': Balanced reasoning (OpenAI's recommended daily driver)
* - 'high': Thorough reasoning for complex tasks
* - 'xhigh': Extra high reasoning for quality-critical, non-latency-sensitive tasks
*
* Default strategy:
* - Reasoning-capable models (codex, o1, o3, gpt-5): 'medium' - OpenAI's recommended default
* - Other models: undefined (no reasoning effort needed)
*
* @param model The model name
* @returns Reasoning effort level or undefined if not applicable
*/
export function getDefaultReasoningEffort(
model: string
): Exclude<ReasoningEffort, 'none'> | undefined {
// Use the centralized registry function for capability detection
if (isReasoningCapableModel(model)) {
// 'medium' is OpenAI's recommended daily driver for reasoning models
return 'medium';
}
// Other models don't need explicit reasoning effort
return undefined;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,647 @@
import { StreamTextResult, ToolSet as VercelToolSet } from 'ai';
import { ContextManager } from '../../context/manager.js';
import { SessionEventBus, LLMFinishReason } from '../../events/index.js';
import { ResourceManager } from '../../resources/index.js';
import { truncateToolResult } from './tool-output-truncator.js';
import { StreamProcessorResult } from './types.js';
import { sanitizeToolResult } from '../../context/utils.js';
import type { SanitizedToolResult } from '../../context/types.js';
import { IDextoLogger } from '../../logger/v2/types.js';
import { DextoLogComponent } from '../../logger/v2/types.js';
import { LLMProvider, TokenUsage } from '../types.js';
type UsageLike = {
inputTokens?: number | undefined;
outputTokens?: number | undefined;
totalTokens?: number | undefined;
reasoningTokens?: number | undefined;
cachedInputTokens?: number | undefined;
inputTokenDetails?: {
noCacheTokens?: number | undefined;
cacheReadTokens?: number | undefined;
cacheWriteTokens?: number | undefined;
};
};
export interface StreamProcessorConfig {
provider: LLMProvider;
model: string;
/** Estimated input tokens before LLM call (for analytics/calibration) */
estimatedInputTokens?: number;
}
export class StreamProcessor {
private assistantMessageId: string | null = null;
private actualTokens: TokenUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
private finishReason: LLMFinishReason = 'unknown';
private reasoningText: string = '';
private reasoningMetadata: Record<string, unknown> | undefined;
private accumulatedText: string = '';
private logger: IDextoLogger;
private hasStepUsage = false;
/**
* Track pending tool calls (added to context but no result yet).
* On cancel/abort, we add synthetic "cancelled" results to maintain tool_use/tool_result pairing.
*/
private pendingToolCalls: Map<string, { toolName: string }> = new Map();
/**
* @param contextManager Context manager for message persistence
* @param eventBus Event bus for emitting events
* @param resourceManager Resource manager for blob storage
* @param abortSignal Abort signal for cancellation
* @param config Provider/model configuration
* @param logger Logger instance
* @param streaming If true, emits llm:chunk events. Default true.
* @param approvalMetadata Map of tool call IDs to approval metadata
*/
constructor(
private contextManager: ContextManager,
private eventBus: SessionEventBus,
private resourceManager: ResourceManager,
private abortSignal: AbortSignal,
private config: StreamProcessorConfig,
logger: IDextoLogger,
private streaming: boolean = true,
private approvalMetadata?: Map<
string,
{ requireApproval: boolean; approvalStatus?: 'approved' | 'rejected' }
>
) {
this.logger = logger.createChild(DextoLogComponent.EXECUTOR);
}
async process(
streamFn: () => StreamTextResult<VercelToolSet, unknown>
): Promise<StreamProcessorResult> {
const stream = streamFn();
try {
for await (const event of stream.fullStream) {
// Don't call throwIfAborted() here - let Vercel SDK handle abort gracefully
// and emit 'abort' event which we handle below in the switch
switch (event.type) {
case 'text-delta':
if (!this.assistantMessageId) {
// Create assistant message on first text delta if not exists
this.assistantMessageId = await this.contextManager
.addAssistantMessage('', [], {})
.then(() => {
return this.getLastMessageId();
});
}
await this.contextManager.appendAssistantText(
this.assistantMessageId!,
event.text
);
// Accumulate text for return value
this.accumulatedText += event.text;
// Only emit chunks in streaming mode
if (this.streaming) {
this.eventBus.emit('llm:chunk', {
chunkType: 'text',
content: event.text,
});
}
break;
case 'reasoning-delta':
// Handle reasoning delta (extended thinking from Claude, etc.)
this.reasoningText += event.text;
// Capture provider metadata for round-tripping (e.g., OpenAI itemId, Gemini thought signatures)
// This must be passed back to the provider on subsequent requests
if (event.providerMetadata) {
this.reasoningMetadata = event.providerMetadata;
}
// Only emit chunks in streaming mode
if (this.streaming) {
this.eventBus.emit('llm:chunk', {
chunkType: 'reasoning',
content: event.text,
});
}
break;
case 'tool-call': {
// Create tool call record
if (!this.assistantMessageId) {
this.assistantMessageId = await this.createAssistantMessage();
}
// Extract providerMetadata for round-tripping (e.g., Gemini 3 thought signatures)
// These are opaque tokens that must be passed back to maintain model state
const toolCall: Parameters<typeof this.contextManager.addToolCall>[1] = {
id: event.toolCallId,
type: 'function',
function: {
name: event.toolName,
arguments: JSON.stringify(event.input),
},
};
// IMPORTANT: Only persist providerMetadata for providers that require round-tripping
// (e.g., Gemini thought signatures). OpenAI Responses metadata can cause invalid
// follow-up requests (function_call item references missing required reasoning items).
const shouldPersistProviderMetadata =
this.config.provider === 'google' ||
(this.config.provider as string) === 'vertex';
if (shouldPersistProviderMetadata && event.providerMetadata) {
toolCall.providerOptions = {
...event.providerMetadata,
} as Record<string, unknown>;
}
await this.contextManager.addToolCall(this.assistantMessageId!, toolCall);
// Track pending tool call for abort handling
this.pendingToolCalls.set(event.toolCallId, {
toolName: event.toolName,
});
// NOTE: llm:tool-call is now emitted from ToolManager.executeTool() instead.
// This ensures correct event ordering - llm:tool-call arrives before approval:request.
// See tool-manager.ts for detailed explanation of the timing issue.
break;
}
case 'tool-result': {
// PERSISTENCE HAPPENS HERE
const rawResult = event.output;
// Log raw tool output for debugging
this.logger.debug('Tool result received', {
toolName: event.toolName,
toolCallId: event.toolCallId,
rawResult,
});
// Sanitize
const sanitized = await sanitizeToolResult(
rawResult,
{
blobStore: this.resourceManager.getBlobStore(),
toolName: event.toolName,
toolCallId: event.toolCallId,
success: true,
},
this.logger
);
// Truncate
const truncated = truncateToolResult(sanitized);
// Get approval metadata for this tool call
const approval = this.approvalMetadata?.get(event.toolCallId);
// Persist to history (success status comes from truncated.meta.success)
await this.contextManager.addToolResult(
event.toolCallId,
event.toolName,
truncated, // Includes meta.success from sanitization
approval // Only approval metadata if present
);
this.eventBus.emit('llm:tool-result', {
toolName: event.toolName,
callId: event.toolCallId,
success: true,
sanitized: truncated,
rawResult: rawResult,
...(approval?.requireApproval !== undefined && {
requireApproval: approval.requireApproval,
}),
...(approval?.approvalStatus !== undefined && {
approvalStatus: approval.approvalStatus,
}),
});
// Clean up approval metadata after use
this.approvalMetadata?.delete(event.toolCallId);
// Remove from pending (tool completed successfully)
this.pendingToolCalls.delete(event.toolCallId);
break;
}
case 'finish-step':
// Track token usage from completed steps for partial runs
// TODO: Token usage for cancelled mid-step responses is unavailable.
// LLM providers only send token counts in their final response chunk.
// If we abort mid-stream, that chunk never arrives. The tokens are
// still billed by the provider, but we can't report them.
if (event.usage) {
const providerMetadata = this.getProviderMetadata(event);
const stepUsage = this.normalizeUsage(event.usage, providerMetadata);
// Accumulate usage across steps
this.actualTokens = {
inputTokens:
(this.actualTokens.inputTokens ?? 0) +
(stepUsage.inputTokens ?? 0),
outputTokens:
(this.actualTokens.outputTokens ?? 0) +
(stepUsage.outputTokens ?? 0),
totalTokens:
(this.actualTokens.totalTokens ?? 0) +
(stepUsage.totalTokens ?? 0),
...(stepUsage.reasoningTokens !== undefined && {
reasoningTokens:
(this.actualTokens.reasoningTokens ?? 0) +
stepUsage.reasoningTokens,
}),
// Cache tokens
cacheReadTokens:
(this.actualTokens.cacheReadTokens ?? 0) +
(stepUsage.cacheReadTokens ?? 0),
cacheWriteTokens:
(this.actualTokens.cacheWriteTokens ?? 0) +
(stepUsage.cacheWriteTokens ?? 0),
};
this.hasStepUsage = true;
}
break;
case 'finish': {
this.finishReason = event.finishReason;
const providerMetadata = this.getProviderMetadata(event);
const fallbackUsage = this.normalizeUsage(
event.totalUsage,
providerMetadata
);
const usage = this.hasStepUsage ? { ...this.actualTokens } : fallbackUsage;
// Backfill usage fields from fallback when step usage reported zeros/undefined.
// This handles edge cases where providers send partial usage in finish-step
// events but complete usage in the final finish event (e.g., Anthropic sends
// cache tokens in providerMetadata rather than usage object).
if (this.hasStepUsage) {
// Backfill input/output tokens if step usage was zero but fallback has values.
// This is defensive - most providers report these consistently, but we log
// when backfill occurs to detect any providers with this edge case.
const fallbackInput = fallbackUsage.inputTokens ?? 0;
if ((usage.inputTokens ?? 0) === 0 && fallbackInput > 0) {
this.logger.debug(
'Backfilling inputTokens from fallback usage (step reported 0)',
{ stepValue: usage.inputTokens, fallbackValue: fallbackInput }
);
usage.inputTokens = fallbackInput;
}
const fallbackOutput = fallbackUsage.outputTokens ?? 0;
if ((usage.outputTokens ?? 0) === 0 && fallbackOutput > 0) {
this.logger.debug(
'Backfilling outputTokens from fallback usage (step reported 0)',
{ stepValue: usage.outputTokens, fallbackValue: fallbackOutput }
);
usage.outputTokens = fallbackOutput;
}
const fallbackCacheRead = fallbackUsage.cacheReadTokens ?? 0;
if ((usage.cacheReadTokens ?? 0) === 0 && fallbackCacheRead > 0) {
usage.cacheReadTokens = fallbackCacheRead;
}
const fallbackCacheWrite = fallbackUsage.cacheWriteTokens ?? 0;
if ((usage.cacheWriteTokens ?? 0) === 0 && fallbackCacheWrite > 0) {
usage.cacheWriteTokens = fallbackCacheWrite;
}
const fallbackTotalTokens = fallbackUsage.totalTokens ?? 0;
if ((usage.totalTokens ?? 0) === 0 && fallbackTotalTokens > 0) {
usage.totalTokens = fallbackTotalTokens;
}
if (
usage.reasoningTokens === undefined &&
fallbackUsage.reasoningTokens !== undefined
) {
usage.reasoningTokens = fallbackUsage.reasoningTokens;
}
}
this.actualTokens = usage;
// Log complete LLM response for debugging
this.logger.info('LLM response complete', {
finishReason: event.finishReason,
contentLength: this.accumulatedText.length,
content: this.accumulatedText,
...(this.reasoningText && {
reasoningLength: this.reasoningText.length,
reasoning: this.reasoningText,
}),
usage,
provider: this.config.provider,
model: this.config.model,
});
// Finalize assistant message with usage in reasoning
if (this.assistantMessageId) {
await this.contextManager.updateAssistantMessage(
this.assistantMessageId,
{
tokenUsage: usage,
// Persist reasoning text and metadata for round-tripping
...(this.reasoningText && { reasoning: this.reasoningText }),
...(this.reasoningMetadata && {
reasoningMetadata: this.reasoningMetadata,
}),
}
);
}
// Skip empty responses when tools are being called
// The meaningful response will come after tool execution completes
const hasContent = this.accumulatedText || this.reasoningText;
if (this.finishReason !== 'tool-calls' || hasContent) {
this.eventBus.emit('llm:response', {
content: this.accumulatedText,
...(this.reasoningText && { reasoning: this.reasoningText }),
provider: this.config.provider,
model: this.config.model,
tokenUsage: usage,
...(this.config.estimatedInputTokens !== undefined && {
estimatedInputTokens: this.config.estimatedInputTokens,
}),
finishReason: this.finishReason,
});
}
break;
}
case 'tool-error': {
// Tool execution failed - emit error event with tool context
this.logger.error('Tool execution failed', {
toolName: event.toolName,
toolCallId: event.toolCallId,
error: event.error,
});
const errorMessage =
event.error instanceof Error
? event.error.message
: String(event.error);
// CRITICAL: Must persist error result to history to maintain tool_use/tool_result pairing
// Without this, the conversation history has tool_use without tool_result,
// causing "tool_use ids were found without tool_result blocks" API errors
const errorResult: SanitizedToolResult = {
content: [{ type: 'text', text: `Error: ${errorMessage}` }],
meta: {
toolName: event.toolName,
toolCallId: event.toolCallId,
success: false,
},
};
await this.contextManager.addToolResult(
event.toolCallId,
event.toolName,
errorResult,
undefined // No approval metadata for errors
);
this.eventBus.emit('llm:tool-result', {
toolName: event.toolName,
callId: event.toolCallId,
success: false,
error: errorMessage,
});
this.eventBus.emit('llm:error', {
error:
event.error instanceof Error
? event.error
: new Error(String(event.error)),
context: `Tool execution failed: ${event.toolName}`,
toolCallId: event.toolCallId,
recoverable: true, // Tool errors are typically recoverable
});
// Remove from pending (tool failed but result was persisted)
this.pendingToolCalls.delete(event.toolCallId);
break;
}
case 'error': {
const err =
event.error instanceof Error
? event.error
: new Error(String(event.error));
this.logger.error(`LLM error: ${err.toString()}}`);
this.eventBus.emit('llm:error', {
error: err,
});
break;
}
case 'abort':
// Vercel SDK emits 'abort' when the stream is cancelled
this.logger.debug('Stream aborted, emitting partial response');
this.finishReason = 'cancelled';
// Persist cancelled results for any pending tool calls
await this.persistCancelledToolResults();
this.eventBus.emit('llm:response', {
content: this.accumulatedText,
...(this.reasoningText && { reasoning: this.reasoningText }),
provider: this.config.provider,
model: this.config.model,
tokenUsage: this.actualTokens,
...(this.config.estimatedInputTokens !== undefined && {
estimatedInputTokens: this.config.estimatedInputTokens,
}),
finishReason: 'cancelled',
});
// Return immediately - stream will close after abort event
return {
text: this.accumulatedText,
finishReason: 'cancelled',
usage: this.actualTokens,
};
}
}
} catch (error) {
// Check if this is an abort error (intentional cancellation)
// Note: DOMException extends Error in Node.js 17+, so the first check covers it
const isAbortError =
(error instanceof Error && error.name === 'AbortError') || this.abortSignal.aborted;
if (isAbortError) {
// Emit final response with accumulated content on cancellation
this.logger.debug('Stream cancelled, emitting partial response');
this.finishReason = 'cancelled';
// Persist cancelled results for any pending tool calls
await this.persistCancelledToolResults();
this.eventBus.emit('llm:response', {
content: this.accumulatedText,
...(this.reasoningText && { reasoning: this.reasoningText }),
provider: this.config.provider,
model: this.config.model,
tokenUsage: this.actualTokens,
...(this.config.estimatedInputTokens !== undefined && {
estimatedInputTokens: this.config.estimatedInputTokens,
}),
finishReason: 'cancelled',
});
// Don't throw - cancellation is intentional, not an error
return {
text: this.accumulatedText,
finishReason: 'cancelled',
usage: this.actualTokens,
};
}
// Non-abort errors are real failures
this.logger.error('Stream processing failed', { error });
// Emit error event so UI knows about the failure
this.eventBus.emit('llm:error', {
error: error instanceof Error ? error : new Error(String(error)),
context: 'StreamProcessor',
recoverable: false,
});
throw error;
}
return {
text: this.accumulatedText,
finishReason: this.finishReason,
usage: this.actualTokens,
};
}
private getCacheTokensFromProviderMetadata(
providerMetadata: Record<string, unknown> | undefined
): { cacheReadTokens: number; cacheWriteTokens: number } {
const anthropicMeta = providerMetadata?.['anthropic'] as Record<string, number> | undefined;
const bedrockMeta = providerMetadata?.['bedrock'] as
| { usage?: Record<string, number> }
| undefined;
const cacheWriteTokens =
anthropicMeta?.['cacheCreationInputTokens'] ??
bedrockMeta?.usage?.['cacheWriteInputTokens'] ??
0;
const cacheReadTokens =
anthropicMeta?.['cacheReadInputTokens'] ??
bedrockMeta?.usage?.['cacheReadInputTokens'] ??
0;
return { cacheReadTokens, cacheWriteTokens };
}
private normalizeUsage(
usage: UsageLike | undefined,
providerMetadata?: Record<string, unknown>
): TokenUsage {
const inputTokensRaw = usage?.inputTokens ?? 0;
const outputTokens = usage?.outputTokens ?? 0;
const totalTokens = usage?.totalTokens ?? 0;
const reasoningTokens = usage?.reasoningTokens;
const cachedInputTokens = usage?.cachedInputTokens;
const inputTokenDetails = usage?.inputTokenDetails;
const providerCache = this.getCacheTokensFromProviderMetadata(providerMetadata);
const cacheReadTokens =
inputTokenDetails?.cacheReadTokens ??
cachedInputTokens ??
providerCache.cacheReadTokens ??
0;
const cacheWriteTokens =
inputTokenDetails?.cacheWriteTokens ?? providerCache.cacheWriteTokens ?? 0;
const needsCacheWriteAdjustment =
inputTokenDetails === undefined &&
cachedInputTokens !== undefined &&
providerCache.cacheWriteTokens > 0;
const noCacheTokens =
inputTokenDetails?.noCacheTokens ??
(cachedInputTokens !== undefined
? inputTokensRaw -
cachedInputTokens -
(needsCacheWriteAdjustment ? providerCache.cacheWriteTokens : 0)
: inputTokensRaw);
return {
inputTokens: Math.max(0, noCacheTokens),
outputTokens,
totalTokens,
...(reasoningTokens !== undefined && { reasoningTokens }),
cacheReadTokens,
cacheWriteTokens,
};
}
private getProviderMetadata(
event: Record<string, unknown>
): Record<string, unknown> | undefined {
const metadata =
'providerMetadata' in event
? (event as { providerMetadata?: Record<string, unknown> }).providerMetadata
: undefined;
if (!metadata || typeof metadata !== 'object') {
return undefined;
}
return metadata;
}
private async createAssistantMessage(): Promise<string> {
await this.contextManager.addAssistantMessage('', [], {});
return this.getLastMessageId();
}
private async getLastMessageId(): Promise<string> {
const history = await this.contextManager.getHistory();
const last = history[history.length - 1];
if (!last || !last.id) throw new Error('Failed to get last message ID');
return last.id;
}
/**
* Persist synthetic "cancelled" results for all pending tool calls.
* This maintains the tool_use/tool_result pairing required by LLM APIs.
* Called on abort/cancel to prevent "tool_use ids were found without tool_result" errors.
*/
private async persistCancelledToolResults(): Promise<void> {
if (this.pendingToolCalls.size === 0) return;
this.logger.debug(
`Persisting cancelled results for ${this.pendingToolCalls.size} pending tool call(s)`
);
for (const [toolCallId, { toolName }] of this.pendingToolCalls) {
const cancelledResult: SanitizedToolResult = {
content: [{ type: 'text', text: 'Cancelled by user' }],
meta: {
toolName,
toolCallId,
success: false,
},
};
await this.contextManager.addToolResult(
toolCallId,
toolName,
cancelledResult,
undefined // No approval metadata for cancelled tools
);
// Emit tool-result event so CLI/WebUI can update UI
this.eventBus.emit('llm:tool-result', {
toolName,
callId: toolCallId,
success: false,
error: 'Cancelled by user',
});
}
this.pendingToolCalls.clear();
}
}

View File

@@ -0,0 +1,76 @@
import { describe, it, expect } from 'vitest';
import { truncateStringOutput, truncateToolResult } from './tool-output-truncator.js';
import { SanitizedToolResult } from '../../context/types.js';
describe('tool-output-truncator', () => {
describe('truncateStringOutput', () => {
it('should not truncate if output is within limit', () => {
const output = 'short output';
const result = truncateStringOutput(output, { maxChars: 100 });
expect(result.truncated).toBe(false);
expect(result.output).toBe(output);
expect(result.originalLength).toBe(output.length);
});
it('should truncate if output exceeds limit', () => {
const output = 'long output that exceeds the limit';
const maxChars = 10;
const result = truncateStringOutput(output, { maxChars });
expect(result.truncated).toBe(true);
expect(result.output).toContain('[Output truncated');
expect(result.output.startsWith('long outpu')).toBe(true);
expect(result.originalLength).toBe(output.length);
});
it('should use default limit if not provided', () => {
// Use input significantly larger than default limit (120,000)
// so the truncation saves more chars than the appended message adds
const output = 'a'.repeat(150000);
const result = truncateStringOutput(output);
expect(result.truncated).toBe(true);
// Output should be ~120,000 + truncation message (~104 chars) = ~120,104
// which is less than original 150,000
expect(result.output.length).toBeLessThan(output.length);
expect(result.output).toContain('[Output truncated');
});
});
describe('truncateToolResult', () => {
it('should truncate text parts in SanitizedToolResult', () => {
const longText = 'a'.repeat(200);
const toolResult: SanitizedToolResult = {
content: [
{ type: 'text', text: longText },
{ type: 'image', image: 'data:image/png;base64,...' },
],
meta: { toolName: 'test', toolCallId: '123', success: true },
};
const result = truncateToolResult(toolResult, { maxChars: 100 });
const firstPart = result.content[0];
expect(firstPart).toBeDefined();
if (firstPart) {
expect(firstPart.type).toBe('text');
}
if (firstPart && firstPart.type === 'text') {
expect(firstPart.text).toContain('[Output truncated');
expect(firstPart.text.length).toBeLessThan(longText.length);
}
// Should preserve other parts
expect(result.content[1]).toEqual(toolResult.content[1]);
});
it('should not modify result if no truncation needed', () => {
const toolResult: SanitizedToolResult = {
content: [{ type: 'text', text: 'short' }],
meta: { toolName: 'test', toolCallId: '123', success: true },
};
const result = truncateToolResult(toolResult, { maxChars: 100 });
expect(result).toEqual(toolResult);
});
});
});

View File

@@ -0,0 +1,77 @@
import { SanitizedToolResult } from '../../context/types.js';
// Constants - configurable per agent
export const DEFAULT_MAX_TOOL_OUTPUT_CHARS = 120_000; // ~30K tokens
export const DEFAULT_MAX_FILE_LINES = 2000;
export const DEFAULT_MAX_LINE_LENGTH = 2000;
export interface TruncationOptions {
maxChars?: number;
}
export interface TruncationResult {
output: string;
truncated: boolean;
originalLength: number;
}
/**
* Truncates a string tool output to prevent context overflow.
* Appends a warning message if truncated.
*/
export function truncateStringOutput(
output: string,
options: TruncationOptions = {}
): TruncationResult {
const maxChars = options.maxChars ?? DEFAULT_MAX_TOOL_OUTPUT_CHARS;
if (output.length <= maxChars) {
return {
output,
truncated: false,
originalLength: output.length,
};
}
const truncatedOutput =
output.slice(0, maxChars) +
`\n\n[Output truncated - exceeded maximum length of ${maxChars} characters. Total length was ${output.length} characters.]`;
return {
output: truncatedOutput,
truncated: true,
originalLength: output.length,
};
}
/**
* Truncates a SanitizedToolResult.
* Currently only truncates text parts.
*
* @param result The sanitized tool result to truncate
* @param options Truncation options
* @returns The truncated result
*/
export function truncateToolResult(
result: SanitizedToolResult,
options: TruncationOptions = {}
): SanitizedToolResult {
const newContent = result.content.map((part) => {
if (part.type === 'text') {
const { output, truncated } = truncateStringOutput(part.text, options);
if (truncated) {
return { ...part, text: output };
}
}
return part;
});
return {
...result,
content: newContent,
meta: {
...result.meta,
// We could add a flag here if we wanted to track truncation in metadata
},
};
}

View File

@@ -0,0 +1,901 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { TurnExecutor } from './turn-executor.js';
import { ContextManager } from '../../context/manager.js';
import { ToolManager } from '../../tools/tool-manager.js';
import { SessionEventBus, AgentEventBus } from '../../events/index.js';
import { ResourceManager } from '../../resources/index.js';
import { MessageQueueService } from '../../session/message-queue.js';
import { SystemPromptManager } from '../../systemPrompt/manager.js';
import { VercelMessageFormatter } from '../formatters/vercel.js';
import { MemoryHistoryProvider } from '../../session/history/memory.js';
import { MCPManager } from '../../mcp/manager.js';
import { ApprovalManager } from '../../approval/manager.js';
import { createLogger } from '../../logger/factory.js';
import { createStorageManager, StorageManager } from '../../storage/storage-manager.js';
import { MemoryManager } from '../../memory/index.js';
import { SystemPromptConfigSchema } from '../../systemPrompt/schemas.js';
import type { LanguageModel, ModelMessage } from 'ai';
import type { LLMContext } from '../types.js';
import type { ValidatedLLMConfig } from '../schemas.js';
import type { ValidatedStorageConfig } from '../../storage/schemas.js';
import type { IDextoLogger } from '../../logger/v2/types.js';
// Only mock the AI SDK's streamText/generateText - everything else is real
vi.mock('ai', async (importOriginal) => {
const actual = await importOriginal<typeof import('ai')>();
return {
...actual,
streamText: vi.fn(),
generateText: vi.fn(),
};
});
vi.mock('@opentelemetry/api', async (importOriginal) => {
const actual = await importOriginal<typeof import('@opentelemetry/api')>();
return {
...actual,
trace: {
...actual.trace,
getActiveSpan: vi.fn(() => null),
},
};
});
import { streamText, generateText } from 'ai';
/**
* Helper to create mock stream results that simulate Vercel AI SDK responses
*/
function createMockStream(options: {
text?: string;
finishReason?: string;
usage?: { inputTokens: number; outputTokens: number; totalTokens: number };
providerMetadata?: Record<string, unknown>;
toolCalls?: Array<{ toolCallId: string; toolName: string; args: Record<string, unknown> }>;
reasoning?: string;
}) {
const events: Array<{ type: string; [key: string]: unknown }> = [];
// Add reasoning delta if present
if (options.reasoning) {
for (const char of options.reasoning) {
events.push({ type: 'reasoning-delta', text: char });
}
}
// Add text delta events
if (options.text) {
for (const char of options.text) {
events.push({ type: 'text-delta', text: char });
}
}
// Add tool call events
if (options.toolCalls) {
for (const tc of options.toolCalls) {
events.push({
type: 'tool-call',
toolCallId: tc.toolCallId,
toolName: tc.toolName,
args: tc.args,
});
}
}
// Add finish event
events.push({
type: 'finish',
finishReason: options.finishReason ?? 'stop',
totalUsage: options.usage ?? { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
...(options.providerMetadata && { providerMetadata: options.providerMetadata }),
});
return {
fullStream: (async function* () {
for (const event of events) {
yield event;
}
})(),
};
}
/**
* Creates a mock LanguageModel
*/
function createMockModel(): LanguageModel {
return {
modelId: 'test-model',
provider: 'test-provider',
specificationVersion: 'v1',
doStream: vi.fn(),
doGenerate: vi.fn(),
} as unknown as LanguageModel;
}
describe('TurnExecutor Integration Tests', () => {
let executor: TurnExecutor;
let contextManager: ContextManager<ModelMessage>;
let toolManager: ToolManager;
let sessionEventBus: SessionEventBus;
let agentEventBus: AgentEventBus;
let resourceManager: ResourceManager;
let messageQueue: MessageQueueService;
let logger: IDextoLogger;
let historyProvider: MemoryHistoryProvider;
let mcpManager: MCPManager;
let approvalManager: ApprovalManager;
let storageManager: StorageManager;
const sessionId = 'test-session';
const llmContext: LLMContext = { provider: 'openai', model: 'gpt-4' };
beforeEach(async () => {
vi.clearAllMocks();
// Create real logger
logger = createLogger({
config: {
level: 'warn', // Use warn to reduce noise in tests
transports: [{ type: 'console', colorize: false }],
},
agentId: 'test-agent',
});
// Create real event buses
agentEventBus = new AgentEventBus();
sessionEventBus = new SessionEventBus();
// Create real storage manager with in-memory backends
// Cast to ValidatedStorageConfig since we know test data is valid (avoids schema parsing overhead)
const storageConfig = {
cache: { type: 'in-memory' },
database: { type: 'in-memory' },
blob: {
type: 'in-memory',
maxBlobSize: 10 * 1024 * 1024,
maxTotalSize: 100 * 1024 * 1024,
},
} as unknown as ValidatedStorageConfig;
storageManager = await createStorageManager(storageConfig, logger);
// Create real MCP manager
mcpManager = new MCPManager(logger);
// Create real resource manager with proper wiring
resourceManager = new ResourceManager(
mcpManager,
{
internalResourcesConfig: { enabled: false, resources: [] },
blobStore: storageManager.getBlobStore(),
},
logger
);
await resourceManager.initialize();
// Create real history provider
historyProvider = new MemoryHistoryProvider(logger);
// Create real memory manager and system prompt manager
const memoryManager = new MemoryManager(storageManager.getDatabase(), logger);
const systemPromptConfig = SystemPromptConfigSchema.parse('You are a helpful assistant.');
const systemPromptManager = new SystemPromptManager(
systemPromptConfig,
'/tmp', // configDir - not used with inline prompts
memoryManager,
undefined, // memoriesConfig
logger
);
// Create real context manager with Vercel formatter
const formatter = new VercelMessageFormatter(logger);
// Cast to ValidatedLLMConfig since we know test data is valid
const llmConfig = {
provider: 'openai',
model: 'gpt-4',
apiKey: 'test-api-key',
maxInputTokens: 100000,
maxOutputTokens: 4096,
temperature: 0.7,
maxIterations: 10,
} as unknown as ValidatedLLMConfig;
contextManager = new ContextManager<ModelMessage>(
llmConfig,
formatter,
systemPromptManager,
100000,
historyProvider,
sessionId,
resourceManager,
logger
);
// Create real approval manager
approvalManager = new ApprovalManager(
{
toolConfirmation: { mode: 'auto-approve', timeout: 120000 },
elicitation: { enabled: false, timeout: 120000 },
},
logger
);
// Create real tool manager (minimal setup - no internal tools)
const mockAllowedToolsProvider = {
isToolAllowed: vi.fn().mockResolvedValue(false),
allowTool: vi.fn(),
disallowTool: vi.fn(),
};
toolManager = new ToolManager(
mcpManager,
approvalManager,
mockAllowedToolsProvider,
'auto-approve',
agentEventBus,
{ alwaysAllow: [], alwaysDeny: [] },
{ internalToolsServices: {}, internalToolsConfig: [] },
logger
);
await toolManager.initialize();
// Create real message queue
messageQueue = new MessageQueueService(sessionEventBus, logger);
// Default streamText mock - simple text response
vi.mocked(streamText).mockImplementation(
() =>
createMockStream({ text: 'Hello!', finishReason: 'stop' }) as unknown as ReturnType<
typeof streamText
>
);
// Create executor with real components
executor = new TurnExecutor(
createMockModel(),
toolManager,
contextManager,
sessionEventBus,
resourceManager,
sessionId,
{ maxSteps: 10, maxOutputTokens: 4096, temperature: 0.7 },
llmContext,
logger,
messageQueue
);
});
afterEach(async () => {
vi.restoreAllMocks();
logger.destroy();
});
describe('Basic Execution Flow', () => {
it('should execute and return result with real context manager', async () => {
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
const result = await executor.execute({ mcpManager }, true);
expect(result.finishReason).toBe('stop');
expect(result.text).toBe('Hello!');
expect(result.usage).toEqual({
inputTokens: 100,
outputTokens: 50,
totalTokens: 150,
cacheReadTokens: 0,
cacheWriteTokens: 0,
});
});
it('should persist assistant response to history', async () => {
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await executor.execute({ mcpManager }, true);
const history = await contextManager.getHistory();
expect(history.length).toBeGreaterThanOrEqual(2);
const assistantMessages = history.filter((m) => m.role === 'assistant');
expect(assistantMessages.length).toBeGreaterThan(0);
});
it('should emit events through real event bus', async () => {
const thinkingHandler = vi.fn();
const responseHandler = vi.fn();
const runCompleteHandler = vi.fn();
sessionEventBus.on('llm:thinking', thinkingHandler);
sessionEventBus.on('llm:response', responseHandler);
sessionEventBus.on('run:complete', runCompleteHandler);
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await executor.execute({ mcpManager }, true);
expect(thinkingHandler).toHaveBeenCalled();
expect(responseHandler).toHaveBeenCalled();
expect(runCompleteHandler).toHaveBeenCalledWith(
expect.objectContaining({
finishReason: 'stop',
stepCount: 0,
})
);
});
it('should emit chunk events when streaming', async () => {
const chunkHandler = vi.fn();
sessionEventBus.on('llm:chunk', chunkHandler);
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await executor.execute({ mcpManager }, true);
expect(chunkHandler).toHaveBeenCalled();
expect(chunkHandler.mock.calls.some((call) => call[0].chunkType === 'text')).toBe(true);
});
it('should not emit chunk events when not streaming', async () => {
const chunkHandler = vi.fn();
sessionEventBus.on('llm:chunk', chunkHandler);
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await executor.execute({ mcpManager }, false);
expect(chunkHandler).not.toHaveBeenCalled();
});
});
describe('Multi-Step Tool Execution', () => {
it('should continue looping on tool-calls finish reason', async () => {
let callCount = 0;
vi.mocked(streamText).mockImplementation(() => {
callCount++;
if (callCount < 3) {
return createMockStream({
text: `Step ${callCount}`,
finishReason: 'tool-calls',
toolCalls: [
{ toolCallId: `call-${callCount}`, toolName: 'test_tool', args: {} },
],
}) as unknown as ReturnType<typeof streamText>;
}
return createMockStream({
text: 'Final response',
finishReason: 'stop',
}) as unknown as ReturnType<typeof streamText>;
});
await contextManager.addUserMessage([{ type: 'text', text: 'Do something' }]);
const result = await executor.execute({ mcpManager }, true);
expect(result.finishReason).toBe('stop');
expect(result.stepCount).toBe(2);
expect(callCount).toBe(3);
});
it('should stop at maxSteps limit', async () => {
vi.mocked(streamText).mockImplementation(
() =>
createMockStream({
text: 'Tool step',
finishReason: 'tool-calls',
toolCalls: [{ toolCallId: 'call-1', toolName: 'test', args: {} }],
}) as unknown as ReturnType<typeof streamText>
);
const limitedExecutor = new TurnExecutor(
createMockModel(),
toolManager,
contextManager,
sessionEventBus,
resourceManager,
sessionId,
{ maxSteps: 3, maxOutputTokens: 4096, temperature: 0.7 },
llmContext,
logger,
messageQueue
);
await contextManager.addUserMessage([{ type: 'text', text: 'Keep going' }]);
const result = await limitedExecutor.execute({ mcpManager }, true);
expect(result.finishReason).toBe('max-steps');
expect(result.stepCount).toBe(3);
});
});
describe('Message Queue Injection', () => {
it('should inject queued messages into context', async () => {
messageQueue.enqueue({
content: [{ type: 'text', text: 'User guidance: focus on performance' }],
});
await contextManager.addUserMessage([{ type: 'text', text: 'Initial request' }]);
await executor.execute({ mcpManager }, true);
const history = await contextManager.getHistory();
const userMessages = history.filter((m) => m.role === 'user');
expect(userMessages.length).toBe(2);
const injectedMsg = userMessages.find((m) => {
const content = Array.isArray(m.content) ? m.content : [];
return content.some((p) => p.type === 'text' && p.text.includes('User guidance'));
});
expect(injectedMsg).toBeDefined();
});
it('should continue processing when queue has messages on termination', async () => {
let callCount = 0;
vi.mocked(streamText).mockImplementation(() => {
callCount++;
if (callCount === 1) {
messageQueue.enqueue({
content: [{ type: 'text', text: 'Follow-up question' }],
});
return createMockStream({
text: 'First response',
finishReason: 'stop',
}) as unknown as ReturnType<typeof streamText>;
}
return createMockStream({
text: 'Second response',
finishReason: 'stop',
}) as unknown as ReturnType<typeof streamText>;
});
await contextManager.addUserMessage([{ type: 'text', text: 'Initial' }]);
const result = await executor.execute({ mcpManager }, true);
expect(callCount).toBe(2);
expect(result.text).toBe('Second response');
});
});
describe('Tool Support Validation', () => {
it('should skip validation for providers without baseURL', async () => {
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await executor.execute({ mcpManager }, true);
expect(generateText).not.toHaveBeenCalled();
});
it('should validate and cache tool support for custom baseURL', async () => {
vi.mocked(generateText).mockResolvedValue(
{} as Awaited<ReturnType<typeof generateText>>
);
const executor1 = new TurnExecutor(
createMockModel(),
toolManager,
contextManager,
sessionEventBus,
resourceManager,
sessionId,
{ maxSteps: 10, baseURL: 'https://custom.api.com' },
llmContext,
logger,
messageQueue
);
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await executor1.execute({ mcpManager }, true);
expect(generateText).toHaveBeenCalledTimes(1);
// Second executor with same baseURL should use cache
const newMessageQueue = new MessageQueueService(sessionEventBus, logger);
const executor2 = new TurnExecutor(
createMockModel(),
toolManager,
contextManager,
sessionEventBus,
resourceManager,
'session-2',
{ maxSteps: 10, baseURL: 'https://custom.api.com' },
llmContext,
logger,
newMessageQueue
);
await executor2.execute({ mcpManager }, true);
expect(generateText).toHaveBeenCalledTimes(1);
});
it('should use empty tools when model does not support them', async () => {
vi.mocked(generateText).mockRejectedValue(new Error('Model does not support tools'));
const executorWithBaseURL = new TurnExecutor(
createMockModel(),
toolManager,
contextManager,
sessionEventBus,
resourceManager,
sessionId,
{ maxSteps: 10, baseURL: 'https://no-tools.api.com' },
llmContext,
logger,
messageQueue
);
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await executorWithBaseURL.execute({ mcpManager }, true);
expect(streamText).toHaveBeenCalledWith(
expect.objectContaining({
tools: {},
})
);
});
it('should validate tool support for local providers even without custom baseURL', async () => {
vi.mocked(generateText).mockRejectedValue(new Error('Model does not support tools'));
const ollamaLlmContext = {
provider: 'ollama' as const,
model: 'gemma3n:e2b',
};
const ollamaExecutor = new TurnExecutor(
createMockModel(),
toolManager,
contextManager,
sessionEventBus,
resourceManager,
sessionId,
{ maxSteps: 10 }, // No baseURL
ollamaLlmContext,
logger,
messageQueue
);
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await ollamaExecutor.execute({ mcpManager }, true);
// Should call generateText for validation even without baseURL
expect(generateText).toHaveBeenCalledTimes(1);
// Should use empty tools in actual execution
expect(streamText).toHaveBeenCalledWith(
expect.objectContaining({
tools: {},
})
);
});
it('should emit llm:unsupported-input warning when model does not support tools', async () => {
vi.mocked(generateText).mockRejectedValue(new Error('Model does not support tools'));
const warningHandler = vi.fn();
sessionEventBus.on('llm:unsupported-input', warningHandler);
const executorWithBaseURL = new TurnExecutor(
createMockModel(),
toolManager,
contextManager,
sessionEventBus,
resourceManager,
sessionId,
{ maxSteps: 10, baseURL: 'https://no-tools.api.com' },
llmContext,
logger,
messageQueue
);
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await executorWithBaseURL.execute({ mcpManager }, true);
expect(warningHandler).toHaveBeenCalledWith(
expect.objectContaining({
errors: expect.arrayContaining([
expect.stringContaining('does not support tool calling'),
expect.stringContaining('You can still chat'),
]),
provider: llmContext.provider,
model: llmContext.model,
details: expect.objectContaining({
feature: 'tool-calling',
supported: false,
}),
})
);
});
});
describe('Error Handling', () => {
it('should emit llm:error and run:complete on failure', async () => {
vi.mocked(streamText).mockImplementation(() => {
throw new Error('Stream failed');
});
const errorHandler = vi.fn();
const completeHandler = vi.fn();
sessionEventBus.on('llm:error', errorHandler);
sessionEventBus.on('run:complete', completeHandler);
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await expect(executor.execute({ mcpManager }, true)).rejects.toThrow();
expect(errorHandler).toHaveBeenCalledWith(
expect.objectContaining({
context: 'TurnExecutor',
recoverable: false,
})
);
expect(completeHandler).toHaveBeenCalledWith(
expect.objectContaining({
finishReason: 'error',
})
);
});
it('should map rate limit errors correctly', async () => {
const { APICallError } = await import('ai');
// Create a real APICallError instance
const rateLimitError = new APICallError({
message: 'Rate limit exceeded',
statusCode: 429,
responseHeaders: { 'retry-after': '60' },
responseBody: 'Rate limit exceeded',
url: 'https://api.openai.com/v1/chat/completions',
requestBodyValues: {},
isRetryable: true,
});
vi.mocked(streamText).mockImplementation(() => {
throw rateLimitError;
});
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await expect(executor.execute({ mcpManager }, true)).rejects.toMatchObject({
code: 'llm_rate_limit_exceeded',
type: 'rate_limit',
});
});
});
describe('Cleanup and Resource Management', () => {
it('should clear message queue on normal completion', async () => {
messageQueue.enqueue({ content: [{ type: 'text', text: 'Pending' }] });
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await executor.execute({ mcpManager }, true);
expect(messageQueue.dequeueAll()).toBeNull();
});
it('should clear message queue on error', async () => {
messageQueue.enqueue({ content: [{ type: 'text', text: 'Pending' }] });
vi.mocked(streamText).mockImplementation(() => {
throw new Error('Failed');
});
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await expect(executor.execute({ mcpManager }, true)).rejects.toThrow();
expect(messageQueue.dequeueAll()).toBeNull();
});
});
describe('External Abort Signal', () => {
it('should handle external abort signal', async () => {
const abortController = new AbortController();
let callCount = 0;
vi.mocked(streamText).mockImplementation(() => {
callCount++;
if (callCount === 1) {
abortController.abort();
return createMockStream({
finishReason: 'tool-calls',
toolCalls: [{ toolCallId: 'call-1', toolName: 'test', args: {} }],
}) as unknown as ReturnType<typeof streamText>;
}
return createMockStream({ finishReason: 'stop' }) as unknown as ReturnType<
typeof streamText
>;
});
const executorWithSignal = new TurnExecutor(
createMockModel(),
toolManager,
contextManager,
sessionEventBus,
resourceManager,
sessionId,
{ maxSteps: 10 },
llmContext,
logger,
messageQueue,
undefined,
abortController.signal
);
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
const result = await executorWithSignal.execute({ mcpManager }, true);
expect(result.finishReason).toBe('cancelled');
});
});
describe('Reasoning Token Support', () => {
it('should handle reasoning tokens in usage', async () => {
vi.mocked(streamText).mockImplementation(
() =>
createMockStream({
text: 'Response',
reasoning: 'Let me think...',
finishReason: 'stop',
usage: {
inputTokens: 100,
outputTokens: 50,
totalTokens: 170,
},
}) as unknown as ReturnType<typeof streamText>
);
const responseHandler = vi.fn();
sessionEventBus.on('llm:response', responseHandler);
await contextManager.addUserMessage([{ type: 'text', text: 'Think about this' }]);
const result = await executor.execute({ mcpManager }, true);
expect(result.usage).toMatchObject({
inputTokens: 100,
outputTokens: 50,
totalTokens: 170,
});
expect(responseHandler).toHaveBeenCalled();
});
});
describe('Context Formatting', () => {
it('should format messages correctly for LLM', async () => {
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await executor.execute({ mcpManager }, true);
expect(streamText).toHaveBeenCalledWith(
expect.objectContaining({
messages: expect.arrayContaining([
expect.objectContaining({
role: 'user',
}),
]),
})
);
});
it('should include system prompt in formatted messages', async () => {
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await executor.execute({ mcpManager }, true);
const call = vi.mocked(streamText).mock.calls[0]?.[0];
expect(call).toBeDefined();
expect(call?.messages).toBeDefined();
const messages = call?.messages as ModelMessage[];
const hasSystemContent = messages.some(
(m) =>
m.role === 'system' ||
(m.role === 'user' &&
Array.isArray(m.content) &&
m.content.some(
(c) =>
typeof c === 'object' &&
'text' in c &&
c.text.includes('helpful assistant')
))
);
expect(hasSystemContent).toBe(true);
});
});
describe('Context Token Tracking', () => {
it('should store actual input tokens from LLM response in ContextManager', async () => {
const expectedInputTokens = 1234;
vi.mocked(streamText).mockImplementation(
() =>
createMockStream({
text: 'Response',
finishReason: 'stop',
usage: {
inputTokens: expectedInputTokens,
outputTokens: 50,
totalTokens: expectedInputTokens + 50,
},
}) as unknown as ReturnType<typeof streamText>
);
// Before LLM call, should be null
expect(contextManager.getLastActualInputTokens()).toBeNull();
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await executor.execute({ mcpManager }, true);
// After LLM call, should have the actual token count
expect(contextManager.getLastActualInputTokens()).toBe(expectedInputTokens);
});
it('should update actual tokens on each LLM call', async () => {
// First call
vi.mocked(streamText).mockImplementationOnce(
() =>
createMockStream({
text: 'First response',
finishReason: 'stop',
usage: { inputTokens: 100, outputTokens: 20, totalTokens: 120 },
}) as unknown as ReturnType<typeof streamText>
);
await contextManager.addUserMessage([{ type: 'text', text: 'First message' }]);
await executor.execute({ mcpManager }, true);
expect(contextManager.getLastActualInputTokens()).toBe(100);
// Second call with different token count
vi.mocked(streamText).mockImplementationOnce(
() =>
createMockStream({
text: 'Second response',
finishReason: 'stop',
usage: { inputTokens: 250, outputTokens: 30, totalTokens: 280 },
}) as unknown as ReturnType<typeof streamText>
);
await contextManager.addUserMessage([{ type: 'text', text: 'Second message' }]);
await executor.execute({ mcpManager }, true);
expect(contextManager.getLastActualInputTokens()).toBe(250);
});
it('should make actual tokens available via getContextTokenEstimate', async () => {
const expectedInputTokens = 5000;
vi.mocked(streamText).mockImplementation(
() =>
createMockStream({
text: 'Response',
finishReason: 'stop',
usage: {
inputTokens: expectedInputTokens,
outputTokens: 100,
totalTokens: expectedInputTokens + 100,
},
}) as unknown as ReturnType<typeof streamText>
);
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await executor.execute({ mcpManager }, true);
// getContextTokenEstimate should return the actual value
const estimate = await contextManager.getContextTokenEstimate({ mcpManager }, {});
expect(estimate.actual).toBe(expectedInputTokens);
});
it('should include cached tokens in actual context input tracking', async () => {
const noCacheTokens = 200;
const cacheReadTokens = 800;
vi.mocked(streamText).mockImplementation(
() =>
createMockStream({
text: 'Response',
finishReason: 'stop',
usage: {
inputTokens: noCacheTokens,
outputTokens: 10,
totalTokens: noCacheTokens + 10,
},
providerMetadata: {
anthropic: {
cacheReadInputTokens: cacheReadTokens,
cacheCreationInputTokens: 0,
},
},
}) as unknown as ReturnType<typeof streamText>
);
await contextManager.addUserMessage([{ type: 'text', text: 'Hello' }]);
await executor.execute({ mcpManager }, true);
expect(contextManager.getLastActualInputTokens()).toBe(noCacheTokens + cacheReadTokens);
});
});
});

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,28 @@
import { TokenUsage } from '../types.js';
import { LLMFinishReason } from '../../events/index.js';
export interface ExecutorResult {
/**
* The accumulated text from assistant responses.
* TODO: Some LLMs are multimodal and can generate non-text content (images, audio, etc.).
* Consider extending this to support multimodal output in the future.
*/
text: string;
/** Number of steps executed */
stepCount: number;
/** Token usage from the last step */
usage: TokenUsage | null;
/** Reason the execution finished */
finishReason: LLMFinishReason;
}
export interface StreamProcessorResult {
/**
* The accumulated text from text-delta events.
* TODO: Some LLMs are multimodal and can generate non-text content (images, audio, etc.).
* Consider extending this to support multimodal output in the future.
*/
text: string;
finishReason: LLMFinishReason;
usage: TokenUsage;
}

View File

@@ -0,0 +1,290 @@
import { describe, test, expect, vi } from 'vitest';
import { VercelMessageFormatter } from './vercel.js';
import { createMockLogger } from '../../logger/v2/test-utils.js';
import type { InternalMessage } from '../../context/types.js';
import * as registry from '../registry.js';
// Mock the registry to allow all file types
vi.mock('../registry.js');
const mockValidateModelFileSupport = vi.mocked(registry.validateModelFileSupport);
mockValidateModelFileSupport.mockReturnValue({ isSupported: true, fileType: 'pdf' });
const mockLogger = createMockLogger();
describe('VercelMessageFormatter', () => {
describe('URL string auto-detection', () => {
test('should convert image URL string to URL object', () => {
const formatter = new VercelMessageFormatter(mockLogger);
const messages: InternalMessage[] = [
{
role: 'user',
content: [
{ type: 'text', text: 'Describe this image' },
{
type: 'image',
image: 'https://example.com/image.png',
mimeType: 'image/png',
},
],
},
];
const result = formatter.format(
messages,
{ provider: 'openai', model: 'gpt-4o' },
'You are helpful'
);
// Find the user message
const userMessage = result.find((m) => m.role === 'user');
expect(userMessage).toBeDefined();
const content = userMessage!.content as Array<{ type: string; image?: URL | string }>;
const imagePart = content.find((p) => p.type === 'image');
expect(imagePart).toBeDefined();
expect(imagePart!.image).toBeInstanceOf(URL);
expect((imagePart!.image as URL).href).toBe('https://example.com/image.png');
});
test('should convert file URL string to URL object', () => {
const formatter = new VercelMessageFormatter(mockLogger);
const messages: InternalMessage[] = [
{
role: 'user',
content: [
{ type: 'text', text: 'Summarize this document' },
{
type: 'file',
data: 'https://example.com/document.pdf',
mimeType: 'application/pdf',
},
],
},
];
const result = formatter.format(
messages,
{ provider: 'openai', model: 'gpt-4o' },
'You are helpful'
);
const userMessage = result.find((m) => m.role === 'user');
expect(userMessage).toBeDefined();
const content = userMessage!.content as Array<{ type: string; data?: URL | string }>;
const filePart = content.find((p) => p.type === 'file');
expect(filePart).toBeDefined();
expect(filePart!.data).toBeInstanceOf(URL);
expect((filePart!.data as URL).href).toBe('https://example.com/document.pdf');
});
test('should preserve base64 strings as-is', () => {
const formatter = new VercelMessageFormatter(mockLogger);
const base64Image =
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
const messages: InternalMessage[] = [
{
role: 'user',
content: [{ type: 'image', image: base64Image, mimeType: 'image/png' }],
},
];
const result = formatter.format(
messages,
{ provider: 'openai', model: 'gpt-4o' },
'You are helpful'
);
const userMessage = result.find((m) => m.role === 'user');
const content = userMessage!.content as Array<{ type: string; image?: string }>;
const imagePart = content.find((p) => p.type === 'image');
expect(imagePart!.image).toBe(base64Image);
expect(typeof imagePart!.image).toBe('string');
});
test('should preserve data URI strings as-is', () => {
const formatter = new VercelMessageFormatter(mockLogger);
const dataUri =
'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
const messages: InternalMessage[] = [
{
role: 'user',
content: [{ type: 'image', image: dataUri, mimeType: 'image/png' }],
},
];
const result = formatter.format(
messages,
{ provider: 'openai', model: 'gpt-4o' },
'You are helpful'
);
const userMessage = result.find((m) => m.role === 'user');
const content = userMessage!.content as Array<{ type: string; image?: string }>;
const imagePart = content.find((p) => p.type === 'image');
expect(imagePart!.image).toBe(dataUri);
expect(typeof imagePart!.image).toBe('string');
});
test('should handle http:// URLs (not just https://)', () => {
const formatter = new VercelMessageFormatter(mockLogger);
const messages: InternalMessage[] = [
{
role: 'user',
content: [
{
type: 'image',
image: 'http://example.com/image.png',
mimeType: 'image/png',
},
],
},
];
const result = formatter.format(
messages,
{ provider: 'openai', model: 'gpt-4o' },
'You are helpful'
);
const userMessage = result.find((m) => m.role === 'user');
const content = userMessage!.content as Array<{ type: string; image?: URL }>;
const imagePart = content.find((p) => p.type === 'image');
expect(imagePart!.image).toBeInstanceOf(URL);
});
test('should preserve URL objects as-is', () => {
const formatter = new VercelMessageFormatter(mockLogger);
const urlObj = new URL('https://example.com/image.png');
const messages: InternalMessage[] = [
{
role: 'user',
content: [
{
type: 'image',
image: urlObj as unknown as string,
mimeType: 'image/png',
},
],
},
];
const result = formatter.format(
messages,
{ provider: 'openai', model: 'gpt-4o' },
'You are helpful'
);
const userMessage = result.find((m) => m.role === 'user');
const content = userMessage!.content as Array<{ type: string; image?: URL }>;
const imagePart = content.find((p) => p.type === 'image');
// URL object should be preserved (or converted back to URL)
expect(imagePart!.image).toBeInstanceOf(URL);
});
});
describe('Reasoning round-trip', () => {
test('should include reasoning part in assistant message when reasoning is present', () => {
const formatter = new VercelMessageFormatter(mockLogger);
const messages: InternalMessage[] = [
{
role: 'assistant',
content: [{ type: 'text', text: 'Here is my answer' }],
reasoning: 'Let me think about this carefully...',
},
];
const result = formatter.format(
messages,
{ provider: 'anthropic', model: 'claude-3-5-sonnet-20241022' },
'You are helpful'
);
const assistantMessage = result.find((m) => m.role === 'assistant');
expect(assistantMessage).toBeDefined();
const content = assistantMessage!.content as Array<{ type: string; text?: string }>;
const reasoningPart = content.find((p) => p.type === 'reasoning');
expect(reasoningPart).toBeDefined();
expect(reasoningPart!.text).toBe('Let me think about this carefully...');
});
test('should include providerOptions in reasoning part when reasoningMetadata is present', () => {
const formatter = new VercelMessageFormatter(mockLogger);
const reasoningMetadata = { anthropic: { cacheId: 'cache-123' } };
const messages: InternalMessage[] = [
{
role: 'assistant',
content: [{ type: 'text', text: 'Answer' }],
reasoning: 'Thinking...',
reasoningMetadata,
},
];
const result = formatter.format(
messages,
{ provider: 'anthropic', model: 'claude-3-5-sonnet-20241022' },
'You are helpful'
);
const assistantMessage = result.find((m) => m.role === 'assistant');
const content = assistantMessage!.content as Array<{
type: string;
providerOptions?: Record<string, unknown>;
}>;
const reasoningPart = content.find((p) => p.type === 'reasoning');
expect(reasoningPart).toBeDefined();
expect(reasoningPart!.providerOptions).toEqual(reasoningMetadata);
});
test('should place reasoning part before text content', () => {
const formatter = new VercelMessageFormatter(mockLogger);
const messages: InternalMessage[] = [
{
role: 'assistant',
content: [{ type: 'text', text: 'Final answer' }],
reasoning: 'Step by step reasoning...',
},
];
const result = formatter.format(
messages,
{ provider: 'anthropic', model: 'claude-3-5-sonnet-20241022' },
'You are helpful'
);
const assistantMessage = result.find((m) => m.role === 'assistant');
const content = assistantMessage!.content as Array<{ type: string }>;
// Reasoning should come before text
const reasoningIndex = content.findIndex((p) => p.type === 'reasoning');
const textIndex = content.findIndex((p) => p.type === 'text');
expect(reasoningIndex).toBeLessThan(textIndex);
});
test('should not include reasoning part when reasoning is not present', () => {
const formatter = new VercelMessageFormatter(mockLogger);
const messages: InternalMessage[] = [
{
role: 'assistant',
content: [{ type: 'text', text: 'Simple answer' }],
// No reasoning field
},
];
const result = formatter.format(
messages,
{ provider: 'openai', model: 'gpt-4o' },
'You are helpful'
);
const assistantMessage = result.find((m) => m.role === 'assistant');
const content = assistantMessage!.content as Array<{ type: string }>;
const reasoningPart = content.find((p) => p.type === 'reasoning');
expect(reasoningPart).toBeUndefined();
});
});
});

View File

@@ -0,0 +1,372 @@
import type { ModelMessage, AssistantContent, ToolContent, ToolResultPart } from 'ai';
import { LLMContext } from '../types.js';
import type { InternalMessage, AssistantMessage, ToolMessage } from '@core/context/types.js';
import { getImageData, getFileData, filterMessagesByLLMCapabilities } from '@core/context/utils.js';
import type { IDextoLogger } from '@core/logger/v2/types.js';
import { DextoLogComponent } from '@core/logger/v2/types.js';
/**
* Checks if a string is a URL (http:// or https://).
* Returns a URL object if it's a valid URL string, otherwise returns the original value.
*/
function toUrlIfString<T>(value: T): T | URL {
if (typeof value === 'string' && /^https?:\/\//i.test(value)) {
try {
return new URL(value);
} catch {
// Invalid URL, return original string
return value;
}
}
return value;
}
/**
* Message formatter for Vercel AI SDK.
*
* Converts the internal message format to Vercel's specific structure:
* - System prompt is included in the messages array
* - Tool calls use function_call property instead of tool_calls
* - Tool results use the 'tool' role (SDK v5)
*
* Note: Vercel's implementation is different from OpenAI's standard,
* particularly in its handling of function calls and responses.
*/
export class VercelMessageFormatter {
private logger: IDextoLogger;
constructor(logger: IDextoLogger) {
this.logger = logger.createChild(DextoLogComponent.LLM);
}
/**
* Formats internal messages into Vercel AI SDK format
*
* @param history Array of internal messages to format
* @param systemPrompt System prompt to include at the beginning of messages
* @returns Array of messages formatted for Vercel's API
*/
format(
history: Readonly<InternalMessage[]>,
context: LLMContext,
systemPrompt: string | null
): ModelMessage[] {
// Returns Vercel-specific type
const formatted: ModelMessage[] = [];
// Apply model-aware capability filtering for Vercel
let filteredHistory: InternalMessage[];
try {
filteredHistory = filterMessagesByLLMCapabilities([...history], context, this.logger);
const modelInfo = `${context.provider}/${context.model}`;
this.logger.debug(`Applied Vercel filtering for ${modelInfo}`);
} catch (error) {
this.logger.warn(
`Failed to apply capability filtering, using original history: ${error}`
);
filteredHistory = [...history];
}
// Add system message if present
if (systemPrompt) {
// For Anthropic/Bedrock/Vertex Claude, add cacheControl to enable prompt caching
// This marks the system prompt as cacheable (ephemeral = cached for session duration)
const modelLower = context.model.toLowerCase();
const isClaudeModel = modelLower.includes('claude');
const isAnthropicProvider =
context.provider === 'anthropic' ||
(context.provider === 'bedrock' && isClaudeModel) ||
(context.provider === 'vertex' && isClaudeModel);
formatted.push({
role: 'system',
content: systemPrompt,
...(isAnthropicProvider && {
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
}),
});
}
// Track pending tool calls to detect orphans (tool calls without results)
// Map stores toolCallId -> toolName for proper synthetic result generation
const pendingToolCalls = new Map<string, string>();
for (const msg of filteredHistory) {
switch (msg.role) {
case 'user':
// Images (and text) in user content arrays are handled natively
// by the Vercel SDK. We can forward the array of TextPart/ImagePart directly.
if (msg.content !== null) {
// Convert internal content types to AI SDK types
// Filter out UIResourcePart - these are for UI rendering, not LLM processing
const content =
typeof msg.content === 'string'
? msg.content
: msg.content
.filter((part) => part.type !== 'ui-resource')
.map((part) => {
if (part.type === 'file') {
return {
type: 'file' as const,
data: toUrlIfString(part.data),
mediaType: part.mimeType, // Convert mimeType -> mediaType
...(part.filename && { filename: part.filename }),
};
} else if (part.type === 'image') {
return {
type: 'image' as const,
image: toUrlIfString(part.image),
...(part.mimeType && {
mediaType: part.mimeType,
}), // Convert mimeType -> mediaType
};
}
return part; // TextPart doesn't need conversion
});
formatted.push({
role: 'user',
content,
});
}
break;
case 'system':
// System messages
if (msg.content !== null) {
formatted.push({
role: 'system',
content: String(msg.content),
});
}
break;
case 'assistant':
formatted.push({ role: 'assistant', ...this.formatAssistantMessage(msg) });
// Track tool call IDs and names as pending
if (msg.toolCalls && msg.toolCalls.length > 0) {
for (const toolCall of msg.toolCalls) {
pendingToolCalls.set(toolCall.id, toolCall.function.name);
}
}
break;
case 'tool':
// Only add if we've seen the corresponding tool call
if (msg.toolCallId && pendingToolCalls.has(msg.toolCallId)) {
formatted.push({ role: 'tool', ...this.formatToolMessage(msg) });
// Remove from pending since we found its result
pendingToolCalls.delete(msg.toolCallId);
} else {
// Orphaned tool result (result without matching call)
// Skip it to prevent API errors - can't send result without corresponding call
this.logger.warn(
`Skipping orphaned tool result ${msg.toolCallId} (no matching tool call found) - cannot send to Vercel AI SDK without corresponding tool-call`
);
}
break;
}
}
// Add synthetic error results for any orphaned tool calls
// This can happen when CLI crashes/interrupts before tool execution completes
if (pendingToolCalls.size > 0) {
for (const [toolCallId, toolName] of pendingToolCalls.entries()) {
// Vercel AI SDK uses tool-result content parts with output property
formatted.push({
role: 'tool',
content: [
{
type: 'tool-result',
toolCallId: toolCallId,
toolName: toolName,
output: {
type: 'text',
value: 'Error: Tool execution was interrupted (session crashed or cancelled before completion)',
},
isError: true,
} as ToolResultPart,
],
});
this.logger.warn(
`Tool call ${toolCallId} (${toolName}) had no matching tool result - added synthetic error result to prevent API errors`
);
}
}
return formatted;
}
/**
* Vercel handles system prompts in the messages array
* This method returns null since the system prompt is already
* included directly in the formatted messages.
*
* @returns null as Vercel doesn't need a separate system prompt
*/
formatSystemPrompt(): null {
return null;
}
// Helper to format Assistant messages (with optional tool calls and reasoning)
private formatAssistantMessage(msg: AssistantMessage): {
content: AssistantContent;
function_call?: { name: string; arguments: string };
} {
const contentParts: AssistantContent = [];
// Add reasoning part if present (for round-tripping extended thinking)
if (msg.reasoning) {
// Cast to AssistantContent element type - providerOptions is Record<string, JSONObject>
// which is compatible with our Record<string, unknown> storage
const reasoningPart = {
type: 'reasoning' as const,
text: msg.reasoning,
...(msg.reasoningMetadata && { providerOptions: msg.reasoningMetadata }),
};
contentParts.push(reasoningPart as (typeof contentParts)[number]);
}
// Add text content
if (Array.isArray(msg.content)) {
const combined = msg.content
.map((part) => (part.type === 'text' ? part.text : ''))
.filter(Boolean)
.join('\n');
if (combined) {
contentParts.push({ type: 'text', text: combined });
}
} else if (typeof msg.content === 'string') {
contentParts.push({ type: 'text', text: msg.content });
}
// Add tool calls if present
if (msg.toolCalls && msg.toolCalls.length > 0) {
for (const toolCall of msg.toolCalls) {
const rawArgs = toolCall.function.arguments;
let parsed: unknown = {};
if (typeof rawArgs === 'string') {
try {
parsed = JSON.parse(rawArgs);
} catch {
parsed = {};
this.logger.warn(
`Vercel formatter: invalid tool args JSON for ${toolCall.function.name}`
);
}
} else {
parsed = rawArgs ?? {};
}
// AI SDK v5 expects 'input' for tool-call arguments (not 'args').
// Include providerOptions if present (e.g., Gemini 3 thought signatures)
const toolCallPart: (typeof contentParts)[number] = {
type: 'tool-call',
toolCallId: toolCall.id,
toolName: toolCall.function.name,
input: parsed,
};
// Pass through providerOptions for round-tripping (thought signatures, etc.)
if (toolCall.providerOptions) {
(toolCallPart as { providerOptions?: unknown }).providerOptions =
toolCall.providerOptions;
}
contentParts.push(toolCallPart);
}
const firstToolCall = msg.toolCalls[0]!;
// Ensure function_call.arguments is always a valid JSON string
const argString = (() => {
const raw = firstToolCall.function.arguments;
if (typeof raw === 'string') return raw;
try {
return JSON.stringify(raw ?? {});
} catch {
return '{}';
}
})();
return {
content: contentParts,
function_call: {
name: firstToolCall.function.name,
arguments: argString,
},
};
}
return {
content: contentParts.length > 0 ? contentParts : [],
};
}
// Helper to format Tool result messages
private formatToolMessage(msg: ToolMessage): { content: ToolContent } {
let toolResultPart: ToolResultPart;
if (Array.isArray(msg.content)) {
if (msg.content[0]?.type === 'image') {
const imagePart = msg.content[0];
const imageDataBase64 = getImageData(imagePart, this.logger);
toolResultPart = {
type: 'tool-result',
toolCallId: msg.toolCallId,
toolName: msg.name,
output: {
type: 'content',
value: [
{
type: 'media',
data: imageDataBase64,
mediaType: imagePart.mimeType || 'image/jpeg',
},
],
},
};
} else if (msg.content[0]?.type === 'file') {
const filePart = msg.content[0];
const fileDataBase64 = getFileData(filePart, this.logger);
toolResultPart = {
type: 'tool-result',
toolCallId: msg.toolCallId,
toolName: msg.name,
output: {
type: 'content',
value: [
{
type: 'media',
data: fileDataBase64,
mediaType: filePart.mimeType,
},
],
},
};
} else {
const textContent = Array.isArray(msg.content)
? msg.content
.map((part) => (part.type === 'text' ? part.text : JSON.stringify(part)))
.join('\n')
: String(msg.content);
toolResultPart = {
type: 'tool-result',
toolCallId: msg.toolCallId,
toolName: msg.name,
output: {
type: 'text',
value: textContent,
},
};
}
} else {
toolResultPart = {
type: 'tool-result',
toolCallId: msg.toolCallId,
toolName: msg.name,
output: {
type: 'text',
value: String(msg.content || ''),
},
};
}
return { content: [toolResultPart] };
}
}

View File

@@ -0,0 +1,18 @@
export * from './errors.js';
export * from './error-codes.js';
export * from './registry.js';
export * from './validation.js';
export * from './types.js';
export * from './services/index.js';
export * from './schemas.js';
export {
lookupOpenRouterModel,
refreshOpenRouterModelCache,
getOpenRouterModelContextLength,
getOpenRouterModelInfo,
type LookupStatus,
type OpenRouterModelInfo,
} from './providers/openrouter-model-registry.js';
// Local model providers
export * from './providers/local/index.js';

View File

@@ -0,0 +1,466 @@
/**
* Vercel AI SDK adapter for node-llama-cpp.
*
* This module creates a LanguageModelV2 implementation that wraps node-llama-cpp,
* allowing local GGUF models to be used with the Vercel AI SDK.
*/
/* global ReadableStream, ReadableStreamDefaultController */
import type {
LanguageModelV2,
LanguageModelV2CallOptions,
LanguageModelV2StreamPart,
LanguageModelV2Content,
LanguageModelV2FinishReason,
LanguageModelV2Usage,
LanguageModelV2CallWarning,
} from '@ai-sdk/provider';
import {
loadModel,
isNodeLlamaCppInstalled,
type ModelSession,
type LoadedModel,
} from './node-llama-provider.js';
import { LocalModelError } from './errors.js';
import { getLocalModelById } from './registry.js';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
/**
* Configuration for the local model AI SDK adapter.
*/
export interface LocalModelAdapterConfig {
/** Model ID from the local registry */
modelId: string;
/** Direct path to model file (optional, overrides modelId lookup) */
modelPath?: string;
/** Context window size (default: 4096) */
contextSize?: number;
/** Number of GPU layers to offload (-1 = all, 0 = CPU only) */
gpuLayers?: number;
/** Number of CPU threads */
threads?: number;
}
/**
* Installed model info structure (matches agent-management schema)
*/
interface InstalledModelInfo {
id: string;
filePath: string;
sizeBytes: number;
downloadedAt: string;
}
/**
* Model state structure (matches agent-management schema)
*/
interface ModelState {
version: string;
installed: Record<string, InstalledModelInfo>;
activeModelId?: string;
}
/**
* Get the models directory path.
*/
function getModelsDirectory(): string {
return path.join(os.homedir(), '.dexto', 'models');
}
/**
* Read installed models from state file.
* This is a standalone implementation that doesn't depend on agent-management.
*/
function getInstalledModelInfo(modelId: string): InstalledModelInfo | null {
const stateFile = path.join(getModelsDirectory(), 'state.json');
try {
if (!fs.existsSync(stateFile)) {
return null;
}
const content = fs.readFileSync(stateFile, 'utf-8');
const state: ModelState = JSON.parse(content);
return state.installed[modelId] ?? null;
} catch {
return null;
}
}
/**
* Custom model info structure (matches agent-management schema)
*/
interface CustomModelInfo {
name: string;
provider: string;
filePath?: string;
displayName?: string;
maxInputTokens?: number;
}
/**
* Custom models storage structure
*/
interface CustomModelsStorage {
version: number;
models: CustomModelInfo[];
}
/**
* Read custom models from custom-models.json.
* This is a standalone implementation that doesn't depend on agent-management.
* Used to resolve custom GGUF file paths for local models.
*/
function getCustomModelFilePath(modelId: string): string | null {
const customModelsFile = path.join(getModelsDirectory(), 'custom-models.json');
try {
if (!fs.existsSync(customModelsFile)) {
return null;
}
const content = fs.readFileSync(customModelsFile, 'utf-8');
const storage: CustomModelsStorage = JSON.parse(content);
// Find a custom model with matching name and local provider
const customModel = storage.models.find(
(m) => m.name === modelId && m.provider === 'local' && m.filePath
);
return customModel?.filePath ?? null;
} catch {
return null;
}
}
/**
* Create a Vercel AI SDK compatible LanguageModelV2 from a local GGUF model.
* This is a synchronous function that returns a LanguageModel with lazy initialization.
* The actual model loading happens on first use.
*/
export function createLocalLanguageModel(config: LocalModelAdapterConfig): LanguageModelV2 {
return new LocalLanguageModel(config);
}
/**
* LanguageModelV2 implementation for local GGUF models.
* Uses lazy initialization - model is loaded on first use.
*/
class LocalLanguageModel implements LanguageModelV2 {
readonly specificationVersion = 'v2' as const;
readonly provider = 'local';
readonly modelId: string;
// Local models don't support URL-based content natively
readonly supportedUrls: Record<string, RegExp[]> = {};
private config: LocalModelAdapterConfig;
private session: ModelSession | null = null;
private loadedModel: LoadedModel | null = null;
private initPromise: Promise<void> | null = null;
private deviceName: string = 'Local';
constructor(config: LocalModelAdapterConfig) {
this.modelId = config.modelId;
this.config = config;
}
/**
* Initialize the model lazily on first use.
*/
private async ensureInitialized(): Promise<void> {
if (this.session) {
return;
}
if (this.initPromise) {
return this.initPromise;
}
this.initPromise = this.initialize();
return this.initPromise;
}
private async initialize(): Promise<void> {
const {
modelId,
modelPath: directPath,
contextSize, // Let node-llama-cpp default to "auto" if not specified
gpuLayers = -1,
threads,
} = this.config;
// Check if node-llama-cpp is installed
const isInstalled = await isNodeLlamaCppInstalled();
if (!isInstalled) {
throw LocalModelError.nodeLlamaNotInstalled();
}
// Resolve model path
let modelPath: string;
if (directPath) {
// Use directly provided path
modelPath = directPath;
} else {
// Look up installed model by ID (from state.json - downloaded models)
const installedModel = getInstalledModelInfo(modelId);
if (installedModel) {
modelPath = installedModel.filePath;
} else {
// Check custom models (from custom-models.json - user-provided GGUF paths)
const customPath = getCustomModelFilePath(modelId);
if (customPath) {
modelPath = customPath;
} else {
// Try to get from registry for a better error message
const registryModel = getLocalModelById(modelId);
if (!registryModel) {
throw LocalModelError.modelNotFound(modelId);
}
throw LocalModelError.modelNotDownloaded(modelId);
}
}
}
// Build config object, only including optional fields if defined
const loadConfig: {
modelPath: string;
contextSize?: number;
gpuLayers: number;
threads?: number;
} = {
modelPath,
gpuLayers,
};
if (contextSize !== undefined) {
loadConfig.contextSize = contextSize;
}
if (threads !== undefined) {
loadConfig.threads = threads;
}
// Load the model
this.loadedModel = await loadModel(loadConfig);
this.deviceName = this.loadedModel.gpuInfo.deviceName || 'Local';
// Create a session for this model
this.session = await this.loadedModel.createSession();
}
/**
* Non-streaming text generation (V2 interface).
*/
async doGenerate(options: LanguageModelV2CallOptions) {
await this.ensureInitialized();
const prompt = this.formatPrompt(options);
const maxTokens = options.maxOutputTokens ?? 1024;
const temperature = options.temperature ?? 0.7;
// Build prompt options, only including signal if defined
const promptOptions: {
maxTokens: number;
temperature: number;
signal?: AbortSignal;
} = {
maxTokens,
temperature,
};
if (options.abortSignal) {
promptOptions.signal = options.abortSignal;
}
const response = await this.session!.prompt(prompt, promptOptions);
// Estimate token counts (rough approximation)
const inputTokens = Math.ceil(prompt.length / 4);
const outputTokens = Math.ceil(response.length / 4);
const content: LanguageModelV2Content[] = [{ type: 'text', text: response }];
const finishReason: LanguageModelV2FinishReason = 'stop';
const usage: LanguageModelV2Usage = {
inputTokens,
outputTokens,
totalTokens: inputTokens + outputTokens,
};
const warnings: LanguageModelV2CallWarning[] = [];
return {
content,
finishReason,
usage,
providerMetadata: {
local: {
device: this.deviceName,
},
},
warnings,
};
}
/**
* Streaming text generation (V2 interface).
*/
async doStream(options: LanguageModelV2CallOptions) {
await this.ensureInitialized();
const prompt = this.formatPrompt(options);
const maxTokens = options.maxOutputTokens ?? 1024;
const temperature = options.temperature ?? 0.7;
const inputTokens = Math.ceil(prompt.length / 4);
let outputTokens = 0;
const session = this.session!;
const textId = 'text-0';
// Build prompt options for streaming
const streamPromptOptions: {
maxTokens: number;
temperature: number;
signal?: AbortSignal;
onToken: (token: string) => void;
} = {
maxTokens,
temperature,
onToken: (_token: string) => {
// Will be set up in the stream
},
};
if (options.abortSignal) {
streamPromptOptions.signal = options.abortSignal;
}
// Need to capture controller reference for the onToken callback
let controller: ReadableStreamDefaultController<LanguageModelV2StreamPart>;
const stream = new ReadableStream<LanguageModelV2StreamPart>({
async start(ctrl) {
controller = ctrl;
// Emit stream-start
controller.enqueue({
type: 'stream-start',
warnings: [],
});
// Emit text-start
controller.enqueue({
type: 'text-start',
id: textId,
});
try {
// Set up the onToken callback to emit text-delta
streamPromptOptions.onToken = (token: string) => {
outputTokens += 1;
controller.enqueue({
type: 'text-delta',
id: textId,
delta: token,
});
};
await session.prompt(prompt, streamPromptOptions);
// Emit text-end
controller.enqueue({
type: 'text-end',
id: textId,
});
// Send finish event
controller.enqueue({
type: 'finish',
finishReason: 'stop',
usage: {
inputTokens,
outputTokens,
totalTokens: inputTokens + outputTokens,
},
});
controller.close();
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
// Emit text-end on abort
controller.enqueue({
type: 'text-end',
id: textId,
});
controller.enqueue({
type: 'finish',
finishReason: 'stop',
usage: {
inputTokens,
outputTokens,
totalTokens: inputTokens + outputTokens,
},
});
controller.close();
} else {
controller.enqueue({
type: 'error',
error,
});
controller.close();
}
}
},
});
return {
stream,
};
}
/**
* Format the prompt from AI SDK message format.
*/
private formatPrompt(options: LanguageModelV2CallOptions): string {
const parts: string[] = [];
// Handle prompt messages
if (options.prompt && Array.isArray(options.prompt)) {
for (const message of options.prompt) {
if (message.role === 'system') {
// System message content is a string
parts.push(`System: ${message.content}`);
} else if (message.role === 'user') {
// User message content is an array of parts
if (Array.isArray(message.content)) {
const textParts = message.content
.filter((p): p is { type: 'text'; text: string } => p.type === 'text')
.map((p) => p.text);
if (textParts.length > 0) {
parts.push(`User: ${textParts.join('\n')}`);
}
}
} else if (message.role === 'assistant') {
// Assistant message content is an array of parts
if (Array.isArray(message.content)) {
const textParts = message.content
.filter((p): p is { type: 'text'; text: string } => p.type === 'text')
.map((p) => p.text);
if (textParts.length > 0) {
parts.push(`Assistant: ${textParts.join('\n')}`);
}
}
}
}
}
parts.push('Assistant:');
return parts.join('\n\n');
}
}

View File

@@ -0,0 +1,436 @@
/**
* Model downloader for local GGUF models.
*
* Downloads models from HuggingFace with:
* - Progress tracking via events
* - Resume support for interrupted downloads
* - Hash verification after download
*/
import { createWriteStream, promises as fs, existsSync, createReadStream } from 'fs';
import { createHash } from 'crypto';
import * as path from 'path';
import type { ModelDownloadProgress, ModelDownloadStatus } from './types.js';
import { LocalModelError } from './errors.js';
import { getLocalModelById } from './registry.js';
/**
* Event emitter interface for download progress.
*/
export interface DownloadEvents {
onProgress?: (progress: ModelDownloadProgress) => void;
onComplete?: (modelId: string, filePath: string) => void;
onError?: (modelId: string, error: Error) => void;
}
/**
* Download options.
*/
export interface DownloadOptions {
/** Directory to save the model */
targetDir: string;
/** Events for progress tracking */
events?: DownloadEvents;
/** HuggingFace token for gated models */
hfToken?: string;
/** Whether to verify hash after download */
verifyHash?: boolean;
/** Abort signal for cancellation */
signal?: AbortSignal;
/** Expected SHA-256 hash for verification */
expectedHash?: string;
}
/**
* Download result.
*/
export interface DownloadResult {
/** Whether download succeeded */
success: boolean;
/** Full path to downloaded file */
filePath: string;
/** File size in bytes */
sizeBytes: number;
/** SHA-256 hash of the file */
sha256?: string;
/** Whether download was resumed from partial */
resumed: boolean;
}
/**
* Build the HuggingFace download URL for a model file.
*/
function buildHuggingFaceUrl(huggingfaceId: string, filename: string): string {
// HuggingFace URL format: https://huggingface.co/{repo}/resolve/main/{filename}
return `https://huggingface.co/${huggingfaceId}/resolve/main/${filename}`;
}
/**
* Get the size of a partial download file.
*/
async function getPartialSize(filePath: string): Promise<number> {
try {
const stats = await fs.stat(filePath);
return stats.size;
} catch {
return 0;
}
}
/**
* Calculate SHA-256 hash of a file.
*/
export async function calculateFileHash(filePath: string): Promise<string> {
return new Promise((resolve, reject) => {
const hash = createHash('sha256');
const stream = createReadStream(filePath);
stream.on('data', (chunk) => hash.update(chunk));
stream.on('end', () => resolve(hash.digest('hex')));
stream.on('error', reject);
});
}
/**
* Create a progress event object.
*/
function createProgressEvent(
modelId: string,
status: ModelDownloadStatus,
bytesDownloaded: number,
totalBytes: number,
speed?: number,
eta?: number,
error?: string
): ModelDownloadProgress {
const progress: ModelDownloadProgress = {
modelId,
status,
bytesDownloaded,
totalBytes,
percentage: totalBytes > 0 ? (bytesDownloaded / totalBytes) * 100 : 0,
};
if (speed !== undefined) {
progress.speed = speed;
}
if (eta !== undefined) {
progress.eta = eta;
}
if (error !== undefined) {
progress.error = error;
}
return progress;
}
/**
* Download a model from HuggingFace.
*/
async function downloadFromHuggingFace(
url: string,
targetPath: string,
options: DownloadOptions,
modelId: string,
expectedSize: number
): Promise<DownloadResult> {
const { events, hfToken, signal } = options;
// Check for partial download to support resume
const tempPath = `${targetPath}.download`;
const partialSize = await getPartialSize(tempPath);
const resumed = partialSize > 0;
const headers: Record<string, string> = {
'User-Agent': 'Dexto/1.0',
};
// Add auth token for gated models
if (hfToken) {
headers['Authorization'] = `Bearer ${hfToken}`;
}
// Add range header for resume
if (partialSize > 0) {
headers['Range'] = `bytes=${partialSize}-`;
}
try {
// Build fetch options - only include signal if provided
const fetchOptions: RequestInit = { headers };
if (signal) {
fetchOptions.signal = signal;
}
const response = await fetch(url, fetchOptions);
// Check for auth errors (gated models)
if (response.status === 401 || response.status === 403) {
throw LocalModelError.hfAuthRequired(modelId);
}
if (!response.ok && response.status !== 206) {
throw LocalModelError.downloadFailed(
modelId,
`HTTP ${response.status}: ${response.statusText}`
);
}
// Get content length for progress tracking
const contentLengthHeader = response.headers.get('content-length');
const contentLength = contentLengthHeader ? parseInt(contentLengthHeader, 10) : 0;
const totalSize = partialSize + contentLength;
// Ensure target directory exists
await fs.mkdir(path.dirname(tempPath), { recursive: true });
// Open file for writing (append if resuming)
const writeStream = createWriteStream(tempPath, {
flags: resumed ? 'a' : 'w',
});
// Track download progress
let bytesDownloaded = partialSize;
const startTime = Date.now();
let lastProgressUpdate = startTime;
const reader = response.body?.getReader();
if (!reader) {
writeStream.destroy();
throw LocalModelError.downloadFailed(modelId, 'No response body');
}
try {
// Read and write chunks
while (true) {
const { done, value } = await reader.read();
if (done) break;
writeStream.write(value);
bytesDownloaded += value.length;
// Emit progress every 100ms
const now = Date.now();
if (now - lastProgressUpdate > 100 || done) {
lastProgressUpdate = now;
const elapsedSeconds = (now - startTime) / 1000;
const speed =
elapsedSeconds > 0 ? (bytesDownloaded - partialSize) / elapsedSeconds : 0;
const remainingBytes = totalSize - bytesDownloaded;
const eta = speed > 0 ? remainingBytes / speed : 0;
const progress = createProgressEvent(
modelId,
'downloading',
bytesDownloaded,
totalSize || expectedSize,
speed,
eta
);
events?.onProgress?.(progress);
}
}
// Close write stream
await new Promise<void>((resolve, reject) => {
writeStream.end((err: Error | null | undefined) => {
if (err) reject(err);
else resolve();
});
});
} catch (error) {
writeStream.destroy();
throw error;
}
// Emit verifying status
events?.onProgress?.(createProgressEvent(modelId, 'verifying', bytesDownloaded, totalSize));
// Rename temp file to final path
await fs.rename(tempPath, targetPath);
// Get final file size
const stats = await fs.stat(targetPath);
// Emit complete status
events?.onProgress?.(createProgressEvent(modelId, 'complete', stats.size, stats.size));
return {
success: true,
filePath: targetPath,
sizeBytes: stats.size,
resumed,
};
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
throw LocalModelError.downloadInterrupted(modelId);
}
throw error;
}
}
/**
* Download a local model by ID.
*/
export async function downloadModel(
modelId: string,
options: DownloadOptions
): Promise<DownloadResult> {
const modelInfo = getLocalModelById(modelId);
if (!modelInfo) {
throw LocalModelError.modelNotFound(modelId);
}
const targetPath = path.join(options.targetDir, modelInfo.filename);
const url = buildHuggingFaceUrl(modelInfo.huggingfaceId, modelInfo.filename);
// Check if file already exists
if (existsSync(targetPath)) {
const stats = await fs.stat(targetPath);
// Verify size matches expected
if (stats.size === modelInfo.sizeBytes) {
return {
success: true,
filePath: targetPath,
sizeBytes: stats.size,
resumed: false,
};
}
// Delete partial/corrupt file
await fs.unlink(targetPath);
}
try {
// Emit pending status
options.events?.onProgress?.(
createProgressEvent(modelId, 'pending', 0, modelInfo.sizeBytes)
);
const result = await downloadFromHuggingFace(
url,
targetPath,
options,
modelId,
modelInfo.sizeBytes
);
// Verify hash if requested and expected hash is provided
if (options.verifyHash && options.expectedHash) {
const actualHash = await calculateFileHash(targetPath);
if (actualHash !== options.expectedHash) {
// Delete corrupted file
await fs.unlink(targetPath);
throw LocalModelError.hashMismatch(modelId, options.expectedHash, actualHash);
}
result.sha256 = actualHash;
}
options.events?.onComplete?.(modelId, targetPath);
return result;
} catch (error) {
options.events?.onError?.(modelId, error as Error);
throw error;
}
}
/**
* Download a model directly from a URL (for custom models).
*/
export async function downloadModelFromUrl(
modelId: string,
url: string,
filename: string,
options: DownloadOptions
): Promise<DownloadResult> {
const targetPath = path.join(options.targetDir, filename);
try {
// Emit pending status
options.events?.onProgress?.(createProgressEvent(modelId, 'pending', 0, 0));
const result = await downloadFromHuggingFace(url, targetPath, options, modelId, 0);
options.events?.onComplete?.(modelId, targetPath);
return result;
} catch (error) {
options.events?.onError?.(modelId, error as Error);
throw error;
}
}
/**
* Check available disk space at a path.
*/
export async function checkDiskSpace(targetDir: string): Promise<number> {
// This is a simplified check - in production, use a library like check-disk-space
// For now, we'll return a large value and let the OS handle space errors
try {
await fs.access(targetDir);
return Number.MAX_SAFE_INTEGER;
} catch {
// Directory doesn't exist, try to create it to check permissions
try {
await fs.mkdir(targetDir, { recursive: true });
return Number.MAX_SAFE_INTEGER;
} catch {
return 0;
}
}
}
/**
* Validate that there's enough disk space for a model.
*/
export async function validateDiskSpace(
modelId: string,
requiredBytes: number,
targetDir: string
): Promise<void> {
const available = await checkDiskSpace(targetDir);
if (available < requiredBytes) {
throw LocalModelError.insufficientDiskSpace(modelId, requiredBytes, available);
}
}
/**
* Clean up partial download files.
*/
export async function cleanupPartialDownload(targetDir: string, filename: string): Promise<void> {
const tempPath = path.join(targetDir, `${filename}.download`);
try {
await fs.unlink(tempPath);
} catch {
// Ignore if file doesn't exist
}
}
/**
* Check if a download is in progress (partial file exists).
*/
export async function isDownloadInProgress(targetDir: string, filename: string): Promise<boolean> {
const tempPath = path.join(targetDir, `${filename}.download`);
try {
await fs.access(tempPath);
return true;
} catch {
return false;
}
}
/**
* Get the progress of a partial download.
*/
export async function getPartialDownloadProgress(
modelId: string,
targetDir: string,
filename: string,
totalBytes: number
): Promise<ModelDownloadProgress | null> {
const tempPath = path.join(targetDir, `${filename}.download`);
try {
const stats = await fs.stat(tempPath);
return createProgressEvent(modelId, 'downloading', stats.size, totalBytes);
} catch {
return null;
}
}

View File

@@ -0,0 +1,74 @@
/**
* Error codes for local model operations.
* Format: LOCAL_XXX where XXX groups by category:
* - 001-009: Installation errors
* - 010-019: Download errors
* - 020-029: Model errors
* - 030-039: GPU errors
* - 040-049: Ollama errors
*/
export enum LocalModelErrorCode {
// Installation errors (001-009)
/** node-llama-cpp package is not installed */
NODE_LLAMA_NOT_INSTALLED = 'LOCAL_001',
/** Failed to install node-llama-cpp */
NODE_LLAMA_INSTALL_FAILED = 'LOCAL_002',
/** CMake not found (required for building from source) */
CMAKE_NOT_FOUND = 'LOCAL_003',
/** Build from source failed */
BUILD_FAILED = 'LOCAL_004',
// Download errors (010-019)
/** Model download failed */
DOWNLOAD_FAILED = 'LOCAL_010',
/** Download was interrupted */
DOWNLOAD_INTERRUPTED = 'LOCAL_011',
/** Downloaded file hash doesn't match expected */
DOWNLOAD_HASH_MISMATCH = 'LOCAL_012',
/** Insufficient disk space for download */
INSUFFICIENT_DISK_SPACE = 'LOCAL_013',
/** HuggingFace authentication required for gated model */
HF_AUTH_REQUIRED = 'LOCAL_014',
/** Network error during download */
NETWORK_ERROR = 'LOCAL_015',
// Model errors (020-029)
/** Model not found in registry */
MODEL_NOT_FOUND = 'LOCAL_020',
/** Model not downloaded locally */
MODEL_NOT_DOWNLOADED = 'LOCAL_021',
/** Failed to load model */
MODEL_LOAD_FAILED = 'LOCAL_022',
/** Model file is corrupted */
MODEL_CORRUPT = 'LOCAL_023',
/** Invalid GGUF format */
INVALID_GGUF = 'LOCAL_024',
/** Model context too large for available memory */
CONTEXT_TOO_LARGE = 'LOCAL_025',
// GPU errors (030-039)
/** No GPU acceleration available */
GPU_NOT_AVAILABLE = 'LOCAL_030',
/** Insufficient VRAM for model */
INSUFFICIENT_VRAM = 'LOCAL_031',
/** GPU driver error */
GPU_DRIVER_ERROR = 'LOCAL_032',
/** Metal not available (macOS only) */
METAL_NOT_AVAILABLE = 'LOCAL_033',
/** CUDA not available */
CUDA_NOT_AVAILABLE = 'LOCAL_034',
/** Vulkan not available */
VULKAN_NOT_AVAILABLE = 'LOCAL_035',
// Ollama errors (040-049)
/** Ollama server is not running */
OLLAMA_NOT_RUNNING = 'LOCAL_040',
/** Model not found on Ollama server */
OLLAMA_MODEL_NOT_FOUND = 'LOCAL_041',
/** Failed to pull model from Ollama */
OLLAMA_PULL_FAILED = 'LOCAL_042',
/** Ollama API error */
OLLAMA_API_ERROR = 'LOCAL_043',
/** Ollama version incompatible */
OLLAMA_VERSION_INCOMPATIBLE = 'LOCAL_044',
}

View File

@@ -0,0 +1,242 @@
/**
* Error factory for local model errors.
* Follows the project's error factory pattern.
*/
import { DextoRuntimeError } from '../../../errors/DextoRuntimeError.js';
import { ErrorType } from '../../../errors/types.js';
import { LocalModelErrorCode } from './error-codes.js';
const SCOPE = 'local-models';
/**
* Error factory for local model operations.
*/
export const LocalModelError = {
// Installation errors
nodeLlamaNotInstalled(): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.NODE_LLAMA_NOT_INSTALLED,
SCOPE,
ErrorType.NOT_FOUND,
'node-llama-cpp is not installed. Run `dexto setup` and select "local" provider to install it.',
{},
'Run `dexto setup` and select "local" provider to install local model support'
);
},
nodeLlamaInstallFailed(error: string): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.NODE_LLAMA_INSTALL_FAILED,
SCOPE,
ErrorType.THIRD_PARTY,
`Failed to install node-llama-cpp: ${error}`,
{ error },
'Check your Node.js version and try again. CMake may be required for your platform.'
);
},
cmakeNotFound(): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.CMAKE_NOT_FOUND,
SCOPE,
ErrorType.NOT_FOUND,
'CMake is required to build node-llama-cpp from source but was not found.',
{},
'Install CMake: brew install cmake (macOS), apt install cmake (Linux), or download from cmake.org (Windows)'
);
},
// Download errors
downloadFailed(modelId: string, error: string): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.DOWNLOAD_FAILED,
SCOPE,
ErrorType.THIRD_PARTY,
`Failed to download model '${modelId}': ${error}`,
{ modelId, error },
'Check your internet connection and try again'
);
},
downloadInterrupted(modelId: string): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.DOWNLOAD_INTERRUPTED,
SCOPE,
ErrorType.THIRD_PARTY,
`Download of model '${modelId}' was interrupted`,
{ modelId },
'Run the download command again to resume'
);
},
hashMismatch(modelId: string, expected: string, actual: string): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.DOWNLOAD_HASH_MISMATCH,
SCOPE,
ErrorType.USER,
`Downloaded model '${modelId}' has invalid hash. Expected: ${expected}, Got: ${actual}`,
{ modelId, expected, actual },
'Delete the file and download again'
);
},
insufficientDiskSpace(modelId: string, required: number, available: number): DextoRuntimeError {
const requiredGB = (required / (1024 * 1024 * 1024)).toFixed(1);
const availableGB = (available / (1024 * 1024 * 1024)).toFixed(1);
return new DextoRuntimeError(
LocalModelErrorCode.INSUFFICIENT_DISK_SPACE,
SCOPE,
ErrorType.USER,
`Insufficient disk space to download '${modelId}'. Required: ${requiredGB}GB, Available: ${availableGB}GB`,
{ modelId, required, available },
'Free up disk space or choose a smaller model'
);
},
hfAuthRequired(modelId: string): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.HF_AUTH_REQUIRED,
SCOPE,
ErrorType.FORBIDDEN,
`Model '${modelId}' is a gated model and requires HuggingFace authentication`,
{ modelId },
'Set HF_TOKEN environment variable or run `huggingface-cli login`'
);
},
// Model errors
modelNotFound(modelId: string): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.MODEL_NOT_FOUND,
SCOPE,
ErrorType.NOT_FOUND,
`Model '${modelId}' not found in local model registry`,
{ modelId },
'Run `dexto setup` and select "local" to see available models'
);
},
modelNotDownloaded(modelId: string): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.MODEL_NOT_DOWNLOADED,
SCOPE,
ErrorType.NOT_FOUND,
`Model '${modelId}' is not downloaded. Download it first.`,
{ modelId },
'Run `dexto setup` and select "local" to download models'
);
},
modelLoadFailed(modelId: string, error: string): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.MODEL_LOAD_FAILED,
SCOPE,
ErrorType.THIRD_PARTY,
`Failed to load model '${modelId}': ${error}`,
{ modelId, error },
'The model file may be corrupted. Try re-downloading it.'
);
},
modelCorrupt(modelId: string, filePath: string): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.MODEL_CORRUPT,
SCOPE,
ErrorType.USER,
`Model file for '${modelId}' appears to be corrupted`,
{ modelId, filePath },
`Delete ${filePath} and download the model again`
);
},
contextTooLarge(modelId: string, requested: number, maxSupported: number): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.CONTEXT_TOO_LARGE,
SCOPE,
ErrorType.USER,
`Requested context size ${requested} exceeds model's maximum of ${maxSupported}`,
{ modelId, requested, maxSupported },
`Use a context size of ${maxSupported} or less`
);
},
// GPU errors
gpuNotAvailable(): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.GPU_NOT_AVAILABLE,
SCOPE,
ErrorType.NOT_FOUND,
'No GPU acceleration available. Running on CPU.',
{},
'For better performance, ensure GPU drivers are installed'
);
},
insufficientVRAM(modelId: string, required: number, available: number): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.INSUFFICIENT_VRAM,
SCOPE,
ErrorType.USER,
`Model '${modelId}' requires ${required}GB VRAM but only ${available}GB available`,
{ modelId, required, available },
'Use a smaller quantization or reduce GPU layers'
);
},
gpuDriverError(error: string): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.GPU_DRIVER_ERROR,
SCOPE,
ErrorType.THIRD_PARTY,
`GPU driver error: ${error}`,
{ error },
'Update your GPU drivers'
);
},
// Ollama errors
ollamaNotRunning(url: string): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.OLLAMA_NOT_RUNNING,
SCOPE,
ErrorType.THIRD_PARTY,
`Ollama server is not running at ${url}`,
{ url },
'Start Ollama with `ollama serve` or ensure it is running'
);
},
ollamaModelNotFound(modelName: string): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.OLLAMA_MODEL_NOT_FOUND,
SCOPE,
ErrorType.NOT_FOUND,
`Model '${modelName}' not found on Ollama server`,
{ modelName },
`Pull the model with \`ollama pull ${modelName}\``
);
},
ollamaPullFailed(modelName: string, error: string): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.OLLAMA_PULL_FAILED,
SCOPE,
ErrorType.THIRD_PARTY,
`Failed to pull model '${modelName}' from Ollama: ${error}`,
{ modelName, error },
'Check your internet connection and Ollama server status'
);
},
ollamaApiError(error: string): DextoRuntimeError {
return new DextoRuntimeError(
LocalModelErrorCode.OLLAMA_API_ERROR,
SCOPE,
ErrorType.THIRD_PARTY,
`Ollama API error: ${error}`,
{ error },
'Check Ollama server logs for details'
);
},
};

View File

@@ -0,0 +1,266 @@
/**
* GPU detection for local model acceleration.
*
* Detects available GPU backends:
* - Metal: Apple Silicon (M1/M2/M3/M4 series)
* - CUDA: NVIDIA GPUs on Linux/Windows
* - Vulkan: Cross-platform fallback for AMD/Intel GPUs
* - CPU: Fallback when no GPU is available
*/
import { exec } from 'child_process';
import { promisify } from 'util';
import * as os from 'os';
import type { GPUBackend, GPUInfo } from './types.js';
const execAsync = promisify(exec);
/**
* Detect available GPU backend for the current system.
* Returns the best available option.
*/
export async function detectGPU(): Promise<GPUInfo> {
const platform = os.platform();
// macOS: Check for Metal (Apple Silicon or discrete GPU)
if (platform === 'darwin') {
const metalInfo = await detectMetal();
if (metalInfo.available) {
return metalInfo;
}
}
// Linux/Windows: Check for CUDA (NVIDIA)
if (platform === 'linux' || platform === 'win32') {
const cudaInfo = await detectCUDA();
if (cudaInfo.available) {
return cudaInfo;
}
// Fallback to Vulkan
const vulkanInfo = await detectVulkan();
if (vulkanInfo.available) {
return vulkanInfo;
}
}
// Default to CPU
return {
backend: 'cpu',
available: true,
deviceName: `${os.cpus()[0]?.model ?? 'Unknown CPU'}`,
};
}
/**
* Detect Metal GPU on macOS.
*/
async function detectMetal(): Promise<GPUInfo> {
try {
// Use system_profiler to get GPU info on macOS
const { stdout } = await execAsync('system_profiler SPDisplaysDataType -json 2>/dev/null');
const data = JSON.parse(stdout);
const gpuData = data?.SPDisplaysDataType?.[0];
if (gpuData) {
const chipName = gpuData.sppci_model ?? gpuData._name ?? 'Apple GPU';
const isAppleSilicon =
chipName.toLowerCase().includes('apple') ||
chipName.toLowerCase().includes('m1') ||
chipName.toLowerCase().includes('m2') ||
chipName.toLowerCase().includes('m3') ||
chipName.toLowerCase().includes('m4');
// Apple Silicon has unified memory, so VRAM = system RAM
// For discrete GPUs, try to parse VRAM
const result: GPUInfo = {
backend: 'metal',
available: true,
deviceName: chipName,
};
if (isAppleSilicon) {
// Unified memory - use total system memory
result.vramMB = Math.round(os.totalmem() / (1024 * 1024));
} else if (gpuData.sppci_vram) {
// Parse VRAM string like "8 GB"
const vramMatch = gpuData.sppci_vram.match(/(\d+)\s*(GB|MB)/i);
if (vramMatch) {
result.vramMB =
parseInt(vramMatch[1]!) * (vramMatch[2]!.toUpperCase() === 'GB' ? 1024 : 1);
}
}
return result;
}
} catch {
// Ignore errors - Metal not available
}
return {
backend: 'metal',
available: false,
};
}
/**
* Detect NVIDIA CUDA GPU.
*/
async function detectCUDA(): Promise<GPUInfo> {
try {
// Use nvidia-smi to detect NVIDIA GPU
const { stdout } = await execAsync(
'nvidia-smi --query-gpu=name,memory.total,driver_version --format=csv,noheader,nounits 2>/dev/null'
);
const lines = stdout.trim().split('\n');
if (lines.length > 0 && lines[0]) {
const [name, memoryMB, driverVersion] = lines[0].split(', ').map((s) => s.trim());
const result: GPUInfo = {
backend: 'cuda',
available: true,
};
if (name) {
result.deviceName = name;
}
if (memoryMB) {
result.vramMB = parseInt(memoryMB);
}
if (driverVersion) {
result.driverVersion = driverVersion;
}
return result;
}
} catch {
// nvidia-smi not available or no NVIDIA GPU
}
return {
backend: 'cuda',
available: false,
};
}
/**
* Detect Vulkan GPU support.
*/
async function detectVulkan(): Promise<GPUInfo> {
try {
// Try vulkaninfo command (available when Vulkan SDK is installed)
const { stdout } = await execAsync('vulkaninfo --summary 2>/dev/null');
// Parse device name from vulkaninfo output
const deviceMatch = stdout.match(/deviceName\s*=\s*(.+)/);
const deviceName = deviceMatch?.[1]?.trim() ?? 'Vulkan GPU';
const result: GPUInfo = {
backend: 'vulkan',
available: true,
deviceName,
};
// Parse VRAM if available
const heapMatch = stdout.match(/heapSize\s*=\s*(\d+)/);
if (heapMatch) {
result.vramMB = Math.round(parseInt(heapMatch[1]!) / (1024 * 1024));
}
return result;
} catch {
// vulkaninfo not available
}
// Fallback: Check for common AMD/Intel GPU indicators on Linux
if (os.platform() === 'linux') {
try {
const { stdout } = await execAsync('lspci | grep -i "vga\\|3d\\|display" 2>/dev/null');
if (stdout.includes('AMD') || stdout.includes('Intel') || stdout.includes('Radeon')) {
// GPU detected but Vulkan tools not installed
const deviceMatch = stdout.match(/: (.+)/);
return {
backend: 'vulkan',
available: true,
deviceName: deviceMatch?.[1]?.trim() ?? 'GPU (Vulkan)',
};
}
} catch {
// lspci not available
}
}
return {
backend: 'vulkan',
available: false,
};
}
/**
* Get a human-readable summary of GPU detection results.
*/
export function formatGPUInfo(info: GPUInfo): string {
if (!info.available) {
return `${info.backend.toUpperCase()} not available`;
}
const parts = [info.deviceName ?? info.backend.toUpperCase()];
if (info.vramMB) {
const vramGB = (info.vramMB / 1024).toFixed(1);
parts.push(`${vramGB}GB`);
}
if (info.driverVersion) {
parts.push(`Driver: ${info.driverVersion}`);
}
return parts.join(' • ');
}
/**
* Check if a specific backend is available.
*/
export async function isBackendAvailable(backend: GPUBackend): Promise<boolean> {
switch (backend) {
case 'metal':
return (await detectMetal()).available;
case 'cuda':
return (await detectCUDA()).available;
case 'vulkan':
return (await detectVulkan()).available;
case 'cpu':
return true;
default:
return false;
}
}
/**
* Get all available backends on the current system.
*/
export async function getAvailableBackends(): Promise<GPUBackend[]> {
const backends: GPUBackend[] = [];
const platform = os.platform();
if (platform === 'darwin') {
if ((await detectMetal()).available) {
backends.push('metal');
}
}
if (platform === 'linux' || platform === 'win32') {
if ((await detectCUDA()).available) {
backends.push('cuda');
}
if ((await detectVulkan()).available) {
backends.push('vulkan');
}
}
// CPU is always available
backends.push('cpu');
return backends;
}

View File

@@ -0,0 +1,103 @@
/**
* Native local model support via node-llama-cpp and Ollama.
*
* This module provides:
* - Local model registry with curated GGUF models
* - GPU detection (Metal/CUDA/Vulkan)
* - Model downloading with progress
* - node-llama-cpp provider for native GGUF execution
* - Ollama provider for Ollama server integration
*/
// Types
export * from './types.js';
// Error codes and factory
export { LocalModelErrorCode } from './error-codes.js';
export { LocalModelError } from './errors.js';
// Schemas
export {
GPUBackendSchema,
QuantizationTypeSchema,
LocalModelCategorySchema,
ModelSourceSchema,
ModelDownloadStatusSchema,
LocalModelInfoSchema,
ModelDownloadProgressSchema,
GPUInfoSchema,
LocalLLMConfigSchema,
InstalledModelSchema,
ModelStateSchema,
ModelDownloadOptionsSchema,
OllamaModelInfoSchema,
OllamaStatusSchema,
} from './schemas.js';
// Registry
export {
LOCAL_MODEL_REGISTRY,
getAllLocalModels,
getLocalModelById,
getLocalModelsByCategory,
getRecommendedLocalModels,
getModelsForVRAM,
getModelsForRAM,
searchLocalModels,
getDefaultLocalModelId,
} from './registry.js';
// GPU Detection
export {
detectGPU,
formatGPUInfo,
isBackendAvailable,
getAvailableBackends,
} from './gpu-detector.js';
// Downloader
export {
downloadModel,
downloadModelFromUrl,
calculateFileHash,
checkDiskSpace,
validateDiskSpace,
cleanupPartialDownload,
isDownloadInProgress,
getPartialDownloadProgress,
type DownloadEvents,
type DownloadOptions,
type DownloadResult,
} from './downloader.js';
// Ollama Provider
export {
DEFAULT_OLLAMA_URL,
checkOllamaStatus,
listOllamaModels,
isOllamaModelAvailable,
pullOllamaModel,
createOllamaModel,
createValidatedOllamaModel,
getOllamaModelInfo,
deleteOllamaModel,
generateOllamaEmbeddings,
type OllamaConfig,
} from './ollama-provider.js';
// node-llama-cpp Provider
export {
isNodeLlamaCppInstalled,
requireNodeLlamaCpp,
loadModel,
unloadModel,
unloadAllModels,
isModelLoaded,
getLoadedModelCount,
type NodeLlamaConfig,
type ModelSession,
type LoadedModel,
} from './node-llama-provider.js';
// AI SDK Adapter
export { createLocalLanguageModel, type LocalModelAdapterConfig } from './ai-sdk-adapter.js';

View File

@@ -0,0 +1,353 @@
/**
* node-llama-cpp provider for native local model execution.
*
* This module provides utilities for loading and using GGUF models via node-llama-cpp.
* Since node-llama-cpp is an optional dependency, all functions handle the case
* where it's not installed gracefully.
*
* For Vercel AI SDK integration, we recommend using Ollama which provides
* an OpenAI-compatible API that works seamlessly with the SDK.
*/
import type { GPUInfo } from './types.js';
import { LocalModelError } from './errors.js';
import { detectGPU } from './gpu-detector.js';
import { getDextoGlobalPath } from '../../../utils/path.js';
import { createRequire } from 'module';
import * as path from 'path';
/**
* Get the global deps path where node-llama-cpp may be installed.
*/
function getGlobalNodeLlamaCppPath(): string {
return path.join(getDextoGlobalPath('deps'), 'node_modules', 'node-llama-cpp');
}
/**
* Check if node-llama-cpp is installed.
* Checks both standard node resolution (for dev/projects) and global deps (~/.dexto/deps).
*/
export async function isNodeLlamaCppInstalled(): Promise<boolean> {
// Try 1: Standard node resolution (works in dev mode, dexto-project with local install)
try {
// @ts-ignore - Optional dependency may not be installed (TS2307 in CI)
await import('node-llama-cpp');
return true;
} catch {
// Continue to fallback
}
// Try 2: Global deps location (~/.dexto/deps/node_modules/node-llama-cpp)
try {
const globalPath = getGlobalNodeLlamaCppPath();
const require = createRequire(import.meta.url);
require.resolve(globalPath);
return true;
} catch {
return false;
}
}
/**
* Dynamically import node-llama-cpp.
* Returns null if not installed.
* Checks both standard node resolution and global deps (~/.dexto/deps).
*/
// Using Record type for dynamic import result since we can't type node-llama-cpp at compile time
async function importNodeLlamaCpp(): Promise<Record<string, unknown> | null> {
// Try 1: Standard node resolution (works in dev mode, dexto-project with local install)
try {
// @ts-ignore - Optional dependency may not be installed (TS2307 in CI)
return await import('node-llama-cpp');
} catch {
// Continue to fallback
}
// Try 2: Global deps location (~/.dexto/deps/node_modules/node-llama-cpp)
try {
const globalPath = getGlobalNodeLlamaCppPath();
// Use dynamic import with full path to entry point (ES modules don't support directory imports)
const entryPoint = path.join(globalPath, 'dist', 'index.js');
// @ts-ignore - Dynamic path import
return await import(entryPoint);
} catch {
return null;
}
}
/**
* Throws an error indicating node-llama-cpp needs to be installed.
*/
export function requireNodeLlamaCpp(): never {
throw LocalModelError.nodeLlamaNotInstalled();
}
/**
* Configuration for the node-llama-cpp model.
*/
export interface NodeLlamaConfig {
/** Path to the .gguf model file */
modelPath: string;
/** Number of GPU layers to offload (-1 = all, 0 = CPU only) */
gpuLayers?: number;
/** Context window size */
contextSize?: number;
/** Number of CPU threads */
threads?: number;
/** Batch size for inference */
batchSize?: number;
/** Whether to use Flash Attention (if available) */
flashAttention?: boolean;
}
/**
* Model session interface for node-llama-cpp.
* This provides a simplified interface for text generation.
*/
export interface ModelSession {
/** Generate a response from a prompt */
prompt(
text: string,
options?: {
maxTokens?: number;
temperature?: number;
topP?: number;
signal?: AbortSignal;
onToken?: (token: string) => void;
}
): Promise<string>;
/** Dispose the session and free resources */
dispose(): Promise<void>;
}
/**
* Loaded model interface.
*/
export interface LoadedModel {
/** Model file path */
modelPath: string;
/** GPU info used for loading */
gpuInfo: GPUInfo;
/** Create a new chat session */
createSession(): Promise<ModelSession>;
/** Dispose the model and free resources */
dispose(): Promise<void>;
}
// Cache for loaded models
const modelCache = new Map<string, Promise<LoadedModel>>();
/**
* Load a GGUF model using node-llama-cpp.
*
* @throws {DextoRuntimeError} If node-llama-cpp is not installed
*/
export async function loadModel(config: NodeLlamaConfig): Promise<LoadedModel> {
const { modelPath, gpuLayers = -1, contextSize, threads, batchSize = 512 } = config;
// Check cache first
const cacheKey = `${modelPath}:${gpuLayers}:${contextSize}`;
const cached = modelCache.get(cacheKey);
if (cached) {
return cached;
}
// Create loading promise
const loadPromise = (async (): Promise<LoadedModel> => {
// Try to import node-llama-cpp
const nodeLlama = await importNodeLlamaCpp();
if (!nodeLlama) {
throw LocalModelError.nodeLlamaNotInstalled();
}
try {
// Detect GPU for optimal configuration
const gpuInfo = await detectGPU();
// Access getLlama from dynamic import (cast to function type)
const getLlama = nodeLlama['getLlama'] as (config: {
logLevel: unknown;
gpu: boolean | string;
}) => Promise<{
loadModel: (config: { modelPath: string; gpuLayers: number | string }) => Promise<{
createContext: (options: Record<string, unknown>) => Promise<{
getSequence: () => unknown;
dispose: () => Promise<void>;
}>;
dispose: () => Promise<void>;
}>;
}>;
const LlamaLogLevel = nodeLlama['LlamaLogLevel'] as { warn: unknown };
const LlamaChatSession = nodeLlama['LlamaChatSession'] as new (options: {
contextSequence: unknown;
}) => {
prompt: (
text: string,
options: {
maxTokens: number;
temperature: number;
topP: number;
signal?: AbortSignal;
stopOnAbortSignal: boolean;
trimWhitespaceSuffix: boolean;
onTextChunk?: (text: string) => void;
}
) => Promise<string>;
};
// Initialize llama.cpp runtime
const llama = await getLlama({
logLevel: LlamaLogLevel.warn,
gpu: gpuInfo.backend === 'cpu' ? false : 'auto',
});
// Load the model
const model = await llama.loadModel({
modelPath,
gpuLayers: gpuLayers === -1 ? 'auto' : gpuLayers,
});
// Create context with specified options
// contextSize defaults to "auto" in node-llama-cpp, which uses the model's
// training context and auto-retries with smaller sizes on failure
const contextOptions: Record<string, unknown> = {
batchSize,
};
if (contextSize !== undefined) {
contextOptions.contextSize = contextSize;
}
if (threads !== undefined) {
contextOptions.threads = threads;
}
const context = await model.createContext(contextOptions);
return {
modelPath,
gpuInfo,
async createSession(): Promise<ModelSession> {
const session = new LlamaChatSession({
contextSequence: context.getSequence(),
});
return {
async prompt(text, options = {}): Promise<string> {
const {
maxTokens = 1024,
temperature = 0.7,
topP = 0.9,
signal,
onToken,
} = options;
// Build options object, only including optional properties if defined
const promptOptions: {
maxTokens: number;
temperature: number;
topP: number;
stopOnAbortSignal: boolean;
trimWhitespaceSuffix: boolean;
signal?: AbortSignal;
onTextChunk?: (text: string) => void;
} = {
maxTokens,
temperature,
topP,
stopOnAbortSignal: true,
trimWhitespaceSuffix: true,
};
if (signal) {
promptOptions.signal = signal;
}
if (onToken) {
promptOptions.onTextChunk = onToken;
}
const response = await session.prompt(text, promptOptions);
return response;
},
async dispose(): Promise<void> {
// Session cleanup is handled by context disposal
},
};
},
async dispose(): Promise<void> {
await context.dispose();
await model.dispose();
modelCache.delete(cacheKey);
},
};
} catch (error) {
modelCache.delete(cacheKey);
if (error instanceof Error && 'code' in error) {
throw error; // Re-throw DextoRuntimeError
}
throw LocalModelError.modelLoadFailed(
modelPath,
error instanceof Error ? error.message : String(error)
);
}
})();
modelCache.set(cacheKey, loadPromise);
return loadPromise;
}
/**
* Unload a model and free resources.
* Removes all cache entries for the given model path (across different configs).
*/
export async function unloadModel(modelPath: string): Promise<void> {
for (const [key, loadPromise] of modelCache.entries()) {
// Cache key format is "modelPath:gpuLayers:contextSize"
const keyModelPath = key.split(':')[0];
if (keyModelPath === modelPath) {
try {
const loaded = await loadPromise;
await loaded.dispose();
} catch {
// Ignore errors during unload
}
modelCache.delete(key);
}
}
}
/**
* Unload all models and free resources.
*/
export async function unloadAllModels(): Promise<void> {
for (const [key, loadPromise] of modelCache.entries()) {
try {
const loaded = await loadPromise;
await loaded.dispose();
} catch {
// Ignore errors during unload
}
modelCache.delete(key);
}
}
/**
* Check if a model is currently loaded.
*/
export function isModelLoaded(modelPath: string): boolean {
for (const key of modelCache.keys()) {
// Cache key format is "modelPath:gpuLayers:contextSize"
const keyModelPath = key.split(':')[0];
if (keyModelPath === modelPath) {
return true;
}
}
return false;
}
/**
* Get the number of currently loaded models.
*/
export function getLoadedModelCount(): number {
return modelCache.size;
}

View File

@@ -0,0 +1,346 @@
/* global TextDecoder */
/**
* Ollama provider for local model inference.
*
* Uses Ollama's OpenAI-compatible API for seamless integration
* with the Vercel AI SDK.
*/
import { createOpenAI } from '@ai-sdk/openai';
import type { LanguageModel } from 'ai';
import type { OllamaModelInfo, OllamaStatus } from './types.js';
import { LocalModelError } from './errors.js';
/**
* Default Ollama server URL.
*/
export const DEFAULT_OLLAMA_URL = 'http://localhost:11434';
/**
* Ollama configuration options.
*/
export interface OllamaConfig {
/** Ollama server base URL (default: http://localhost:11434) */
baseURL?: string;
}
/**
* Check if the Ollama server is running.
*/
export async function checkOllamaStatus(
baseURL: string = DEFAULT_OLLAMA_URL
): Promise<OllamaStatus> {
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 5000);
const response = await fetch(`${baseURL}/api/version`, {
signal: controller.signal,
});
clearTimeout(timeoutId);
if (!response.ok) {
return {
running: false,
url: baseURL,
error: `HTTP ${response.status}: ${response.statusText}`,
};
}
const data = (await response.json()) as { version?: string };
// Fetch available models
const models = await listOllamaModels(baseURL);
const status: OllamaStatus = {
running: true,
url: baseURL,
models,
};
if (data.version) {
status.version = data.version;
}
return status;
} catch (error) {
const errorMessage =
error instanceof Error
? error.name === 'AbortError'
? 'Connection timed out'
: error.message
: 'Unknown error';
return {
running: false,
url: baseURL,
error: errorMessage,
};
}
}
/**
* List available models on the Ollama server.
*/
export async function listOllamaModels(
baseURL: string = DEFAULT_OLLAMA_URL
): Promise<OllamaModelInfo[]> {
try {
const response = await fetch(`${baseURL}/api/tags`);
if (!response.ok) {
return [];
}
const data = (await response.json()) as {
models?: Array<{
name: string;
size: number;
digest: string;
modified_at: string;
details?: {
family?: string;
parameter_size?: string;
quantization_level?: string;
};
}>;
};
return (data.models ?? []).map((model) => {
const modelInfo: OllamaModelInfo = {
name: model.name,
size: model.size,
digest: model.digest,
modifiedAt: model.modified_at,
};
if (model.details) {
const details: NonNullable<OllamaModelInfo['details']> = {};
if (model.details.family) {
details.family = model.details.family;
}
if (model.details.parameter_size) {
details.parameterSize = model.details.parameter_size;
}
if (model.details.quantization_level) {
details.quantizationLevel = model.details.quantization_level;
}
if (Object.keys(details).length > 0) {
modelInfo.details = details;
}
}
return modelInfo;
});
} catch {
return [];
}
}
/**
* Check if a specific model is available on Ollama.
*/
export async function isOllamaModelAvailable(
modelName: string,
baseURL: string = DEFAULT_OLLAMA_URL
): Promise<boolean> {
const models = await listOllamaModels(baseURL);
return models.some(
(m) =>
m.name === modelName ||
m.name.startsWith(`${modelName}:`) ||
modelName.startsWith(`${m.name}:`)
);
}
/**
* Pull a model from the Ollama registry.
* Returns a stream of progress events.
*
* @param modelName - Name of the model to pull
* @param baseURL - Ollama server URL (default: http://localhost:11434)
* @param onProgress - Optional callback for progress updates
* @param signal - Optional AbortSignal for cancellation
*/
export async function pullOllamaModel(
modelName: string,
baseURL: string = DEFAULT_OLLAMA_URL,
onProgress?: (progress: { status: string; completed?: number; total?: number }) => void,
signal?: AbortSignal
): Promise<void> {
try {
const fetchOptions: RequestInit = {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: modelName }),
};
if (signal) {
fetchOptions.signal = signal;
}
const response = await fetch(`${baseURL}/api/pull`, fetchOptions);
if (!response.ok) {
throw LocalModelError.ollamaPullFailed(
modelName,
`HTTP ${response.status}: ${response.statusText}`
);
}
const reader = response.body?.getReader();
if (!reader) {
throw LocalModelError.ollamaPullFailed(modelName, 'No response body');
}
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
// Ollama sends newline-delimited JSON
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (!line.trim()) continue;
try {
const progress = JSON.parse(line) as {
status: string;
completed?: number;
total?: number;
error?: string;
};
if (progress.error) {
throw LocalModelError.ollamaPullFailed(modelName, progress.error);
}
onProgress?.(progress);
} catch (e) {
if (e instanceof Error && e.message.includes('ollamaPullFailed')) {
throw e;
}
// Ignore JSON parse errors
}
}
}
} catch (error) {
if (error instanceof Error && error.message.includes('ECONNREFUSED')) {
throw LocalModelError.ollamaNotRunning(baseURL);
}
throw error;
}
}
/**
* Create an Ollama language model using the OpenAI-compatible API.
*/
export function createOllamaModel(modelName: string, config: OllamaConfig = {}): LanguageModel {
const { baseURL = DEFAULT_OLLAMA_URL } = config;
// Ollama's OpenAI-compatible endpoint is at /v1
const openai = createOpenAI({
baseURL: `${baseURL}/v1`,
apiKey: 'ollama', // Ollama doesn't require an API key, but the SDK requires a non-empty string
});
return openai(modelName);
}
/**
* Create an Ollama model with status validation.
* Throws if Ollama is not running or model is not available.
*/
export async function createValidatedOllamaModel(
modelName: string,
config: OllamaConfig = {}
): Promise<LanguageModel> {
const { baseURL = DEFAULT_OLLAMA_URL } = config;
// Check if Ollama is running
const status = await checkOllamaStatus(baseURL);
if (!status.running) {
throw LocalModelError.ollamaNotRunning(baseURL);
}
// Check if model is available
const isAvailable = await isOllamaModelAvailable(modelName, baseURL);
if (!isAvailable) {
throw LocalModelError.ollamaModelNotFound(modelName);
}
return createOllamaModel(modelName, config);
}
/**
* Get information about a specific Ollama model.
*/
export async function getOllamaModelInfo(
modelName: string,
baseURL: string = DEFAULT_OLLAMA_URL
): Promise<OllamaModelInfo | null> {
const models = await listOllamaModels(baseURL);
return (
models.find(
(m) =>
m.name === modelName ||
m.name.startsWith(`${modelName}:`) ||
modelName.startsWith(`${m.name}:`)
) ?? null
);
}
/**
* Delete a model from Ollama.
*/
export async function deleteOllamaModel(
modelName: string,
baseURL: string = DEFAULT_OLLAMA_URL
): Promise<boolean> {
try {
const response = await fetch(`${baseURL}/api/delete`, {
method: 'DELETE',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: modelName }),
});
return response.ok;
} catch {
return false;
}
}
/**
* Generate embeddings using Ollama.
* Uses the /api/embed endpoint which supports batch processing.
*
* Note: Reserved for future RAG/vector search functionality.
*/
export async function generateOllamaEmbeddings(
modelName: string,
input: string | string[],
baseURL: string = DEFAULT_OLLAMA_URL
): Promise<number[][]> {
const inputs = Array.isArray(input) ? input : [input];
// Use /api/embed endpoint which accepts arrays for batch processing
const response = await fetch(`${baseURL}/api/embed`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: modelName,
input: inputs,
}),
});
if (!response.ok) {
throw LocalModelError.ollamaApiError(`HTTP ${response.status}: ${response.statusText}`);
}
const data = (await response.json()) as { embeddings: number[][] };
return data.embeddings;
}

View File

@@ -0,0 +1,346 @@
/**
* Curated registry of local GGUF models.
*
* This registry contains vetted models from HuggingFace that are known to work
* well with node-llama-cpp. Models are organized by size and use case.
*
* Model selection criteria:
* - Well-maintained quantizations (bartowski, TheBloke, official repos)
* - Good performance/size trade-off (Q4_K_M as default)
* - Clear licensing for commercial use where possible
* - Tested with node-llama-cpp
*/
import type { LocalModelInfo } from './types.js';
/**
* Curated list of recommended local models.
* Sorted by category and size for easy selection.
*/
export const LOCAL_MODEL_REGISTRY: LocalModelInfo[] = [
// ============================================
// RECOMMENDED: Best balance of quality and size
// ============================================
{
id: 'llama-3.3-8b-q4',
name: 'Llama 3.3 8B Instruct',
description:
"Meta's latest 8B model. Excellent general-purpose performance with 128K context.",
huggingfaceId: 'bartowski/Llama-3.3-8B-Instruct-GGUF',
filename: 'Llama-3.3-8B-Instruct-Q4_K_M.gguf',
quantization: 'Q4_K_M',
sizeBytes: 5_020_000_000, // ~5GB
contextLength: 131072,
categories: ['general', 'coding'],
minVRAM: 6,
minRAM: 8,
recommended: true,
author: 'Meta',
license: 'llama3.3',
supportsTools: true,
},
{
id: 'qwen-2.5-coder-7b-q4',
name: 'Qwen 2.5 Coder 7B Instruct',
description: "Alibaba's coding-focused model. Excellent for code generation and review.",
huggingfaceId: 'Qwen/Qwen2.5-Coder-7B-Instruct-GGUF',
filename: 'qwen2.5-coder-7b-instruct-q4_k_m.gguf',
quantization: 'Q4_K_M',
sizeBytes: 4_680_000_000, // ~4.7GB
contextLength: 131072,
categories: ['coding'],
minVRAM: 6,
minRAM: 8,
recommended: true,
author: 'Alibaba',
license: 'apache-2.0',
supportsTools: true,
},
// ============================================
// SMALL: Fast models for quick tasks (< 4GB)
// ============================================
{
id: 'phi-3.5-mini-q4',
name: 'Phi 3.5 Mini Instruct',
description: "Microsoft's compact model. Great for simple tasks with minimal resources.",
huggingfaceId: 'bartowski/Phi-3.5-mini-instruct-GGUF',
filename: 'Phi-3.5-mini-instruct-Q4_K_M.gguf',
quantization: 'Q4_K_M',
sizeBytes: 2_390_000_000, // ~2.4GB
contextLength: 131072,
categories: ['small', 'general'],
minVRAM: 4,
minRAM: 4,
recommended: true,
author: 'Microsoft',
license: 'mit',
},
{
id: 'qwen-2.5-3b-q4',
name: 'Qwen 2.5 3B Instruct',
description: 'Compact but capable. Good for basic chat and simple tasks.',
huggingfaceId: 'Qwen/Qwen2.5-3B-Instruct-GGUF',
filename: 'qwen2.5-3b-instruct-q4_k_m.gguf',
quantization: 'Q4_K_M',
sizeBytes: 2_050_000_000, // ~2GB
contextLength: 32768,
categories: ['small', 'general'],
minVRAM: 3,
minRAM: 4,
author: 'Alibaba',
license: 'apache-2.0',
},
{
id: 'gemma-2-2b-q4',
name: 'Gemma 2 2B Instruct',
description: "Google's efficient small model. Good balance of speed and capability.",
huggingfaceId: 'bartowski/gemma-2-2b-it-GGUF',
filename: 'gemma-2-2b-it-Q4_K_M.gguf',
quantization: 'Q4_K_M',
sizeBytes: 1_790_000_000, // ~1.8GB
contextLength: 8192,
categories: ['small', 'general'],
minVRAM: 3,
minRAM: 4,
author: 'Google',
license: 'gemma',
},
// ============================================
// CODING: Optimized for code generation
// ============================================
{
id: 'qwen-2.5-coder-14b-q4',
name: 'Qwen 2.5 Coder 14B Instruct',
description: 'Larger coding model for complex tasks. Better code understanding.',
huggingfaceId: 'Qwen/Qwen2.5-Coder-14B-Instruct-GGUF',
filename: 'qwen2.5-coder-14b-instruct-q4_k_m.gguf',
quantization: 'Q4_K_M',
sizeBytes: 8_900_000_000, // ~8.9GB
contextLength: 131072,
categories: ['coding'],
minVRAM: 10,
minRAM: 12,
author: 'Alibaba',
license: 'apache-2.0',
supportsTools: true,
},
{
id: 'deepseek-coder-v2-lite-q4',
name: 'DeepSeek Coder V2 Lite',
description: "DeepSeek's efficient coding model. Great for code completion.",
huggingfaceId: 'bartowski/DeepSeek-Coder-V2-Lite-Instruct-GGUF',
filename: 'DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf',
quantization: 'Q4_K_M',
sizeBytes: 9_200_000_000, // ~9.2GB
contextLength: 131072,
categories: ['coding'],
minVRAM: 12,
minRAM: 16,
author: 'DeepSeek',
license: 'deepseek',
},
{
id: 'codestral-22b-q4',
name: 'Codestral 22B',
description: "Mistral's dedicated coding model. Supports 80+ languages.",
huggingfaceId: 'bartowski/Codestral-22B-v0.1-GGUF',
filename: 'Codestral-22B-v0.1-Q4_K_M.gguf',
quantization: 'Q4_K_M',
sizeBytes: 13_500_000_000, // ~13.5GB
contextLength: 32768,
categories: ['coding'],
minVRAM: 16,
minRAM: 20,
author: 'Mistral AI',
license: 'mnpl',
},
// ============================================
// GENERAL: Versatile all-purpose models
// ============================================
{
id: 'mistral-7b-q4',
name: 'Mistral 7B Instruct v0.3',
description: "Mistral's efficient 7B model. Good balance of speed and quality.",
huggingfaceId: 'bartowski/Mistral-7B-Instruct-v0.3-GGUF',
filename: 'Mistral-7B-Instruct-v0.3-Q4_K_M.gguf',
quantization: 'Q4_K_M',
sizeBytes: 4_370_000_000, // ~4.4GB
contextLength: 32768,
categories: ['general'],
minVRAM: 6,
minRAM: 8,
author: 'Mistral AI',
license: 'apache-2.0',
supportsTools: true,
},
{
id: 'gemma-2-9b-q4',
name: 'Gemma 2 9B Instruct',
description: "Google's capable 9B model. Strong reasoning and instruction following.",
huggingfaceId: 'bartowski/gemma-2-9b-it-GGUF',
filename: 'gemma-2-9b-it-Q4_K_M.gguf',
quantization: 'Q4_K_M',
sizeBytes: 5_760_000_000, // ~5.8GB
contextLength: 8192,
categories: ['general'],
minVRAM: 8,
minRAM: 10,
author: 'Google',
license: 'gemma',
},
{
id: 'llama-3.1-8b-q4',
name: 'Llama 3.1 8B Instruct',
description: "Meta's Llama 3.1. Solid general-purpose performance.",
huggingfaceId: 'bartowski/Meta-Llama-3.1-8B-Instruct-GGUF',
filename: 'Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf',
quantization: 'Q4_K_M',
sizeBytes: 4_920_000_000, // ~4.9GB
contextLength: 131072,
categories: ['general'],
minVRAM: 6,
minRAM: 8,
author: 'Meta',
license: 'llama3.1',
supportsTools: true,
},
// ============================================
// REASONING: Strong reasoning capabilities
// ============================================
{
id: 'qwen-2.5-14b-q4',
name: 'Qwen 2.5 14B Instruct',
description: "Alibaba's mid-size model. Strong reasoning and long context.",
huggingfaceId: 'Qwen/Qwen2.5-14B-Instruct-GGUF',
filename: 'qwen2.5-14b-instruct-q4_k_m.gguf',
quantization: 'Q4_K_M',
sizeBytes: 8_700_000_000, // ~8.7GB
contextLength: 131072,
categories: ['reasoning', 'general'],
minVRAM: 10,
minRAM: 12,
author: 'Alibaba',
license: 'apache-2.0',
supportsTools: true,
},
{
id: 'qwen-2.5-32b-q4',
name: 'Qwen 2.5 32B Instruct',
description: "Alibaba's large model. Excellent reasoning and complex tasks.",
huggingfaceId: 'Qwen/Qwen2.5-32B-Instruct-GGUF',
filename: 'qwen2.5-32b-instruct-q4_k_m.gguf',
quantization: 'Q4_K_M',
sizeBytes: 19_300_000_000, // ~19.3GB
contextLength: 131072,
categories: ['reasoning', 'general'],
minVRAM: 24,
minRAM: 32,
author: 'Alibaba',
license: 'apache-2.0',
supportsTools: true,
},
// ============================================
// VISION: Multimodal models with image support
// ============================================
{
id: 'llava-v1.6-mistral-7b-q4',
name: 'LLaVA v1.6 Mistral 7B',
description: 'Vision-language model. Can understand and discuss images.',
huggingfaceId: 'cjpais/llava-v1.6-mistral-7b-gguf',
filename: 'llava-v1.6-mistral-7b.Q4_K_M.gguf',
quantization: 'Q4_K_M',
sizeBytes: 4_500_000_000, // ~4.5GB
contextLength: 4096,
categories: ['vision', 'general'],
minVRAM: 8,
minRAM: 10,
author: 'Microsoft/LLaVA',
license: 'llama2',
supportsVision: true,
},
{
id: 'qwen-2-vl-7b-q4',
name: 'Qwen2 VL 7B Instruct',
description: "Alibaba's vision-language model. High-quality image understanding.",
huggingfaceId: 'Qwen/Qwen2-VL-7B-Instruct-GGUF',
filename: 'qwen2-vl-7b-instruct-q4_k_m.gguf',
quantization: 'Q4_K_M',
sizeBytes: 5_100_000_000, // ~5.1GB
contextLength: 32768,
categories: ['vision', 'general'],
minVRAM: 8,
minRAM: 10,
author: 'Alibaba',
license: 'apache-2.0',
supportsVision: true,
},
];
/**
* Get all models from the registry.
*/
export function getAllLocalModels(): LocalModelInfo[] {
return [...LOCAL_MODEL_REGISTRY];
}
/**
* Get a model by ID.
*/
export function getLocalModelById(id: string): LocalModelInfo | undefined {
return LOCAL_MODEL_REGISTRY.find((m) => m.id === id);
}
/**
* Get models by category.
*/
export function getLocalModelsByCategory(category: string): LocalModelInfo[] {
return LOCAL_MODEL_REGISTRY.filter((m) => m.categories.includes(category as any));
}
/**
* Get recommended models (featured in UI).
*/
export function getRecommendedLocalModels(): LocalModelInfo[] {
return LOCAL_MODEL_REGISTRY.filter((m) => m.recommended);
}
/**
* Get models that fit within VRAM constraints.
*/
export function getModelsForVRAM(vramGB: number): LocalModelInfo[] {
return LOCAL_MODEL_REGISTRY.filter((m) => !m.minVRAM || m.minVRAM <= vramGB);
}
/**
* Get models that fit within RAM constraints (CPU inference).
*/
export function getModelsForRAM(ramGB: number): LocalModelInfo[] {
return LOCAL_MODEL_REGISTRY.filter((m) => !m.minRAM || m.minRAM <= ramGB);
}
/**
* Search models by name or description.
*/
export function searchLocalModels(query: string): LocalModelInfo[] {
const q = query.toLowerCase();
return LOCAL_MODEL_REGISTRY.filter(
(m) =>
m.id.toLowerCase().includes(q) ||
m.name.toLowerCase().includes(q) ||
m.description.toLowerCase().includes(q)
);
}
/**
* Get the default model ID for first-time setup.
*/
export function getDefaultLocalModelId(): string {
// Return the first recommended model as default
const recommended = getRecommendedLocalModels();
return recommended[0]?.id ?? 'llama-3.3-8b-q4';
}

View File

@@ -0,0 +1,204 @@
/**
* Zod schemas for local model configuration validation.
*/
import { z } from 'zod';
/**
* GPU backend options.
*/
export const GPUBackendSchema = z.enum(['metal', 'cuda', 'vulkan', 'cpu']);
/**
* Quantization type options.
*/
export const QuantizationTypeSchema = z.enum([
'Q2_K',
'Q3_K_S',
'Q3_K_M',
'Q3_K_L',
'Q4_0',
'Q4_K_S',
'Q4_K_M',
'Q5_0',
'Q5_K_S',
'Q5_K_M',
'Q6_K',
'Q8_0',
'F16',
'F32',
]);
/**
* Local model category options.
*/
export const LocalModelCategorySchema = z.enum([
'general',
'coding',
'reasoning',
'small',
'vision',
]);
/**
* Model source options.
*/
export const ModelSourceSchema = z.enum(['huggingface', 'ollama']);
/**
* Model download status.
*/
export const ModelDownloadStatusSchema = z.enum([
'pending',
'downloading',
'verifying',
'complete',
'error',
]);
/**
* Schema for local model info (registry entry).
*/
export const LocalModelInfoSchema = z
.object({
id: z.string().min(1).describe('Unique model identifier'),
name: z.string().min(1).describe('Human-readable display name'),
description: z.string().describe('Short description of model capabilities'),
huggingfaceId: z.string().min(1).describe('HuggingFace repository ID'),
filename: z.string().min(1).describe('GGUF filename to download'),
quantization: QuantizationTypeSchema.describe('Quantization level'),
sizeBytes: z.number().int().positive().describe('File size in bytes'),
contextLength: z.number().int().positive().describe('Maximum context window'),
categories: z.array(LocalModelCategorySchema).describe('Model categories'),
minVRAM: z.number().positive().optional().describe('Minimum VRAM in GB'),
minRAM: z.number().positive().optional().describe('Minimum RAM in GB'),
recommended: z.boolean().optional().describe('Whether model is featured'),
author: z.string().optional().describe('Model author/organization'),
license: z.string().optional().describe('License type'),
supportsVision: z.boolean().optional().describe('Whether model supports images'),
supportsTools: z.boolean().optional().describe('Whether model supports function calling'),
})
.strict();
/**
* Schema for model download progress.
*/
export const ModelDownloadProgressSchema = z
.object({
modelId: z.string().min(1),
status: ModelDownloadStatusSchema,
bytesDownloaded: z.number().int().nonnegative(),
totalBytes: z.number().int().nonnegative(),
percentage: z.number().min(0).max(100),
speed: z.number().nonnegative().optional(),
eta: z.number().nonnegative().optional(),
error: z.string().optional(),
})
.strict();
/**
* Schema for GPU info.
*/
export const GPUInfoSchema = z
.object({
backend: GPUBackendSchema,
available: z.boolean(),
deviceName: z.string().optional(),
vramMB: z.number().int().nonnegative().optional(),
driverVersion: z.string().optional(),
})
.strict();
/**
* Schema for local LLM configuration.
*/
export const LocalLLMConfigSchema = z
.object({
provider: z.enum(['local', 'ollama']),
model: z.string().min(1).describe('Model ID or GGUF path'),
gpuLayers: z.number().int().optional().describe('GPU layers (-1=auto, 0=CPU)'),
contextSize: z.number().int().positive().optional().describe('Override context size'),
threads: z.number().int().positive().optional().describe('CPU threads'),
batchSize: z.number().int().positive().optional().describe('Inference batch size'),
modelPath: z.string().optional().describe('Resolved path to model file'),
})
.strict();
/**
* Schema for installed model metadata.
*/
export const InstalledModelSchema = z
.object({
id: z.string().min(1),
filePath: z.string().min(1),
sizeBytes: z.number().int().positive(),
downloadedAt: z.string().datetime(),
lastUsedAt: z.string().datetime().optional(),
sha256: z.string().optional(),
source: ModelSourceSchema,
})
.strict();
/**
* Schema for model state (persisted state file).
*/
export const ModelStateSchema = z
.object({
version: z.string().default('1.0'),
installed: z.record(z.string(), InstalledModelSchema).default({}),
activeModelId: z.string().optional(),
downloadQueue: z.array(z.string()).default([]),
})
.strict();
/**
* Schema for model download options.
*/
export const ModelDownloadOptionsSchema = z
.object({
modelId: z.string().min(1),
outputDir: z.string().optional(),
showProgress: z.boolean().default(true),
hfToken: z.string().optional(),
})
.strict();
/**
* Schema for Ollama model info (from API).
*/
export const OllamaModelInfoSchema = z
.object({
name: z.string().min(1),
size: z.number().int().nonnegative(),
digest: z.string(),
modifiedAt: z.string(),
details: z
.object({
family: z.string().optional(),
parameterSize: z.string().optional(),
quantizationLevel: z.string().optional(),
})
.optional(),
})
.strict();
/**
* Schema for Ollama server status.
*/
export const OllamaStatusSchema = z
.object({
running: z.boolean(),
url: z.string().url(),
version: z.string().optional(),
models: z.array(OllamaModelInfoSchema).optional(),
error: z.string().optional(),
})
.strict();
// Export inferred types for convenience
export type LocalModelInfoInput = z.input<typeof LocalModelInfoSchema>;
export type ModelDownloadProgressInput = z.input<typeof ModelDownloadProgressSchema>;
export type GPUInfoInput = z.input<typeof GPUInfoSchema>;
export type LocalLLMConfigInput = z.input<typeof LocalLLMConfigSchema>;
export type InstalledModelInput = z.input<typeof InstalledModelSchema>;
export type ModelStateInput = z.input<typeof ModelStateSchema>;

View File

@@ -0,0 +1,303 @@
/**
* Types for native local model support via node-llama-cpp and Ollama.
*/
/**
* GPU acceleration backends supported by node-llama-cpp.
* - metal: Apple Silicon (M1/M2/M3) via Metal API
* - cuda: NVIDIA GPUs via CUDA
* - vulkan: Cross-platform GPU acceleration
* - cpu: CPU-only execution (fallback)
*/
export type GPUBackend = 'metal' | 'cuda' | 'vulkan' | 'cpu';
/**
* Common GGUF quantization types.
* Lower quantization = smaller file size, slightly lower quality.
* Q4_K_M is a good balance for most use cases.
*/
export type QuantizationType =
| 'Q2_K'
| 'Q3_K_S'
| 'Q3_K_M'
| 'Q3_K_L'
| 'Q4_0'
| 'Q4_K_S'
| 'Q4_K_M'
| 'Q5_0'
| 'Q5_K_S'
| 'Q5_K_M'
| 'Q6_K'
| 'Q8_0'
| 'F16'
| 'F32';
/**
* Categories for organizing local models in the UI.
*/
export type LocalModelCategory = 'general' | 'coding' | 'reasoning' | 'small' | 'vision';
/**
* Model source - where the model can be downloaded from.
*/
export type ModelSource = 'huggingface' | 'ollama';
/**
* Curated local model entry from the registry.
* These are pre-vetted models with known configurations.
*/
export interface LocalModelInfo {
/** Unique identifier (e.g., 'llama-3.3-8b-q4') */
id: string;
/** Human-readable display name */
name: string;
/** Short description of the model's capabilities */
description: string;
/** HuggingFace repository ID (e.g., 'bartowski/Llama-3.3-8B-Instruct-GGUF') */
huggingfaceId: string;
/** Filename of the GGUF file to download */
filename: string;
/** Quantization level */
quantization: QuantizationType;
/** Expected file size in bytes (for progress estimation) */
sizeBytes: number;
/** Maximum context window size in tokens */
contextLength: number;
/** Model categories for filtering */
categories: LocalModelCategory[];
/** Minimum VRAM required in GB (for GPU inference) */
minVRAM?: number;
/** Minimum RAM required in GB (for CPU inference) */
minRAM?: number;
/** Whether this model is recommended (featured in UI) */
recommended?: boolean;
/** Model author/organization */
author?: string;
/** License type (e.g., 'llama3.3', 'apache-2.0', 'mit') */
license?: string;
/** Whether model supports vision/images */
supportsVision?: boolean;
/** Whether model supports tool/function calling */
supportsTools?: boolean;
}
/**
* State of a model download.
*/
export type ModelDownloadStatus = 'pending' | 'downloading' | 'verifying' | 'complete' | 'error';
/**
* Progress information for a model download.
* Emitted via events during download.
*/
export interface ModelDownloadProgress {
/** Model ID being downloaded */
modelId: string;
/** Current download status */
status: ModelDownloadStatus;
/** Bytes downloaded so far */
bytesDownloaded: number;
/** Total file size in bytes */
totalBytes: number;
/** Download progress as percentage (0-100) */
percentage: number;
/** Download speed in bytes per second */
speed?: number;
/** Estimated time remaining in seconds */
eta?: number;
/** Error message if status is 'error' */
error?: string;
}
/**
* GPU detection result.
*/
export interface GPUInfo {
/** Detected GPU backend */
backend: GPUBackend;
/** Whether GPU acceleration is available */
available: boolean;
/** GPU device name (e.g., 'Apple M2 Pro', 'NVIDIA RTX 4090') */
deviceName?: string;
/** Available VRAM in megabytes */
vramMB?: number;
/** GPU driver version */
driverVersion?: string;
}
/**
* Extended LLM configuration for local models.
* Extends the base config with local-specific options.
*/
export interface LocalLLMConfig {
/** Provider type */
provider: 'local' | 'ollama';
/** Model ID from local registry or custom GGUF path */
model: string;
/** Number of layers to offload to GPU (-1 = auto, 0 = CPU only) */
gpuLayers?: number;
/** Override context size (tokens) */
contextSize?: number;
/** Number of CPU threads to use */
threads?: number;
/** Inference batch size */
batchSize?: number;
/** Path to model file (resolved from model ID) */
modelPath?: string;
}
/**
* Installed model metadata (persisted to state file).
*/
export interface InstalledModel {
/** Model ID from registry */
id: string;
/** Absolute path to the .gguf file */
filePath: string;
/** File size in bytes */
sizeBytes: number;
/** When the model was downloaded (ISO timestamp) */
downloadedAt: string;
/** When the model was last used (ISO timestamp) */
lastUsedAt?: string;
/** SHA-256 hash of the file for integrity verification */
sha256?: string;
/** Source of the download */
source: ModelSource;
}
/**
* Model state manager state (persisted to ~/.dexto/models/state.json).
*/
export interface ModelState {
/** Schema version for migrations */
version: string;
/** Map of model ID to installed model info */
installed: Record<string, InstalledModel>;
/** Currently active/selected model ID */
activeModelId?: string;
/** Queue of model IDs pending download */
downloadQueue: string[];
}
/**
* Options for downloading a model.
*/
export interface ModelDownloadOptions {
/** Model ID to download */
modelId: string;
/** Directory to save the model (default: ~/.dexto/models/) */
outputDir?: string;
/** Whether to show CLI progress (default: true) */
showProgress?: boolean;
/** Callback for progress updates */
onProgress?: (progress: ModelDownloadProgress) => void;
/** HuggingFace token for gated models */
hfToken?: string;
}
/**
* Result of a model download operation.
*/
export interface ModelDownloadResult {
/** Whether download was successful */
success: boolean;
/** Path to the downloaded model file */
filePath?: string;
/** SHA-256 hash of the downloaded file */
sha256?: string;
/** Error message if download failed */
error?: string;
}
/**
* Ollama model info (from Ollama API /api/tags).
*/
export interface OllamaModelInfo {
/** Model name (e.g., 'llama3.3:8b') */
name: string;
/** Model size in bytes */
size: number;
/** Model digest/hash */
digest: string;
/** When the model was last modified */
modifiedAt: string;
/** Model details (parameters, family, etc.) */
details?: {
family?: string;
parameterSize?: string;
quantizationLevel?: string;
};
}
/**
* Ollama server status.
*/
export interface OllamaStatus {
/** Whether Ollama server is running */
running: boolean;
/** Ollama server URL */
url: string;
/** Ollama version */
version?: string;
/** Available models on the server */
models?: OllamaModelInfo[];
/** Error message if not running */
error?: string;
}

View File

@@ -0,0 +1,441 @@
/**
* OpenRouter Model Registry
*
* Provides dynamic model validation against OpenRouter's catalog of 100+ models.
* Fetches and caches the model list from OpenRouter's API with a 24-hour TTL.
*
* Features:
* - Lazy loading: Cache is populated on first lookup
* - Background refresh: Non-blocking cache updates
* - Graceful degradation: Returns 'unknown' when cache is stale, allowing config
* - Throttled requests: Max 1 refresh per 5 minutes to avoid rate limits
*/
import { promises as fs } from 'node:fs';
import { existsSync, readFileSync } from 'node:fs';
import path from 'node:path';
import { getDextoGlobalPath } from '../../utils/path.js';
import { logger } from '../../logger/logger.js';
const OPENROUTER_MODELS_ENDPOINT = 'https://openrouter.ai/api/v1/models';
const CACHE_FILENAME = 'openrouter-models.json';
const CACHE_SUBDIR = 'cache';
const CACHE_TTL_MS = 1000 * 60 * 60 * 24; // 24 hours
const MIN_REFRESH_INTERVAL_MS = 1000 * 60 * 5; // 5 minutes throttle between refresh attempts
export type LookupStatus = 'valid' | 'invalid' | 'unknown';
/** Model info stored in cache */
export interface OpenRouterModelInfo {
id: string;
contextLength: number;
}
interface CacheFile {
fetchedAt: string;
models: OpenRouterModelInfo[];
}
interface RefreshOptions {
apiKey?: string;
force?: boolean;
}
/** Default context length when not available from API */
const DEFAULT_CONTEXT_LENGTH = 128000;
class OpenRouterModelRegistry {
/** Map from normalized model ID to model info */
private models: Map<string, OpenRouterModelInfo> | null = null;
private lastFetchedAt: number | null = null;
private refreshPromise: Promise<void> | null = null;
private lastRefreshAttemptAt: number | null = null;
private lastUsedApiKey?: string;
constructor(private readonly cachePath: string) {
this.loadCacheFromDisk();
}
/**
* Look up a model ID against the OpenRouter catalog.
* @returns 'valid' if model exists, 'invalid' if not found, 'unknown' if cache is stale/empty
*/
lookup(modelId: string): LookupStatus {
const normalized = this.normalizeModelId(modelId);
if (!normalized) {
return 'unknown';
}
if (!this.models || this.models.size === 0) {
// No cache yet - kick off a background refresh and allow for now
this.scheduleRefresh({ force: true });
return 'unknown';
}
if (!this.isCacheFresh()) {
// Don't rely on stale data - refresh in background and treat as unknown
this.scheduleRefresh();
return 'unknown';
}
return this.models.has(normalized) ? 'valid' : 'invalid';
}
/**
* Get context length for a model ID.
* @returns context length if model is in cache, null if not found or cache is stale
*/
getContextLength(modelId: string): number | null {
const normalized = this.normalizeModelId(modelId);
if (!normalized) {
return null;
}
if (!this.models || this.models.size === 0 || !this.isCacheFresh()) {
return null;
}
const info = this.models.get(normalized);
return info?.contextLength ?? null;
}
/**
* Get model info for a model ID.
* @returns model info if found in cache, null otherwise
*/
getModelInfo(modelId: string): OpenRouterModelInfo | null {
const normalized = this.normalizeModelId(modelId);
if (!normalized) {
return null;
}
if (!this.models || this.models.size === 0 || !this.isCacheFresh()) {
return null;
}
return this.models.get(normalized) ?? null;
}
/**
* Schedule a non-blocking background refresh of the model cache.
*/
scheduleRefresh(options?: RefreshOptions): void {
const apiKey = options?.apiKey ?? this.lastUsedApiKey;
if (apiKey) {
this.lastUsedApiKey = apiKey;
}
if (this.refreshPromise) {
return; // Refresh already in-flight
}
const now = Date.now();
if (
!options?.force &&
this.lastRefreshAttemptAt &&
now - this.lastRefreshAttemptAt < MIN_REFRESH_INTERVAL_MS
) {
return; // Throttle refresh attempts
}
this.lastRefreshAttemptAt = now;
this.refreshPromise = this.refreshInternal(apiKey)
.catch((error) => {
logger.warn(
`Failed to refresh OpenRouter model registry: ${error instanceof Error ? error.message : String(error)}`
);
})
.finally(() => {
this.refreshPromise = null;
});
}
/**
* Blocking refresh of the model cache.
*/
async refresh(options?: RefreshOptions): Promise<void> {
const apiKey = options?.apiKey ?? this.lastUsedApiKey;
if (apiKey) {
this.lastUsedApiKey = apiKey;
}
if (!options?.force && this.refreshPromise) {
await this.refreshPromise;
return;
}
if (!options?.force) {
const now = Date.now();
if (
this.lastRefreshAttemptAt &&
now - this.lastRefreshAttemptAt < MIN_REFRESH_INTERVAL_MS
) {
if (this.refreshPromise) {
await this.refreshPromise;
}
return;
}
this.lastRefreshAttemptAt = now;
} else {
this.lastRefreshAttemptAt = Date.now();
}
const promise = this.refreshInternal(apiKey).finally(() => {
this.refreshPromise = null;
});
this.refreshPromise = promise;
await promise;
}
/**
* Get all cached model IDs (or null if cache is empty).
*/
getCachedModels(): string[] | null {
if (!this.models || this.models.size === 0) {
return null;
}
return Array.from(this.models.keys());
}
/**
* Get all cached model info (or null if cache is empty).
*/
getCachedModelsWithInfo(): OpenRouterModelInfo[] | null {
if (!this.models || this.models.size === 0) {
return null;
}
return Array.from(this.models.values());
}
/**
* Get cache metadata for debugging/monitoring.
*/
getCacheMetadata(): { lastFetchedAt: Date | null; modelCount: number; isFresh: boolean } {
return {
lastFetchedAt: this.lastFetchedAt ? new Date(this.lastFetchedAt) : null,
modelCount: this.models ? this.models.size : 0,
isFresh: this.isCacheFresh(),
};
}
private async refreshInternal(apiKey?: string): Promise<void> {
try {
const headers: Record<string, string> = {
Accept: 'application/json',
};
if (apiKey) {
headers.Authorization = `Bearer ${apiKey}`;
}
logger.debug('Refreshing OpenRouter model registry from remote source');
const response = await fetch(OPENROUTER_MODELS_ENDPOINT, { headers });
if (!response.ok) {
const body = await response.text();
throw new Error(`HTTP ${response.status}: ${body}`);
}
const payload = await response.json();
const models = this.extractModels(payload);
if (models.length === 0) {
throw new Error('No model identifiers returned by OpenRouter');
}
await this.writeCache(models);
logger.info(`OpenRouter model registry refreshed with ${models.length} models`);
} catch (error) {
throw error instanceof Error ? error : new Error(String(error));
}
}
private loadCacheFromDisk(): void {
if (!existsSync(this.cachePath)) {
return;
}
try {
const raw = readFileSync(this.cachePath, 'utf-8');
const parsed = JSON.parse(raw) as CacheFile;
if (!Array.isArray(parsed.models) || typeof parsed.fetchedAt !== 'string') {
logger.warn(`Invalid OpenRouter model cache structure at ${this.cachePath}`);
return;
}
// Build map from model ID to info
this.models = new Map();
for (const model of parsed.models) {
if (
typeof model === 'object' &&
model.id &&
typeof model.contextLength === 'number'
) {
this.models.set(model.id.toLowerCase(), model);
}
}
const timestamp = Date.parse(parsed.fetchedAt);
this.lastFetchedAt = Number.isNaN(timestamp) ? null : timestamp;
logger.debug(
`Loaded ${this.models.size} OpenRouter models from cache (fetched at ${parsed.fetchedAt})`
);
} catch (error) {
logger.warn(
`Failed to load OpenRouter model cache: ${error instanceof Error ? error.message : String(error)}`
);
}
}
private normalizeModelId(modelId: string): string | null {
if (!modelId) {
return null;
}
return modelId.trim().toLowerCase();
}
private isCacheFresh(): boolean {
if (!this.lastFetchedAt) {
return false;
}
return Date.now() - this.lastFetchedAt < CACHE_TTL_MS;
}
private async writeCache(models: OpenRouterModelInfo[]): Promise<void> {
// Deduplicate by ID and sort
const modelMap = new Map<string, OpenRouterModelInfo>();
for (const model of models) {
if (model.id.trim()) {
modelMap.set(model.id.toLowerCase(), model);
}
}
const uniqueModels = Array.from(modelMap.values()).sort((a, b) => a.id.localeCompare(b.id));
await fs.mkdir(path.dirname(this.cachePath), { recursive: true });
const now = new Date();
const cachePayload: CacheFile = {
fetchedAt: now.toISOString(),
models: uniqueModels,
};
await fs.writeFile(this.cachePath, JSON.stringify(cachePayload, null, 2), 'utf-8');
this.models = new Map(uniqueModels.map((m) => [m.id.toLowerCase(), m]));
this.lastFetchedAt = now.getTime();
}
private extractModels(payload: unknown): OpenRouterModelInfo[] {
if (!payload) {
return [];
}
const raw =
(payload as { data?: unknown; models?: unknown }).data ??
(payload as { data?: unknown; models?: unknown }).models ??
payload;
if (!Array.isArray(raw)) {
return [];
}
const models: OpenRouterModelInfo[] = [];
for (const item of raw) {
if (item && typeof item === 'object') {
const record = item as Record<string, unknown>;
const id = this.firstString([record.id, record.model, record.name]);
if (id) {
// Get context_length from item or top_provider
let contextLength = DEFAULT_CONTEXT_LENGTH;
if (typeof record.context_length === 'number') {
contextLength = record.context_length;
} else if (
record.top_provider &&
typeof record.top_provider === 'object' &&
typeof (record.top_provider as Record<string, unknown>).context_length ===
'number'
) {
contextLength = (record.top_provider as Record<string, unknown>)
.context_length as number;
}
models.push({ id, contextLength });
}
}
}
return models;
}
private firstString(values: Array<unknown>): string | null {
for (const value of values) {
if (typeof value === 'string' && value.trim().length > 0) {
return value;
}
}
return null;
}
}
// Singleton instance with global cache path
const cachePath = getDextoGlobalPath(CACHE_SUBDIR, CACHE_FILENAME);
export const openRouterModelRegistry = new OpenRouterModelRegistry(cachePath);
/**
* Look up a model ID against the OpenRouter catalog.
* @returns 'valid' if model exists, 'invalid' if not found, 'unknown' if cache is stale/empty
*/
export function lookupOpenRouterModel(modelId: string): LookupStatus {
return openRouterModelRegistry.lookup(modelId);
}
/**
* Schedule a non-blocking background refresh of the OpenRouter model cache.
*/
export function scheduleOpenRouterModelRefresh(options?: RefreshOptions): void {
openRouterModelRegistry.scheduleRefresh(options);
}
/**
* Perform a blocking refresh of the OpenRouter model cache.
*/
export async function refreshOpenRouterModelCache(options?: RefreshOptions): Promise<void> {
await openRouterModelRegistry.refresh(options);
}
/**
* Get all cached OpenRouter model IDs (or null if cache is empty).
*/
export function getCachedOpenRouterModels(): string[] | null {
return openRouterModelRegistry.getCachedModels();
}
/**
* Get context length for an OpenRouter model.
* @returns context length if model is in cache, null if not found or cache is stale
*/
export function getOpenRouterModelContextLength(modelId: string): number | null {
return openRouterModelRegistry.getContextLength(modelId);
}
/**
* Get model info for an OpenRouter model.
* @returns model info if found in cache, null otherwise
*/
export function getOpenRouterModelInfo(modelId: string): OpenRouterModelInfo | null {
return openRouterModelRegistry.getModelInfo(modelId);
}
/**
* Get cache metadata for debugging/monitoring.
*/
export function getOpenRouterModelCacheInfo(): {
lastFetchedAt: Date | null;
modelCount: number;
isFresh: boolean;
} {
return openRouterModelRegistry.getCacheMetadata();
}
// Export internal constants for testing purposes
export const __TEST_ONLY__ = {
cachePath,
CACHE_TTL_MS,
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,271 @@
import { Result, hasErrors, splitIssues, ok, fail, zodToIssues } from '../utils/result.js';
import { Issue, ErrorScope, ErrorType } from '@core/errors/types.js';
import { LLMErrorCode } from './error-codes.js';
import { type ValidatedLLMConfig, type LLMUpdates, type LLMConfig } from './schemas.js';
import { LLMConfigSchema } from './schemas.js';
import {
getDefaultModelForProvider,
acceptsAnyModel,
getProviderFromModel,
isValidProviderModel,
getEffectiveMaxInputTokens,
supportsBaseURL,
supportsCustomModels,
hasAllRegistryModelsSupport,
transformModelNameForProvider,
} from './registry.js';
import {
lookupOpenRouterModel,
refreshOpenRouterModelCache,
} from './providers/openrouter-model-registry.js';
import type { LLMUpdateContext } from './types.js';
import { resolveApiKeyForProvider } from '@core/utils/api-key-resolver.js';
import type { IDextoLogger } from '@core/logger/v2/types.js';
// TODO: Consider consolidating validation into async Zod schema (superRefine supports async).
// Currently OpenRouter validation is here to avoid network calls during startup/serverless.
// If startup validation is desired, move to schema with safeParseAsync() and handle serverless separately.
/**
* Convenience function that combines resolveLLM and validateLLM
*/
export async function resolveAndValidateLLMConfig(
previous: ValidatedLLMConfig,
updates: LLMUpdates,
logger: IDextoLogger
): Promise<Result<ValidatedLLMConfig, LLMUpdateContext>> {
const { candidate, warnings } = await resolveLLMConfig(previous, updates, logger);
// If resolver produced any errors, fail immediately (don't try to validate a broken candidate)
if (hasErrors(warnings)) {
const { errors } = splitIssues(warnings);
return fail<ValidatedLLMConfig, LLMUpdateContext>(errors);
}
const result = validateLLMConfig(candidate, warnings);
return result;
}
/**
* Infers the LLM config from the provided updates
* @param previous - The previous LLM config
* @param updates - The updates to the LLM config
* @returns The resolved LLM config
*/
export async function resolveLLMConfig(
previous: ValidatedLLMConfig,
updates: LLMUpdates,
logger: IDextoLogger
): Promise<{ candidate: LLMConfig; warnings: Issue<LLMUpdateContext>[] }> {
const warnings: Issue<LLMUpdateContext>[] = [];
// Provider inference (if not provided, infer from model or previous provider)
const provider =
updates.provider ??
(updates.model
? (() => {
try {
return getProviderFromModel(updates.model);
} catch {
return previous.provider;
}
})()
: previous.provider);
// API key resolution
// (if not provided, previous API key if provider is the same)
// (if not provided, and provider is different, throw error)
const envKey = resolveApiKeyForProvider(provider);
const apiKey =
updates.apiKey ?? (provider !== previous.provider ? envKey : previous.apiKey) ?? '';
if (!apiKey) {
warnings.push({
code: LLMErrorCode.API_KEY_CANDIDATE_MISSING,
message: 'API key not provided or found in environment',
severity: 'warning',
scope: ErrorScope.LLM,
type: ErrorType.USER,
context: { provider },
});
} else if (typeof apiKey === 'string' && apiKey.length < 10) {
warnings.push({
code: LLMErrorCode.API_KEY_INVALID,
message: 'API key looks unusually short',
severity: 'warning',
scope: ErrorScope.LLM,
type: ErrorType.USER,
context: { provider },
});
}
// Model fallback
// if new provider doesn't support the new model, use the default model
// Skip fallback for providers that support custom models (they allow arbitrary model IDs)
let model = updates.model ?? previous.model;
if (
provider !== previous.provider &&
!acceptsAnyModel(provider) &&
!supportsCustomModels(provider) &&
!isValidProviderModel(provider, model)
) {
model = getDefaultModelForProvider(provider) ?? previous.model;
warnings.push({
code: LLMErrorCode.MODEL_INCOMPATIBLE,
message: `Model set to default '${model}' for provider '${provider}'`,
severity: 'warning',
scope: ErrorScope.LLM,
type: ErrorType.USER,
context: { provider, model },
});
}
// Gateway model transformation
// When targeting a gateway provider (dexto/openrouter), transform native model names
// to OpenRouter format (e.g., "claude-sonnet-4-5-20250929" -> "anthropic/claude-sonnet-4.5")
if (hasAllRegistryModelsSupport(provider) && !model.includes('/')) {
try {
const originalProvider = getProviderFromModel(model);
model = transformModelNameForProvider(model, originalProvider, provider);
logger.debug(
`Transformed model for ${provider}: ${updates.model ?? previous.model} -> ${model}`
);
} catch {
// Model not in registry - pass through as-is, gateway may accept custom model IDs
logger.debug(
`Model '${model}' not in registry, passing through to ${provider} without transformation`
);
}
}
// Token defaults - always use model's effective max unless explicitly provided
const maxInputTokens =
updates.maxInputTokens ??
getEffectiveMaxInputTokens({ provider, model, apiKey: apiKey || previous.apiKey }, logger);
// BaseURL resolution
// Note: OpenRouter baseURL is handled by the factory (fixed endpoint, no user override)
let baseURL: string | undefined;
if (updates.baseURL) {
baseURL = updates.baseURL;
} else if (supportsBaseURL(provider)) {
baseURL = previous.baseURL;
} else {
baseURL = undefined;
}
// Vertex AI validation - requires GOOGLE_VERTEX_PROJECT for ADC authentication
// This upfront check provides immediate feedback rather than failing at first API call
if (provider === 'vertex') {
const projectId = process.env.GOOGLE_VERTEX_PROJECT;
if (!projectId || !projectId.trim()) {
warnings.push({
code: LLMErrorCode.CONFIG_MISSING,
message:
'GOOGLE_VERTEX_PROJECT environment variable is required for Vertex AI. ' +
'Set it to your GCP project ID and ensure ADC is configured via `gcloud auth application-default login`',
severity: 'error',
scope: ErrorScope.LLM,
type: ErrorType.USER,
context: { provider, model },
});
}
}
// Amazon Bedrock validation - requires AWS_REGION for the endpoint URL
// Auth can be either:
// 1. AWS_BEARER_TOKEN_BEDROCK (API key - simplest)
// 2. AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY (IAM credentials)
if (provider === 'bedrock') {
const region = process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION;
if (!region || !region.trim()) {
warnings.push({
code: LLMErrorCode.CONFIG_MISSING,
message:
'AWS_REGION environment variable is required for Amazon Bedrock. ' +
'Also set either AWS_BEARER_TOKEN_BEDROCK (API key) or ' +
'AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY (IAM credentials).',
severity: 'error',
scope: ErrorScope.LLM,
type: ErrorType.USER,
context: { provider, model },
});
}
}
// OpenRouter model validation with cache refresh
if (provider === 'openrouter') {
let lookupStatus = lookupOpenRouterModel(model);
if (lookupStatus === 'unknown') {
// Cache stale/empty - try to refresh before validating
try {
await refreshOpenRouterModelCache({ apiKey });
lookupStatus = lookupOpenRouterModel(model);
} catch {
// Network failed - keep 'unknown' status, allow gracefully
logger.debug(
`OpenRouter model cache refresh failed, allowing model '${model}' without validation`
);
}
}
if (lookupStatus === 'invalid') {
// Model definitively not found in fresh cache - this is an error
warnings.push({
code: LLMErrorCode.MODEL_INCOMPATIBLE,
message: `Model '${model}' not found in OpenRouter catalog. Check model ID at https://openrouter.ai/models`,
severity: 'error',
scope: ErrorScope.LLM,
type: ErrorType.USER,
context: { provider, model },
});
}
// 'unknown' after failed refresh = allow (network issue, graceful degradation)
}
return {
candidate: {
provider,
model,
apiKey,
baseURL,
maxIterations: updates.maxIterations ?? previous.maxIterations,
maxInputTokens,
maxOutputTokens: updates.maxOutputTokens ?? previous.maxOutputTokens,
temperature: updates.temperature ?? previous.temperature,
},
warnings,
};
}
// Passes the input candidate through the schema and returns a result
export function validateLLMConfig(
candidate: LLMConfig,
warnings: Issue<LLMUpdateContext>[]
): Result<ValidatedLLMConfig, LLMUpdateContext> {
// Final validation (business rules + shape)
const parsed = LLMConfigSchema.safeParse(candidate);
if (!parsed.success) {
return fail<ValidatedLLMConfig, LLMUpdateContext>(zodToIssues(parsed.error, 'error'));
}
// Schema validation now handles apiKey non-empty validation
// Check for short API key (warning)
if (parsed.data.apiKey && parsed.data.apiKey.length < 10) {
warnings.push({
code: LLMErrorCode.API_KEY_INVALID,
message: 'API key seems too short - please verify it is correct',
path: ['apiKey'],
severity: 'warning',
scope: ErrorScope.LLM,
type: ErrorType.USER,
context: {
provider: candidate.provider,
model: candidate.model,
},
});
}
return ok<ValidatedLLMConfig, LLMUpdateContext>(parsed.data, warnings);
}

View File

@@ -0,0 +1,488 @@
import { describe, it, expect, vi } from 'vitest';
// Mock logger to prevent initialization issues
vi.mock('@core/logger/index.js', () => ({
logger: {
debug: vi.fn(),
info: vi.fn(),
warn: vi.fn(),
error: vi.fn(),
},
}));
import { z } from 'zod';
import { LLMErrorCode } from './error-codes.js';
import {
LLMConfigSchema,
LLMUpdatesSchema,
type LLMConfig,
type ValidatedLLMConfig,
} from './schemas.js';
import { LLM_PROVIDERS } from './types.js';
import {
getSupportedModels,
getMaxInputTokensForModel,
requiresBaseURL,
supportsBaseURL,
getDefaultModelForProvider,
acceptsAnyModel,
} from './registry.js';
import type { LLMProvider } from './types.js';
// Test helpers
class LLMTestHelpers {
static getValidConfigForProvider(provider: LLMProvider): LLMConfig {
const models = getSupportedModels(provider);
const defaultModel = getDefaultModelForProvider(provider) || models[0] || 'custom-model';
const baseConfig = {
provider,
model: defaultModel,
apiKey: 'test-key',
};
if (requiresBaseURL(provider)) {
return { ...baseConfig, baseURL: 'https://api.test.com/v1' };
}
return baseConfig;
}
static getProviderRequiringBaseURL(): LLMProvider | null {
return LLM_PROVIDERS.find((p) => requiresBaseURL(p)) || null;
}
static getProviderNotSupportingBaseURL(): LLMProvider | null {
return LLM_PROVIDERS.find((p) => !supportsBaseURL(p)) || null;
}
}
describe('LLMConfigSchema', () => {
describe('Basic Structure Validation', () => {
it('should accept valid minimal config', () => {
const config = LLMTestHelpers.getValidConfigForProvider('openai');
const result = LLMConfigSchema.parse(config);
expect(result.provider).toBe('openai');
expect(result.model).toBeTruthy();
expect(result.apiKey).toBe('test-key');
});
it('should apply default values', () => {
const config = LLMTestHelpers.getValidConfigForProvider('openai');
const result = LLMConfigSchema.parse(config);
expect(result.maxIterations).toBeUndefined();
});
it('should preserve explicit optional values', () => {
const config: LLMConfig = {
provider: 'openai',
model: 'gpt-5',
apiKey: 'test-key',
maxIterations: 25,
temperature: 0.7,
maxOutputTokens: 4000,
};
const result = LLMConfigSchema.parse(config);
expect(result.maxIterations).toBe(25);
expect(result.temperature).toBe(0.7);
expect(result.maxOutputTokens).toBe(4000);
});
});
describe('Required Fields Validation', () => {
it('should require provider field', () => {
const config = {
model: 'gpt-5',
apiKey: 'test-key',
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['provider']);
});
it('should require model field', () => {
const config = {
provider: 'openai',
apiKey: 'test-key',
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['model']);
});
it('should require apiKey field', () => {
const config = {
provider: 'openai',
model: 'gpt-5',
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['apiKey']);
});
});
describe('Provider Validation', () => {
it('should accept all registry providers', () => {
for (const provider of LLM_PROVIDERS) {
const config = LLMTestHelpers.getValidConfigForProvider(provider);
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.provider).toBe(provider);
}
}
});
it('should reject invalid providers', () => {
const config = {
provider: 'invalid-provider',
model: 'test-model',
apiKey: 'test-key',
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.code).toBe(z.ZodIssueCode.invalid_enum_value);
expect(result.error?.issues[0]?.path).toEqual(['provider']);
});
it('should be case sensitive for providers', () => {
const config = {
provider: 'OpenAI', // Should be 'openai'
model: 'gpt-5',
apiKey: 'test-key',
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.code).toBe(z.ZodIssueCode.invalid_enum_value);
expect(result.error?.issues[0]?.path).toEqual(['provider']);
});
});
describe('Model Validation', () => {
it('should accept known models for each provider', () => {
for (const provider of LLM_PROVIDERS) {
const models = getSupportedModels(provider);
if (models.length === 0) continue; // Skip providers that accept any model
// Test first few models to avoid excessive test runs
const modelsToTest = models.slice(0, 3);
for (const model of modelsToTest) {
const config: LLMConfig = {
provider,
model,
apiKey: 'test-key',
...(requiresBaseURL(provider) && { baseURL: 'https://api.test.com/v1' }),
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(true);
}
}
});
it('should reject unknown models for providers with restricted models', () => {
// Find a provider that has specific model restrictions
const provider = LLM_PROVIDERS.find((p) => !acceptsAnyModel(p));
if (!provider) return; // Skip if no providers have model restrictions
const config: LLMConfig = {
provider,
model: 'unknown-model-xyz-123',
apiKey: 'test-key',
...(requiresBaseURL(provider) && { baseURL: 'https://api.test.com/v1' }),
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['model']);
expect((result.error?.issues[0] as any).params?.code).toBe(
LLMErrorCode.MODEL_INCOMPATIBLE
);
});
});
describe('Temperature Validation', () => {
it('should accept valid temperature values', () => {
const validTemperatures = [0, 0.1, 0.5, 0.7, 1.0];
for (const temperature of validTemperatures) {
const config: LLMConfig = {
...LLMTestHelpers.getValidConfigForProvider('openai'),
temperature,
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.temperature).toBe(temperature);
}
}
});
it('should reject invalid temperature values', () => {
const invalidTemperatures = [-0.1, -1, 1.1, 2];
for (const temperature of invalidTemperatures) {
const config: LLMConfig = {
...LLMTestHelpers.getValidConfigForProvider('openai'),
temperature,
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['temperature']);
}
});
});
describe('BaseURL Validation', () => {
it('should require baseURL for providers that need it', () => {
const provider = LLMTestHelpers.getProviderRequiringBaseURL();
if (!provider) return; // Skip if no providers require baseURL
const config = {
provider,
model: 'custom-model',
apiKey: 'test-key',
// Missing baseURL
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['baseURL']);
expect((result.error?.issues[0] as any).params?.code).toBe(
LLMErrorCode.BASE_URL_MISSING
);
});
it('should accept baseURL for providers that require it', () => {
const provider = LLMTestHelpers.getProviderRequiringBaseURL();
if (!provider) return;
const config: LLMConfig = {
provider,
model: 'custom-model',
apiKey: 'test-key',
baseURL: 'https://api.custom.com/v1',
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(true);
});
it('should reject baseURL for providers that do not support it', () => {
const provider = LLMTestHelpers.getProviderNotSupportingBaseURL();
if (!provider) return; // Skip if all providers support baseURL
const config: LLMConfig = {
...LLMTestHelpers.getValidConfigForProvider(provider),
baseURL: 'https://api.custom.com/v1',
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['provider']);
expect((result.error?.issues[0] as any).params?.code).toBe(
LLMErrorCode.BASE_URL_INVALID
);
});
});
describe('MaxInputTokens Validation', () => {
it('should accept valid maxInputTokens within model limits', () => {
// Find a provider with specific models to test token limits
const provider = LLM_PROVIDERS.find((p) => !acceptsAnyModel(p));
if (!provider) return;
const models = getSupportedModels(provider);
const model = models[0]!;
const maxTokens = getMaxInputTokensForModel(provider, model);
const config: LLMConfig = {
provider,
model,
apiKey: 'test-key',
maxInputTokens: Math.floor(maxTokens / 2), // Well within limit
...(requiresBaseURL(provider) && { baseURL: 'https://api.test.com/v1' }),
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(true);
});
it('should reject maxInputTokens exceeding model limits', () => {
const provider = LLM_PROVIDERS.find((p) => !acceptsAnyModel(p));
if (!provider) return;
const models = getSupportedModels(provider);
const model = models[0]!;
const maxTokens = getMaxInputTokensForModel(provider, model);
const config: LLMConfig = {
provider,
model,
apiKey: 'test-key',
maxInputTokens: maxTokens + 1000, // Exceed limit
...(requiresBaseURL(provider) && { baseURL: 'https://api.test.com/v1' }),
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.path).toEqual(['maxInputTokens']);
expect((result.error?.issues[0] as any).params?.code).toBe(
LLMErrorCode.TOKENS_EXCEEDED
);
});
it('should allow maxInputTokens for providers that accept any model', () => {
const provider = LLMTestHelpers.getProviderRequiringBaseURL();
if (!provider || !acceptsAnyModel(provider)) return;
const config: LLMConfig = {
provider,
model: 'custom-model',
apiKey: 'test-key',
baseURL: 'https://api.custom.com/v1',
maxInputTokens: 50000,
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(true);
});
});
describe('Edge Cases', () => {
it('should reject empty string values', () => {
const testCases = [
{ provider: '', model: 'gpt-5', apiKey: 'key' },
{ provider: 'openai', model: '', apiKey: 'key' },
{ provider: 'openai', model: 'gpt-5', apiKey: '' },
];
for (const config of testCases) {
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(false);
}
});
it('should reject whitespace-only values', () => {
const testCases = [
{ provider: ' ', model: 'gpt-5', apiKey: 'key' },
{ provider: 'openai', model: ' ', apiKey: 'key' },
{ provider: 'openai', model: 'gpt-5', apiKey: ' ' },
];
for (const config of testCases) {
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(false);
}
});
it('should handle type coercion for numeric fields', () => {
const config: any = {
...LLMTestHelpers.getValidConfigForProvider('openai'),
maxIterations: '25', // String that should coerce to number
temperature: '0.7', // String that should coerce to number
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.maxIterations).toBe(25);
expect(result.data.temperature).toBe(0.7);
}
});
it('should reject invalid numeric coercion', () => {
const config: any = {
...LLMTestHelpers.getValidConfigForProvider('openai'),
maxIterations: 'not-a-number',
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(false);
if (!result.success) {
expect(result.error.issues[0]?.path).toEqual(['maxIterations']);
}
});
});
describe('Strict Validation', () => {
it('should reject unknown fields', () => {
const config: any = {
...LLMTestHelpers.getValidConfigForProvider('openai'),
unknownField: 'should-fail',
};
const result = LLMConfigSchema.safeParse(config);
expect(result.success).toBe(false);
expect(result.error?.issues[0]?.code).toBe(z.ZodIssueCode.unrecognized_keys);
});
});
describe('Type Safety', () => {
it('should handle input and output types correctly', () => {
const input: LLMConfig = LLMTestHelpers.getValidConfigForProvider('openai');
const result: ValidatedLLMConfig = LLMConfigSchema.parse(input);
// Should have applied defaults
expect(result.maxIterations).toBeUndefined();
// Should preserve input values
expect(result.provider).toBe(input.provider);
expect(result.model).toBe(input.model);
expect(result.apiKey).toBe(input.apiKey);
});
it('should maintain type consistency', () => {
const config = LLMTestHelpers.getValidConfigForProvider('anthropic');
const result = LLMConfigSchema.parse(config);
// TypeScript should infer correct types
expect(typeof result.provider).toBe('string');
expect(typeof result.model).toBe('string');
expect(typeof result.apiKey).toBe('string');
expect(result.maxIterations).toBeUndefined();
});
});
describe('LLMUpdatesSchema', () => {
describe('Update Requirements', () => {
it('should pass validation when model is provided', () => {
const updates = { model: 'gpt-5' };
expect(() => LLMUpdatesSchema.parse(updates)).not.toThrow();
});
it('should pass validation when provider is provided', () => {
const updates = { provider: 'openai' };
expect(() => LLMUpdatesSchema.parse(updates)).not.toThrow();
});
it('should pass validation when both model and provider are provided', () => {
const updates = { model: 'gpt-5', provider: 'openai' };
expect(() => LLMUpdatesSchema.parse(updates)).not.toThrow();
});
it('should reject empty updates object', () => {
const updates = {};
expect(() => LLMUpdatesSchema.parse(updates)).toThrow();
});
it('should reject updates with only non-key fields (no model/provider)', () => {
const updates = { maxIterations: 10 } as const;
expect(() => LLMUpdatesSchema.parse(updates)).toThrow();
});
it('should pass validation when model/provider with other fields', () => {
const updates = { model: 'gpt-5', maxIterations: 10 };
expect(() => LLMUpdatesSchema.parse(updates)).not.toThrow();
});
});
});
});

View File

@@ -0,0 +1,314 @@
import { LLMErrorCode } from './error-codes.js';
import { ErrorScope, ErrorType } from '@core/errors/types.js';
import { DextoRuntimeError } from '@core/errors/index.js';
import { NonEmptyTrimmed, EnvExpandedString, OptionalURL } from '@core/utils/result.js';
import { getPrimaryApiKeyEnvVar } from '@core/utils/api-key-resolver.js';
import { z } from 'zod';
import {
supportsBaseURL,
requiresBaseURL,
acceptsAnyModel,
supportsCustomModels,
getSupportedModels,
isValidProviderModel,
getMaxInputTokensForModel,
requiresApiKey,
} from './registry.js';
import { LLM_PROVIDERS } from './types.js';
/**
* Options for LLM config validation
*/
export interface LLMValidationOptions {
/**
* When true, enforces API key and baseURL requirements.
* When false (relaxed mode), allows missing API keys/baseURLs for interactive configuration.
*
* Use strict mode for:
* - Server/API mode (headless, needs full config)
* - MCP mode (headless)
*
* Use relaxed mode for:
* - Web UI (user can configure via settings)
* - CLI (user can configure interactively)
*
* @default true
*/
strict?: boolean;
}
/**
* Default-free field definitions for LLM configuration.
* Used to build both the full config schema (with defaults) and the updates schema (no defaults).
*/
const LLMConfigFields = {
provider: z
.enum(LLM_PROVIDERS)
.describe("LLM provider (e.g., 'openai', 'anthropic', 'google', 'groq')"),
model: NonEmptyTrimmed.describe('Specific model name for the selected provider'),
// Expand $ENV refs and trim; final validation happens with provider context
// Optional for providers that don't need API keys (Ollama, vLLM, etc.)
apiKey: EnvExpandedString()
.optional()
.describe('API key for provider; can be given directly or via $ENV reference'),
maxIterations: z.coerce.number().int().positive().describe('Max iterations for agentic loops'),
baseURL: OptionalURL.describe(
'Base URL for provider (e.g., https://api.openai.com/v1). Only certain providers support this.'
),
maxInputTokens: z.coerce
.number()
.int()
.positive()
.optional()
.describe('Max input tokens for history; required for unknown models'),
maxOutputTokens: z.coerce
.number()
.int()
.positive()
.optional()
.describe('Max tokens for model output'),
temperature: z.coerce
.number()
.min(0)
.max(1)
.optional()
.describe('Randomness: 0 deterministic, 1 creative'),
allowedMediaTypes: z
.array(z.string())
.optional()
.describe(
'MIME type patterns for media expansion (e.g., "image/*", "application/pdf"). ' +
'If omitted, uses model capabilities from registry. Supports wildcards.'
),
// Provider-specific options
/**
* OpenAI reasoning effort level for reasoning-capable models (o1, o3, codex, gpt-5.x).
* Controls how many reasoning tokens the model generates before producing a response.
* - 'none': No reasoning, fastest responses
* - 'minimal': Barely any reasoning, very fast responses
* - 'low': Light reasoning, fast responses
* - 'medium': Balanced reasoning (OpenAI's recommended daily driver)
* - 'high': Thorough reasoning for complex tasks
* - 'xhigh': Extra high reasoning for quality-critical, non-latency-sensitive tasks
*/
reasoningEffort: z
.enum(['none', 'minimal', 'low', 'medium', 'high', 'xhigh'])
.optional()
.describe(
'OpenAI reasoning effort level for reasoning models (o1, o3, codex). ' +
"Options: 'none', 'minimal', 'low', 'medium' (recommended), 'high', 'xhigh'"
),
} as const;
/** Business rules + compatibility checks */
// Base LLM config object schema (before validation/branding) - can be extended
export const LLMConfigBaseSchema = z
.object({
provider: LLMConfigFields.provider,
model: LLMConfigFields.model,
// apiKey is optional at schema level - validated based on provider in superRefine
apiKey: LLMConfigFields.apiKey,
// Apply defaults only for complete config validation
maxIterations: z.coerce.number().int().positive().optional(),
baseURL: LLMConfigFields.baseURL,
maxInputTokens: LLMConfigFields.maxInputTokens,
maxOutputTokens: LLMConfigFields.maxOutputTokens,
temperature: LLMConfigFields.temperature,
allowedMediaTypes: LLMConfigFields.allowedMediaTypes,
// Provider-specific options
reasoningEffort: LLMConfigFields.reasoningEffort,
})
.strict();
/**
* Creates an LLM config schema with configurable validation strictness.
*
* @param options.strict - When true (default), enforces API key and baseURL requirements.
* When false, allows missing credentials for interactive configuration.
*/
export function createLLMConfigSchema(options: LLMValidationOptions = {}) {
const { strict = true } = options;
return LLMConfigBaseSchema.superRefine((data, ctx) => {
const baseURLIsSet = data.baseURL != null && data.baseURL.trim() !== '';
const maxInputTokensIsSet = data.maxInputTokens != null;
// API key validation with provider context
// In relaxed mode, skip API key validation to allow launching app for interactive config
// Skip validation for providers that don't require API keys:
// - openai-compatible: local providers like Ollama, vLLM, LocalAI
// - litellm: self-hosted proxy handles auth internally
// - vertex: uses Google Cloud ADC
// - bedrock: uses AWS credentials
if (strict && requiresApiKey(data.provider) && !data.apiKey?.trim()) {
const primaryVar = getPrimaryApiKeyEnvVar(data.provider);
ctx.addIssue({
code: z.ZodIssueCode.custom,
path: ['apiKey'],
message: `Missing API key for provider '${data.provider}' set $${primaryVar}`,
params: {
code: LLMErrorCode.API_KEY_MISSING,
scope: ErrorScope.LLM,
type: ErrorType.USER,
provider: data.provider,
envVar: primaryVar,
},
});
}
if (baseURLIsSet) {
if (!supportsBaseURL(data.provider)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
path: ['provider'],
message:
`Provider '${data.provider}' does not support baseURL. ` +
`Use an 'openai-compatible' provider if you need a custom base URL.`,
params: {
code: LLMErrorCode.BASE_URL_INVALID,
scope: ErrorScope.LLM,
type: ErrorType.USER,
},
});
}
} else if (strict && requiresBaseURL(data.provider)) {
// In relaxed mode, skip baseURL requirement validation
ctx.addIssue({
code: z.ZodIssueCode.custom,
path: ['baseURL'],
message: `Provider '${data.provider}' requires a 'baseURL'.`,
params: {
code: LLMErrorCode.BASE_URL_MISSING,
scope: ErrorScope.LLM,
type: ErrorType.USER,
},
});
}
// Model and token validation always runs (not affected by strict mode)
if (!baseURLIsSet || supportsBaseURL(data.provider)) {
// Skip model validation for providers that accept any model OR support custom models
if (!acceptsAnyModel(data.provider) && !supportsCustomModels(data.provider)) {
const supportedModelsList = getSupportedModels(data.provider);
if (!isValidProviderModel(data.provider, data.model)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
path: ['model'],
message:
`Model '${data.model}' is not supported for provider '${data.provider}'. ` +
`Supported: ${supportedModelsList.join(', ')}`,
params: {
code: LLMErrorCode.MODEL_INCOMPATIBLE,
scope: ErrorScope.LLM,
type: ErrorType.USER,
},
});
}
}
// Skip token cap validation for providers that accept any model OR support custom models
if (
maxInputTokensIsSet &&
!acceptsAnyModel(data.provider) &&
!supportsCustomModels(data.provider)
) {
try {
const cap = getMaxInputTokensForModel(data.provider, data.model);
if (data.maxInputTokens! > cap) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
path: ['maxInputTokens'],
message:
`Max input tokens for model '${data.model}' is ${cap}. ` +
`You provided ${data.maxInputTokens}`,
params: {
code: LLMErrorCode.TOKENS_EXCEEDED,
scope: ErrorScope.LLM,
type: ErrorType.USER,
},
});
}
} catch (error: unknown) {
if (
error instanceof DextoRuntimeError &&
error.code === LLMErrorCode.MODEL_UNKNOWN
) {
// Model not found in registry
ctx.addIssue({
code: z.ZodIssueCode.custom,
path: ['model'],
message: error.message,
params: {
code: error.code,
scope: error.scope,
type: error.type,
},
});
} else {
// Unexpected error
const message =
error instanceof Error ? error.message : 'Unknown error occurred';
ctx.addIssue({
code: z.ZodIssueCode.custom,
path: ['model'],
message,
params: {
code: LLMErrorCode.REQUEST_INVALID_SCHEMA,
scope: ErrorScope.LLM,
type: ErrorType.SYSTEM,
},
});
}
}
}
}
// Note: OpenRouter model validation happens in resolver.ts during switchLLM only
// to avoid network calls during startup/serverless cold starts
}) // Brand the validated type so it can be distinguished at compile time
.brand<'ValidatedLLMConfig'>();
}
/**
* Default LLM config schema with strict validation (backwards compatible).
* Use createLLMConfigSchema({ strict: false }) for relaxed validation.
*/
export const LLMConfigSchema = createLLMConfigSchema({ strict: true });
/**
* Relaxed LLM config schema that allows missing API keys and baseURLs.
* Use this for interactive modes (CLI, WebUI) where users can configure later.
*/
export const LLMConfigSchemaRelaxed = createLLMConfigSchema({ strict: false });
// Input type and output types for the zod schema
export type LLMConfig = z.input<typeof LLMConfigSchema>;
export type ValidatedLLMConfig = z.output<typeof LLMConfigSchema>;
// PATCH-like schema for updates (switch flows)
// TODO: when moving to zod v4 we might be able to set this as strict
export const LLMUpdatesSchema = z
.object({ ...LLMConfigFields })
.partial()
.superRefine((data, ctx) => {
// Require at least one meaningful change field: model or provider
if (!data.model && !data.provider) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: 'At least model or provider must be specified for LLM switch',
path: [],
});
}
});
export type LLMUpdates = z.input<typeof LLMUpdatesSchema>;
// Re-export context type from llm module
export type { LLMUpdateContext } from '../llm/types.js';

View File

@@ -0,0 +1,267 @@
import { ToolManager } from '../../tools/tool-manager.js';
import { ValidatedLLMConfig } from '../schemas.js';
import { LLMError } from '../errors.js';
import { createOpenAI } from '@ai-sdk/openai';
import { createGoogleGenerativeAI } from '@ai-sdk/google';
import { createAnthropic } from '@ai-sdk/anthropic';
import { createGroq } from '@ai-sdk/groq';
import { createXai } from '@ai-sdk/xai';
import { createVertex } from '@ai-sdk/google-vertex';
import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';
import { VercelLLMService } from './vercel.js';
import { LanguageModel } from 'ai';
import { SessionEventBus } from '../../events/index.js';
import { createCohere } from '@ai-sdk/cohere';
import { createLocalLanguageModel } from '../providers/local/ai-sdk-adapter.js';
import type { IConversationHistoryProvider } from '../../session/history/types.js';
import type { SystemPromptManager } from '../../systemPrompt/manager.js';
import type { IDextoLogger } from '../../logger/v2/types.js';
import { requiresApiKey } from '../registry.js';
import { getPrimaryApiKeyEnvVar, resolveApiKeyForProvider } from '../../utils/api-key-resolver.js';
import type { CompactionConfigInput } from '../../context/compaction/schemas.js';
// Dexto Gateway headers for usage tracking
const DEXTO_GATEWAY_HEADERS = {
SESSION_ID: 'X-Dexto-Session-ID',
CLIENT_SOURCE: 'X-Dexto-Source',
CLIENT_VERSION: 'X-Dexto-Version',
} as const;
/**
* Context for model creation, including session info for usage tracking.
*/
export interface DextoProviderContext {
/** Session ID for usage tracking */
sessionId?: string;
/** Client source for usage attribution (cli, web, sdk) */
clientSource?: 'cli' | 'web' | 'sdk';
}
/**
* Create a Vercel AI SDK LanguageModel from config.
*
* With explicit providers, the config's provider field directly determines
* where requests go. No auth-dependent routing - what you configure is what runs.
*
* @param llmConfig - LLM configuration from agent config
* @param context - Optional context for usage tracking (session ID, etc.)
* @returns Vercel AI SDK LanguageModel instance
*/
export function createVercelModel(
llmConfig: ValidatedLLMConfig,
context?: DextoProviderContext
): LanguageModel {
const { provider, model, baseURL } = llmConfig;
const apiKey = llmConfig.apiKey || resolveApiKeyForProvider(provider);
// Runtime check: if provider requires API key but none is configured, fail with helpful message
if (requiresApiKey(provider) && !apiKey?.trim()) {
const envVar = getPrimaryApiKeyEnvVar(provider);
throw LLMError.apiKeyMissing(provider, envVar);
}
switch (provider.toLowerCase()) {
case 'openai': {
// Regular OpenAI - strict compatibility, no baseURL
return createOpenAI({ apiKey: apiKey ?? '' })(model);
}
case 'openai-compatible': {
// OpenAI-compatible - requires baseURL, uses chat completions endpoint
// Must use .chat() as most compatible endpoints (like Ollama) don't support Responses API
const compatibleBaseURL =
baseURL?.replace(/\/$/, '') || process.env.OPENAI_BASE_URL?.replace(/\/$/, '');
if (!compatibleBaseURL) {
throw LLMError.baseUrlMissing('openai-compatible');
}
return createOpenAI({ apiKey: apiKey ?? '', baseURL: compatibleBaseURL }).chat(model);
}
case 'openrouter': {
// OpenRouter - unified API gateway for 100+ models (BYOK)
// Model IDs are in OpenRouter format (e.g., 'anthropic/claude-sonnet-4-5-20250929')
const orBaseURL = baseURL || 'https://openrouter.ai/api/v1';
return createOpenAI({ apiKey: apiKey ?? '', baseURL: orBaseURL }).chat(model);
}
case 'litellm': {
// LiteLLM - OpenAI-compatible proxy for 100+ LLM providers
// User must provide their own LiteLLM proxy URL
if (!baseURL) {
throw LLMError.baseUrlMissing('litellm');
}
return createOpenAI({ apiKey: apiKey ?? '', baseURL }).chat(model);
}
case 'glama': {
// Glama - OpenAI-compatible gateway for multiple LLM providers
// Fixed endpoint, no user configuration needed
const glamaBaseURL = 'https://glama.ai/api/gateway/openai/v1';
return createOpenAI({ apiKey: apiKey ?? '', baseURL: glamaBaseURL }).chat(model);
}
case 'dexto': {
// Dexto Gateway - OpenAI-compatible proxy with per-request billing
// Routes through api.dexto.ai to OpenRouter, deducts from user balance
// Requires DEXTO_API_KEY from `dexto login`
//
// Model IDs are in OpenRouter format (e.g., 'anthropic/claude-sonnet-4-5-20250929')
// Users explicitly choose `provider: dexto` in their config
//
// Note: 402 "insufficient credits" errors are handled in turn-executor.ts mapProviderError()
const dextoBaseURL = 'https://api.dexto.ai/v1';
// Build headers for usage tracking
const headers: Record<string, string> = {
[DEXTO_GATEWAY_HEADERS.CLIENT_SOURCE]: context?.clientSource ?? 'cli',
};
if (context?.sessionId) {
headers[DEXTO_GATEWAY_HEADERS.SESSION_ID] = context.sessionId;
}
if (process.env.DEXTO_CLI_VERSION) {
headers[DEXTO_GATEWAY_HEADERS.CLIENT_VERSION] = process.env.DEXTO_CLI_VERSION;
}
// Model is already in OpenRouter format - pass through directly
return createOpenAI({ apiKey: apiKey ?? '', baseURL: dextoBaseURL, headers }).chat(
model
);
}
case 'vertex': {
// Google Vertex AI - supports both Gemini and Claude models
// Auth via Application Default Credentials (ADC)
//
// TODO: Integrate with agent config (llmConfig.vertex?.projectId) as primary,
// falling back to env vars. This would allow per-agent Vertex configuration.
const projectId = process.env.GOOGLE_VERTEX_PROJECT;
if (!projectId) {
throw LLMError.missingConfig(
'vertex',
'GOOGLE_VERTEX_PROJECT environment variable'
);
}
const location = process.env.GOOGLE_VERTEX_LOCATION;
// Route based on model type: Claude models use /anthropic subpath
if (model.includes('claude')) {
// Claude models on Vertex use the /anthropic subpath export
// Default to us-east5 for Claude (limited region availability)
return createVertexAnthropic({
project: projectId,
location: location || 'us-east5',
})(model);
}
// Gemini models use the main export
// Default to us-central1 for Gemini (widely available)
return createVertex({
project: projectId,
location: location || 'us-central1',
})(model);
}
case 'bedrock': {
// Amazon Bedrock - AWS-hosted gateway for Claude, Nova, Llama, Mistral
// Auth via AWS credentials (env vars or credential provider)
//
// TODO: Add credentialProvider support for:
// - ~/.aws/credentials file profiles (fromIni)
// - AWS SSO sessions (fromSSO)
// - IAM roles on EC2/Lambda (fromNodeProviderChain)
// This would require adding @aws-sdk/credential-providers dependency
// and exposing a config option like llmConfig.bedrock?.credentialProvider
//
// Current implementation: SDK reads directly from env vars:
// - AWS_REGION (required)
// - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY (required)
// - AWS_SESSION_TOKEN (optional, for temporary credentials)
const region = process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION;
if (!region) {
throw LLMError.missingConfig(
'bedrock',
'AWS_REGION or AWS_DEFAULT_REGION environment variable'
);
}
// Auto-detect cross-region inference profile prefix based on user's region
// Users can override by explicitly using prefixed model IDs (e.g., eu.anthropic.claude...)
let modelId = model;
const hasRegionPrefix =
model.startsWith('eu.') || model.startsWith('us.') || model.startsWith('global.');
if (!hasRegionPrefix) {
const prefix = region.startsWith('eu-') ? 'eu.' : 'us.';
modelId = `${prefix}${model}`;
}
// SDK automatically reads AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
return createAmazonBedrock({ region })(modelId);
}
case 'anthropic':
return createAnthropic({ apiKey: apiKey ?? '' })(model);
case 'google':
return createGoogleGenerativeAI({ apiKey: apiKey ?? '' })(model);
case 'groq':
return createGroq({ apiKey: apiKey ?? '' })(model);
case 'xai':
return createXai({ apiKey: apiKey ?? '' })(model);
case 'cohere':
return createCohere({ apiKey: apiKey ?? '' })(model);
case 'ollama': {
// Ollama - local model server with OpenAI-compatible API
// Uses the /v1 endpoint for AI SDK compatibility
// Default URL: http://localhost:11434
const ollamaBaseURL = baseURL || 'http://localhost:11434/v1';
// Ollama doesn't require an API key, but the SDK needs a non-empty string
return createOpenAI({ apiKey: 'ollama', baseURL: ollamaBaseURL }).chat(model);
}
case 'local': {
// Native node-llama-cpp execution via AI SDK adapter.
// Model is loaded lazily on first use.
return createLocalLanguageModel({
modelId: model,
});
}
default:
throw LLMError.unsupportedProvider(provider);
}
}
/**
* Create an LLM service instance using the Vercel AI SDK.
* All providers are routed through the unified Vercel service.
*
* @param config LLM configuration from the config file
* @param toolManager Unified tool manager instance
* @param systemPromptManager Prompt manager for system prompts
* @param historyProvider History provider for conversation persistence
* @param sessionEventBus Session-level event bus for emitting LLM events
* @param sessionId Session ID
* @param resourceManager Resource manager for blob storage and resource access
* @param logger Logger instance for dependency injection
* @param compactionStrategy Optional compaction strategy for context management
* @param compactionConfig Optional compaction configuration for thresholds
* @returns VercelLLMService instance
*/
export function createLLMService(
config: ValidatedLLMConfig,
toolManager: ToolManager,
systemPromptManager: SystemPromptManager,
historyProvider: IConversationHistoryProvider,
sessionEventBus: SessionEventBus,
sessionId: string,
resourceManager: import('../../resources/index.js').ResourceManager,
logger: IDextoLogger,
compactionStrategy?: import('../../context/compaction/types.js').ICompactionStrategy | null,
compactionConfig?: CompactionConfigInput
): VercelLLMService {
const model = createVercelModel(config, { sessionId });
return new VercelLLMService(
toolManager,
model,
systemPromptManager,
historyProvider,
sessionEventBus,
config,
sessionId,
resourceManager,
logger,
compactionStrategy,
compactionConfig
);
}

View File

@@ -0,0 +1 @@
export * from './types.js';

View File

@@ -0,0 +1,234 @@
import { DextoAgent } from '../../agent/DextoAgent.js';
import {
resolveApiKeyForProvider,
getPrimaryApiKeyEnvVar,
PROVIDER_API_KEY_MAP,
} from '../../utils/api-key-resolver.js';
import type { LLMProvider } from '../types.js';
import type { AgentConfig } from '../../agent/schemas.js';
/**
* Shared utilities for LLM service integration tests
*/
export interface TestEnvironment {
agent: DextoAgent;
sessionId: string;
cleanup: () => Promise<void>;
}
/**
* Creates a test environment with real dependencies (no mocks)
* Uses DextoAgent to handle complex initialization properly
*/
export async function createTestEnvironment(
config: AgentConfig,
sessionId: string = 'test-session'
): Promise<TestEnvironment> {
const agent = new DextoAgent(config);
await agent.start();
return {
agent,
sessionId,
cleanup: async () => {
if (agent.isStarted()) {
// Don't wait - just stop the agent immediately
// The agent.stop() will handle graceful shutdown
await agent.stop();
}
},
};
}
// Standard test cases have been moved inline to each test file
// This reduces complexity and makes tests more explicit
/**
* Test configuration helpers that create full AgentConfig objects
*/
export const TestConfigs = {
/**
* Creates OpenAI test config
*/
createOpenAIConfig(): AgentConfig {
const provider: LLMProvider = 'openai';
const apiKey = resolveApiKeyForProvider(provider);
if (!apiKey) {
throw new Error(
`${getPrimaryApiKeyEnvVar(provider)} environment variable is required for OpenAI integration tests`
);
}
return {
systemPrompt: 'You are a helpful assistant for testing purposes.',
llm: {
provider,
model: 'gpt-4o-mini', // Use cheapest non-reasoning model for testing
apiKey,
maxOutputTokens: 1000, // Enough for reasoning models (reasoning + answer)
temperature: 0, // Deterministic responses
maxIterations: 1, // Minimal tool iterations
},
mcpServers: {},
storage: {
cache: { type: 'in-memory' },
database: { type: 'in-memory' },
blob: { type: 'local', storePath: '/tmp/test-blobs' },
},
sessions: {
maxSessions: 10,
sessionTTL: 60000, // 60s for tests
},
logger: {
level: 'info',
transports: [{ type: 'console' }],
},
toolConfirmation: {
mode: 'auto-approve', // Tests don't have interactive approval
timeout: 120000,
},
elicitation: {
enabled: false, // Tests don't handle elicitation
timeout: 120000,
},
};
},
/**
* Creates Anthropic test config
*/
createAnthropicConfig(): AgentConfig {
const provider: LLMProvider = 'anthropic';
const apiKey = resolveApiKeyForProvider(provider);
if (!apiKey) {
throw new Error(
`${getPrimaryApiKeyEnvVar(provider)} environment variable is required for Anthropic integration tests`
);
}
return {
systemPrompt: 'You are a helpful assistant for testing purposes.',
llm: {
provider,
model: 'claude-haiku-4-5-20251001', // Use cheapest model for testing
apiKey,
maxOutputTokens: 1000, // Enough for reasoning models (reasoning + answer)
temperature: 0,
maxIterations: 1,
},
mcpServers: {},
storage: {
cache: { type: 'in-memory' },
database: { type: 'in-memory' },
blob: { type: 'local', storePath: '/tmp/test-blobs' },
},
sessions: {
maxSessions: 10,
sessionTTL: 60000,
},
logger: {
level: 'info',
transports: [{ type: 'console' }],
},
toolConfirmation: {
mode: 'auto-approve', // Tests don't have interactive approval
timeout: 120000,
},
elicitation: {
enabled: false, // Tests don't handle elicitation
timeout: 120000,
},
};
},
/**
* Creates Vercel test config - parametric for different providers/models
*/
createVercelConfig(provider: LLMProvider = 'openai', model?: string): AgentConfig {
const apiKey = resolveApiKeyForProvider(provider);
// Only enforce API key check for providers that require it (exclude local, ollama, vertex with empty key maps)
if (!apiKey && providerRequiresApiKey(provider)) {
throw new Error(
`${getPrimaryApiKeyEnvVar(provider)} environment variable is required for Vercel integration tests with ${provider}`
);
}
// Default models for common providers
const defaultModels: Record<LLMProvider, string> = {
openai: 'gpt-4o-mini',
anthropic: 'claude-haiku-4-5-20251001',
google: 'gemini-2.0-flash',
groq: 'llama-3.1-8b-instant',
xai: 'grok-beta',
cohere: 'command-r',
'openai-compatible': 'gpt-5-mini',
openrouter: 'anthropic/claude-3.5-haiku', // OpenRouter model format: provider/model
litellm: 'gpt-4', // LiteLLM model names follow the provider's convention
glama: 'openai/gpt-4o', // Glama model format: provider/model
vertex: 'gemini-2.5-pro', // Vertex AI uses ADC auth, not API keys
bedrock: 'anthropic.claude-3-5-haiku-20241022-v1:0', // Bedrock uses AWS credentials, not API keys
local: 'llama-3.2-3b-q4', // Native node-llama-cpp GGUF models
ollama: 'llama3.2', // Ollama server models
dexto: 'anthropic/claude-4.5-sonnet', // Dexto gateway (OpenRouter model format)
};
return {
systemPrompt: 'You are a helpful assistant for testing purposes.',
llm: {
provider,
model: model || defaultModels[provider],
apiKey,
maxOutputTokens: 1000, // Enough for reasoning models (reasoning + answer)
temperature: 0,
maxIterations: 1,
},
mcpServers: {},
storage: {
cache: { type: 'in-memory' },
database: { type: 'in-memory' },
blob: { type: 'local', storePath: '/tmp/test-blobs' },
},
sessions: {
maxSessions: 10,
sessionTTL: 60000,
},
logger: {
level: 'info',
transports: [{ type: 'console' }],
},
toolConfirmation: {
mode: 'auto-approve', // Tests don't have interactive approval
timeout: 120000,
},
elicitation: {
enabled: false, // Tests don't handle elicitation
timeout: 120000,
},
};
},
} as const;
/**
* Helper to check if a provider requires an API key
* Providers with empty arrays in PROVIDER_API_KEY_MAP don't require API keys (e.g., local, ollama, vertex)
*/
export function providerRequiresApiKey(provider: LLMProvider): boolean {
const envVars = PROVIDER_API_KEY_MAP[provider];
return envVars && envVars.length > 0;
}
/**
* Helper to check if API key is available for a provider
* Used to skip tests when API keys are not configured
*/
export function requiresApiKey(provider: LLMProvider): boolean {
return !!resolveApiKeyForProvider(provider);
}
/**
* Cleanup helper
*/
export async function cleanupTestEnvironment(_env: TestEnvironment): Promise<void> {
await _env.cleanup();
}

View File

@@ -0,0 +1,22 @@
import { LanguageModel } from 'ai';
import type { LLMProvider } from '../types.js';
/**
* Configuration object returned by LLMService.getConfig()
*/
export type LLMServiceConfig = {
provider: LLMProvider;
model: LanguageModel;
configuredMaxInputTokens?: number | null;
modelMaxInputTokens?: number | null;
};
/**
* Token usage statistics from LLM
*/
export interface LLMTokenUsage {
inputTokens: number;
outputTokens: number;
reasoningTokens?: number;
totalTokens: number;
}

View File

@@ -0,0 +1,367 @@
import { describe, test, expect } from 'vitest';
import {
createTestEnvironment,
TestConfigs,
requiresApiKey,
cleanupTestEnvironment,
} from './test-utils.integration.js';
import { ErrorScope, ErrorType } from '@core/errors/index.js';
import { LLMErrorCode } from '../error-codes.js';
/**
* Vercel AI SDK LLM Service Integration Tests
*
* These tests verify the Vercel AI SDK service works correctly with real API calls.
* They test multiple providers through the Vercel AI SDK.
*/
describe('Vercel AI SDK LLM Service Integration', () => {
// Test with OpenAI through Vercel AI SDK by default
const defaultProvider = 'openai';
const skipTests = !requiresApiKey(defaultProvider);
const t = skipTests ? test.skip : test.concurrent;
// Normal operation tests
t(
'generate works normally',
async () => {
const env = await createTestEnvironment(
TestConfigs.createVercelConfig(defaultProvider)
);
try {
const response = await env.agent.run('Hello', undefined, undefined, env.sessionId);
expect(response).toBeTruthy();
expect(typeof response).toBe('string');
expect(response.length).toBeGreaterThan(0);
} finally {
await cleanupTestEnvironment(env);
}
},
60000
);
t(
'multi-turn generate works normally',
async () => {
const env = await createTestEnvironment(
TestConfigs.createVercelConfig(defaultProvider)
);
try {
const response1 = await env.agent.run(
'My name is Bob',
undefined,
undefined,
env.sessionId
);
const response2 = await env.agent.run(
'What is my name?',
undefined,
undefined,
env.sessionId
);
expect(response1).toBeTruthy();
expect(response2).toBeTruthy();
expect(typeof response1).toBe('string');
expect(typeof response2).toBe('string');
} finally {
await cleanupTestEnvironment(env);
}
},
60000
);
t(
'stream works normally',
async () => {
const env = await createTestEnvironment(
TestConfigs.createVercelConfig(defaultProvider)
);
try {
const response = await env.agent.run(
'Hello',
undefined,
undefined,
env.sessionId,
true
);
expect(response).toBeTruthy();
expect(typeof response).toBe('string');
expect(response.length).toBeGreaterThan(0);
} finally {
await cleanupTestEnvironment(env);
}
},
60000
);
t(
'multi-turn stream works normally',
async () => {
const env = await createTestEnvironment(
TestConfigs.createVercelConfig(defaultProvider)
);
try {
const response1 = await env.agent.run(
'I like pizza',
undefined,
undefined,
env.sessionId,
true
);
const response2 = await env.agent.run(
'What do I like?',
undefined,
undefined,
env.sessionId,
true
);
expect(response1).toBeTruthy();
expect(response2).toBeTruthy();
expect(typeof response1).toBe('string');
expect(typeof response2).toBe('string');
} finally {
await cleanupTestEnvironment(env);
}
},
60000
);
t(
'creating sessions works normally',
async () => {
const env = await createTestEnvironment(
TestConfigs.createVercelConfig(defaultProvider)
);
try {
const newSession = await env.agent.createSession('test-vercel-session');
const response = await env.agent.run(
'Hello in new session',
undefined,
undefined,
newSession.id
);
expect(newSession).toBeTruthy();
expect(newSession.id).toBe('test-vercel-session');
expect(response).toBeTruthy();
expect(typeof response).toBe('string');
} finally {
await cleanupTestEnvironment(env);
}
},
60000
);
// Multiple Provider Support through Vercel AI SDK
(requiresApiKey('anthropic') ? test.concurrent : test.skip)(
'anthropic through vercel works normally',
async () => {
const anthropicConfig = TestConfigs.createVercelConfig('anthropic');
const anthropicEnv = await createTestEnvironment(anthropicConfig);
try {
const response = await anthropicEnv.agent.run(
'Hello',
undefined,
undefined,
anthropicEnv.sessionId
);
expect(response).toBeTruthy();
expect(typeof response).toBe('string');
expect(response.length).toBeGreaterThan(0);
} finally {
await cleanupTestEnvironment(anthropicEnv);
}
},
60000
);
(requiresApiKey('google') ? test.concurrent : test.skip)(
'google through vercel works normally',
async () => {
const googleConfig = TestConfigs.createVercelConfig('google');
const googleEnv = await createTestEnvironment(googleConfig);
try {
const response = await googleEnv.agent.run(
'Hello',
undefined,
undefined,
googleEnv.sessionId
);
expect(response).toBeTruthy();
expect(typeof response).toBe('string');
expect(response.length).toBeGreaterThan(0);
} finally {
await cleanupTestEnvironment(googleEnv);
}
},
60000
);
// Error handling tests
t(
'errors handled with correct error codes',
async () => {
// Test with unsupported file type to trigger validation error
const invalidFileData = Buffer.from('test data').toString('base64');
const env = await createTestEnvironment(
TestConfigs.createVercelConfig(defaultProvider)
);
try {
await expect(
env.agent.run(
'Process this file',
undefined,
{
data: invalidFileData,
mimeType: 'application/unknown-type',
filename: 'test.unknown',
},
env.sessionId
)
).rejects.toMatchObject({
issues: [
expect.objectContaining({
code: LLMErrorCode.INPUT_FILE_UNSUPPORTED,
scope: ErrorScope.LLM,
type: ErrorType.USER,
}),
],
});
} finally {
await cleanupTestEnvironment(env);
}
},
60000
);
// Positive media/file tests (OpenAI via Vercel)
(requiresApiKey('openai') ? test.concurrent : test.skip)(
'openai via vercel: image input works',
async () => {
const openaiConfig = TestConfigs.createVercelConfig('openai');
const openaiEnv = await createTestEnvironment(openaiConfig);
let errorSeen = false;
const onError = () => {
errorSeen = true;
};
try {
openaiEnv.agent.agentEventBus.on('llm:error', onError);
// 1x1 PNG (red pixel) base64 (no data URI), minimal cost
const imgBase64 =
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==';
const res = await openaiEnv.agent.run(
'What is in the image?',
{ image: imgBase64, mimeType: 'image/png' },
undefined,
openaiEnv.sessionId
);
expect(typeof res).toBe('string');
expect(res.length).toBeGreaterThan(0);
expect(errorSeen).toBe(false);
} finally {
// cleanup listener
try {
openaiEnv.agent.agentEventBus.off('llm:error', onError);
} catch (_e) {
void 0; // ignore
}
await cleanupTestEnvironment(openaiEnv);
}
},
60000
);
(requiresApiKey('openai') ? test.concurrent : test.skip)(
'openai via vercel: pdf file input works',
async () => {
const openaiConfig = TestConfigs.createVercelConfig('openai');
const openaiEnv = await createTestEnvironment(openaiConfig);
let errorSeen = false;
const onError = () => {
errorSeen = true;
};
try {
openaiEnv.agent.agentEventBus.on('llm:error', onError);
// Valid tiny PDF (Hello World) base64 from OpenAI tests
const pdfBase64 =
'JVBERi0xLjQKMSAwIG9iago8PAovVHlwZSAvQ2F0YWxvZwovUGFnZXMgMiAwIFIKPj4KZW5kb2JqCjIgMCBvYmoKPDwKL1R5cGUgL1BhZ2VzCi9LaWRzIFszIDAgUl0KL0NvdW50IDEKPj4KZW5kb2JqCjMgMCBvYmoKPDwKL1R5cGUgL1BhZ2UKL1BhcmVudCAyIDAgUgovTWVkaWFCb3ggWzAgMCA2MTIgNzkyXQovQ29udGVudHMgNCAwIFIKPj4KZW5kb2JqCjQgMCBvYmoKPDwKL0xlbmd0aCA0NAo+PgpzdHJlYW0KQlQKL0YxIDEyIFRmCjcyIDcyMCBUZAooSGVsbG8gV29ybGQpIFRqCkVUCmVuZHN0cmVhbQplbmRvYmoKeHJlZgowIDUKMDAwMDAwMDAwMCA2NTUzNSBmIAowMDAwMDAwMDEwIDAwMDAwIG4gCjAwMDAwMDAwNzkgMDAwMDAgbiAKMDAwMDAwMDE3MyAwMDAwMCBuIAowMDAwMDAwMzAxIDAwMDAwIG4gCnRyYWlsZXIKPDwKL1NpemUgNQovUm9vdCAxIDAgUgo+PgpzdGFydHhyZWYKMzgwCiUlRU9G';
const res = await openaiEnv.agent.run(
'Summarize this PDF',
undefined,
{ data: pdfBase64, mimeType: 'application/pdf', filename: 'test.pdf' },
openaiEnv.sessionId
);
expect(typeof res).toBe('string');
expect(res.length).toBeGreaterThan(0);
expect(errorSeen).toBe(false);
} finally {
try {
openaiEnv.agent.agentEventBus.off('llm:error', onError);
} catch (_e) {
void 0; // ignore
}
await cleanupTestEnvironment(openaiEnv);
}
},
60000
);
(requiresApiKey('openai') ? test.concurrent : test.skip)(
'openai via vercel: streaming with image works',
async () => {
const openaiConfig = TestConfigs.createVercelConfig('openai');
const openaiEnv = await createTestEnvironment(openaiConfig);
let errorSeen = false;
const onError = () => {
errorSeen = true;
};
try {
openaiEnv.agent.agentEventBus.on('llm:error', onError);
const imgBase64 =
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==';
const res = await openaiEnv.agent.run(
'Describe this image in one sentence',
{ image: imgBase64, mimeType: 'image/png' },
undefined,
openaiEnv.sessionId,
true
);
expect(typeof res).toBe('string');
expect(res.length).toBeGreaterThan(0);
expect(errorSeen).toBe(false);
} finally {
try {
openaiEnv.agent.agentEventBus.off('llm:error', onError);
} catch (_e) {
void 0; // ignore
}
await cleanupTestEnvironment(openaiEnv);
}
},
60000
);
// Skip test warnings
if (skipTests) {
test('Vercel AI SDK integration tests skipped - no API key', () => {
console.warn(
`Vercel AI SDK integration tests skipped: ${defaultProvider.toUpperCase()}_API_KEY environment variable not found`
);
expect(true).toBe(true); // Placeholder test
});
}
});

View File

@@ -0,0 +1,295 @@
import { LanguageModel, type ModelMessage } from 'ai';
import { ToolManager } from '../../tools/tool-manager.js';
import { LLMServiceConfig } from './types.js';
import type { IDextoLogger } from '../../logger/v2/types.js';
import { DextoLogComponent } from '../../logger/v2/types.js';
import { ToolSet } from '../../tools/types.js';
import { ContextManager } from '../../context/manager.js';
import { getEffectiveMaxInputTokens, getMaxInputTokensForModel } from '../registry.js';
import type { ModelLimits } from '../../context/compaction/overflow.js';
import type { CompactionConfigInput } from '../../context/compaction/schemas.js';
import { ContentPart } from '../../context/types.js';
import type { SessionEventBus } from '../../events/index.js';
import type { IConversationHistoryProvider } from '../../session/history/types.js';
import type { SystemPromptManager } from '../../systemPrompt/manager.js';
import { VercelMessageFormatter } from '../formatters/vercel.js';
import type { ValidatedLLMConfig } from '../schemas.js';
import { InstrumentClass } from '../../telemetry/decorators.js';
import { trace, context, propagation } from '@opentelemetry/api';
import { TurnExecutor } from '../executor/turn-executor.js';
import { MessageQueueService } from '../../session/message-queue.js';
import type { ResourceManager } from '../../resources/index.js';
import { DextoRuntimeError } from '../../errors/DextoRuntimeError.js';
import { LLMErrorCode } from '../error-codes.js';
import type { ContentInput } from '../../agent/types.js';
/**
* Vercel AI SDK implementation of LLMService
*
* This service delegates actual LLM execution to TurnExecutor, which handles:
* - Tool execution with multimodal support
* - Streaming with llm:chunk events
* - Message persistence via StreamProcessor
* - Reactive compaction on overflow
* - Tool output pruning
* - Message queue injection
*
* @see TurnExecutor for the main execution loop
* @see StreamProcessor for stream event handling
*/
@InstrumentClass({
prefix: 'llm.vercel',
excludeMethods: ['getModelId', 'getAllTools', 'createTurnExecutor'],
})
export class VercelLLMService {
private model: LanguageModel;
private config: ValidatedLLMConfig;
private toolManager: ToolManager;
private contextManager: ContextManager<ModelMessage>;
private sessionEventBus: SessionEventBus;
private readonly sessionId: string;
private logger: IDextoLogger;
private resourceManager: ResourceManager;
private messageQueue: MessageQueueService;
private compactionStrategy:
| import('../../context/compaction/types.js').ICompactionStrategy
| null;
private modelLimits: ModelLimits;
private compactionThresholdPercent: number;
/**
* Helper to extract model ID from LanguageModel union type (string | LanguageModelV2)
*/
private getModelId(): string {
return typeof this.model === 'string' ? this.model : this.model.modelId;
}
constructor(
toolManager: ToolManager,
model: LanguageModel,
systemPromptManager: SystemPromptManager,
historyProvider: IConversationHistoryProvider,
sessionEventBus: SessionEventBus,
config: ValidatedLLMConfig,
sessionId: string,
resourceManager: ResourceManager,
logger: IDextoLogger,
compactionStrategy?: import('../../context/compaction/types.js').ICompactionStrategy | null,
compactionConfig?: CompactionConfigInput
) {
this.logger = logger.createChild(DextoLogComponent.LLM);
this.model = model;
this.config = config;
this.toolManager = toolManager;
this.sessionEventBus = sessionEventBus;
this.sessionId = sessionId;
this.resourceManager = resourceManager;
this.compactionStrategy = compactionStrategy ?? null;
this.compactionThresholdPercent = compactionConfig?.thresholdPercent ?? 0.9;
// Create session-level message queue for mid-task user messages
this.messageQueue = new MessageQueueService(this.sessionEventBus, this.logger);
// Create properly-typed ContextManager for Vercel
const formatter = new VercelMessageFormatter(this.logger);
const maxInputTokens = getEffectiveMaxInputTokens(config, this.logger);
// Set model limits for compaction overflow detection
// - maxContextTokens overrides the model's context window
// - thresholdPercent is applied separately in isOverflow() to trigger before 100%
let effectiveContextWindow = maxInputTokens;
// Apply maxContextTokens override if set (cap the context window)
if (compactionConfig?.maxContextTokens !== undefined) {
effectiveContextWindow = Math.min(maxInputTokens, compactionConfig.maxContextTokens);
this.logger.debug(
`Compaction: Using maxContextTokens override: ${compactionConfig.maxContextTokens} (model max: ${maxInputTokens})`
);
}
// NOTE: thresholdPercent is NOT applied here - it's only applied in isOverflow()
// to trigger compaction early (e.g., at 90% instead of 100%)
this.modelLimits = {
contextWindow: effectiveContextWindow,
};
this.contextManager = new ContextManager<ModelMessage>(
config,
formatter,
systemPromptManager,
maxInputTokens,
historyProvider,
sessionId,
resourceManager,
this.logger
);
this.logger.debug(
`[VercelLLMService] Initialized for model: ${this.getModelId()}, provider: ${this.config.provider}, temperature: ${this.config.temperature}, maxOutputTokens: ${this.config.maxOutputTokens}`
);
}
getAllTools(): Promise<ToolSet> {
return this.toolManager.getAllTools();
}
/**
* Create a TurnExecutor instance for executing the agent loop.
*/
private createTurnExecutor(externalSignal?: AbortSignal): TurnExecutor {
return new TurnExecutor(
this.model,
this.toolManager,
this.contextManager,
this.sessionEventBus,
this.resourceManager,
this.sessionId,
{
maxSteps: this.config.maxIterations,
maxOutputTokens: this.config.maxOutputTokens,
temperature: this.config.temperature,
baseURL: this.config.baseURL,
// Provider-specific options
reasoningEffort: this.config.reasoningEffort,
},
{ provider: this.config.provider, model: this.getModelId() },
this.logger,
this.messageQueue,
this.modelLimits,
externalSignal,
this.compactionStrategy,
this.compactionThresholdPercent
);
}
/**
* Result from streaming a response.
*/
public static StreamResult: { text: string };
/**
* Stream a response for the given content.
* Primary method for running conversations with multi-image support.
*
* @param content - String or ContentPart[] (text, images, files)
* @param options - { signal?: AbortSignal }
* @returns Object with text response
*/
async stream(
content: ContentInput,
options?: { signal?: AbortSignal }
): Promise<{ text: string }> {
// Get active span and context for telemetry
const activeSpan = trace.getActiveSpan();
const currentContext = context.active();
const provider = this.config.provider;
const model = this.getModelId();
// Set on active span
if (activeSpan) {
activeSpan.setAttribute('llm.provider', provider);
activeSpan.setAttribute('llm.model', model);
}
// Add to baggage for child span propagation
const existingBaggage = propagation.getBaggage(currentContext);
const baggageEntries: Record<string, import('@opentelemetry/api').BaggageEntry> = {};
// Preserve existing baggage
if (existingBaggage) {
existingBaggage.getAllEntries().forEach(([key, entry]) => {
baggageEntries[key] = entry;
});
}
// Add LLM metadata
baggageEntries['llm.provider'] = { value: provider };
baggageEntries['llm.model'] = { value: model };
const updatedContext = propagation.setBaggage(
currentContext,
propagation.createBaggage(baggageEntries)
);
// Execute rest of method in updated context
return await context.with(updatedContext, async () => {
// Normalize content to ContentPart[] for addUserMessage
const parts: ContentPart[] =
typeof content === 'string' ? [{ type: 'text', text: content }] : content;
// Add user message with all content parts
await this.contextManager.addUserMessage(parts);
// Create executor (uses session-level messageQueue, pass external abort signal)
const executor = this.createTurnExecutor(options?.signal);
// Execute with streaming enabled
const contributorContext = { mcpManager: this.toolManager.getMcpManager() };
const result = await executor.execute(contributorContext, true);
return {
text: result.text ?? '',
};
});
}
/**
* Get configuration information about the LLM service
* @returns Configuration object with provider and model information
*/
getConfig(): LLMServiceConfig {
const configuredMaxTokens = this.contextManager.getMaxInputTokens();
let modelMaxInputTokens: number;
// Fetching max tokens from LLM registry - default to configured max tokens if not found
// Max tokens may not be found if the model is supplied by user
try {
modelMaxInputTokens = getMaxInputTokensForModel(
this.config.provider,
this.getModelId(),
this.logger
);
} catch (error) {
// if the model is not found in the LLM registry, log and default to configured max tokens
if (error instanceof DextoRuntimeError && error.code === LLMErrorCode.MODEL_UNKNOWN) {
modelMaxInputTokens = configuredMaxTokens;
this.logger.debug(
`Could not find model ${this.getModelId()} in LLM registry to get max tokens. Using configured max tokens: ${configuredMaxTokens}.`
);
} else {
throw error;
}
}
return {
provider: this.config.provider,
model: this.model,
configuredMaxInputTokens: configuredMaxTokens,
modelMaxInputTokens: modelMaxInputTokens,
};
}
/**
* Get the context manager for external access
*/
getContextManager(): ContextManager<unknown> {
return this.contextManager;
}
/**
* Get the message queue for external access (e.g., queueing messages while busy)
*/
getMessageQueue(): MessageQueueService {
return this.messageQueue;
}
/**
* Get the compaction strategy for external access (e.g., session-native compaction)
*/
getCompactionStrategy():
| import('../../context/compaction/types.js').ICompactionStrategy
| null {
return this.compactionStrategy;
}
}

View File

@@ -0,0 +1,52 @@
// Derive types from registry constants without creating runtime imports.
export const LLM_PROVIDERS = [
'openai',
'openai-compatible',
'anthropic',
'google',
'groq',
'xai',
'cohere',
'openrouter',
'litellm',
'glama',
'vertex',
'bedrock',
'local', // Native node-llama-cpp execution (GGUF models)
'ollama', // Ollama server integration
'dexto', // Dexto gateway - routes through api.dexto.ai/v1 with billing
] as const;
export type LLMProvider = (typeof LLM_PROVIDERS)[number];
export const SUPPORTED_FILE_TYPES = ['pdf', 'image', 'audio'] as const;
export type SupportedFileType = (typeof SUPPORTED_FILE_TYPES)[number];
/**
* Context interface for message formatters.
* Provides runtime information for model-aware processing.
*/
export interface LLMContext {
/** LLM provider name (e.g., 'google', 'openai') */
provider: LLMProvider;
/** Specific LLM model name (e.g., 'gemini-2.5-flash', 'gpt-5') */
model: string;
}
// TODO: see how we can combine this with LLMContext
export interface LLMUpdateContext {
provider?: LLMProvider;
model?: string;
suggestedAction?: string;
}
export interface TokenUsage {
inputTokens?: number;
outputTokens?: number;
reasoningTokens?: number;
totalTokens?: number;
// Cache tokens (Vercel AI SDK: cachedInputTokens, providerMetadata.anthropic.cacheCreationInputTokens)
cacheReadTokens?: number;
cacheWriteTokens?: number;
}

View File

@@ -0,0 +1,444 @@
import { describe, test, expect } from 'vitest';
import { validateInputForLLM } from './validation.js';
import { LLMErrorCode } from './error-codes.js';
import { createSilentMockLogger } from '../logger/v2/test-utils.js';
describe('validateInputForLLM', () => {
const mockLogger = createSilentMockLogger();
describe('text validation', () => {
test('should pass validation for valid text input', () => {
const result = validateInputForLLM(
{ text: 'Hello, world!' },
{ provider: 'openai', model: 'gpt-5' },
mockLogger
);
expect(result.ok).toBe(true);
expect(result.issues.filter((i) => i.severity === 'error')).toHaveLength(0);
});
test('should pass validation for empty text when no other input provided', () => {
const result = validateInputForLLM(
{ text: '' },
{ provider: 'openai', model: 'gpt-5' },
mockLogger
);
expect(result.ok).toBe(true);
expect(result.issues.filter((i) => i.severity === 'error')).toHaveLength(0);
});
test('should pass validation for undefined text', () => {
const result = validateInputForLLM(
{},
{ provider: 'openai', model: 'gpt-5' },
mockLogger
);
expect(result.ok).toBe(true);
expect(result.issues.filter((i) => i.severity === 'error')).toHaveLength(0);
});
});
describe('file validation', () => {
test('should pass validation for supported file type with model that supports PDF', () => {
const result = validateInputForLLM(
{
text: 'Analyze this file',
fileData: {
data: 'SGVsbG8gV29ybGQ=', // Valid base64 for "Hello World"
mimeType: 'application/pdf',
filename: 'document.pdf',
},
},
{ provider: 'openai', model: 'gpt-5' },
mockLogger
);
expect(result.ok).toBe(true);
expect(result.issues.filter((i) => i.severity === 'error')).toHaveLength(0);
if (result.ok) {
expect(result.data.fileValidation?.isSupported).toBe(true);
}
});
test('should pass validation for supported audio file with model that supports audio', () => {
const result = validateInputForLLM(
{
text: 'Analyze this audio',
fileData: {
data: 'SGVsbG8gV29ybGQ=', // Valid base64 for "Hello World"
mimeType: 'audio/mp3',
filename: 'audio.mp3',
},
},
{ provider: 'openai', model: 'gpt-4o-audio-preview' },
mockLogger
);
expect(result.ok).toBe(true);
expect(result.issues.filter((i) => i.severity === 'error')).toHaveLength(0);
if (result.ok) {
expect(result.data.fileValidation?.isSupported).toBe(true);
}
});
test('should fail validation for unsupported file type (model without audio support)', () => {
const result = validateInputForLLM(
{
text: 'Analyze this audio',
fileData: {
data: 'SGVsbG8gV29ybGQ=', // Valid base64 for "Hello World"
mimeType: 'audio/mp3',
filename: 'audio.mp3',
},
},
{ provider: 'openai', model: 'gpt-5' }, // This model doesn't support audio
mockLogger
);
expect(result.ok).toBe(false);
expect(result.issues.filter((i) => i.severity === 'error').length).toBeGreaterThan(0);
expect(result.issues.some((i) => i.code === LLMErrorCode.INPUT_FILE_UNSUPPORTED)).toBe(
true
);
});
test('should fail validation for file not in allowed MIME types', () => {
const result = validateInputForLLM(
{
text: 'Analyze this file',
fileData: {
data: 'base64data',
mimeType: 'application/exe',
filename: 'malware.exe',
},
},
{ provider: 'openai', model: 'gpt-5' },
mockLogger
);
expect(result.ok).toBe(false);
expect(
result.issues.filter((i) => i.severity === 'error').map((i) => i.message)
).toContain('Unsupported file type: application/exe');
expect(result.issues.some((i) => i.code === LLMErrorCode.INPUT_FILE_UNSUPPORTED)).toBe(
true
);
});
test('should fail validation for oversized file', () => {
const largeBase64 = 'A'.repeat(67108865); // > 64MB
const result = validateInputForLLM(
{
text: 'Analyze this file',
fileData: {
data: largeBase64,
mimeType: 'application/pdf',
filename: 'large.pdf',
},
},
{ provider: 'openai', model: 'gpt-5' },
mockLogger
);
expect(result.ok).toBe(false);
expect(
result.issues.filter((i) => i.severity === 'error').map((i) => i.message)
).toContain('File size too large (max 64MB)');
});
test('should fail validation for invalid base64 format', () => {
const result = validateInputForLLM(
{
text: 'Analyze this file',
fileData: {
data: 'invalid-base64!@#',
mimeType: 'application/pdf',
filename: 'document.pdf',
},
},
{ provider: 'openai', model: 'gpt-5' },
mockLogger
);
expect(result.ok).toBe(false);
expect(
result.issues.filter((i) => i.severity === 'error').map((i) => i.message)
).toContain('Invalid file data format');
});
test('should fail validation when model is not specified for file', () => {
const result = validateInputForLLM(
{
text: 'Analyze this file',
fileData: {
data: 'SGVsbG8gV29ybGQ=', // Valid base64 for "Hello World"
mimeType: 'application/pdf',
filename: 'document.pdf',
},
},
{ provider: 'openai' }, // No model specified
mockLogger
);
expect(result.ok).toBe(false);
expect(
result.issues.filter((i) => i.severity === 'error').map((i) => i.message)
).toContain('Model must be specified for file capability validation');
});
test('should fail validation for file without mimeType', () => {
const result = validateInputForLLM(
{
text: 'Analyze this file',
fileData: {
data: 'SGVsbG8gV29ybGQ=', // Valid base64 for "Hello World"
mimeType: '', // Empty MIME type should fail
filename: 'document.pdf',
},
},
{ provider: 'openai', model: 'gpt-5' },
mockLogger
);
expect(result.ok).toBe(false);
expect(result.issues.some((i) => i.code === LLMErrorCode.INPUT_FILE_UNSUPPORTED)).toBe(
true
);
expect(
result.issues.filter((i) => i.severity === 'error').map((i) => i.message)
).toContain('Unsupported file type: ');
});
test('should pass validation for parameterized MIME types', () => {
// Test audio/webm;codecs=opus (the original issue)
const webmResult = validateInputForLLM(
{
text: 'Analyze this audio',
fileData: {
data: 'SGVsbG8gV29ybGQ=', // Valid base64
mimeType: 'audio/webm;codecs=opus',
filename: 'recording.webm',
},
},
{ provider: 'openai', model: 'gpt-4o-audio-preview' },
mockLogger
);
expect(webmResult.ok).toBe(true);
// Test application/pdf;version=1.4
const pdfResult = validateInputForLLM(
{
text: 'Analyze this document',
fileData: {
data: 'SGVsbG8gV29ybGQ=', // Valid base64
mimeType: 'application/pdf;version=1.4',
filename: 'document.pdf',
},
},
{ provider: 'openai', model: 'gpt-5' },
mockLogger
);
expect(pdfResult.ok).toBe(true);
});
});
describe('image validation', () => {
test('should pass validation for image input', () => {
const result = validateInputForLLM(
{
text: 'Analyze this image',
imageData: {
image: 'data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD',
mimeType: 'image/jpeg',
},
},
{ provider: 'openai', model: 'gpt-5' },
mockLogger
);
expect(result.ok).toBe(true);
expect(result.issues.filter((i) => i.severity === 'error')).toHaveLength(0);
if (result.ok) {
expect(result.data.imageValidation).toBeDefined();
expect(result.data.imageValidation?.isSupported).toBe(true);
}
});
test('should pass validation for image without mimeType', () => {
const result = validateInputForLLM(
{
text: 'Analyze this image',
imageData: {
image: 'data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD',
},
},
{ provider: 'openai', model: 'gpt-5' },
mockLogger
);
expect(result.ok).toBe(true);
expect(result.issues.filter((i) => i.severity === 'error')).toHaveLength(0);
});
});
describe('combined input validation', () => {
test('should pass validation for text + image + file', () => {
const result = validateInputForLLM(
{
text: 'Analyze this content',
imageData: {
image: 'data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD',
mimeType: 'image/jpeg',
},
fileData: {
data: 'SGVsbG8gV29ybGQ=', // Valid base64 for "Hello World"
mimeType: 'application/pdf',
filename: 'document.pdf',
},
},
{ provider: 'openai', model: 'gpt-5' },
mockLogger
);
expect(result.ok).toBe(true);
expect(result.issues.filter((i) => i.severity === 'error')).toHaveLength(0);
});
test('should fail validation when file input is invalid for model', () => {
const result = validateInputForLLM(
{
text: 'Analyze this content',
imageData: {
image: 'data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD',
mimeType: 'image/jpeg',
},
fileData: {
data: 'SGVsbG8gV29ybGQ=', // Valid base64 for "Hello World"
mimeType: 'audio/mp3',
filename: 'audio.mp3',
},
},
{ provider: 'openai', model: 'gpt-5' }, // This model doesn't support audio
mockLogger
);
expect(result.ok).toBe(false);
expect(result.issues.filter((i) => i.severity === 'error').length).toBeGreaterThan(0);
expect(result.issues.some((i) => i.code === LLMErrorCode.INPUT_FILE_UNSUPPORTED)).toBe(
true
);
});
});
describe('error handling', () => {
test('should handle unknown provider gracefully', () => {
const result = validateInputForLLM(
{
text: 'Analyze this file',
fileData: {
data: 'SGVsbG8gV29ybGQ=', // Valid base64 for "Hello World"
mimeType: 'application/pdf',
filename: 'document.pdf',
},
},
{ provider: 'openai', model: 'unknown-model' },
mockLogger
);
expect(result.ok).toBe(false);
expect(result.issues.filter((i) => i.severity === 'error').length).toBeGreaterThan(0);
});
test('should fail validation for unknown model', () => {
const result = validateInputForLLM(
{
text: 'Analyze this file',
fileData: {
data: 'SGVsbG8gV29ybGQ=', // Valid base64 for "Hello World"
mimeType: 'application/pdf',
filename: 'document.pdf',
},
},
{ provider: 'openai', model: 'unknown-model' },
mockLogger
);
// Fixed behavior: unknown models should fail validation
expect(result.ok).toBe(false);
expect(result.issues.some((i) => i.code === LLMErrorCode.INPUT_FILE_UNSUPPORTED)).toBe(
true
);
expect(result.issues.filter((i) => i.severity === 'error').length).toBeGreaterThan(0);
});
});
describe('different providers and models', () => {
test('should work with Anthropic provider and PDF files', () => {
const result = validateInputForLLM(
{
text: 'Hello Claude',
fileData: {
data: 'SGVsbG8gV29ybGQ=', // Valid base64 for "Hello World"
mimeType: 'application/pdf',
filename: 'document.pdf',
},
},
{ provider: 'anthropic', model: 'claude-4-sonnet-20250514' },
mockLogger
);
expect(result.ok).toBe(true);
expect(result.issues.filter((i) => i.severity === 'error')).toHaveLength(0);
if (result.ok) {
expect(result.data.fileValidation?.isSupported).toBe(true);
}
});
test('should work with Google provider and PDF files', () => {
const result = validateInputForLLM(
{
text: 'Hello Gemini',
fileData: {
data: 'SGVsbG8gV29ybGQ=', // Valid base64 for "Hello World"
mimeType: 'application/pdf',
filename: 'document.pdf',
},
},
{ provider: 'google', model: 'gemini-2.0-flash' },
mockLogger
);
expect(result.ok).toBe(true);
expect(result.issues.filter((i) => i.severity === 'error')).toHaveLength(0);
if (result.ok) {
expect(result.data.fileValidation?.isSupported).toBe(true);
}
});
test('should work with image validation (always supported currently)', () => {
const result = validateInputForLLM(
{
text: 'Hello Gemini',
imageData: {
image: 'data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD',
mimeType: 'image/jpeg',
},
},
{ provider: 'google', model: 'gemini-2.0-flash' },
mockLogger
);
expect(result.ok).toBe(true);
expect(result.issues.filter((i) => i.severity === 'error')).toHaveLength(0);
if (result.ok) {
expect(result.data.imageValidation?.isSupported).toBe(true);
}
});
});
});

View File

@@ -0,0 +1,252 @@
import { validateModelFileSupport, getAllowedMimeTypes } from './registry.js';
import type { LLMProvider } from './types.js';
import type { IDextoLogger } from '../logger/v2/types.js';
import type { ImageData, FileData } from '../context/types.js';
import { Result, ok, fail } from '../utils/result.js';
import { Issue, ErrorScope, ErrorType } from '@core/errors/types.js';
import { LLMErrorCode } from './error-codes.js';
// TOOD: Refactor/simplify this file
export interface ValidationLLMConfig {
provider: LLMProvider;
model?: string;
}
export interface ValidationContext {
provider?: string;
model?: string | undefined;
fileSize?: number;
maxFileSize?: number;
filename?: string | undefined;
mimeType?: string;
fileType?: string | undefined;
suggestedAction?: string;
}
export interface ValidationData {
fileValidation?: {
isSupported: boolean;
fileType?: string;
error?: string;
};
imageValidation?: {
isSupported: boolean;
error?: string;
};
}
/**
* Input interface for comprehensive validation
*/
export interface ValidationInput {
text?: string;
imageData?: ImageData | undefined;
fileData?: FileData | undefined;
}
// Security constants
const MAX_FILE_SIZE = 67108864; // 64MB in base64 format
const MAX_IMAGE_SIZE = 20971520; // 20MB
/**
* Validates all inputs (text, image, file) against LLM capabilities and security requirements.
* This is the single entry point for all input validation using pure Result<T,C> pattern.
* @param input The input data to validate (text, image, file)
* @param config The LLM configuration (provider and model)
* @param logger The logger instance for logging
* @returns Comprehensive validation result
*/
export function validateInputForLLM(
input: ValidationInput,
config: ValidationLLMConfig,
logger: IDextoLogger
): Result<ValidationData, ValidationContext> {
const issues: Issue<ValidationContext>[] = [];
const validationData: ValidationData = {};
try {
const context: ValidationContext = {
provider: config.provider,
model: config.model,
};
// Validate file data if provided
if (input.fileData) {
const fileValidation = validateFileInput(input.fileData, config, logger);
validationData.fileValidation = fileValidation;
if (!fileValidation.isSupported) {
issues.push({
code: LLMErrorCode.INPUT_FILE_UNSUPPORTED,
message: fileValidation.error || 'File type not supported by current LLM',
scope: ErrorScope.LLM,
type: ErrorType.USER,
severity: 'error',
context: {
...context,
fileType: fileValidation.fileType,
mimeType: input.fileData.mimeType,
filename: input.fileData.filename,
suggestedAction: 'Use a supported file type or different model',
},
});
}
}
// Validate image data if provided
if (input.imageData) {
const imageValidation = validateImageInput(input.imageData, config, logger);
validationData.imageValidation = imageValidation;
if (!imageValidation.isSupported) {
issues.push({
code: LLMErrorCode.INPUT_IMAGE_UNSUPPORTED,
message: imageValidation.error || 'Image format not supported by current LLM',
scope: ErrorScope.LLM,
type: ErrorType.USER,
severity: 'error',
context: {
...context,
suggestedAction: 'Use a supported image format or different model',
},
});
}
}
// Basic text validation (currently permissive - empty text is allowed)
// TODO: Could be extended with more sophisticated text validation rules
// Note: Empty text is currently allowed as it may be valid in combination with images/files
return issues.length === 0 ? ok(validationData, issues) : fail(issues);
} catch (error) {
logger.error(`Error during input validation: ${error}`);
return fail([
{
code: LLMErrorCode.REQUEST_INVALID_SCHEMA,
message: 'Failed to validate input',
scope: ErrorScope.LLM,
type: ErrorType.SYSTEM,
severity: 'error',
context: {
provider: config.provider,
model: config.model,
suggestedAction: 'Check input format and try again',
},
},
]);
}
}
/**
* Validates file input including security checks and model capability validation.
* @param fileData The file data to validate
* @param config The LLM configuration
* @param logger The logger instance for logging
* @returns File validation result
*/
function validateFileInput(
fileData: FileData,
config: ValidationLLMConfig,
logger: IDextoLogger
): NonNullable<ValidationData['fileValidation']> {
logger.info(`Validating file input: ${fileData.mimeType}`);
// Security validation: file size check (max 64MB for base64)
if (typeof fileData.data === 'string' && fileData.data.length > MAX_FILE_SIZE) {
return {
isSupported: false,
error: 'File size too large (max 64MB)',
};
}
// Security validation: MIME type allowlist
// Extract base MIME type by removing parameters (e.g., "audio/webm;codecs=opus" -> "audio/webm")
const baseMimeType =
fileData.mimeType.toLowerCase().split(';')[0]?.trim() || fileData.mimeType.toLowerCase();
const allowedMimeTypes = getAllowedMimeTypes();
if (!allowedMimeTypes.includes(baseMimeType)) {
return {
isSupported: false,
error: `Unsupported file type: ${fileData.mimeType}`,
};
}
// Security validation: base64 format check
if (typeof fileData.data === 'string') {
// Enhanced base64 validation: ensures proper length and padding
const base64Regex = /^[A-Za-z0-9+/]+={0,2}$/;
if (!base64Regex.test(fileData.data) || fileData.data.length % 4 !== 0) {
return {
isSupported: false,
error: 'Invalid file data format',
};
}
}
// Model-specific capability validation (only if model is specified)
if (config.model) {
return validateModelFileSupport(config.provider, config.model, fileData.mimeType);
}
// If no model specified, we cannot validate capabilities
return {
isSupported: false,
error: 'Model must be specified for file capability validation',
};
}
/**
* Validates image input with size and format checks.
* @param imageData The image data to validate
* @param config The LLM configuration
* @param logger The logger instance for logging
* @returns Image validation result
*/
function validateImageInput(
imageData: ImageData,
config: ValidationLLMConfig,
logger: IDextoLogger
): NonNullable<ValidationData['imageValidation']> {
logger.info(`Validating image input: ${imageData.mimeType}`);
// Check image size if available
if (typeof imageData.image === 'string' && imageData.image.length > MAX_IMAGE_SIZE) {
return {
isSupported: false,
error: `Image size too large (max ${MAX_IMAGE_SIZE / 1048576}MB)`,
};
}
// Resolve image MIME type from either explicit field or data URL
// Example: callers may only provide a data URL like
// image: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD..."
// without setting imageData.mimeType. In that case, parse the MIME from the prefix.
let resolvedMime: string | undefined = imageData.mimeType?.toLowerCase();
if (!resolvedMime && typeof imageData.image === 'string') {
const dataUrlMatch = /^data:([^;]+);base64,/i.exec(imageData.image);
if (dataUrlMatch && dataUrlMatch[1]) {
resolvedMime = dataUrlMatch[1].toLowerCase();
}
}
if (!resolvedMime) {
return { isSupported: false, error: 'Missing image MIME type' };
}
if (!config.model) {
return {
isSupported: false,
error: 'Model must be specified for image capability validation',
};
}
// Extract base MIME type by removing parameters (e.g., "image/jpeg;quality=85" -> "image/jpeg")
const baseMimeType = resolvedMime.split(';')[0]?.trim() || resolvedMime;
// Delegate both allowed-MIME and model capability to registry helper
const res = validateModelFileSupport(config.provider, config.model, baseMimeType);
return {
isSupported: res.isSupported,
...(res.error ? { error: res.error } : {}),
};
}

View File

@@ -0,0 +1,72 @@
// Browser-safe console-backed logger implementation.
// Matches the public surface used by the app/CLI but avoids fs/path/winston.
export interface LoggerOptions {
level?: string;
silent?: boolean;
logToConsole?: boolean;
}
export class Logger {
private level: string;
private isSilent: boolean;
constructor(options: LoggerOptions = {}) {
this.level = (options.level || 'info').toLowerCase();
this.isSilent = options.silent ?? false;
}
private out(fn: (...args: any[]) => void, args: any[]) {
if (!this.isSilent && typeof console !== 'undefined') fn(...args);
}
error(message: any, meta?: any) {
this.out(console.error, [message, meta]);
}
warn(message: any, meta?: any) {
this.out(console.warn, [message, meta]);
}
info(message: any, meta?: any) {
this.out(console.info, [message, meta]);
}
http(message: any, meta?: any) {
this.out(console.info, [message, meta]);
}
verbose(message: any, meta?: any) {
this.out(console.debug, [message, meta]);
}
debug(message: any, meta?: any) {
this.out(console.debug, [message, meta]);
}
silly(message: any, meta?: any) {
this.out(console.debug, [message, meta]);
}
displayAIResponse(response: any) {
this.out(console.log, [response]);
}
toolCall(toolName: string, args: any) {
this.out(console.log, ['Tool Call', toolName, args]);
}
toolResult(result: any) {
this.out(console.log, ['Tool Result', result]);
}
displayStartupInfo(info: Record<string, any>) {
this.out(console.log, ['Startup', info]);
}
displayError(message: string, error?: Error) {
this.out(console.error, [message, error]);
}
setLevel(level: string) {
this.level = level.toLowerCase();
}
getLevel(): string {
return this.level;
}
getLogFilePath(): string | null {
return null;
}
}
export const logger = new Logger();

View File

@@ -0,0 +1,74 @@
/**
* Logger Factory
*
* Creates logger instances from agent configuration.
* Bridges the gap between agent config (LoggerConfig) and the DextoLogger implementation.
*/
import type { LoggerConfig } from './v2/schemas.js';
import type { IDextoLogger, LogLevel } from './v2/types.js';
import { DextoLogComponent } from './v2/types.js';
import { DextoLogger } from './v2/dexto-logger.js';
import { createTransport } from './v2/transport-factory.js';
export interface CreateLoggerOptions {
/** Logger configuration from agent config */
config: LoggerConfig;
/** Agent ID for multi-agent isolation */
agentId: string;
/** Component identifier (defaults to AGENT) */
component?: DextoLogComponent;
}
/**
* Helper to get effective log level from environment or config
* DEXTO_LOG_LEVEL environment variable takes precedence over config
*/
function getEffectiveLogLevel(configLevel: LogLevel): LogLevel {
const envLevel = process.env.DEXTO_LOG_LEVEL;
if (envLevel) {
const validLevels: LogLevel[] = ['debug', 'info', 'warn', 'error', 'silly'];
const normalizedLevel = envLevel.toLowerCase() as LogLevel;
if (validLevels.includes(normalizedLevel)) {
return normalizedLevel;
}
}
return configLevel;
}
/**
* Create a logger instance from agent configuration
*
* @param options Logger creation options
* @returns Configured logger instance
*
* @example
* ```typescript
* const logger = createLogger({
* config: validatedConfig.logger,
* agentId: 'my-agent',
* component: DextoLogComponent.AGENT
* });
*
* logger.info('Agent started');
* ```
*/
export function createLogger(options: CreateLoggerOptions): IDextoLogger {
const { config, agentId, component = DextoLogComponent.AGENT } = options;
// Override log level with DEXTO_LOG_LEVEL environment variable if present
const effectiveLevel = getEffectiveLogLevel(config.level);
// Create transport instances from configs
const transports = config.transports.map((transportConfig) => {
return createTransport(transportConfig);
});
// Create and return logger instance
return new DextoLogger({
level: effectiveLevel,
component,
agentId,
transports,
});
}

View File

@@ -0,0 +1,24 @@
// Logger factory for dependency injection
export { createLogger } from './factory.js';
export type { CreateLoggerOptions } from './factory.js';
// Multi-transport logger - v2
export type { LogLevel, LogEntry, IDextoLogger, ILoggerTransport } from './v2/types.js';
export { DextoLogComponent } from './v2/types.js';
export { LoggerTransportSchema, LoggerConfigSchema } from './v2/schemas.js';
export type { LoggerTransportConfig, LoggerConfig } from './v2/schemas.js';
export type { DextoLoggerConfig } from './v2/dexto-logger.js';
export { DextoLogger } from './v2/dexto-logger.js';
export { createTransport, createTransports } from './v2/transport-factory.js';
export type { ConsoleTransportConfig } from './v2/transports/console-transport.js';
export { ConsoleTransport } from './v2/transports/console-transport.js';
export type { FileTransportConfig } from './v2/transports/file-transport.js';
export { FileTransport } from './v2/transports/file-transport.js';
// Error handling
export { LoggerError } from './v2/errors.js';
export { LoggerErrorCode } from './v2/error-codes.js';
// Legacy logger (to be removed)
export type { LoggerOptions } from './logger.js';
export { Logger, logger } from './logger.js';

Some files were not shown because too many files have changed in this diff Show More