/** * Compaction Service * * Integrates opencode-style compaction logic for managing context window: * - Detect token overflow * - Prune old tool outputs (keep last 40k tokens protected) * - Track compacted parts with timestamps */ import { getLogger } from "@/lib/logger"; const log = getLogger("compaction-service"); // Configuration from opencode export const PRUNE_MINIMUM = 20_000; // Minimum tokens to prune before triggering export const PRUNE_PROTECT = 40_000; // Protect last N tokens of tool outputs export const PRUNE_PROTECTED_TOOLS = ["skill"]; // Tools to never prune export interface CompactionConfig { contextLimit: number; outputMax: number; } export interface TokenInfo { input: number; output: number; cacheRead?: number; cacheWrite?: number; } export interface PruneResult { pruned: number; total: number; partsCount: number; } /** * Check if context is overflowing and compaction is needed */ export function isOverflow(tokens: TokenInfo, model: CompactionConfig): boolean { const context = model.contextLimit; if (context === 0) return false; const count = tokens.input + (tokens.cacheRead || 0) + tokens.output; const output = Math.min(model.outputMax, 32000) || 32000; const usable = context - output; return count > usable; } /** * Calculate how many tokens to prune from old tool outputs */ export function calculatePruneTarget( toolOutputTokens: number[], protectThreshold: number = PRUNE_PROTECT ): { toPrune: number[]; prunedTotal: number } { let total = 0; let pruned = 0; const toPrune: number[] = []; // Go through tool outputs from newest to oldest for (let i = toolOutputTokens.length - 1; i >= 0; i--) { const estimate = toolOutputTokens[i]; total += estimate; // Once we've protected enough, mark the rest for pruning if (total > protectThreshold) { pruned += estimate; toPrune.push(i); } } return { toPrune, prunedTotal: pruned }; } /** * Estimate token count from text (rough approximation) */ export function estimateTokens(text: string): number { if (!text) return 0; // Rough estimate: 1 token ≈ 4 characters return Math.ceil(text.length / 4); } /** * Generate default compaction prompt */ export function getDefaultCompactionPrompt(): string { return `Provide a detailed prompt for continuing our conversation above. Focus on information that would be helpful for continuing the conversation, including what we did, what we're doing, which files we're working on, and what we're going to do next considering new session will not have access to our conversation.`; } /** * Check if a tool should be protected from pruning */ export function isProtectedTool(toolName: string): boolean { return PRUNE_PROTECTED_TOOLS.includes(toolName); } /** * Calculate context usage percentage */ export function getContextUsagePercent(tokens: TokenInfo, contextLimit: number): number { if (contextLimit === 0) return 0; const used = tokens.input + (tokens.cacheRead || 0) + tokens.output; return Math.round((used / contextLimit) * 100); } /** * Get compaction recommendation */ export function getCompactionRecommendation( tokens: TokenInfo, model: CompactionConfig ): { shouldCompact: boolean; reason: string; urgency: "low" | "medium" | "high" } { const usagePercent = getContextUsagePercent(tokens, model.contextLimit); if (usagePercent >= 90) { return { shouldCompact: true, reason: `Context ${usagePercent}% full - compaction required`, urgency: "high" }; } if (usagePercent >= 75) { return { shouldCompact: true, reason: `Context ${usagePercent}% full - compaction recommended`, urgency: "medium" }; } if (usagePercent >= 50) { return { shouldCompact: false, reason: `Context ${usagePercent}% full`, urgency: "low" }; } return { shouldCompact: false, reason: "", urgency: "low" }; } export default { isOverflow, calculatePruneTarget, estimateTokens, getDefaultCompactionPrompt, isProtectedTool, getContextUsagePercent, getCompactionRecommendation, PRUNE_MINIMUM, PRUNE_PROTECT, PRUNE_PROTECTED_TOOLS, };