✨ Major Features: - Native session management without OpenCode binary - Provider routing: OpenCode Zen (free), Qwen OAuth, Z.AI - Streaming chat with tool execution loop - Mode detection API (/api/meta/mode) - MCP integration fix (resolved infinite loading) - NomadArch Native option in UI with comparison info 🆓 Free Models (No API Key): - GPT-5 Nano (400K context) - Grok Code Fast 1 (256K context) - GLM-4.7 (205K context) - Doubao Seed Code (256K context) - Big Pickle (200K context) 📦 New Files: - session-store.ts: Native session persistence - native-sessions.ts: REST API for sessions - lite-mode.ts: UI mode detection client - native-sessions.ts (UI): SolidJS store 🔧 Updated: - All installers: Optional binary download - All launchers: Mode detection display - Binary selector: Added NomadArch Native option - README: Binary-Free Mode documentation
161 lines
4.3 KiB
TypeScript
161 lines
4.3 KiB
TypeScript
/**
|
|
* Compaction Service
|
|
*
|
|
* Integrates opencode-style compaction logic for managing context window:
|
|
* - Detect token overflow
|
|
* - Prune old tool outputs (keep last 40k tokens protected)
|
|
* - Track compacted parts with timestamps
|
|
*/
|
|
|
|
import { getLogger } from "@/lib/logger";
|
|
|
|
const log = getLogger("compaction-service");
|
|
|
|
// Configuration from opencode
|
|
export const PRUNE_MINIMUM = 20_000; // Minimum tokens to prune before triggering
|
|
export const PRUNE_PROTECT = 40_000; // Protect last N tokens of tool outputs
|
|
export const PRUNE_PROTECTED_TOOLS = ["skill"]; // Tools to never prune
|
|
|
|
export interface CompactionConfig {
|
|
contextLimit: number;
|
|
outputMax: number;
|
|
}
|
|
|
|
export interface TokenInfo {
|
|
input: number;
|
|
output: number;
|
|
cacheRead?: number;
|
|
cacheWrite?: number;
|
|
}
|
|
|
|
export interface PruneResult {
|
|
pruned: number;
|
|
total: number;
|
|
partsCount: number;
|
|
}
|
|
|
|
/**
|
|
* Check if context is overflowing and compaction is needed
|
|
*/
|
|
export function isOverflow(tokens: TokenInfo, model: CompactionConfig): boolean {
|
|
const context = model.contextLimit;
|
|
if (context === 0) return false;
|
|
|
|
const count = tokens.input + (tokens.cacheRead || 0) + tokens.output;
|
|
const output = Math.min(model.outputMax, 32000) || 32000;
|
|
const usable = context - output;
|
|
|
|
return count > usable;
|
|
}
|
|
|
|
/**
|
|
* Calculate how many tokens to prune from old tool outputs
|
|
*/
|
|
export function calculatePruneTarget(
|
|
toolOutputTokens: number[],
|
|
protectThreshold: number = PRUNE_PROTECT
|
|
): { toPrune: number[]; prunedTotal: number } {
|
|
let total = 0;
|
|
let pruned = 0;
|
|
const toPrune: number[] = [];
|
|
|
|
// Go through tool outputs from newest to oldest
|
|
for (let i = toolOutputTokens.length - 1; i >= 0; i--) {
|
|
const estimate = toolOutputTokens[i];
|
|
total += estimate;
|
|
|
|
// Once we've protected enough, mark the rest for pruning
|
|
if (total > protectThreshold) {
|
|
pruned += estimate;
|
|
toPrune.push(i);
|
|
}
|
|
}
|
|
|
|
return { toPrune, prunedTotal: pruned };
|
|
}
|
|
|
|
/**
|
|
* Estimate token count from text (rough approximation)
|
|
*/
|
|
export function estimateTokens(text: string): number {
|
|
if (!text) return 0;
|
|
// Rough estimate: 1 token ≈ 4 characters
|
|
return Math.ceil(text.length / 4);
|
|
}
|
|
|
|
/**
|
|
* Generate default compaction prompt
|
|
*/
|
|
export function getDefaultCompactionPrompt(): string {
|
|
return `Provide a detailed prompt for continuing our conversation above. Focus on information that would be helpful for continuing the conversation, including what we did, what we're doing, which files we're working on, and what we're going to do next considering new session will not have access to our conversation.`;
|
|
}
|
|
|
|
/**
|
|
* Check if a tool should be protected from pruning
|
|
*/
|
|
export function isProtectedTool(toolName: string): boolean {
|
|
return PRUNE_PROTECTED_TOOLS.includes(toolName);
|
|
}
|
|
|
|
/**
|
|
* Calculate context usage percentage
|
|
*/
|
|
export function getContextUsagePercent(tokens: TokenInfo, contextLimit: number): number {
|
|
if (contextLimit === 0) return 0;
|
|
const used = tokens.input + (tokens.cacheRead || 0) + tokens.output;
|
|
return Math.round((used / contextLimit) * 100);
|
|
}
|
|
|
|
/**
|
|
* Get compaction recommendation
|
|
*/
|
|
export function getCompactionRecommendation(
|
|
tokens: TokenInfo,
|
|
model: CompactionConfig
|
|
): { shouldCompact: boolean; reason: string; urgency: "low" | "medium" | "high" } {
|
|
const usagePercent = getContextUsagePercent(tokens, model.contextLimit);
|
|
|
|
if (usagePercent >= 90) {
|
|
return {
|
|
shouldCompact: true,
|
|
reason: `Context ${usagePercent}% full - compaction required`,
|
|
urgency: "high"
|
|
};
|
|
}
|
|
|
|
if (usagePercent >= 75) {
|
|
return {
|
|
shouldCompact: true,
|
|
reason: `Context ${usagePercent}% full - compaction recommended`,
|
|
urgency: "medium"
|
|
};
|
|
}
|
|
|
|
if (usagePercent >= 50) {
|
|
return {
|
|
shouldCompact: false,
|
|
reason: `Context ${usagePercent}% full`,
|
|
urgency: "low"
|
|
};
|
|
}
|
|
|
|
return {
|
|
shouldCompact: false,
|
|
reason: "",
|
|
urgency: "low"
|
|
};
|
|
}
|
|
|
|
export default {
|
|
isOverflow,
|
|
calculatePruneTarget,
|
|
estimateTokens,
|
|
getDefaultCompactionPrompt,
|
|
isProtectedTool,
|
|
getContextUsagePercent,
|
|
getCompactionRecommendation,
|
|
PRUNE_MINIMUM,
|
|
PRUNE_PROTECT,
|
|
PRUNE_PROTECTED_TOOLS,
|
|
};
|