Complete Agent Pipeline System with Claude Code & OpenClaw Integration
- Added Claude Code integration with full context compaction support - Added OpenClaw integration with deterministic pipeline support - Implemented parallel agent execution (4 projects x 3 roles pattern) - Added workspace isolation with permissions and quotas - Implemented Lobster-compatible YAML workflow parser - Added persistent memory store for cross-session context - Created comprehensive README with hero section This project was 100% autonomously built by Z.AI GLM-5
This commit is contained in:
220
agent-system/core/token-counter.ts
Normal file
220
agent-system/core/token-counter.ts
Normal file
@@ -0,0 +1,220 @@
|
||||
/**
|
||||
* Token Counter Module
|
||||
*
|
||||
* Estimates token counts for text and messages.
|
||||
* Uses a character-based approximation (GPT-style tokenization is roughly 4 chars per token).
|
||||
* For more accurate counting, you could integrate tiktoken or similar libraries.
|
||||
*/
|
||||
|
||||
export interface TokenCountResult {
|
||||
tokens: number;
|
||||
characters: number;
|
||||
words: number;
|
||||
}
|
||||
|
||||
export interface MessageTokenCount {
|
||||
role: string;
|
||||
content: string;
|
||||
tokens: number;
|
||||
}
|
||||
|
||||
export interface TokenBudget {
|
||||
used: number;
|
||||
remaining: number;
|
||||
total: number;
|
||||
percentageUsed: number;
|
||||
}
|
||||
|
||||
// Approximate tokens per character ratio (GPT-style)
|
||||
const CHARS_PER_TOKEN = 4;
|
||||
|
||||
// Overhead for message formatting (role, delimiters, etc.)
|
||||
const MESSAGE_OVERHEAD_TOKENS = 4;
|
||||
|
||||
/**
|
||||
* TokenCounter - Estimates token counts for text and conversations
|
||||
*/
|
||||
export class TokenCounter {
|
||||
private maxTokens: number;
|
||||
private reservedTokens: number;
|
||||
|
||||
constructor(maxTokens: number = 128000, reservedTokens: number = 4096) {
|
||||
this.maxTokens = maxTokens;
|
||||
this.reservedTokens = reservedTokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Count tokens in a text string
|
||||
*/
|
||||
countText(text: string): TokenCountResult {
|
||||
const characters = text.length;
|
||||
const words = text.split(/\s+/).filter(w => w.length > 0).length;
|
||||
|
||||
// Token estimation using character ratio
|
||||
// Also account for word boundaries and special characters
|
||||
const tokens = Math.ceil(characters / CHARS_PER_TOKEN);
|
||||
|
||||
return {
|
||||
tokens,
|
||||
characters,
|
||||
words
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Count tokens in a single message
|
||||
*/
|
||||
countMessage(message: { role: string; content: string }): number {
|
||||
const contentTokens = this.countText(message.content).tokens;
|
||||
return contentTokens + MESSAGE_OVERHEAD_TOKENS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Count tokens in a conversation (array of messages)
|
||||
*/
|
||||
countConversation(messages: Array<{ role: string; content: string }>): {
|
||||
total: number;
|
||||
breakdown: MessageTokenCount[];
|
||||
} {
|
||||
const breakdown: MessageTokenCount[] = messages.map(msg => ({
|
||||
role: msg.role,
|
||||
content: msg.content.substring(0, 100) + (msg.content.length > 100 ? '...' : ''),
|
||||
tokens: this.countMessage(msg)
|
||||
}));
|
||||
|
||||
const total = breakdown.reduce((sum, msg) => sum + msg.tokens, 0);
|
||||
|
||||
return { total, breakdown };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current token budget
|
||||
*/
|
||||
getBudget(usedTokens: number): TokenBudget {
|
||||
const availableTokens = this.maxTokens - this.reservedTokens;
|
||||
const remaining = Math.max(0, availableTokens - usedTokens);
|
||||
|
||||
return {
|
||||
used: usedTokens,
|
||||
remaining,
|
||||
total: availableTokens,
|
||||
percentageUsed: (usedTokens / availableTokens) * 100
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if adding a message would exceed the budget
|
||||
*/
|
||||
wouldExceedBudget(
|
||||
currentTokens: number,
|
||||
message: { role: string; content: string }
|
||||
): boolean {
|
||||
const messageTokens = this.countMessage(message);
|
||||
const budget = this.getBudget(currentTokens);
|
||||
return messageTokens > budget.remaining;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate how many messages can fit in the remaining budget
|
||||
*/
|
||||
calculateCapacity(
|
||||
currentTokens: number,
|
||||
averageMessageTokens: number = 500
|
||||
): number {
|
||||
const budget = this.getBudget(currentTokens);
|
||||
return Math.floor(budget.remaining / averageMessageTokens);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split text into chunks that fit within token limits
|
||||
*/
|
||||
chunkText(text: string, maxTokensPerChunk: number): string[] {
|
||||
const totalTokens = this.countText(text).tokens;
|
||||
|
||||
if (totalTokens <= maxTokensPerChunk) {
|
||||
return [text];
|
||||
}
|
||||
|
||||
const chunks: string[] = [];
|
||||
const sentences = text.split(/(?<=[.!?])\s+/);
|
||||
|
||||
let currentChunk = '';
|
||||
let currentTokens = 0;
|
||||
|
||||
for (const sentence of sentences) {
|
||||
const sentenceTokens = this.countText(sentence).tokens;
|
||||
|
||||
if (currentTokens + sentenceTokens > maxTokensPerChunk) {
|
||||
if (currentChunk) {
|
||||
chunks.push(currentChunk.trim());
|
||||
}
|
||||
currentChunk = sentence;
|
||||
currentTokens = sentenceTokens;
|
||||
} else {
|
||||
currentChunk += ' ' + sentence;
|
||||
currentTokens += sentenceTokens;
|
||||
}
|
||||
}
|
||||
|
||||
if (currentChunk.trim()) {
|
||||
chunks.push(currentChunk.trim());
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the optimal cutoff point for message truncation
|
||||
*/
|
||||
findOptimalCutoff(
|
||||
messages: Array<{ role: string; content: string }>,
|
||||
targetTokens: number
|
||||
): number {
|
||||
let accumulated = 0;
|
||||
|
||||
for (let i = 0; i < messages.length; i++) {
|
||||
const msgTokens = this.countMessage(messages[i]);
|
||||
if (accumulated + msgTokens > targetTokens) {
|
||||
return i;
|
||||
}
|
||||
accumulated += msgTokens;
|
||||
}
|
||||
|
||||
return messages.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate tokens for different content types
|
||||
*/
|
||||
estimateContentTokens(content: unknown): number {
|
||||
if (typeof content === 'string') {
|
||||
return this.countText(content).tokens;
|
||||
}
|
||||
|
||||
if (Array.isArray(content)) {
|
||||
return this.countText(JSON.stringify(content)).tokens;
|
||||
}
|
||||
|
||||
if (typeof content === 'object' && content !== null) {
|
||||
return this.countText(JSON.stringify(content)).tokens;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton instance with default settings
|
||||
export const defaultTokenCounter = new TokenCounter();
|
||||
|
||||
/**
|
||||
* Quick utility functions
|
||||
*/
|
||||
export function countTokens(text: string): number {
|
||||
return defaultTokenCounter.countText(text).tokens;
|
||||
}
|
||||
|
||||
export function countMessagesTokens(
|
||||
messages: Array<{ role: string; content: string }>
|
||||
): number {
|
||||
return defaultTokenCounter.countConversation(messages).total;
|
||||
}
|
||||
Reference in New Issue
Block a user