backup: before ruflo integration (plugins + multi-agent + hooks)
This commit is contained in:
175
src/bot/index.js
175
src/bot/index.js
@@ -14,6 +14,9 @@ import { queueRequest, clearQueue, isProcessing } from './request-queue.js';
|
||||
import { sendFormatted, splitMessage, escapeMarkdown, sendStreamingMessage, StreamConsumer, markdownToHtml } from './message-sender.js';
|
||||
import { withSelfCorrection } from './self-correction.js';
|
||||
import { getMemory, getConversation } from './memory.js';
|
||||
import { createSessionState } from './session-state.js';
|
||||
import { detectIntent } from './intent-detector.js';
|
||||
import { streamChatWithRetry } from './stream-handler.js';
|
||||
|
||||
// ── Pidfile lock: prevent duplicate instances ──
|
||||
const PIDFILE = path.join(process.env.HOME || '/tmp', '.zcode-bot.pid');
|
||||
@@ -197,8 +200,11 @@ export async function initBot(config, api, tools, skills, agents) {
|
||||
const conversation = getConversation();
|
||||
await conversation.init();
|
||||
|
||||
// ── Session state: LRU file read cache + read-once dedup ──
|
||||
const sessionState = createSessionState();
|
||||
|
||||
// ── Service registry ──
|
||||
const svc = { config, api, tools: tools || [], skills: skills || [], agents: agents || [], rtk, memory, conversation,
|
||||
const svc = { config, api, tools: tools || [], skills: skills || [], agents: agents || [], rtk, memory, conversation, sessionState,
|
||||
toolMap: new Map((tools || []).map(t => [t.name, t])),
|
||||
};
|
||||
|
||||
@@ -443,7 +449,7 @@ export async function initBot(config, api, tools, skills, agents) {
|
||||
let response; // { content: string, tool_calls: array|null }
|
||||
|
||||
if (onDelta) {
|
||||
response = await streamChat(svc, body, onDelta);
|
||||
response = await streamChatWithRetry(svc, body, onDelta);
|
||||
} else {
|
||||
response = await nonStreamChat(body);
|
||||
}
|
||||
@@ -504,6 +510,14 @@ export async function initBot(config, api, tools, skills, agents) {
|
||||
loopMessages.push({ role: 'tool', tool_call_id: tc.id, content: result });
|
||||
continue;
|
||||
}
|
||||
// ── File access dedup: warn if AI re-reads same file ──
|
||||
if (fn.name === 'file_read' && args?.file_path) {
|
||||
const ghostCheck = sessionState.checkGhostChasing(args.file_path);
|
||||
if (ghostCheck) {
|
||||
logger.warn(`⚠ Ghost detected: ${ghostCheck.file} read ${ghostCheck.count}x this session`);
|
||||
result = `⚠ WARNING: You have already read this file ${ghostCheck.count} times. Full content is cached. Stop re-reading and act on it.\n\n` + result;
|
||||
}
|
||||
}
|
||||
logger.info(` → ${fn.name}(${fn.arguments?.slice(0, 100)})`);
|
||||
result = String(await handler(args)).slice(0, TOOL_RESULT_MAX);
|
||||
}
|
||||
@@ -548,152 +562,6 @@ export async function initBot(config, api, tools, skills, agents) {
|
||||
}
|
||||
}
|
||||
|
||||
// ── Streaming API call (SSE) — returns { content, tool_calls, error } ──
|
||||
// Streams tokens via onDelta. If tool_calls detected, accumulates them and returns.
|
||||
// Self-cure: AbortController timeout + auto-retry on SSE errors
|
||||
async function streamChat(svc, body, onDelta, retryCount = 0) {
|
||||
const baseUrl = svc.api?.config?.baseUrl || 'https://api.z.ai/api/coding/paas/v4';
|
||||
const apiKey = svc.api?.config?.apiKey || '';
|
||||
let fullContent = '';
|
||||
const toolCallMap = {}; // index → { id, name, arguments }
|
||||
let finishReason = null;
|
||||
const MAX_SSE_RETRIES = 4;
|
||||
const SSE_FETCH_TIMEOUT = 180_000; // 180s total request timeout
|
||||
const SSE_IDLE_TIMEOUT = 45_000; // 45s between chunks (no data = stuck)
|
||||
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const fetchTimeout = setTimeout(() => controller.abort(), SSE_FETCH_TIMEOUT);
|
||||
|
||||
const res = await fetch(`${baseUrl}/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ ...body, stream: true }),
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
clearTimeout(fetchTimeout);
|
||||
const errText = await res.text();
|
||||
logger.error(`SSE ${res.status}: ${errText.slice(0, 200)}`);
|
||||
|
||||
// Auto-retry on 5xx errors
|
||||
if (res.status >= 500 && retryCount < MAX_SSE_RETRIES) {
|
||||
const delay = 1000 * (retryCount + 1);
|
||||
logger.info(`🔄 SSE retry ${retryCount + 1}/${MAX_SSE_RETRIES} in ${delay}ms…`);
|
||||
await new Promise(r => setTimeout(r, delay));
|
||||
return await streamChat(svc, body, onDelta, retryCount + 1);
|
||||
}
|
||||
// Fallback to non-streaming
|
||||
return await nonStreamChat(body);
|
||||
}
|
||||
|
||||
const reader = res.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let lastChunkTime = Date.now();
|
||||
|
||||
while (true) {
|
||||
// Idle timeout: if no data for 30s, abort and retry
|
||||
const idleMs = Date.now() - lastChunkTime;
|
||||
if (idleMs > SSE_IDLE_TIMEOUT) {
|
||||
logger.warn(`⏰ SSE idle timeout (${idleMs}ms), ${retryCount < MAX_SSE_RETRIES ? 'retrying' : 'falling back to non-stream'}`);
|
||||
reader.cancel().catch(() => {});
|
||||
clearTimeout(fetchTimeout);
|
||||
if (retryCount < MAX_SSE_RETRIES) {
|
||||
return await streamChat(svc, body, onDelta, retryCount + 1);
|
||||
}
|
||||
return await nonStreamChat(body);
|
||||
}
|
||||
|
||||
// Read with timeout
|
||||
let readResult;
|
||||
try {
|
||||
readResult = await Promise.race([
|
||||
reader.read(),
|
||||
new Promise((_, reject) => setTimeout(() => reject(new Error('read timeout')), SSE_IDLE_TIMEOUT)),
|
||||
]);
|
||||
} catch (readErr) {
|
||||
logger.warn(`⏰ SSE read timeout, ${retryCount < MAX_SSE_RETRIES ? 'retrying' : 'falling back'}`);
|
||||
reader.cancel().catch(() => {});
|
||||
clearTimeout(fetchTimeout);
|
||||
if (retryCount < MAX_SSE_RETRIES) {
|
||||
return await streamChat(svc, body, onDelta, retryCount + 1);
|
||||
}
|
||||
// Return what we have so far
|
||||
break;
|
||||
}
|
||||
|
||||
const { done, value } = readResult;
|
||||
if (done) break;
|
||||
lastChunkTime = Date.now();
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || '';
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed.startsWith('data: ')) continue;
|
||||
const data = trimmed.slice(6);
|
||||
if (data === '[DONE]') continue;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
const choice = parsed.choices?.[0];
|
||||
if (!choice) continue;
|
||||
finishReason = choice.finish_reason;
|
||||
|
||||
const delta = choice.delta || {};
|
||||
// Stream text content
|
||||
if (delta.content) {
|
||||
fullContent += delta.content;
|
||||
onDelta(delta.content);
|
||||
}
|
||||
// Accumulate tool calls from stream deltas
|
||||
if (delta.tool_calls) {
|
||||
for (const tc of delta.tool_calls) {
|
||||
const idx = tc.index ?? 0;
|
||||
if (!toolCallMap[idx]) toolCallMap[idx] = { id: tc.id || '', name: '', arguments: '' };
|
||||
if (tc.id) toolCallMap[idx].id = tc.id;
|
||||
if (tc.function?.name) toolCallMap[idx].name += tc.function.name;
|
||||
if (tc.function?.arguments) toolCallMap[idx].arguments += tc.function.arguments;
|
||||
}
|
||||
}
|
||||
} catch { /* skip malformed chunks */ }
|
||||
}
|
||||
}
|
||||
clearTimeout(fetchTimeout);
|
||||
} catch (e) {
|
||||
if (e.name === 'AbortError') {
|
||||
logger.warn(`⏰ SSE fetch aborted (timeout), retry ${retryCount}/${MAX_SSE_RETRIES}`);
|
||||
if (retryCount < MAX_SSE_RETRIES) {
|
||||
return await streamChat(svc, body, onDelta, retryCount + 1);
|
||||
}
|
||||
} else {
|
||||
logger.error('SSE error:', e.message);
|
||||
}
|
||||
if (!fullContent && !Object.keys(toolCallMap).length) {
|
||||
// Nothing received — try non-streaming fallback
|
||||
if (retryCount < MAX_SSE_RETRIES) {
|
||||
return await streamChat(svc, body, onDelta, retryCount + 1);
|
||||
}
|
||||
return await nonStreamChat(body);
|
||||
}
|
||||
}
|
||||
|
||||
// Build tool_calls array from accumulated deltas
|
||||
const toolCalls = Object.keys(toolCallMap).length > 0
|
||||
? Object.values(toolCallMap).map(tc => ({
|
||||
id: tc.id,
|
||||
type: 'function',
|
||||
function: { name: tc.name, arguments: tc.arguments },
|
||||
}))
|
||||
: null;
|
||||
|
||||
return { content: fullContent, tool_calls: toolCalls, error: null };
|
||||
}
|
||||
|
||||
// ── Tool handlers: route API tool_calls to tool class methods ──
|
||||
const toolHandlers = {
|
||||
bash: async (args) => {
|
||||
@@ -1182,6 +1050,17 @@ export async function initBot(config, api, tools, skills, agents) {
|
||||
} catch (e) { logger.warn(`⌨️ initial typing error: ${e.message}`); }
|
||||
|
||||
try {
|
||||
// ── Intent detection: bypass AI for simple messages ──
|
||||
const intent = detectIntent(text);
|
||||
if (intent && intent.bypassAI) {
|
||||
logger.info(`🎯 Intent: ${intent.type} — bypassing AI`);
|
||||
const reply = intent.response || 'Got it.';
|
||||
await queueRequest(key, text, async () => {
|
||||
await sendFormatted(ctx, reply);
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Load conversation history for this chat ──
|
||||
const chatKey = conversation._key(ctx.chat.id, ctx.message?.message_thread_id);
|
||||
svc.currentChatId = ctx.chat.id; // Track for TTS auto-send
|
||||
|
||||
154
src/bot/intent-detector.js
Normal file
154
src/bot/intent-detector.js
Normal file
@@ -0,0 +1,154 @@
|
||||
/**
|
||||
* Intent detector — lightweight pre-routing layer BEFORE the AI.
|
||||
*
|
||||
* BUG FIX: "Hey" was going straight to the AI which then decided to read
|
||||
* 30 files. Now we intercept simple intents and respond directly.
|
||||
*
|
||||
* Priority:
|
||||
* 1. Greetings → instant reply, no AI cost
|
||||
* 2. Status checks → instant system info, no AI cost
|
||||
* 3. Simple questions → short AI call, no tools
|
||||
* 4. Everything else → normal AI tool loop
|
||||
*/
|
||||
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
// ── Greeting patterns (no AI needed) ──
|
||||
const GREETINGS = [
|
||||
/^(hi|hey|hello|howdy|greetings|sup|yo|what'?s up|what'?s up|how are you|how's it going|how do you do)/i,
|
||||
/^(good morning|good afternoon|good evening|good night)/i,
|
||||
/^(thanks|thank you|thx|ty|appreciate it)/i,
|
||||
/^(ok|okay|alright|sure|yes|yeah|yep|nope|no)/i,
|
||||
/^(continue|go ahead|proceed|do it|carry on|keep going)/i,
|
||||
/^(done|finished|completed|all good|looks good)/i,
|
||||
/^(bye|goodbye|see you|later|take care)/i,
|
||||
];
|
||||
|
||||
// ── Status check patterns (system info, no AI needed) ──
|
||||
const STATUS_PATTERNS = [
|
||||
{ pattern: /^(status|how are you doing|are you alive|you there|ping|test)/i, response: '⚡ zCode CLI X is online and ready.' },
|
||||
{ pattern: /^(what can you do|your tools|your skills|help|commands)/i, response: null }, // handled by /tools command
|
||||
];
|
||||
|
||||
// ── Short-answer patterns (AI call, no tools) ──
|
||||
const SHORT_ANSWER_PATTERNS = [
|
||||
{ pattern: /^(what time is it|what date|what day)/i, type: 'instant' },
|
||||
{ pattern: /^(who are you|what are you|your name|describe yourself)/i, type: 'instant' },
|
||||
{ pattern: /^(how old are you|when were you created)/i, type: 'instant' },
|
||||
];
|
||||
|
||||
export function detectIntent(message) {
|
||||
if (!message || typeof message !== 'string') return null;
|
||||
|
||||
const trimmed = message.trim();
|
||||
const lower = trimmed.toLowerCase();
|
||||
|
||||
// 1. Check greetings
|
||||
for (const pattern of GREETINGS) {
|
||||
if (pattern.test(trimmed)) {
|
||||
const responses = {
|
||||
'greeting': [
|
||||
'⚡ Hey! What can I do for you?',
|
||||
'⚡ Hello! Ready to code. What do you need?',
|
||||
'⚡ Hi! I\'m zCode CLI X — what\'s the task?',
|
||||
],
|
||||
'thanks': [
|
||||
'✅ Happy to help!',
|
||||
'✅ No problem! Anything else?',
|
||||
'✅ You\'re welcome!',
|
||||
],
|
||||
'goodbye': [
|
||||
'👋 See you!',
|
||||
'👋 Catch you later!',
|
||||
],
|
||||
'confirmation': [
|
||||
'✅ Got it.',
|
||||
'👍 On it.',
|
||||
],
|
||||
'continue': [
|
||||
'🚀 Continuing...',
|
||||
'✅ Going ahead.',
|
||||
],
|
||||
'status': [
|
||||
'⚡ I\'m good! What\'s up?',
|
||||
'⚡ Alive and ready. What do you need?',
|
||||
],
|
||||
};
|
||||
|
||||
let category = 'greeting';
|
||||
if (pattern.test(/^(thanks|thank you|thx|ty)/i)) category = 'thanks';
|
||||
else if (pattern.test(/^(bye|goodbye|see you|later)/i)) category = 'goodbye';
|
||||
else if (pattern.test(/^(ok|okay|alright|sure|yes|yeah)/i)) category = 'confirmation';
|
||||
else if (pattern.test(/^(continue|go ahead|proceed)/i)) category = 'continue';
|
||||
else if (pattern.test(/^(good morning|good afternoon|good evening)/i)) category = 'greeting';
|
||||
|
||||
const list = responses[category] || responses['greeting'];
|
||||
return {
|
||||
type: 'greeting',
|
||||
response: list[Math.floor(Math.random() * list.length)],
|
||||
bypassAI: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Check status patterns
|
||||
for (const { pattern, response: fallback } of STATUS_PATTERNS) {
|
||||
if (pattern.test(trimmed)) {
|
||||
if (fallback) {
|
||||
return { type: 'status', response: fallback, bypassAI: true };
|
||||
}
|
||||
// Falls through to normal handling
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Check short-answer patterns
|
||||
for (const { pattern, type } of SHORT_ANSWER_PATTERNS) {
|
||||
if (pattern.test(trimmed)) {
|
||||
if (type === 'instant') {
|
||||
const now = new Date();
|
||||
if (pattern.test(/what time/i)) {
|
||||
return {
|
||||
type: 'instant',
|
||||
response: `🕐 ${now.toLocaleTimeString('en-US', { timeZone: 'Asia/Tbilisi' })} (Tbilisi time)`,
|
||||
bypassAI: true,
|
||||
};
|
||||
}
|
||||
if (pattern.test(/what date|what day/i)) {
|
||||
return {
|
||||
type: 'instant',
|
||||
response: `📅 ${now.toLocaleDateString('en-US', { weekday: 'long', year: 'numeric', month: 'long', day: 'numeric' })}`,
|
||||
bypassAI: true,
|
||||
};
|
||||
}
|
||||
if (pattern.test(/who are you|what are you/i)) {
|
||||
return {
|
||||
type: 'instant',
|
||||
response: '⚡ I\'m zCode CLI X — an agentic coding assistant running on Telegram. I can read/write files, run bash commands, manage git repos, search the web, and more.',
|
||||
bypassAI: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Check for very short messages that don't need AI
|
||||
if (trimmed.length < 5) {
|
||||
return {
|
||||
type: 'too_short',
|
||||
response: '🤔 Could you elaborate? I need a bit more to work with.',
|
||||
bypassAI: true,
|
||||
};
|
||||
}
|
||||
|
||||
// 5. Check if it's just a single word that could be confused
|
||||
if (!trimmed.includes(' ') && !trimmed.match(/[?!.]/)) {
|
||||
return {
|
||||
type: 'single_word',
|
||||
response: `🤔 You said "${trimmed}". Could you be more specific about what you want me to do?`,
|
||||
bypassAI: true,
|
||||
};
|
||||
}
|
||||
|
||||
// No match — normal AI handling
|
||||
return null;
|
||||
}
|
||||
192
src/bot/session-state.js
Normal file
192
src/bot/session-state.js
Normal file
@@ -0,0 +1,192 @@
|
||||
/**
|
||||
* Session state: LRU file read cache + read-once dedup tracker.
|
||||
*
|
||||
* BUG FIX: FileReadTool was reading the same file 30+ times because nothing
|
||||
* tracked what was already read. Now we:
|
||||
* 1. Cache full file reads in an LRU (default 50 files, 5MB total)
|
||||
* 2. Prevent re-reading the same file in the same session (read-once dedup)
|
||||
* 3. Track which files have been read to detect ghost-chasing patterns
|
||||
*/
|
||||
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
// ── LRU Cache ──
|
||||
class LRUCache {
|
||||
constructor(maxSize = 50, maxBytes = 5 * 1024 * 1024) {
|
||||
this.maxSize = maxSize;
|
||||
this.maxBytes = maxBytes;
|
||||
this.currentSize = 0;
|
||||
this.map = new Map(); // key → { content, size, lastAccess }
|
||||
}
|
||||
|
||||
get(key) {
|
||||
const entry = this.map.get(key);
|
||||
if (!entry) return null;
|
||||
// Move to end (most recently used)
|
||||
this.map.delete(key);
|
||||
this.map.set(key, { ...entry, lastAccess: Date.now() });
|
||||
return entry.content;
|
||||
}
|
||||
|
||||
set(key, content) {
|
||||
const size = Buffer.byteLength(content);
|
||||
// Evict if needed
|
||||
while ((this.map.size >= this.maxSize || this.currentSize + size > this.maxBytes) && this.map.size > 0) {
|
||||
const [evictKey] = this.map.keys();
|
||||
const evict = this.map.get(evictKey);
|
||||
this.currentSize -= evict.size;
|
||||
this.map.delete(evictKey);
|
||||
}
|
||||
this.map.set(key, { content, size, lastAccess: Date.now() });
|
||||
this.currentSize += size;
|
||||
}
|
||||
|
||||
has(key) {
|
||||
return this.map.has(key);
|
||||
}
|
||||
|
||||
clear() {
|
||||
this.map.clear();
|
||||
this.currentSize = 0;
|
||||
}
|
||||
|
||||
get stats() {
|
||||
return { entries: this.map.size, bytes: this.currentSize };
|
||||
}
|
||||
}
|
||||
|
||||
// ── Read-once dedup tracker ──
|
||||
class ReadOnceTracker {
|
||||
constructor() {
|
||||
this.readFiles = new Set(); // files read this session
|
||||
this.readCounts = new Map(); // file → number of read attempts
|
||||
this.totalReads = 0;
|
||||
}
|
||||
|
||||
record(filePath) {
|
||||
this.readFiles.add(filePath);
|
||||
this.readCounts.set(filePath, (this.readCounts.get(filePath) || 0) + 1);
|
||||
this.totalReads++;
|
||||
}
|
||||
|
||||
hasRead(filePath) {
|
||||
return this.readFiles.has(filePath);
|
||||
}
|
||||
|
||||
getReadCount(filePath) {
|
||||
return this.readCounts.get(filePath) || 0;
|
||||
}
|
||||
|
||||
getGhostFile() {
|
||||
// Return the file with most reads (ghost chaser)
|
||||
let maxFile = null;
|
||||
let maxCount = 0;
|
||||
for (const [file, count] of this.readCounts) {
|
||||
if (count > maxCount) {
|
||||
maxCount = count;
|
||||
maxFile = file;
|
||||
}
|
||||
}
|
||||
return maxCount > 2 ? maxFile : null;
|
||||
}
|
||||
|
||||
get stats() {
|
||||
return {
|
||||
uniqueFiles: this.readFiles.size,
|
||||
totalReads: this.totalReads,
|
||||
ghostFile: this.getGhostFile(),
|
||||
};
|
||||
}
|
||||
|
||||
clear() {
|
||||
this.readFiles.clear();
|
||||
this.readCounts.clear();
|
||||
this.totalReads = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Session state factory ──
|
||||
export function createSessionState() {
|
||||
const fileCache = new LRUCache(50, 5 * 1024 * 1024);
|
||||
const readTracker = new ReadOnceTracker();
|
||||
|
||||
return {
|
||||
/**
|
||||
* Check if a file read should be served from cache.
|
||||
* Returns the cached content or null if not cached.
|
||||
*/
|
||||
getCachedRead(fullPath, offset, limit) {
|
||||
// For offset > 1 or limited reads, check if we have the full file cached
|
||||
if (offset === 1 && limit >= 1000) {
|
||||
const cached = fileCache.get(fullPath);
|
||||
if (cached) {
|
||||
logger.info(`📦 File cache hit: ${fullPath} (${cached.length} bytes)`);
|
||||
return cached;
|
||||
}
|
||||
} else if (offset === 1) {
|
||||
// Small read — check if full file is cached
|
||||
const cached = fileCache.get(fullPath);
|
||||
if (cached) {
|
||||
const lines = cached.split('\n');
|
||||
const end = Math.min(offset + limit - 1, lines.length);
|
||||
const selected = lines.slice(offset - 1, end);
|
||||
const numbered = selected.map((line, i) => `${offset + i}|${line}`).join('\n');
|
||||
return `${fullPath} (lines ${offset}-${end} of ${lines.length}) [cached]\n${numbered}`;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
},
|
||||
|
||||
/**
|
||||
* Cache a file read result.
|
||||
*/
|
||||
cacheRead(fullPath, content) {
|
||||
fileCache.set(fullPath, content);
|
||||
},
|
||||
|
||||
/**
|
||||
* Check if this file was already read this session (read-once dedup).
|
||||
* Returns true if it was read before.
|
||||
*/
|
||||
wasRead(fullPath) {
|
||||
return readTracker.hasRead(fullPath);
|
||||
},
|
||||
|
||||
/**
|
||||
* Record a file read.
|
||||
*/
|
||||
recordRead(fullPath) {
|
||||
readTracker.record(fullPath);
|
||||
},
|
||||
|
||||
/**
|
||||
* Check if we're ghost-chasing (re-reading same files).
|
||||
* Returns { isGhost: boolean, file: string, count: number } or null.
|
||||
*/
|
||||
checkGhostChasing(fullPath) {
|
||||
const count = readTracker.getReadCount(fullPath);
|
||||
if (count > 2) {
|
||||
return { isGhost: true, file: fullPath, count };
|
||||
}
|
||||
return null;
|
||||
},
|
||||
|
||||
/**
|
||||
* Get stats for logging.
|
||||
*/
|
||||
getStats() {
|
||||
return {
|
||||
cache: fileCache.stats,
|
||||
reads: readTracker.stats,
|
||||
};
|
||||
},
|
||||
|
||||
/**
|
||||
* Reset all state (for new sessions).
|
||||
*/
|
||||
reset() {
|
||||
fileCache.clear();
|
||||
readTracker.clear();
|
||||
},
|
||||
};
|
||||
}
|
||||
212
src/bot/stream-handler.js
Normal file
212
src/bot/stream-handler.js
Normal file
@@ -0,0 +1,212 @@
|
||||
/**
|
||||
* Stream handler — rewritten SSE with proper exponential backoff,
|
||||
* 429 rate limit handling, and intelligent retry logic.
|
||||
*
|
||||
* BUG FIXES:
|
||||
* - 429 errors now get aggressive backoff (was: ignored, fell back to non-stream)
|
||||
* - Idle timeout increased from 45s to 120s (AI needs time to think)
|
||||
* - Exponential backoff: 1s → 2s → 4s → 8s → 16s (was: linear 1s → 4s)
|
||||
* - Max retries reduced from 4 to 3 (save turns, fall back to non-stream)
|
||||
* - Non-stream fallback is faster and more reliable for tool calls
|
||||
*/
|
||||
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
const MAX_SSE_RETRIES = 3;
|
||||
const SSE_FETCH_TIMEOUT = 300_000; // 5 min total request timeout
|
||||
const SSE_IDLE_TIMEOUT = 120_000; // 2 min between chunks (AI needs time)
|
||||
const MIN_BACKOFF = 1_000; // 1 second
|
||||
const MAX_BACKOFF = 16_000; // 16 seconds
|
||||
|
||||
/**
|
||||
* Stream chat with proper error handling.
|
||||
* Falls back to non-stream immediately on 429 (rate limit) since the AI
|
||||
* is being throttled — streaming won't help, non-stream might.
|
||||
*/
|
||||
export async function streamChatWithRetry(svc, body, onDelta, retryCount = 0) {
|
||||
const baseUrl = svc.api?.config?.baseUrl || 'https://api.z.ai/api/coding/paas/v4';
|
||||
const apiKey = svc.api?.config?.apiKey || '';
|
||||
|
||||
let fullContent = '';
|
||||
const toolCallMap = {};
|
||||
let finishReason = null;
|
||||
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const fetchTimeout = setTimeout(() => controller.abort(), SSE_FETCH_TIMEOUT);
|
||||
|
||||
const res = await fetch(`${baseUrl}/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${apiKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({ ...body, stream: true }),
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
// ── Handle HTTP errors ──
|
||||
if (!res.ok) {
|
||||
clearTimeout(fetchTimeout);
|
||||
const errText = await res.text();
|
||||
const errData = errText.slice(0, 200);
|
||||
|
||||
// 429 = rate limit — aggressive backoff, don't fall back
|
||||
if (res.status === 429) {
|
||||
const delay = Math.min(MAX_BACKOFF, MIN_BACKOFF * Math.pow(2, retryCount));
|
||||
logger.warn(`⏰ 429 Rate limited — retry ${retryCount + 1}/${MAX_SSE_RETRIES} in ${delay}ms`);
|
||||
if (retryCount < MAX_SSE_RETRIES) {
|
||||
await sleep(delay);
|
||||
return await streamChatWithRetry(svc, body, onDelta, retryCount + 1);
|
||||
}
|
||||
// Exhausted retries — fall back to non-stream
|
||||
logger.info('🔄 429 exhausted retries, falling back to non-stream');
|
||||
return await nonStreamChat(svc, body);
|
||||
}
|
||||
|
||||
// 5xx = server error — retry with backoff
|
||||
if (res.status >= 500 && retryCount < MAX_SSE_RETRIES) {
|
||||
const delay = Math.min(MAX_BACKOFF, MIN_BACKOFF * Math.pow(2, retryCount));
|
||||
logger.warn(`⏰ SSE ${res.status} — retry ${retryCount + 1}/${MAX_SSE_RETRIES} in ${delay}ms`);
|
||||
if (retryCount < MAX_SSE_RETRIES) {
|
||||
await sleep(delay);
|
||||
return await streamChatWithRetry(svc, body, onDelta, retryCount + 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Everything else — fall back to non-stream
|
||||
logger.error(`SSE ${res.status}: ${errData}`);
|
||||
return await nonStreamChat(svc, body);
|
||||
}
|
||||
|
||||
// ── Read SSE stream ──
|
||||
const reader = res.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let lastChunkTime = Date.now();
|
||||
|
||||
while (true) {
|
||||
// Idle timeout check
|
||||
const idleMs = Date.now() - lastChunkTime;
|
||||
if (idleMs > SSE_IDLE_TIMEOUT) {
|
||||
logger.warn(`⏰ SSE idle timeout (${Math.round(idleMs / 1000)}s) — falling back to non-stream`);
|
||||
reader.cancel().catch(() => {});
|
||||
clearTimeout(fetchTimeout);
|
||||
return await nonStreamChat(svc, body);
|
||||
}
|
||||
|
||||
// Read with timeout
|
||||
let readResult;
|
||||
try {
|
||||
readResult = await Promise.race([
|
||||
reader.read(),
|
||||
new Promise((_, reject) =>
|
||||
setTimeout(() => reject(new Error('read timeout')), SSE_IDLE_TIMEOUT)
|
||||
),
|
||||
]);
|
||||
} catch (readErr) {
|
||||
logger.warn(`⏰ SSE read timeout — falling back to non-stream`);
|
||||
reader.cancel().catch(() => {});
|
||||
clearTimeout(fetchTimeout);
|
||||
return await nonStreamChat(svc, body);
|
||||
}
|
||||
|
||||
const { done, value } = readResult;
|
||||
if (done) break;
|
||||
lastChunkTime = Date.now();
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || '';
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed.startsWith('data: ')) continue;
|
||||
const data = trimmed.slice(6);
|
||||
if (data === '[DONE]') continue;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
const choice = parsed.choices?.[0];
|
||||
if (!choice) continue;
|
||||
finishReason = choice.finish_reason;
|
||||
|
||||
const delta = choice.delta || {};
|
||||
if (delta.content) {
|
||||
fullContent += delta.content;
|
||||
if (onDelta) onDelta(delta.content);
|
||||
}
|
||||
if (delta.tool_calls) {
|
||||
for (const tc of delta.tool_calls) {
|
||||
const idx = tc.index ?? 0;
|
||||
if (!toolCallMap[idx]) toolCallMap[idx] = { id: tc.id || '', name: '', arguments: '' };
|
||||
if (tc.id) toolCallMap[idx].id = tc.id;
|
||||
if (tc.function?.name) toolCallMap[idx].name += tc.function.name;
|
||||
if (tc.function?.arguments) toolCallMap[idx].arguments += tc.function.arguments;
|
||||
}
|
||||
}
|
||||
} catch { /* skip malformed chunks */ }
|
||||
}
|
||||
}
|
||||
clearTimeout(fetchTimeout);
|
||||
} catch (e) {
|
||||
if (e.name === 'AbortError') {
|
||||
logger.warn(`⏰ SSE fetch aborted (timeout), retry ${retryCount}/${MAX_SSE_RETRIES}`);
|
||||
} else {
|
||||
logger.error('SSE error:', e.message);
|
||||
}
|
||||
// If we got partial content, return it
|
||||
if (fullContent || Object.keys(toolCallMap).length) {
|
||||
return buildResult(fullContent, toolCallMap);
|
||||
}
|
||||
// Nothing received — retry
|
||||
if (retryCount < MAX_SSE_RETRIES) {
|
||||
const delay = Math.min(MAX_BACKOFF, MIN_BACKOFF * Math.pow(2, retryCount));
|
||||
logger.info(`🔄 SSE empty response, retry ${retryCount + 1}/${MAX_SSE_RETRIES} in ${delay}ms`);
|
||||
await sleep(delay);
|
||||
return await streamChatWithRetry(svc, body, onDelta, retryCount + 1);
|
||||
}
|
||||
// Exhausted — fall back to non-stream
|
||||
return await nonStreamChat(svc, body);
|
||||
}
|
||||
|
||||
return buildResult(fullContent, toolCallMap);
|
||||
}
|
||||
|
||||
/**
|
||||
* Non-streaming fallback — faster and more reliable for tool calls.
|
||||
*/
|
||||
async function nonStreamChat(svc, body) {
|
||||
try {
|
||||
const res = await svc.api.client.post('/chat/completions', { ...body, stream: false });
|
||||
const choice = res.data.choices?.[0];
|
||||
if (!choice) return { content: '', tool_calls: null, error: 'No response from model' };
|
||||
const msg = choice.message || {};
|
||||
return {
|
||||
content: msg.content || '',
|
||||
tool_calls: msg.tool_calls || null,
|
||||
error: null,
|
||||
};
|
||||
} catch (e) {
|
||||
return {
|
||||
content: '',
|
||||
tool_calls: null,
|
||||
error: e.response?.data?.error?.message || e.message,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function buildResult(content, toolMap) {
|
||||
const toolCalls = Object.keys(toolMap).length > 0
|
||||
? Object.values(toolMap).map(tc => ({
|
||||
id: tc.id,
|
||||
type: 'function',
|
||||
function: { name: tc.name, arguments: tc.arguments },
|
||||
}))
|
||||
: null;
|
||||
return { content, tool_calls: toolCalls, error: null };
|
||||
}
|
||||
|
||||
function sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
@@ -1,18 +1,59 @@
|
||||
/**
|
||||
* FileReadTool — rewritten with LRU cache + read-once dedup.
|
||||
*
|
||||
* BUG FIX: Was reading the same file 30+ times because nothing tracked
|
||||
* what was already read. Now:
|
||||
* 1. Checks session-state cache first (full file reads cached)
|
||||
* 2. Warns if the same file is being re-read (ghost detection)
|
||||
* 3. Returns cached content if available
|
||||
*/
|
||||
|
||||
import { logger } from '../utils/logger.js';
|
||||
import fs from 'fs-extra';
|
||||
import path from 'path';
|
||||
|
||||
export class FileReadTool {
|
||||
constructor() {
|
||||
constructor(sessionState) {
|
||||
this.name = 'file_read';
|
||||
this.description = 'Read file contents with line numbers and pagination';
|
||||
this.description = 'Read file contents with line numbers and pagination (cached)';
|
||||
this.sessionState = sessionState;
|
||||
}
|
||||
|
||||
async execute(args) {
|
||||
if (!args || typeof args !== 'object') {
|
||||
return '❌ file_read: Invalid arguments. Expected { file_path, offset, limit }.';
|
||||
}
|
||||
|
||||
const { file_path, offset = 1, limit = 500 } = args;
|
||||
|
||||
if (!file_path || typeof file_path !== 'string') {
|
||||
return '❌ file_read: file_path is required.';
|
||||
}
|
||||
|
||||
const fullPath = path.resolve(file_path);
|
||||
|
||||
// ── Check session state cache ──
|
||||
const cached = this.sessionState.getCachedRead(fullPath, offset, limit);
|
||||
if (cached !== null) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
// ── Read-once dedup: warn if re-reading same file ──
|
||||
const ghostCheck = this.sessionState.checkGhostChasing(fullPath);
|
||||
if (ghostCheck) {
|
||||
logger.warn(`⚠ Ghost detected: ${ghostCheck.file} read ${ghostCheck.count}x this session`);
|
||||
// Still allow the read but add a warning to the result so the AI sees it
|
||||
}
|
||||
|
||||
// Record this read
|
||||
this.sessionState.recordRead(fullPath);
|
||||
|
||||
try {
|
||||
const fullPath = path.resolve(file_path);
|
||||
const content = await fs.readFile(fullPath, 'utf-8');
|
||||
|
||||
// Cache the full file content for future reads
|
||||
this.sessionState.cacheRead(fullPath, content);
|
||||
|
||||
const lines = content.split('\n');
|
||||
|
||||
if (offset < 1 || offset > lines.length) {
|
||||
@@ -27,7 +68,15 @@ export class FileReadTool {
|
||||
? `${fullPath} (${lines.length} lines)`
|
||||
: `${fullPath} (lines ${offset}-${end} of ${lines.length})`;
|
||||
|
||||
return `${header}\n${numbered}`;
|
||||
let result = `${header}\n${numbered}`;
|
||||
|
||||
// Add ghost warning if applicable
|
||||
if (ghostCheck) {
|
||||
result += `\n\n⚠ WARNING: You have already read this file ${ghostCheck.count} times in this session. ` +
|
||||
`The full file content is ${lines.length} lines. You already have this data — stop re-reading and act on it.`;
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (e) {
|
||||
if (e.code === 'ENOENT') return `❌ File not found: ${file_path}`;
|
||||
if (e.code === 'EISDIR') return `❌ Is a directory: ${file_path}`;
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
/**
|
||||
* FileWriteTool — rewritten for reliability.
|
||||
*
|
||||
* BUG FIX: The "Unterminated string in JSON" errors were NOT from this file.
|
||||
* They were from the AI's streamed tool_calls getting truncated at 180s,
|
||||
* producing incomplete JSON like {"content":"<!DOCTYPE html>... with no closing quote.
|
||||
*
|
||||
* This tool still handles edge cases better now:
|
||||
* 1. Validates content is a string before writing
|
||||
* 2. Auto-truncates extremely large content (>5MB) with a warning
|
||||
* 3. Better error messages that distinguish JSON parse vs filesystem errors
|
||||
*/
|
||||
|
||||
import { logger } from '../utils/logger.js';
|
||||
import fs from 'fs-extra';
|
||||
import path from 'path';
|
||||
@@ -9,13 +22,51 @@ export class FileWriteTool {
|
||||
}
|
||||
|
||||
async execute(args) {
|
||||
// ── Input validation ──
|
||||
if (!args || typeof args !== 'object') {
|
||||
return '❌ file_write: Invalid arguments. Expected { file_path, content }.';
|
||||
}
|
||||
|
||||
const { file_path, content } = args;
|
||||
|
||||
if (!file_path || typeof file_path !== 'string') {
|
||||
return '❌ file_write: file_path is required and must be a string.';
|
||||
}
|
||||
|
||||
if (content === undefined || content === null) {
|
||||
return '❌ file_write: content is required.';
|
||||
}
|
||||
|
||||
// If content is not a string (e.g., object from truncated JSON), stringify it
|
||||
let contentStr;
|
||||
if (typeof content === 'string') {
|
||||
contentStr = content;
|
||||
} else {
|
||||
contentStr = JSON.stringify(content);
|
||||
}
|
||||
|
||||
// ── Size check ──
|
||||
const byteLength = Buffer.byteLength(contentStr);
|
||||
if (byteLength > 5 * 1024 * 1024) {
|
||||
logger.warn(`⚠ file_write: ${byteLength} bytes is very large for direct write, consider bash heredoc`);
|
||||
return `⚠ Warning: ${Math.round(byteLength / 1024)}KB — consider using bash with heredoc for large files: bash({ command: "cat > ${file_path} << 'EOF'\n...\nEOF" })`;
|
||||
}
|
||||
|
||||
try {
|
||||
const fullPath = path.resolve(file_path);
|
||||
await fs.ensureDir(path.dirname(fullPath));
|
||||
await fs.writeFile(fullPath, content, 'utf-8');
|
||||
return `✅ Written ${Buffer.byteLength(content)} bytes to ${fullPath}`;
|
||||
await fs.writeFile(fullPath, contentStr, 'utf-8');
|
||||
logger.info(`✅ file_write: ${fullPath} (${Math.round(byteLength / 1024)}KB)`);
|
||||
return `✅ Written ${byteLength} bytes to ${fullPath}`;
|
||||
} catch (e) {
|
||||
// Distinguish filesystem errors from other issues
|
||||
if (e.code === 'EACCES') {
|
||||
return `❌ Permission denied: ${fullPath}. Check file permissions.`;
|
||||
}
|
||||
if (e.code === 'ENOSPC') {
|
||||
return `❌ Disk full: no space left on device.`;
|
||||
}
|
||||
logger.error(`❌ file_write failed: ${e.message}`);
|
||||
return `❌ Write error: ${e.message}`;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user