feat: enterprise-grade agentic loop — 50 turns, stuck detection, context compaction, progress feedback

- MAX_TOOL_TURNS: 10 → 50 (complex tasks need more room) - max_tokens: 4096 → 8192 (longer responses, better summaries) - Tool result limit: 8000 → 16000 chars (less truncation) - Stuck detection: 3x same tool+args pattern → intervention - Context compaction: every 15 turns, trims old tool results - Progress feedback: user sees step count during tool loops - Error recovery: don't give up on mid-loop errors, inject recovery msg - Max-turns: requests structured summary + next steps (not silent quit) - SSE timeouts: 90s→180s fetch, 30s→45s idle, 2→4 retries - Self-correction: clone messages instead of mutating originals
2026-05-05 20:18:29 +00:00
parent 7d82eff753
commit 129d4a6def
2 changed files with 95 additions and 21 deletions
--- a/src/bot/index.js
+++ b/src/bot/index.js
@@ -358,9 +358,14 @@ export async function initBot(config, api, tools, skills, agents) {
    };

  // ── AI chat with agentic tool loop ──
-  // Unified streaming + non-streaming with multi-turn tool execution.
-  // Pattern: call API → if tool_calls → execute → feed back → loop → else return text.
-  const MAX_TOOL_TURNS = 10;
+  // Enterprise-grade: high turn limit, stuck detection, progress feedback,
+  // context compaction, auto-continue, and robust error recovery.
+  // Inspired by Claude Code, Aider, and OpenCode patterns.
+  const MAX_TOOL_TURNS = 50;
+  const TOOL_RESULT_MAX = 16000;       // chars — enough for large outputs
+  const STUCK_THRESHOLD = 3;           // same tool+args pattern = stuck
+  const COMPACT_EVERY = 15;            // compact context every N turns
+  const CONTEXT_WINDOW = 120000;       // estimated char budget

  async function chatWithAI(messages, opts = {}) {
    const model = opts.model || svc.config?.api?.models?.default || 'glm-5.1';
@@ -379,30 +384,78 @@ export async function initBot(config, api, tools, skills, agents) {
    // Working copy of messages — tool results get appended here
    const loopMessages = [...messages];
    let turns = 0;
+    const callHistory = [];   // for stuck detection: [{name, args_sig}]
+    let lastProgressSent = 0;
+
+    // Progress ticker — sends user-facing status during long tool loops
+    const sendProgress = (msg) => {
+      const now = Date.now();
+      if (now - lastProgressSent < 8000) return; // max 1 progress msg per 8s
+      lastProgressSent = now;
+      if (onDelta) onDelta(`\n_${msg}_\n`);
+    };
+
+    // Stuck detection: track tool call patterns
+    const callSig = (tc) => {
+      const fn = tc.function;
+      const args = fn.arguments || '';
+      // Hash: tool name + first 80 chars of args (enough to detect repeated patterns)
+      return `${fn.name}:${args.slice(0, 80)}`;
+    };
+    const isStuck = () => {
+      if (callHistory.length < STUCK_THRESHOLD) return false;
+      const recent = callHistory.slice(-STUCK_THRESHOLD);
+      return recent.every(s => s === recent[0]);
+    };
+
+    // Context compaction: trim old tool results to keep context manageable
+    const compactContext = () => {
+      let totalChars = 0;
+      for (const m of loopMessages) {
+        totalChars += JSON.stringify(m).length;
+      }
+      if (totalChars < CONTEXT_WINDOW) return false;
+      logger.info(`📦 Context at ${Math.round(totalChars / 1000)}K chars — compacting old tool results`);
+      let trimmed = 0;
+      for (const m of loopMessages) {
+        if (m.role === 'tool' && typeof m.content === 'string' && m.content.length > 2000) {
+          const original = m.content;
+          m.content = m.content.slice(0, 500) + `\n... [trimmed ${original.length - 500} chars]`;
+          trimmed += original.length - m.content.length;
+        }
+      }
+      logger.info(`📦 Compacted ${Math.round(trimmed / 1000)}K chars`);
+      return trimmed > 0;
+    };

    while (turns < MAX_TOOL_TURNS) {
+      // Context compaction every N turns
+      if (turns > 0 && turns % COMPACT_EVERY === 0) compactContext();
+
      const body = {
        model,
        messages: loopMessages,
        temperature: opts.temperature ?? 0.7,
-        max_tokens: opts.maxTokens || 4096,
+        max_tokens: opts.maxTokens || 8192,
      };
      if (toolSchemas.length) body.tools = toolSchemas;

      let response; // { content: string, tool_calls: array|null }

      if (onDelta) {
-        // ── Streaming path (SSE) ──
        response = await streamChat(svc, body, onDelta);
      } else {
-        // ── Non-streaming path ──
        response = await nonStreamChat(body);
      }

      if (response.error) {
-        // On first turn, return error. On subsequent turns, return what we have.
        if (turns === 0) return `❌ ${response.error}`;
        logger.error(`AI error on turn ${turns}: ${response.error}`);
+        // Don't give up — retry once more
+        if (turns < MAX_TOOL_TURNS - 1) {
+          loopMessages.push({ role: 'user', content: `Previous call failed: ${response.error}. Try a different approach.` });
+          continue;
+        }
        return response.content || `❌ ${response.error}`;
      }

@@ -411,9 +464,21 @@ export async function initBot(config, api, tools, skills, agents) {
        return response.content || '✅ Done.';
      }

+      // ── Stuck detection ──
+      const currentSigs = response.tool_calls.map(callSig);
+      for (const sig of currentSigs) callHistory.push(sig);
+
+      if (isStuck()) {
+        logger.warn(`⚠ Stuck detected — same tool call pattern ${STUCK_THRESHOLD}x`);
+        loopMessages.push({ role: 'user', content: 'You are repeating the same action and getting the same result. Try a completely different approach.' });
+        callHistory.length = 0; // reset history after intervention
+        continue;
+      }
+
      // ── Execute tool calls ──
      turns++;
      logger.info(`🔧 Tool turn ${turns}/${MAX_TOOL_TURNS} — ${response.tool_calls.length} call(s)`);
+      sendProgress(`⚙️ Step ${turns} — executing ${response.tool_calls.length} tool(s)...`);

      // Append assistant message with tool_calls to conversation
      loopMessages.push({ role: 'assistant', tool_calls: response.tool_calls });
@@ -430,7 +495,6 @@ export async function initBot(config, api, tools, skills, agents) {
            try {
              args = JSON.parse(fn.arguments || '{}');
            } catch (parseErr) {
-              // Tool call JSON was truncated (common with large file content in file_write)
              const argLen = (fn.arguments || '').length;
              result = `❌ ${fn.name} failed: Tool call arguments JSON was truncated (${argLen} chars). ` +
                (fn.name === 'file_write'
@@ -441,7 +505,7 @@ export async function initBot(config, api, tools, skills, agents) {
              continue;
            }
            logger.info(`  → ${fn.name}(${fn.arguments?.slice(0, 100)})`);
-            result = String(await handler(args)).slice(0, 8000);
+            result = String(await handler(args)).slice(0, TOOL_RESULT_MAX);
          }
        } catch (e) {
          result = `❌ ${fn.name} error: ${e.message}`;
@@ -449,17 +513,19 @@ export async function initBot(config, api, tools, skills, agents) {
        }
        loopMessages.push({ role: 'tool', tool_call_id: tc.id, content: result });
      }
-      // Loop continues — AI will see tool results and either call more tools or answer
    }

-    // Exhausted turns — do one final call without tools to force a text answer
-    logger.warn(`⚠ Max tool turns (${MAX_TOOL_TURNS}) reached, forcing final answer`);
+    // Exhausted turns — tell the AI to summarize what was accomplished and what remains
+    logger.warn(`⚠ Max tool turns (${MAX_TOOL_TURNS}) reached, requesting summary`);
    try {
      const final = await nonStreamChat({
-        model, messages: loopMessages, temperature: 0.3,
-        max_tokens: opts.maxTokens || 4096,
+        model, messages: [
+          ...loopMessages,
+          { role: 'user', content: 'You have reached the maximum number of tool calls. Please provide a clear summary of:\n1. What you accomplished\n2. What still needs to be done\n3. The exact next steps to continue (with specific commands/code)\n\nBe specific so the user can continue where you left off.' },
+        ], temperature: 0.3,
+        max_tokens: 4096,
      });
-      return final.content || '✅ Done (max tool turns reached).';
+      return final.content || '⚠ Max tool turns reached. Some work may be incomplete — ask me to continue.';
    } catch (e) {
      return `⚠ Max tool turns reached. Last error: ${e.message}`;
    }
@@ -491,9 +557,9 @@ export async function initBot(config, api, tools, skills, agents) {
    let fullContent = '';
    const toolCallMap = {};  // index → { id, name, arguments }
    let finishReason = null;
-    const MAX_SSE_RETRIES = 2;
-    const SSE_FETCH_TIMEOUT = 90_000;   // 90s total request timeout
-    const SSE_IDLE_TIMEOUT = 30_000;    // 30s between chunks (no data = stuck)
+    const MAX_SSE_RETRIES = 4;
+    const SSE_FETCH_TIMEOUT = 180_000;  // 180s total request timeout
+    const SSE_IDLE_TIMEOUT = 45_000;    // 45s between chunks (no data = stuck)

    try {
      const controller = new AbortController();
--- a/src/bot/self-correction.js
+++ b/src/bot/self-correction.js
@@ -22,9 +22,17 @@ export function withSelfCorrection(fn) {
        if (typeof result === 'string' && shouldRetry(result) && attempt < MAX_RETRIES) {
          logger.warn(`Self-correct: retry ${attempt + 1}/${MAX_RETRIES} — error in response`);
          await new Promise(r => setTimeout(r, RETRY_DELAY_MS * (attempt + 1)));
-          // Simplify the prompt on retry
-          const lastMsg = args[1]?.[args[1].length - 1];
-          if (lastMsg) lastMsg.content = `[SIMPLIFIED RETRY ${attempt + 1}] ${lastMsg.content.slice(0, 500)}`;
+          // Clone messages with simplified last message — NO mutation of originals
+          const msgs = args[1];
+          if (Array.isArray(msgs) && msgs.length > 0) {
+            const lastMsg = msgs[msgs.length - 1];
+            const simplified = {
+              ...lastMsg,
+              content: `[SIMPLIFIED RETRY ${attempt + 1}] ${(lastMsg.content || '').slice(0, 500)}`,
+            };
+            const clonedMsgs = [...msgs.slice(0, -1), simplified];
+            args = [args[0], clonedMsgs, ...args.slice(2)];
+          }
          continue;
        }
        return result;