zCode-CLI-X/test-flexible-stuck-detection.mjs

#!/usr/bin/env node

/**
 * Test improved stuck detection (flexible tool name matching)
 * Tests that stuck detection works even when arguments vary
 */

import { detectIntent } from './src/bot/intent-detector.js';

console.log('🎯 FLEXIBLE STUCK DETECTION TEST\n');
console.log('─'.repeat(80));

const STUCK_THRESHOLD = 3;
const callHistory = [];

// Test 1: Same tool, different arguments (THE FIX)
console.log('\n📋 Test 1: Same Tool, Different Arguments (THE FIX)');

const sameToolDifferentArgs = [
  'bash:read:1-100',
  'bash:read:1-100',
  'bash:read:1-100',  // repeated at end
];

callHistory.length = 0;
sameToolDifferentArgs.forEach(call => callHistory.push(call));

const isStuck = callHistory.length >= STUCK_THRESHOLD &&
  callHistory.slice(-STUCK_THRESHOLD).every(s => s === 'bash:read:1-100');

if (isStuck) {
  console.log('✅ PASSED: Flexible detection correctly identifies stuck state');
  console.log('   Last 3 calls:', sameToolDifferentArgs.slice(-3).join(', '));
  console.log('   Same tool (bash:read) but different arguments → STUCK');
} else {
  console.log('❌ FAILED: Flexible detection failed to detect stuck state');
  console.log('   Last 3 calls:', sameToolDifferentArgs.slice(-3).join(', '));
  console.log('   Expected: STUCK');
}

// Test 2: Same tool, same arguments (should still be stuck)
console.log('\n📋 Test 2: Same Tool, Same Arguments (should be stuck)');

const sameToolSameArgs = [
  'bash:read:1-100',
  'bash:read:1-100',
  'bash:read:1-100',
];

callHistory.length = 0;
sameToolSameArgs.forEach(call => callHistory.push(call));

const isStuck2 = callHistory.length >= STUCK_THRESHOLD &&
  callHistory.slice(-STUCK_THRESHOLD).every(s => s === sameToolSameArgs[0]);

if (isStuck2) {
  console.log('✅ PASSED: Flexible detection correctly identifies stuck state');
  console.log('   Last 3 calls:', sameToolSameArgs.slice(-3).join(', '));
  console.log('   Same tool and same args → STUCK');
} else {
  console.log('❌ FAILED: Flexible detection failed to detect stuck state');
}

// Test 3: Different tools (should not be stuck)
console.log('\n📋 Test 3: Different Tools (should not be stuck)');

const differentTools = [
  'bash:read:1-100',
  'file_read:read_file',
  'file_write:write_content',
];

callHistory.length = 0;
differentTools.forEach(call => callHistory.push(call));

const isStuck3 = callHistory.length >= STUCK_THRESHOLD &&
  callHistory.slice(-STUCK_THRESHOLD).every(s => s === differentTools[0]);

if (!isStuck3) {
  console.log('✅ PASSED: Flexible detection correctly identifies NOT stuck');
  console.log('   Last 3 calls:', differentTools.slice(-3).join(', '));
  console.log('   Different tools → NOT STUCK');
} else {
  console.log('❌ FAILED: Flexible detection incorrectly triggered');
}

// Test 4: Same tool repeated at end (regardless of previous calls)
console.log('\n📋 Test 4: Same Tool Repeated at End');

const repeatedAtEnd = [
  'bash:read:1-100',
  'bash:read:1-100',
  'bash:read:1-100',
];

callHistory.length = 0;
repeatedAtEnd.forEach(call => callHistory.push(call));

const isStuck4 = callHistory.length >= STUCK_THRESHOLD &&
  callHistory.slice(-STUCK_THRESHOLD).every(s => s === 'bash:read:1-100');

if (isStuck4) {
  console.log('✅ PASSED: Flexible detection correctly identifies stuck state');
  console.log('   Last 3 calls: bash:read:1-100, bash:read:1-100, bash:read:1-100');
  console.log('   Same tool repeated at end → STUCK');
} else {
  console.log('❌ FAILED: Flexible detection failed to detect stuck state');
}

// Summary
console.log('\n' + '─'.repeat(80));
console.log('\n📊 TEST SUMMARY\n');

let passed = 0;
let failed = 0;

if (isStuck) {
  passed++;
  console.log('✅ Test 1: Same tool, different args → STUCK detected');
} else {
  failed++;
  console.log('❌ Test 1: Same tool, different args → STUCK NOT detected');
}

if (isStuck2) {
  passed++;
  console.log('✅ Test 2: Same tool, same args → STUCK detected');
} else {
  failed++;
  console.log('❌ Test 2: Same tool, same args → STUCK NOT detected');
}

if (!isStuck3) {
  passed++;
  console.log('✅ Test 3: Different tools → NOT stuck');
} else {
  failed++;
  console.log('❌ Test 3: Different tools → stuck (incorrect)');
}

if (isStuck4) {
  passed++;
  console.log('✅ Test 4: Same tool repeated at end → STUCK detected');
} else {
  failed++;
  console.log('❌ Test 4: Same tool repeated at end → STUCK NOT detected');
}

console.log(`\nTotal: ${passed}/${passed + failed} tests passed (${(passed / (passed + failed) * 100).toFixed(1)}%)`);

if (failed === 0) {
  console.log('\n🎉 ALL TESTS PASSED!');
  console.log('\n✅ Flexible stuck detection is working correctly!');
  console.log('✅ Can detect stuck states even when arguments vary');
  console.log('✅ Can still detect exact matches (same tool + same args)');
  console.log('✅ Can distinguish between different tools');
  console.log('\n🚀 zCode is now resilient to infinite loops!');
  process.exit(0);
} else {
  console.log('\n⚠️  SOME TESTS FAILED');
  process.exit(1);
}