QwenClaw v2.0 - Complete Rebuild with ALL 81+ Skills
This commit is contained in:
@@ -0,0 +1,143 @@
|
||||
#!/bin/bash
|
||||
# Test explicit skill requests in multi-turn conversations
|
||||
# Usage: ./run-multiturn-test.sh
|
||||
#
|
||||
# This test builds actual conversation history to reproduce the failure mode
|
||||
# where Claude skips skill invocation after extended conversation
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
TIMESTAMP=$(date +%s)
|
||||
OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/multiturn"
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
# Create project directory (conversation is cwd-based)
|
||||
PROJECT_DIR="$OUTPUT_DIR/project"
|
||||
mkdir -p "$PROJECT_DIR/docs/plans"
|
||||
|
||||
echo "=== Multi-Turn Explicit Skill Request Test ==="
|
||||
echo "Output dir: $OUTPUT_DIR"
|
||||
echo "Project dir: $PROJECT_DIR"
|
||||
echo "Plugin dir: $PLUGIN_DIR"
|
||||
echo ""
|
||||
|
||||
cd "$PROJECT_DIR"
|
||||
|
||||
# Create a dummy plan file
|
||||
cat > "$PROJECT_DIR/docs/plans/auth-system.md" << 'EOF'
|
||||
# Auth System Implementation Plan
|
||||
|
||||
## Task 1: Add User Model
|
||||
Create user model with email and password fields.
|
||||
|
||||
## Task 2: Add Auth Routes
|
||||
Create login and register endpoints.
|
||||
|
||||
## Task 3: Add JWT Middleware
|
||||
Protect routes with JWT validation.
|
||||
|
||||
## Task 4: Write Tests
|
||||
Add comprehensive test coverage.
|
||||
EOF
|
||||
|
||||
# Turn 1: Start a planning conversation
|
||||
echo ">>> Turn 1: Starting planning conversation..."
|
||||
TURN1_LOG="$OUTPUT_DIR/turn1.json"
|
||||
claude -p "I need to implement an authentication system. Let's plan this out. The requirements are: user registration with email/password, JWT tokens, and protected routes." \
|
||||
--plugin-dir "$PLUGIN_DIR" \
|
||||
--dangerously-skip-permissions \
|
||||
--max-turns 2 \
|
||||
--output-format stream-json \
|
||||
> "$TURN1_LOG" 2>&1 || true
|
||||
|
||||
echo "Turn 1 complete."
|
||||
echo ""
|
||||
|
||||
# Turn 2: Continue with more planning detail
|
||||
echo ">>> Turn 2: Continuing planning..."
|
||||
TURN2_LOG="$OUTPUT_DIR/turn2.json"
|
||||
claude -p "Good analysis. I've already written the plan to docs/plans/auth-system.md. Now I'm ready to implement. What are my options for execution?" \
|
||||
--continue \
|
||||
--plugin-dir "$PLUGIN_DIR" \
|
||||
--dangerously-skip-permissions \
|
||||
--max-turns 2 \
|
||||
--output-format stream-json \
|
||||
> "$TURN2_LOG" 2>&1 || true
|
||||
|
||||
echo "Turn 2 complete."
|
||||
echo ""
|
||||
|
||||
# Turn 3: The critical test - ask for subagent-driven-development
|
||||
echo ">>> Turn 3: Requesting subagent-driven-development..."
|
||||
TURN3_LOG="$OUTPUT_DIR/turn3.json"
|
||||
claude -p "subagent-driven-development, please" \
|
||||
--continue \
|
||||
--plugin-dir "$PLUGIN_DIR" \
|
||||
--dangerously-skip-permissions \
|
||||
--max-turns 2 \
|
||||
--output-format stream-json \
|
||||
> "$TURN3_LOG" 2>&1 || true
|
||||
|
||||
echo "Turn 3 complete."
|
||||
echo ""
|
||||
|
||||
echo "=== Results ==="
|
||||
|
||||
# Check if skill was triggered in Turn 3
|
||||
SKILL_PATTERN='"skill":"([^"]*:)?subagent-driven-development"'
|
||||
if grep -q '"name":"Skill"' "$TURN3_LOG" && grep -qE "$SKILL_PATTERN" "$TURN3_LOG"; then
|
||||
echo "PASS: Skill 'subagent-driven-development' was triggered in Turn 3"
|
||||
TRIGGERED=true
|
||||
else
|
||||
echo "FAIL: Skill 'subagent-driven-development' was NOT triggered in Turn 3"
|
||||
TRIGGERED=false
|
||||
fi
|
||||
|
||||
# Show what skills were triggered
|
||||
echo ""
|
||||
echo "Skills triggered in Turn 3:"
|
||||
grep -o '"skill":"[^"]*"' "$TURN3_LOG" 2>/dev/null | sort -u || echo " (none)"
|
||||
|
||||
# Check for premature action in Turn 3
|
||||
echo ""
|
||||
echo "Checking for premature action in Turn 3..."
|
||||
FIRST_SKILL_LINE=$(grep -n '"name":"Skill"' "$TURN3_LOG" | head -1 | cut -d: -f1)
|
||||
if [ -n "$FIRST_SKILL_LINE" ]; then
|
||||
PREMATURE_TOOLS=$(head -n "$FIRST_SKILL_LINE" "$TURN3_LOG" | \
|
||||
grep '"type":"tool_use"' | \
|
||||
grep -v '"name":"Skill"' | \
|
||||
grep -v '"name":"TodoWrite"' || true)
|
||||
if [ -n "$PREMATURE_TOOLS" ]; then
|
||||
echo "WARNING: Tools invoked BEFORE Skill tool in Turn 3:"
|
||||
echo "$PREMATURE_TOOLS" | head -5
|
||||
else
|
||||
echo "OK: No premature tool invocations detected"
|
||||
fi
|
||||
else
|
||||
echo "WARNING: No Skill invocation found in Turn 3"
|
||||
# Show what WAS invoked
|
||||
echo ""
|
||||
echo "Tools invoked in Turn 3:"
|
||||
grep '"type":"tool_use"' "$TURN3_LOG" | grep -o '"name":"[^"]*"' | head -10 || echo " (none)"
|
||||
fi
|
||||
|
||||
# Show Turn 3 assistant response
|
||||
echo ""
|
||||
echo "Turn 3 first assistant response (truncated):"
|
||||
grep '"type":"assistant"' "$TURN3_LOG" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo " (could not extract)"
|
||||
|
||||
echo ""
|
||||
echo "Logs:"
|
||||
echo " Turn 1: $TURN1_LOG"
|
||||
echo " Turn 2: $TURN2_LOG"
|
||||
echo " Turn 3: $TURN3_LOG"
|
||||
echo "Timestamp: $TIMESTAMP"
|
||||
|
||||
if [ "$TRIGGERED" = "true" ]; then
|
||||
exit 0
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user