Adds complete implementation of the Chippery framework integration for semantic codebase navigation: - build-index.sh: Scan and build semantic index from codebase - search.sh: Natural language code search with relevance scoring - update-index.sh: Incremental index updates (git-aware) - concept-map.sh: Show concept relationships and file mappings - stats.sh: Display index statistics and token savings - codebase-indexer-hook.sh: Auto-trigger hook for session start Features: - Supports 15+ programming languages (TS, JS, Python, Go, Rust, etc.) - Concept extraction from filenames, exports, functions, classes - Token-efficient indexing (~99% savings vs full codebase read) - JSON-based index with jq integration - Auto-detection of code projects - Git-aware incremental updates Token Efficiency: - Full codebase read: ~188K tokens - Index-based query: ~2K tokens - Potential savings: ~99% 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
177 lines
6.0 KiB
Bash
Executable File
177 lines
6.0 KiB
Bash
Executable File
#!/bin/bash
|
|
# Codebase Indexer - Show Statistics
|
|
# Part of Chippery framework for semantic codebase navigation
|
|
|
|
set -e
|
|
|
|
# Configuration
|
|
PROJECT_ROOT="${1:-$(pwd)}"
|
|
INDEX_FILE="$PROJECT_ROOT/.codebase-index.json"
|
|
LOG_FILE="$HOME/.claude/logs/codebase-indexer.log"
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
CYAN='\033[0;36m'
|
|
MAGENTA='\033[0;35m'
|
|
BOLD='\033[1m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Logging function
|
|
log() {
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >> "$LOG_FILE"
|
|
}
|
|
|
|
# Print colored output
|
|
print_status() {
|
|
echo -e "${BLUE}[Chippery]${NC} $1"
|
|
log "$1"
|
|
}
|
|
|
|
print_success() {
|
|
echo -e "${GREEN}[Chippery]${NC} $1"
|
|
log "SUCCESS: $1"
|
|
}
|
|
|
|
print_warning() {
|
|
echo -e "${YELLOW}[Chippery]${NC} $1"
|
|
log "WARNING: $1"
|
|
}
|
|
|
|
print_error() {
|
|
echo -e "${RED}[Chippery]${NC} $1"
|
|
log "ERROR: $1"
|
|
}
|
|
|
|
# Create log directory if needed
|
|
mkdir -p "$(dirname "$LOG_FILE")"
|
|
|
|
# Check if index exists
|
|
check_index() {
|
|
if [ ! -f "$INDEX_FILE" ]; then
|
|
print_error "No index found at $INDEX_FILE"
|
|
print_status "Run 'build-index.sh' first to create the index"
|
|
exit 1
|
|
fi
|
|
|
|
if ! command -v jq &> /dev/null; then
|
|
print_error "jq is required for statistics"
|
|
print_status "Install with: sudo apt-get install jq / brew install jq"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Format large numbers
|
|
format_number() {
|
|
local num=$1
|
|
if [ $num -ge 1000000 ]; then
|
|
awk "BEGIN {printf \"%.1fM\", $num / 1000000}"
|
|
elif [ $num -ge 1000 ]; then
|
|
awk "BEGIN {printf \"%.1fK\", $num / 1000}"
|
|
else
|
|
echo $num
|
|
fi
|
|
}
|
|
|
|
# Show main statistics
|
|
show_stats() {
|
|
echo ""
|
|
echo -e "${BOLD}${BLUE}╔════════════════════════════════════════════════════════════╗${NC}"
|
|
echo -e "${BOLD}${BLUE}║ Chippery Codebase Index Statistics ║${NC}"
|
|
echo -e "${BOLD}${BLUE}╚════════════════════════════════════════════════════════════╝${NC}"
|
|
echo ""
|
|
|
|
# Basic info
|
|
local version=$(jq -r '.version' "$INDEX_FILE" 2>/dev/null)
|
|
local updated=$(jq -r '.last_updated' "$INDEX_FILE" 2>/dev/null)
|
|
local project_root=$(jq -r '.project_root' "$INDEX_FILE" 2>/dev/null)
|
|
|
|
echo -e "${BOLD}${CYAN}📁 Index Information${NC}"
|
|
echo -e " ${YELLOW}Version:${NC} $version"
|
|
echo -e " ${YELLOW}Project Root:${NC} $project_root"
|
|
echo -e " ${YELLOW}Last Updated:${NC} $updated"
|
|
echo ""
|
|
|
|
# File statistics
|
|
local total_files=$(jq '.total_files' "$INDEX_FILE" 2>/dev/null)
|
|
local total_concepts=$(jq '.concepts | length' "$INDEX_FILE" 2>/dev/null)
|
|
|
|
echo -e "${BOLD}${CYAN}📊 Overall Statistics${NC}"
|
|
echo -e " ${YELLOW}Total Files:${NC} $(format_number $total_files)"
|
|
echo -e " ${YELLOW}Total Concepts:${NC} $(format_number $total_concepts)"
|
|
echo ""
|
|
|
|
# Calculate token statistics
|
|
local total_tokens=$(jq '[.file_summaries[] | .token_estimate] | add' "$INDEX_FILE" 2>/dev/null)
|
|
local total_lines=$(jq '[.file_summaries[] | .line_count] | add' "$INDEX_FILE" 2>/dev/null)
|
|
local avg_tokens_per_file=$(awk "BEGIN {printf \"%.0f\", $total_tokens / $total_files}")
|
|
|
|
echo -e "${BOLD}${CYAN}📏 Token Statistics${NC}"
|
|
echo -e " ${YELLOW}Total Tokens:${NC} $(format_number $total_tokens)"
|
|
echo -e " ${YELLOW}Total Lines:${NC} $(format_number $total_lines)"
|
|
echo -e " ${YELLOW}Avg Tokens/File:${NC} $avg_tokens_per_file"
|
|
echo ""
|
|
|
|
# Calculate potential token savings
|
|
local full_read_tokens=$total_tokens
|
|
local index_read_tokens=2000 # Rough estimate for reading index
|
|
local savings_percent=$(awk "BEGIN {printf \"%.0f\", ($full_read_tokens - $index_read_tokens) * 100 / $full_read_tokens}")
|
|
|
|
echo -e "${BOLD}${CYAN}💰 Token Efficiency${NC}"
|
|
echo -e " ${GREEN}Full codebase read:${NC} $(format_number $full_read_tokens) tokens"
|
|
echo -e " ${GREEN}Index-based query:${NC} ~$(format_number $index_read_tokens) tokens"
|
|
echo -e " ${BOLD}${MAGENTA}Potential savings:${NC} ~${savings_percent}%${NC}"
|
|
echo ""
|
|
|
|
# Top concepts by file count
|
|
echo -e "${BOLD}${CYAN}🏆 Top Concepts (by file count)${NC}"
|
|
echo ""
|
|
|
|
jq -r '.concepts | to_entries[] | select(.value.files | length > 0) | "\(.key)|\(.value.files | length)"' "$INDEX_FILE" 2>/dev/null | \
|
|
sort -t'|' -k2 -rn | head -10 | while IFS='|' read -r concept count; do
|
|
printf " ${GREEN}%2d.${NC} %-35s ${CYAN}(%d file(s))${NC}\n" "$((++i))" "$concept" "$count"
|
|
done
|
|
echo ""
|
|
|
|
# Largest files
|
|
echo -e "${BOLD}${CYAN}📦 Largest Files (by tokens)${NC}"
|
|
echo ""
|
|
|
|
jq -r '.file_summaries | to_entries[] | "\(.key)|\(.value.token_estimate)|\(.value.line_count)"' "$INDEX_FILE" 2>/dev/null | \
|
|
sort -t'|' -k2 -rn | head -10 | while IFS='|' read -r file tokens lines; do
|
|
printf " ${GREEN}%2d.${NC} %-50s ${CYAN}(%s tokens, %s lines)${NC}\n" "$((++i))" "$file" "$(format_number $tokens)" "$(format_number $lines)"
|
|
done
|
|
echo ""
|
|
|
|
# File type distribution
|
|
echo -e "${BOLD}${CYAN}📄 File Type Distribution${NC}"
|
|
echo ""
|
|
|
|
declare -A ext_counts
|
|
jq -r '.file_summaries | keys[]' "$INDEX_FILE" 2>/dev/null | while read -r file; do
|
|
local ext="${file##*.}"
|
|
ext_counts[$ext]=$((${ext_counts[$ext]:-0} + 1))
|
|
done
|
|
|
|
for ext in $(echo "${!ext_counts[@]}" | sort); do
|
|
printf " ${YELLOW}%s${NC} %-15s ${CYAN}%d file(s)${NC}\n" "$ext" "" "${ext_counts[$ext]}"
|
|
done
|
|
echo ""
|
|
|
|
# Completion message
|
|
print_success "Statistics generated successfully"
|
|
echo ""
|
|
echo -e "${CYAN}Tip:${NC} Use 'search.sh <query>' to find files by concept"
|
|
echo -e "${CYAN}Tip:${NC} Use 'concept-map.sh <concept>' to explore related code"
|
|
}
|
|
|
|
# Main function
|
|
main() {
|
|
check_index
|
|
show_stats
|
|
}
|
|
|
|
main
|