#!/bin/bash # Codebase Indexer - Show Statistics # Part of Chippery framework for semantic codebase navigation set -e # Configuration PROJECT_ROOT="${1:-$(pwd)}" INDEX_FILE="$PROJECT_ROOT/.codebase-index.json" LOG_FILE="$HOME/.claude/logs/codebase-indexer.log" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' MAGENTA='\033[0;35m' BOLD='\033[1m' NC='\033[0m' # No Color # Logging function log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >> "$LOG_FILE" } # Print colored output print_status() { echo -e "${BLUE}[Chippery]${NC} $1" log "$1" } print_success() { echo -e "${GREEN}[Chippery]${NC} $1" log "SUCCESS: $1" } print_warning() { echo -e "${YELLOW}[Chippery]${NC} $1" log "WARNING: $1" } print_error() { echo -e "${RED}[Chippery]${NC} $1" log "ERROR: $1" } # Create log directory if needed mkdir -p "$(dirname "$LOG_FILE")" # Check if index exists check_index() { if [ ! -f "$INDEX_FILE" ]; then print_error "No index found at $INDEX_FILE" print_status "Run 'build-index.sh' first to create the index" exit 1 fi if ! command -v jq &> /dev/null; then print_error "jq is required for statistics" print_status "Install with: sudo apt-get install jq / brew install jq" exit 1 fi } # Format large numbers format_number() { local num=$1 if [ $num -ge 1000000 ]; then awk "BEGIN {printf \"%.1fM\", $num / 1000000}" elif [ $num -ge 1000 ]; then awk "BEGIN {printf \"%.1fK\", $num / 1000}" else echo $num fi } # Show main statistics show_stats() { echo "" echo -e "${BOLD}${BLUE}╔════════════════════════════════════════════════════════════╗${NC}" echo -e "${BOLD}${BLUE}║ Chippery Codebase Index Statistics ║${NC}" echo -e "${BOLD}${BLUE}╚════════════════════════════════════════════════════════════╝${NC}" echo "" # Basic info local version=$(jq -r '.version' "$INDEX_FILE" 2>/dev/null) local updated=$(jq -r '.last_updated' "$INDEX_FILE" 2>/dev/null) local project_root=$(jq -r '.project_root' "$INDEX_FILE" 2>/dev/null) echo -e "${BOLD}${CYAN}📁 Index Information${NC}" echo -e " ${YELLOW}Version:${NC} $version" echo -e " ${YELLOW}Project Root:${NC} $project_root" echo -e " ${YELLOW}Last Updated:${NC} $updated" echo "" # File statistics local total_files=$(jq '.total_files' "$INDEX_FILE" 2>/dev/null) local total_concepts=$(jq '.concepts | length' "$INDEX_FILE" 2>/dev/null) echo -e "${BOLD}${CYAN}📊 Overall Statistics${NC}" echo -e " ${YELLOW}Total Files:${NC} $(format_number $total_files)" echo -e " ${YELLOW}Total Concepts:${NC} $(format_number $total_concepts)" echo "" # Calculate token statistics local total_tokens=$(jq '[.file_summaries[] | .token_estimate] | add' "$INDEX_FILE" 2>/dev/null) local total_lines=$(jq '[.file_summaries[] | .line_count] | add' "$INDEX_FILE" 2>/dev/null) local avg_tokens_per_file=$(awk "BEGIN {printf \"%.0f\", $total_tokens / $total_files}") echo -e "${BOLD}${CYAN}📏 Token Statistics${NC}" echo -e " ${YELLOW}Total Tokens:${NC} $(format_number $total_tokens)" echo -e " ${YELLOW}Total Lines:${NC} $(format_number $total_lines)" echo -e " ${YELLOW}Avg Tokens/File:${NC} $avg_tokens_per_file" echo "" # Calculate potential token savings local full_read_tokens=$total_tokens local index_read_tokens=2000 # Rough estimate for reading index local savings_percent=$(awk "BEGIN {printf \"%.0f\", ($full_read_tokens - $index_read_tokens) * 100 / $full_read_tokens}") echo -e "${BOLD}${CYAN}💰 Token Efficiency${NC}" echo -e " ${GREEN}Full codebase read:${NC} $(format_number $full_read_tokens) tokens" echo -e " ${GREEN}Index-based query:${NC} ~$(format_number $index_read_tokens) tokens" echo -e " ${BOLD}${MAGENTA}Potential savings:${NC} ~${savings_percent}%${NC}" echo "" # Top concepts by file count echo -e "${BOLD}${CYAN}🏆 Top Concepts (by file count)${NC}" echo "" jq -r '.concepts | to_entries[] | select(.value.files | length > 0) | "\(.key)|\(.value.files | length)"' "$INDEX_FILE" 2>/dev/null | \ sort -t'|' -k2 -rn | head -10 | while IFS='|' read -r concept count; do printf " ${GREEN}%2d.${NC} %-35s ${CYAN}(%d file(s))${NC}\n" "$((++i))" "$concept" "$count" done echo "" # Largest files echo -e "${BOLD}${CYAN}📦 Largest Files (by tokens)${NC}" echo "" jq -r '.file_summaries | to_entries[] | "\(.key)|\(.value.token_estimate)|\(.value.line_count)"' "$INDEX_FILE" 2>/dev/null | \ sort -t'|' -k2 -rn | head -10 | while IFS='|' read -r file tokens lines; do printf " ${GREEN}%2d.${NC} %-50s ${CYAN}(%s tokens, %s lines)${NC}\n" "$((++i))" "$file" "$(format_number $tokens)" "$(format_number $lines)" done echo "" # File type distribution echo -e "${BOLD}${CYAN}📄 File Type Distribution${NC}" echo "" declare -A ext_counts jq -r '.file_summaries | keys[]' "$INDEX_FILE" 2>/dev/null | while read -r file; do local ext="${file##*.}" ext_counts[$ext]=$((${ext_counts[$ext]:-0} + 1)) done for ext in $(echo "${!ext_counts[@]}" | sort); do printf " ${YELLOW}%s${NC} %-15s ${CYAN}%d file(s)${NC}\n" "$ext" "" "${ext_counts[$ext]}" done echo "" # Completion message print_success "Statistics generated successfully" echo "" echo -e "${CYAN}Tip:${NC} Use 'search.sh ' to find files by concept" echo -e "${CYAN}Tip:${NC} Use 'concept-map.sh ' to explore related code" } # Main function main() { check_index show_stats } main