#!/bin/bash # Claude Code Production-Grade Health Check v2 # Comprehensive validation including Prometheus integration set -euo pipefail CLAUDE_DIR="${HOME}/.claude" LOG_DIR="${CLAUDE_DIR}/logs" HEALTH_LOG="${LOG_DIR}/health-check.log" mkdir -p "$LOG_DIR" # Color codes RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color log() { echo "[$(date -u +"%Y-%m-%d %H:%M:%S UTC")] $*" | tee -a "$HEALTH_LOG" } check_pass() { echo -e "${GREEN}✓${NC} $1" log "PASS: $1" } check_fail() { echo -e "${RED}✗${NC} $1" log "FAIL: $1" } check_warn() { echo -e "${YELLOW}⚠${NC} $1" log "WARN: $1" } check_info() { echo -e "${BLUE}ℹ${NC} $1" log "INFO: $1" } section() { echo "" echo -e "${BLUE}=== $1 ===${NC}" log "SECTION: $1" } # Health check functions check_binary() { local binary="$1" local path="${2:-}" if [[ -n "$path" && -x "$path" ]]; then check_pass "$binary: Found at $path" return 0 fi if command -v "$binary" >/dev/null 2>&1; then local location=$(command -v "$binary") check_pass "$binary: Found at $location" return 0 fi check_fail "$binary: Not found in PATH" return 1 } check_directory() { local dir="$1" local name="$2" if [[ -d "$dir" ]]; then check_pass "$name: Directory exists at $dir" return 0 else check_fail "$name: Directory not found at $dir" return 1 fi } check_file() { local file="$1" local name="$2" if [[ -f "$file" ]]; then check_pass "$name: File exists at $file" return 0 else check_fail "$name: File not found at $file" return 1 fi } check_process() { local pid_file="$1" local name="$2" if [[ -f "$pid_file" ]]; then local pid=$(cat "$pid_file" 2>/dev/null || echo "") if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then check_pass "$name: Running (PID: $pid)" return 0 else check_fail "$name: PID file exists but process not running" return 1 fi else check_warn "$name: Not running (no PID file)" return 1 fi } check_port() { local port="$1" local name="$2" if ss -tuln 2>/dev/null | grep -q ":${port} "; then check_pass "$name: Listening on port $port" return 0 elif netstat -tuln 2>/dev/null | grep -q ":${port} "; then check_pass "$name: Listening on port $port" return 0 else check_fail "$name: Not listening on port $port" return 1 fi } main() { local exit_code=0 echo -e "${BLUE}" echo "╔═══════════════════════════════════════════════════════════╗" echo "║ Claude Code Production-Grade Health Check v2 ║" echo "║ Including Prometheus Integration ║" echo "║ $(date -u +"%Y-%m-%d %H:%M:%S UTC") ║" echo "╚═══════════════════════════════════════════════════════════╝" echo -e "${NC}" log "Starting health check" # Core Binaries section "Core Binaries" check_binary "claude" || exit_code=1 check_binary "clawdbot" || exit_code=1 check_binary "ralphloop" "${CLAUDE_DIR}/skills/bin/ralphloop" || exit_code=1 check_binary "jq" || exit_code=1 # Directory Structure section "Directory Structure" check_directory "${CLAUDE_DIR}/commands" "Commands directory" || exit_code=1 check_directory "${CLAUDE_DIR}/skills" "Skills directory" || exit_code=1 check_directory "${CLAUDE_DIR}/hooks" "Hooks directory" || exit_code=1 check_directory "${CLAUDE_DIR}/plugins" "Plugins directory" || exit_code=1 check_directory "${CLAUDE_DIR}/clawd" "Clawd directory" || exit_code=1 check_directory "${CLAUDE_DIR}/ralph-integration" "Ralph integration" || exit_code=1 check_directory "${CLAUDE_DIR}/mcp-servers" "MCP servers" || exit_code=1 check_directory "${CLAUDE_DIR}/prometheus" "Prometheus" || exit_code=1 # Configuration Files section "Configuration Files" check_file "${CLAUDE_DIR}/settings.json" "Settings" || exit_code=1 check_file "${CLAUDE_DIR}/hooks.json" "Hooks configuration" || exit_code=1 check_file "${CLAUDE_DIR}/config.json" "Config" || exit_code=1 check_file "${CLAUDE_DIR}/plugins/installed_plugins.json" "Installed plugins" || exit_code=1 check_file "${CLAUDE_DIR}/plugins/marketplaces/official.json" "Official marketplace" || exit_code=1 check_file "${CLAUDE_DIR}/mcp-servers/registry.json" "MCP registry" || exit_code=1 # Clawd Components section "Clawd Integration" check_process "${CLAUDE_DIR}/clawd/gateway.pid" "Clawd Gateway" || exit_code=1 check_port 8766 "Clawd Gateway" || exit_code=1 check_file "${CLAUDE_DIR}/commands/clawd.md" "Clawd command" || exit_code=1 check_file "${CLAUDE_DIR}/hooks/clawd-wrapper.sh" "Clawd wrapper hook" || exit_code=1 # Ralph Integration section "Ralph Integration" check_file "${CLAUDE_DIR}/skills/bin/ralphloop" "Ralph binary" || exit_code=1 check_file "${CLAUDE_DIR}/skills/bin/ralph-loop" "Ralph symlink" || exit_code=1 check_file "${CLAUDE_DIR}/commands/ralph.md" "Ralph command" || exit_code=1 check_file "${CLAUDE_DIR}/ralph-integration/ralph-manage.sh" "Ralph manager" || exit_code=1 # MCP Integration section "MCP Integration" check_file "${CLAUDE_DIR}/mcp-servers/manager.sh" "MCP manager" || exit_code=1 check_file "${CLAUDE_DIR}/ralph-integration/arc/.agent/mcp/arc_mcp_server.py" "ARC MCP server" || exit_code=1 check_file "${CLAUDE_DIR}/skills/mcp-client/mcp-client.py" "MCP client" || exit_code=1 # Prometheus Integration section "Prometheus Integration" check_directory "${CLAUDE_DIR}/prometheus" "Prometheus directory" || exit_code=1 check_file "${CLAUDE_DIR}/prometheus/README.md" "Prometheus README" || exit_code=1 check_file "${CLAUDE_DIR}/commands/prometheus.md" "Prometheus command" || exit_code=1 check_file "${CLAUDE_DIR}/hooks/prometheus-wrapper.sh" "Prometheus wrapper" || exit_code=1 check_file "${CLAUDE_DIR}/prometheus/install.sh" "Prometheus installer" || exit_code=1 if [[ -d "${CLAUDE_DIR}/prometheus/venv" ]]; then check_pass "Prometheus: Virtual environment created" else check_warn "Prometheus: Not installed (run install.sh)" fi # Plugin System section "Plugin System" if check_file "${CLAUDE_DIR}/plugins/installed_plugins.json" "Plugin registry"; then local plugin_count=$(jq '.plugins | length' "${CLAUDE_DIR}/plugins/installed_plugins.json" 2>/dev/null || echo "0") check_info "Installed plugins: $plugin_count" if jq -e '.plugins."superpowers@superpowers"' "${CLAUDE_DIR}/plugins/installed_plugins.json" >/dev/null 2>&1; then check_pass "Superpowers plugin: Registered" else check_fail "Superpowers plugin: Not registered" exit_code=1 fi fi # Hooks System section "Hooks System" local hook_count=$(jq '.hooks | length' "${CLAUDE_DIR}/hooks.json" 2>/dev/null || echo "0") check_info "Registered hook events: $hook_count" for hook in clawd-auto-trigger.sh clawd-session-start.sh clawd-task-complete.sh unified-integration-v2.sh prometheus-wrapper.sh; do check_file "${CLAUDE_DIR}/hooks/$hook" "Hook: $hook" || exit_code=1 done # Skills Count section "Skills" local skill_count=$(find "${CLAUDE_DIR}/skills" -mindepth 1 -maxdepth 1 -type d 2>/dev/null | wc -l) check_info "Available skills: $skill_count" # Environment section "Environment" if [[ -n "${ANTHROPIC_API_KEY:-}" ]]; then check_pass "ANTHROPIC_API_KEY: Set" else check_warn "ANTHROPIC_API_KEY: Not set" fi if [[ -n "${ANTHROPIC_MODEL:-}" ]]; then check_info "ANTHROPIC_MODEL: $ANTHROPIC_MODEL" fi # Summary section "Summary" if [[ $exit_code -eq 0 ]]; then echo -e "${GREEN}✓ All critical systems operational${NC}" log "Health check PASSED" return 0 else echo -e "${RED}✗ Some issues detected - review logs above${NC}" log "Health check FAILED" return 1 fi } main "$@" exit $?