feat: add OpenClaw automated self-healing script to mitigate port collisions
This commit is contained in:
14
README.md
14
README.md
@@ -37,3 +37,17 @@ A native executable installer for Linux (x64).
|
||||
- **Runtime:** Built around Electron v41.1.1
|
||||
- **Packaging:** Bundled using Go `embed` to bypass traditional deployment toolchain limitations (like Wine).
|
||||
- **Silent Mode:** Installers execute completely in the background without requiring user interaction (No Next/Finish wizards).
|
||||
|
||||
## OpenClaw Automated Self-Healing Engine
|
||||
|
||||
Included in this repository is `openclaw-healer.sh`, a robust background daemon script designed to mitigate headless browser crashes and port deadlocks commonly encountered when automating OpenClaw or Puppeteer.
|
||||
|
||||
### What it does:
|
||||
1. **Zombie Reaping:** Continuously scans for and aggressively reaps `[Z] Defunct` instances of Chrome, Chromium, and Puppeteer by terminating their deadlocked parent processes.
|
||||
2. **Port Mitigation:** Monitors target ports (default `18800`) for stalled processes. If a process holds the port for longer than 120 seconds, the healer executes a forced termination (`kill -15` then `kill -9`) to guarantee the port is released for the next automation task.
|
||||
|
||||
### How to run it:
|
||||
```bash
|
||||
chmod +x openclaw-healer.sh
|
||||
nohup bash -c "while true; do ./openclaw-healer.sh; sleep 60; done" > /var/log/openclaw-healer.log 2>&1 &
|
||||
```
|
||||
|
||||
78
openclaw-healer.sh
Executable file
78
openclaw-healer.sh
Executable file
@@ -0,0 +1,78 @@
|
||||
#!/bin/bash
|
||||
|
||||
# =====================================================================
|
||||
# OpenClaw Automated Self-Healing Engine
|
||||
# Monitors and mitigates zombie browser processes & port 18800 collisions
|
||||
# =====================================================================
|
||||
|
||||
TARGET_PORT=18800
|
||||
LOG_FILE="/var/log/openclaw-healer.log"
|
||||
HEAL_THRESHOLD_SECONDS=120 # Kill processes holding the port for more than 2 minutes
|
||||
|
||||
log() {
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
heal_zombies() {
|
||||
log "Checking for zombie browser processes..."
|
||||
|
||||
# Find any defunct/zombie Chrome/Chromium/Puppeteer processes
|
||||
ZOMBIES=$(ps -A -ostat,ppid,pid,cmd | grep -e '^[Zz]' | grep -iE 'chrome|chromium|puppeteer')
|
||||
|
||||
if [ -n "$ZOMBIES" ]; then
|
||||
log "WARNING: Detected zombie browser processes. Attempting to reap..."
|
||||
|
||||
# Extract PPIDs of zombies and kill their parents to force reaping
|
||||
echo "$ZOMBIES" | awk '{print $2}' | sort -u | while read -r ppid; do
|
||||
if [ "$ppid" -ne 1 ]; then
|
||||
log "Killing parent process $ppid to reap zombies."
|
||||
kill -9 "$ppid" 2>/dev/null
|
||||
fi
|
||||
done
|
||||
log "Zombie reaping complete."
|
||||
else
|
||||
log "No zombie browser processes detected."
|
||||
fi
|
||||
}
|
||||
|
||||
heal_port_collision() {
|
||||
log "Checking port $TARGET_PORT for deadlocks..."
|
||||
|
||||
# Check if anything is listening on the target port
|
||||
PIDS=$(lsof -t -i :$TARGET_PORT 2>/dev/null)
|
||||
|
||||
if [ -n "$PIDS" ]; then
|
||||
for pid in $PIDS; do
|
||||
# Check how long the process has been running
|
||||
ETIMES=$(ps -p "$pid" -o etimes= | tr -d ' ')
|
||||
|
||||
if [ -n "$ETIMES" ] && [ "$ETIMES" -gt "$HEAL_THRESHOLD_SECONDS" ]; then
|
||||
log "CRITICAL: Process $pid has held port $TARGET_PORT for ${ETIMES}s (Exceeds threshold)."
|
||||
log "Self-healing: Force terminating process $pid..."
|
||||
|
||||
# Aggressive kill sequence
|
||||
kill -15 "$pid" 2>/dev/null
|
||||
sleep 2
|
||||
kill -9 "$pid" 2>/dev/null
|
||||
|
||||
# Verify port release
|
||||
sleep 1
|
||||
if lsof -t -i :$TARGET_PORT >/dev/null 2>&1; then
|
||||
log "ERROR: Failed to release port $TARGET_PORT. Process $pid is stuck in kernel space."
|
||||
else
|
||||
log "SUCCESS: Port $TARGET_PORT has been successfully released."
|
||||
fi
|
||||
else
|
||||
log "Process $pid on port $TARGET_PORT is active and within healthy timeframe (${ETIMES}s)."
|
||||
fi
|
||||
done
|
||||
else
|
||||
log "Port $TARGET_PORT is free. No healing required."
|
||||
fi
|
||||
}
|
||||
|
||||
log "=== Starting OpenClaw Self-Healing Cycle ==="
|
||||
heal_zombies
|
||||
heal_port_collision
|
||||
log "=== Cycle Complete ==="
|
||||
echo ""
|
||||
Reference in New Issue
Block a user