feat: add OpenClaw automated self-healing script to mitigate port collisions
This commit is contained in:
14
README.md
14
README.md
@@ -37,3 +37,17 @@ A native executable installer for Linux (x64).
|
|||||||
- **Runtime:** Built around Electron v41.1.1
|
- **Runtime:** Built around Electron v41.1.1
|
||||||
- **Packaging:** Bundled using Go `embed` to bypass traditional deployment toolchain limitations (like Wine).
|
- **Packaging:** Bundled using Go `embed` to bypass traditional deployment toolchain limitations (like Wine).
|
||||||
- **Silent Mode:** Installers execute completely in the background without requiring user interaction (No Next/Finish wizards).
|
- **Silent Mode:** Installers execute completely in the background without requiring user interaction (No Next/Finish wizards).
|
||||||
|
|
||||||
|
## OpenClaw Automated Self-Healing Engine
|
||||||
|
|
||||||
|
Included in this repository is `openclaw-healer.sh`, a robust background daemon script designed to mitigate headless browser crashes and port deadlocks commonly encountered when automating OpenClaw or Puppeteer.
|
||||||
|
|
||||||
|
### What it does:
|
||||||
|
1. **Zombie Reaping:** Continuously scans for and aggressively reaps `[Z] Defunct` instances of Chrome, Chromium, and Puppeteer by terminating their deadlocked parent processes.
|
||||||
|
2. **Port Mitigation:** Monitors target ports (default `18800`) for stalled processes. If a process holds the port for longer than 120 seconds, the healer executes a forced termination (`kill -15` then `kill -9`) to guarantee the port is released for the next automation task.
|
||||||
|
|
||||||
|
### How to run it:
|
||||||
|
```bash
|
||||||
|
chmod +x openclaw-healer.sh
|
||||||
|
nohup bash -c "while true; do ./openclaw-healer.sh; sleep 60; done" > /var/log/openclaw-healer.log 2>&1 &
|
||||||
|
```
|
||||||
|
|||||||
78
openclaw-healer.sh
Executable file
78
openclaw-healer.sh
Executable file
@@ -0,0 +1,78 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# =====================================================================
|
||||||
|
# OpenClaw Automated Self-Healing Engine
|
||||||
|
# Monitors and mitigates zombie browser processes & port 18800 collisions
|
||||||
|
# =====================================================================
|
||||||
|
|
||||||
|
TARGET_PORT=18800
|
||||||
|
LOG_FILE="/var/log/openclaw-healer.log"
|
||||||
|
HEAL_THRESHOLD_SECONDS=120 # Kill processes holding the port for more than 2 minutes
|
||||||
|
|
||||||
|
log() {
|
||||||
|
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
|
||||||
|
}
|
||||||
|
|
||||||
|
heal_zombies() {
|
||||||
|
log "Checking for zombie browser processes..."
|
||||||
|
|
||||||
|
# Find any defunct/zombie Chrome/Chromium/Puppeteer processes
|
||||||
|
ZOMBIES=$(ps -A -ostat,ppid,pid,cmd | grep -e '^[Zz]' | grep -iE 'chrome|chromium|puppeteer')
|
||||||
|
|
||||||
|
if [ -n "$ZOMBIES" ]; then
|
||||||
|
log "WARNING: Detected zombie browser processes. Attempting to reap..."
|
||||||
|
|
||||||
|
# Extract PPIDs of zombies and kill their parents to force reaping
|
||||||
|
echo "$ZOMBIES" | awk '{print $2}' | sort -u | while read -r ppid; do
|
||||||
|
if [ "$ppid" -ne 1 ]; then
|
||||||
|
log "Killing parent process $ppid to reap zombies."
|
||||||
|
kill -9 "$ppid" 2>/dev/null
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
log "Zombie reaping complete."
|
||||||
|
else
|
||||||
|
log "No zombie browser processes detected."
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
heal_port_collision() {
|
||||||
|
log "Checking port $TARGET_PORT for deadlocks..."
|
||||||
|
|
||||||
|
# Check if anything is listening on the target port
|
||||||
|
PIDS=$(lsof -t -i :$TARGET_PORT 2>/dev/null)
|
||||||
|
|
||||||
|
if [ -n "$PIDS" ]; then
|
||||||
|
for pid in $PIDS; do
|
||||||
|
# Check how long the process has been running
|
||||||
|
ETIMES=$(ps -p "$pid" -o etimes= | tr -d ' ')
|
||||||
|
|
||||||
|
if [ -n "$ETIMES" ] && [ "$ETIMES" -gt "$HEAL_THRESHOLD_SECONDS" ]; then
|
||||||
|
log "CRITICAL: Process $pid has held port $TARGET_PORT for ${ETIMES}s (Exceeds threshold)."
|
||||||
|
log "Self-healing: Force terminating process $pid..."
|
||||||
|
|
||||||
|
# Aggressive kill sequence
|
||||||
|
kill -15 "$pid" 2>/dev/null
|
||||||
|
sleep 2
|
||||||
|
kill -9 "$pid" 2>/dev/null
|
||||||
|
|
||||||
|
# Verify port release
|
||||||
|
sleep 1
|
||||||
|
if lsof -t -i :$TARGET_PORT >/dev/null 2>&1; then
|
||||||
|
log "ERROR: Failed to release port $TARGET_PORT. Process $pid is stuck in kernel space."
|
||||||
|
else
|
||||||
|
log "SUCCESS: Port $TARGET_PORT has been successfully released."
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
log "Process $pid on port $TARGET_PORT is active and within healthy timeframe (${ETIMES}s)."
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
else
|
||||||
|
log "Port $TARGET_PORT is free. No healing required."
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
log "=== Starting OpenClaw Self-Healing Cycle ==="
|
||||||
|
heal_zombies
|
||||||
|
heal_port_collision
|
||||||
|
log "=== Cycle Complete ==="
|
||||||
|
echo ""
|
||||||
Reference in New Issue
Block a user