fix: pidfile lock + port conflict guard + systemd ready
- Pidfile lock prevents duplicate instances (auto-kills stale PIDs) - EADDRINUSE retry: kills port hog, retries up to 3x with 1.5s delay - releasePidfile() on graceful shutdown - Added fs/path imports needed by pidfile utilities
This commit is contained in:
@@ -4,6 +4,8 @@ import { sequentialize } from '@grammyjs/runner';
|
|||||||
import express from 'express';
|
import express from 'express';
|
||||||
import { createServer } from 'http';
|
import { createServer } from 'http';
|
||||||
import { WebSocketServer } from 'ws';
|
import { WebSocketServer } from 'ws';
|
||||||
|
import fs from 'fs';
|
||||||
|
import path from 'path';
|
||||||
import { logger } from '../utils/logger.js';
|
import { logger } from '../utils/logger.js';
|
||||||
import { checkEnv } from '../utils/env.js';
|
import { checkEnv } from '../utils/env.js';
|
||||||
import { getRTK } from '../utils/rtk.js';
|
import { getRTK } from '../utils/rtk.js';
|
||||||
@@ -13,6 +15,36 @@ import { sendFormatted, splitMessage, escapeMarkdown, sendStreamingMessage, Stre
|
|||||||
import { withSelfCorrection } from './self-correction.js';
|
import { withSelfCorrection } from './self-correction.js';
|
||||||
import { getMemory, getConversation } from './memory.js';
|
import { getMemory, getConversation } from './memory.js';
|
||||||
|
|
||||||
|
// ── Pidfile lock: prevent duplicate instances ──
|
||||||
|
const PIDFILE = path.join(process.env.HOME || '/tmp', '.zcode-bot.pid');
|
||||||
|
function acquirePidfile() {
|
||||||
|
try {
|
||||||
|
if (fs.existsSync(PIDFILE)) {
|
||||||
|
const oldPid = parseInt(fs.readFileSync(PIDFILE, 'utf8').trim());
|
||||||
|
// Check if old process is still alive
|
||||||
|
try { process.kill(oldPid, 0);
|
||||||
|
logger.warn(`⚠ Another zCode instance (PID ${oldPid}) is running — killing stale process`);
|
||||||
|
process.kill(oldPid, 'SIGTERM');
|
||||||
|
// Wait briefly for cleanup
|
||||||
|
const deadline = Date.now() + 5000;
|
||||||
|
while (Date.now() < deadline) {
|
||||||
|
try { process.kill(oldPid, 0); } catch { break; }
|
||||||
|
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 200);
|
||||||
|
}
|
||||||
|
// Force kill if still alive
|
||||||
|
try { process.kill(oldPid, 0); process.kill(oldPid, 'SIGKILL'); } catch {}
|
||||||
|
} catch { /* old PID dead, safe */ }
|
||||||
|
}
|
||||||
|
fs.writeFileSync(PIDFILE, process.pid.toString());
|
||||||
|
logger.info(`✓ Pidfile acquired: ${PIDFILE} (PID ${process.pid})`);
|
||||||
|
} catch (e) {
|
||||||
|
logger.error(`Pidfile error: ${e.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function releasePidfile() {
|
||||||
|
try { if (fs.existsSync(PIDFILE)) fs.unlinkSync(PIDFILE); } catch {}
|
||||||
|
}
|
||||||
|
|
||||||
function buildSessionKey(chatId, threadId) {
|
function buildSessionKey(chatId, threadId) {
|
||||||
return threadId ? `${chatId}:${threadId}` : String(chatId);
|
return threadId ? `${chatId}:${threadId}` : String(chatId);
|
||||||
}
|
}
|
||||||
@@ -761,11 +793,14 @@ export async function initBot(config, api, tools, skills, agents) {
|
|||||||
const shutdown = async (signal) => {
|
const shutdown = async (signal) => {
|
||||||
logger.info(`🛑 Shutting down (${signal})...`);
|
logger.info(`🛑 Shutting down (${signal})...`);
|
||||||
await conversation.flush();
|
await conversation.flush();
|
||||||
|
releasePidfile();
|
||||||
process.exit(0);
|
process.exit(0);
|
||||||
};
|
};
|
||||||
process.on('SIGINT', () => shutdown('SIGINT'));
|
process.on('SIGINT', () => shutdown('SIGINT'));
|
||||||
process.on('SIGTERM', () => shutdown('SIGTERM'));
|
process.on('SIGTERM', () => shutdown('SIGTERM'));
|
||||||
|
|
||||||
|
acquirePidfile();
|
||||||
|
|
||||||
// ── Express + WebSocket server (keep for webhook compatibility) ──
|
// ── Express + WebSocket server (keep for webhook compatibility) ──
|
||||||
const app = express();
|
const app = express();
|
||||||
app.use(express.json());
|
app.use(express.json());
|
||||||
@@ -818,10 +853,39 @@ export async function initBot(config, api, tools, skills, agents) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const PORT = process.env.ZCODE_PORT || 3000;
|
const PORT = process.env.ZCODE_PORT || 3000;
|
||||||
httpServer.listen(PORT, () => {
|
// ── Port conflict guard: retry with cleanup ──
|
||||||
logger.info(`✓ HTTP on :${PORT} · WS ready · grammy bot online`);
|
let listenAttempts = 0;
|
||||||
logger.info(`✓ ${svc.tools.length} tools · ${svc.skills.length} skills · ${svc.agents.length} agents`);
|
const MAX_LISTEN_ATTEMPTS = 3;
|
||||||
|
function tryListen() {
|
||||||
|
httpServer.listen(PORT, () => {
|
||||||
|
logger.info(`✓ HTTP on :${PORT} · WS ready · grammy bot online`);
|
||||||
|
logger.info(`✓ ${svc.tools.length} tools · ${svc.skills.length} skills · ${svc.agents.length} agents`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
httpServer.on('error', (err) => {
|
||||||
|
if (err.code === 'EADDRINUSE' && listenAttempts < MAX_LISTEN_ATTEMPTS) {
|
||||||
|
listenAttempts++;
|
||||||
|
logger.warn(`⚠ Port ${PORT} in use (attempt ${listenAttempts}/${MAX_LISTEN_ATTEMPTS}) — killing stale process`);
|
||||||
|
// Find and kill process on that port
|
||||||
|
try {
|
||||||
|
const { execSync } = require('child_process');
|
||||||
|
const out = execSync(`fuser ${PORT}/tcp 2>/dev/null`, { encoding: 'utf8' }).trim();
|
||||||
|
if (out) {
|
||||||
|
out.split(/\s+/).forEach(pid => {
|
||||||
|
try {
|
||||||
|
const p = parseInt(pid);
|
||||||
|
if (p !== process.pid) { process.kill(p, 'SIGTERM'); logger.warn(` Killed PID ${p}`); }
|
||||||
|
} catch {}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
setTimeout(tryListen, 1500);
|
||||||
|
} else {
|
||||||
|
logger.error(`❌ Failed to bind port ${PORT}: ${err.message}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
tryListen();
|
||||||
|
|
||||||
// Set webhook
|
// Set webhook
|
||||||
const wu = process.env.ZCODE_WEBHOOK_URL;
|
const wu = process.env.ZCODE_WEBHOOK_URL;
|
||||||
|
|||||||
Reference in New Issue
Block a user