From 9537bf2dc422375c0fb3b01d6d3ecb663550bc75 Mon Sep 17 00:00:00 2001 From: admin Date: Tue, 5 May 2026 16:20:00 +0000 Subject: [PATCH] fix: pidfile lock + port conflict guard + systemd ready - Pidfile lock prevents duplicate instances (auto-kills stale PIDs) - EADDRINUSE retry: kills port hog, retries up to 3x with 1.5s delay - releasePidfile() on graceful shutdown - Added fs/path imports needed by pidfile utilities --- src/bot/index.js | 70 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 3 deletions(-) diff --git a/src/bot/index.js b/src/bot/index.js index 236eedf2..b2802618 100644 --- a/src/bot/index.js +++ b/src/bot/index.js @@ -4,6 +4,8 @@ import { sequentialize } from '@grammyjs/runner'; import express from 'express'; import { createServer } from 'http'; import { WebSocketServer } from 'ws'; +import fs from 'fs'; +import path from 'path'; import { logger } from '../utils/logger.js'; import { checkEnv } from '../utils/env.js'; import { getRTK } from '../utils/rtk.js'; @@ -13,6 +15,36 @@ import { sendFormatted, splitMessage, escapeMarkdown, sendStreamingMessage, Stre import { withSelfCorrection } from './self-correction.js'; import { getMemory, getConversation } from './memory.js'; +// ── Pidfile lock: prevent duplicate instances ── +const PIDFILE = path.join(process.env.HOME || '/tmp', '.zcode-bot.pid'); +function acquirePidfile() { + try { + if (fs.existsSync(PIDFILE)) { + const oldPid = parseInt(fs.readFileSync(PIDFILE, 'utf8').trim()); + // Check if old process is still alive + try { process.kill(oldPid, 0); + logger.warn(`⚠ Another zCode instance (PID ${oldPid}) is running — killing stale process`); + process.kill(oldPid, 'SIGTERM'); + // Wait briefly for cleanup + const deadline = Date.now() + 5000; + while (Date.now() < deadline) { + try { process.kill(oldPid, 0); } catch { break; } + Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 200); + } + // Force kill if still alive + try { process.kill(oldPid, 0); process.kill(oldPid, 'SIGKILL'); } catch {} + } catch { /* old PID dead, safe */ } + } + fs.writeFileSync(PIDFILE, process.pid.toString()); + logger.info(`✓ Pidfile acquired: ${PIDFILE} (PID ${process.pid})`); + } catch (e) { + logger.error(`Pidfile error: ${e.message}`); + } +} +function releasePidfile() { + try { if (fs.existsSync(PIDFILE)) fs.unlinkSync(PIDFILE); } catch {} +} + function buildSessionKey(chatId, threadId) { return threadId ? `${chatId}:${threadId}` : String(chatId); } @@ -761,11 +793,14 @@ export async function initBot(config, api, tools, skills, agents) { const shutdown = async (signal) => { logger.info(`🛑 Shutting down (${signal})...`); await conversation.flush(); + releasePidfile(); process.exit(0); }; process.on('SIGINT', () => shutdown('SIGINT')); process.on('SIGTERM', () => shutdown('SIGTERM')); + acquirePidfile(); + // ── Express + WebSocket server (keep for webhook compatibility) ── const app = express(); app.use(express.json()); @@ -818,10 +853,39 @@ export async function initBot(config, api, tools, skills, agents) { }); const PORT = process.env.ZCODE_PORT || 3000; - httpServer.listen(PORT, () => { - logger.info(`✓ HTTP on :${PORT} · WS ready · grammy bot online`); - logger.info(`✓ ${svc.tools.length} tools · ${svc.skills.length} skills · ${svc.agents.length} agents`); + // ── Port conflict guard: retry with cleanup ── + let listenAttempts = 0; + const MAX_LISTEN_ATTEMPTS = 3; + function tryListen() { + httpServer.listen(PORT, () => { + logger.info(`✓ HTTP on :${PORT} · WS ready · grammy bot online`); + logger.info(`✓ ${svc.tools.length} tools · ${svc.skills.length} skills · ${svc.agents.length} agents`); + }); + } + httpServer.on('error', (err) => { + if (err.code === 'EADDRINUSE' && listenAttempts < MAX_LISTEN_ATTEMPTS) { + listenAttempts++; + logger.warn(`⚠ Port ${PORT} in use (attempt ${listenAttempts}/${MAX_LISTEN_ATTEMPTS}) — killing stale process`); + // Find and kill process on that port + try { + const { execSync } = require('child_process'); + const out = execSync(`fuser ${PORT}/tcp 2>/dev/null`, { encoding: 'utf8' }).trim(); + if (out) { + out.split(/\s+/).forEach(pid => { + try { + const p = parseInt(pid); + if (p !== process.pid) { process.kill(p, 'SIGTERM'); logger.warn(` Killed PID ${p}`); } + } catch {} + }); + } + } catch {} + setTimeout(tryListen, 1500); + } else { + logger.error(`❌ Failed to bind port ${PORT}: ${err.message}`); + process.exit(1); + } }); + tryListen(); // Set webhook const wu = process.env.ZCODE_WEBHOOK_URL;