diff --git a/src/bot/index.js b/src/bot/index.js index e5eebac4..ef361d9b 100644 --- a/src/bot/index.js +++ b/src/bot/index.js @@ -9,7 +9,7 @@ import { checkEnv } from '../utils/env.js'; import { getRTK } from '../utils/rtk.js'; import { isDuplicate, markProcessed } from './deduplication.js'; import { queueRequest, clearQueue, isProcessing } from './request-queue.js'; -import { sendFormatted, splitMessage, escapeMarkdown, sendStreamingMessage, StreamConsumer } from './message-sender.js'; +import { sendFormatted, splitMessage, escapeMarkdown, sendStreamingMessage, StreamConsumer, markdownToHtml } from './message-sender.js'; import { withSelfCorrection } from './self-correction.js'; function buildSessionKey(chatId, threadId) { @@ -638,7 +638,7 @@ export async function initBot(config, api, tools, skills, agents) { } return { - send: (chatId, text) => bot.api.sendMessage(chatId, text, { parse_mode: 'Markdown' }), + send: (chatId, text) => bot.api.sendMessage(chatId, markdownToHtml(text), { parse_mode: 'HTML' }), ws: (chatId, msg) => wsClients.get(chatId)?.send(JSON.stringify(msg)), waitForMessages: async () => { await new Promise(() => {}); }, getConnections: () => wsClients.size, diff --git a/src/bot/message-sender.js b/src/bot/message-sender.js index c6760abf..9ba2b3cc 100644 --- a/src/bot/message-sender.js +++ b/src/bot/message-sender.js @@ -6,6 +6,7 @@ * 2. Tokens accumulate in a buffer * 3. An async run() loop edits a single message at ~1s intervals * 4. Adaptive backoff on flood control, graceful fallback to plain send + * 5. Final message delivered with HTML formatting * * Credit: Hermes Agent gateway/stream_consumer.py (NousResearch/hermes-agent) */ @@ -18,6 +19,89 @@ const DEFAULT_BUFFER_THRESHOLD = 40; const MAX_FLOOD_STRIKES = 3; const CURSOR = ' ▉'; +// ─────────────────────────────────────────── +// Markdown → Telegram HTML converter +// ─────────────────────────────────────────── + +/** + * Convert common Markdown to Telegram-compatible HTML. + * Handles: **bold**, *italic*, `code`, ```blocks```, [links](url), ~~strike~~, headings, lists. + * Code content is properly escaped; surrounding text is escaped before tag insertion. + */ +export function markdownToHtml(text) { + if (!text) return ''; + + // 1. Extract fenced code blocks → protect from escaping + const codeBlocks = []; + text = text.replace(/```(\w*)\n?([\s\S]*?)```/g, (_, lang, code) => { + const idx = codeBlocks.length; + const escaped = code + .replace(/&/g, '&') + .replace(//g, '>'); + codeBlocks.push(`
${escaped}`);
+ return `\x00CB${idx}\x00`;
+ });
+
+ // 2. Extract inline code → protect from escaping
+ const inlineCodes = [];
+ text = text.replace(/`([^`\n]+)`/g, (_, code) => {
+ const idx = inlineCodes.length;
+ const escaped = code
+ .replace(/&/g, '&')
+ .replace(//g, '>');
+ inlineCodes.push(`${escaped}`);
+ return `\x00IC${idx}\x00`;
+ });
+
+ // 3. Escape HTML entities in remaining text
+ text = text
+ .replace(/&/g, '&')
+ .replace(//g, '>');
+
+ // 4. Convert Markdown patterns → HTML tags
+ text = text
+ .replace(/\*\*(.+?)\*\*/g, '$1') // **bold**
+ .replace(/(?$1') // *italic* (not inside **)
+ .replace(/~~(.+?)~~/g, '$1') // > quote + .replace(/^[-*]\s+/gm, '• '); // - or * list → bullet + + // 5. Restore protected code blocks and inline code + for (let i = 0; i < codeBlocks.length; i++) { + text = text.replace(`\x00CB${i}\x00`, codeBlocks[i]); + } + for (let i = 0; i < inlineCodes.length; i++) { + text = text.replace(`\x00IC${i}\x00`, inlineCodes[i]); + } + + return text; +} + +/** + * Sanitize text for plain-text Telegram messages (no parse_mode). + * Strips markdown formatting symbols so they don't show as raw text. + */ +export function stripMarkdown(text) { + if (!text) return ''; + return text + .replace(/```[\s\S]*?```/g, (m) => m.replace(/```\w*\n?/g, '┌──\n').replace(/```/g, '\n└──')) + .replace(/\*\*(.+?)\*\*/g, '$1') + .replace(/\*(.+?)\*/g, '$1') + .replace(/~~(.+?)~~/g, '$1') + .replace(/`([^`\n]+)`/g, '「$1」') + .replace(/\[(.+?)\]\((.+?)\)/g, '$1 ($2)') + .replace(/^#{1,4}\s+/gm, '') + .replace(/^[-*]\s+/gm, '• '); +} + export function splitMessage(text) { if (text.length <= MAX_MSG_LENGTH) return [text]; const chunks = []; @@ -40,14 +124,16 @@ export function escapeMarkdown(text) { export async function sendFormatted(ctx, text) { if (!text) return; + const html = markdownToHtml(text); try { - const chunks = splitMessage(text); + const chunks = splitMessage(html); for (const chunk of chunks) { - await ctx.reply(chunk, { parse_mode: 'Markdown' }); + await ctx.reply(chunk, { parse_mode: 'HTML' }); } } catch { - logger.warn('Markdown send failed, falling back to plain text'); - const chunks = splitMessage(text); + logger.warn('HTML send failed, falling back to stripped plain text'); + const plain = stripMarkdown(text); + const chunks = splitMessage(plain); for (const chunk of chunks) { await ctx.reply(chunk, { parse_mode: undefined }); } @@ -57,12 +143,15 @@ export async function sendFormatted(ctx, text) { /** * StreamConsumer — progressive edit-in-place streaming for Telegram. * + * - Intermediate edits: plain text (no formatting — partial HTML would break) + * - Final message: converted to Telegram HTML with full formatting + * * Usage: * const consumer = new StreamConsumer(ctx, { editInterval: 1000 }); * const runPromise = consumer.run(); // start async edit loop * // ... call consumer.onDelta(token) for each SSE chunk ... * consumer.finish(); - * await runPromise; // wait for final edit + * await runPromise; // wait for final edit (HTML formatted) */ export class StreamConsumer { constructor(ctx, options = {}) { @@ -151,7 +240,7 @@ export class StreamConsumer { this._lastSentText = ''; } - // Add cursor if not final + // Intermediate edits: plain text + cursor (no parse_mode) const displayText = this._accumulated + this.cursor; await this._sendOrEdit(displayText); this._lastEditTime = Date.now(); @@ -166,16 +255,18 @@ export class StreamConsumer { } } - // Final edit without cursor + // ═══════════════════════════════════════ + // FINAL EDIT — with HTML formatting + // ═══════════════════════════════════════ if (this._accumulated.trim()) { if (this._fallbackFinalSend) { await this._sendFallbackFinal(this._accumulated); } else if (this._messageId) { - await this._sendOrEdit(this._accumulated); - this._finalResponseSent = true; + // Edit the existing message with formatted HTML + await this._sendFinalFormatted(this._accumulated); } else { - await this._sendOrEdit(this._accumulated); - this._finalResponseSent = true; + // No message sent yet — send new with formatting + await this._sendFinalFormatted(this._accumulated); } } } catch (e) { @@ -183,6 +274,52 @@ export class StreamConsumer { } } + /** + * Send the final message with HTML formatting. + * Falls back to stripped plain text if HTML parse fails. + */ + async _sendFinalFormatted(text) { + const html = markdownToHtml(text); + + // Try HTML first + try { + if (this._messageId) { + await this.ctx.api.editMessageText(this._chatId, this._messageId, html, { parse_mode: 'HTML' }); + } else { + const msg = await this.ctx.api.sendMessage(this.ctx.chat.id, html, { parse_mode: 'HTML' }); + if (msg?.message_id) { + this._messageId = msg.message_id; + this._chatId = msg.chat.id; + } + } + this._alreadySent = true; + this._finalResponseSent = true; + this._lastSentText = html; + return; + } catch (e) { + logger.warn(`Final HTML edit failed (${e.message}), falling back to plain text`); + } + + // Fallback: stripped plain text (no raw ** showing) + const plain = stripMarkdown(text); + try { + if (this._messageId) { + await this.ctx.api.editMessageText(this._chatId, this._messageId, plain, { parse_mode: undefined }); + } else { + const msg = await this.ctx.api.sendMessage(this.ctx.chat.id, plain, { parse_mode: undefined }); + if (msg?.message_id) { + this._messageId = msg.message_id; + this._chatId = msg.chat.id; + } + } + this._alreadySent = true; + this._finalResponseSent = true; + this._lastSentText = plain; + } catch (e2) { + logger.error('Final plain text send also failed:', e2.message); + } + } + async _sendOrEdit(text) { if (!text.trim()) return true; @@ -232,7 +369,7 @@ export class StreamConsumer { return false; } } else { - // First message — send new + // First message — send as plain text (no formatting during streaming) try { const msg = await this.ctx.api.sendMessage(this.ctx.chat.id, text, { parse_mode: undefined }); if (msg && msg.message_id) { @@ -315,17 +452,25 @@ export class StreamConsumer { // Try to strip cursor from last partial await this._tryStripCursor(); - const chunks = splitMessage(continuation); + // Send remaining content with HTML formatting + const html = markdownToHtml(continuation); + const chunks = splitMessage(html); let sentAny = false; for (const chunk of chunks) { try { - await this.ctx.reply(chunk, { parse_mode: undefined }); + await this.ctx.reply(chunk, { parse_mode: 'HTML' }); sentAny = true; - } catch (e) { - logger.warn('Fallback send chunk error:', e.message); + } catch { + // Fallback to plain + try { + await this.ctx.reply(stripMarkdown(chunk), { parse_mode: undefined }); + sentAny = true; + } catch (e) { + logger.warn('Fallback send chunk error:', e.message); + } } } - this._already_sent = sentAny; + this._alreadySent = sentAny; this._finalResponseSent = sentAny; } }