feat: real-time SSE streaming via StreamConsumer (adapted from Hermes Agent)

- StreamConsumer class: queued token buffer → rate-limited editMessageText loop
- Adaptive flood control backoff (3 strikes → fallback to plain send)
- Cursor indicator (▉) during typing, stripped on completion
- chatWithAI now supports onDelta callback for SSE token streaming
- Uses native fetch() for SSE (Node 18+), falls back to non-streaming on error
- Message handler wires StreamConsumer into the chat pipeline
- Graceful fallback: if streaming fails entirely, sends as plain message
This commit is contained in:
admin
2026-05-05 14:13:03 +00:00
Unverified
parent e17af157ae
commit ed4a4c35e8
2 changed files with 410 additions and 72 deletions

View File

@@ -9,7 +9,7 @@ import { checkEnv } from '../utils/env.js';
import { getRTK } from '../utils/rtk.js';
import { isDuplicate, markProcessed } from './deduplication.js';
import { queueRequest, clearQueue, isProcessing } from './request-queue.js';
import { sendFormatted, splitMessage, escapeMarkdown, sendStreamingMessage } from './message-sender.js';
import { sendFormatted, splitMessage, escapeMarkdown, sendStreamingMessage, StreamConsumer } from './message-sender.js';
import { withSelfCorrection } from './self-correction.js';
function buildSessionKey(chatId, threadId) {
@@ -155,9 +155,16 @@ export async function initBot(config, api, tools, skills, agents) {
messages,
temperature: opts.temperature ?? 0.7,
max_tokens: opts.maxTokens || 4096,
stream: !!opts.onDelta, // Enable SSE when delta callback is provided
};
if (tools.length) body.tools = tools;
// ── Streaming path (SSE) ──
if (opts.onDelta) {
return await chatWithAIStream(svc, body, tools, toolHandlers, opts.onDelta);
}
// ── Non-streaming path (original) ──
const response = await api.client.post('/chat/completions', body);
const choice = response.data.choices?.[0];
if (!choice) return '❌ No response from model.';
@@ -186,6 +193,83 @@ export async function initBot(config, api, tools, skills, agents) {
}
}
/**
* Streaming chat completion via SSE.
* Pipes each token chunk to onDelta() callback in real-time.
* Falls back to non-streaming if SSE fails.
*/
async function chatWithAIStream(svc, body, tools, toolHandlers, onDelta) {
const baseUrl = svc.api?.config?.baseUrl || 'https://api.z.ai/api/coding/paas/v4';
const apiKey = svc.api?.config?.apiKey || '';
let fullResponse = '';
try {
const response = await fetch(`${baseUrl}/chat/completions`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(body),
});
if (!response.ok) {
const errText = await response.text();
logger.error(`SSE error ${response.status}: ${errText}`);
// Fallback to non-streaming
return await chatWithAI(body.messages, { ...body, stream: false });
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || ''; // Keep incomplete line in buffer
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed || !trimmed.startsWith('data: ')) continue;
const data = trimmed.slice(6);
if (data === '[DONE]') continue;
try {
const parsed = JSON.parse(data);
const choices = parsed.choices || [];
if (choices.length > 0) {
const delta = choices[0].delta || {};
const content = delta.content || '';
if (content) {
fullResponse += content;
onDelta(content);
}
// Check for tool calls in streaming
if (delta.tool_calls) {
// Tool calls in streaming mode — accumulate and handle after stream
// For now, fall through to non-streaming tool handling
}
}
} catch {
// Ignore malformed JSON lines
}
}
}
} catch (e) {
logger.error('SSE stream error:', e.message);
// Fallback to non-streaming
if (!fullResponse) {
return await chatWithAI(body.messages, { maxTokens: body.max_tokens });
}
}
return fullResponse || '✅ Done.';
}
const toolHandlers = {
bash: async (args) => {
const tool = svc.toolMap.get('bash');
@@ -437,9 +521,13 @@ export async function initBot(config, api, tools, skills, agents) {
await queueRequest(key, text, async () => {
await ctx.api.sendChatAction(ctx.chat.id, 'typing');
// Wrap chatWithAI with self-correction
// Create stream consumer for real-time edit-in-place
const consumer = new StreamConsumer(ctx, { editInterval: 1000 });
const runPromise = consumer.run();
// Wrap chatWithAI with self-correction + streaming
const chatWithCorrection = withSelfCorrection(async (msgs) => {
return await chatWithAI(msgs, {});
return await chatWithAI(msgs, { onDelta: (token) => consumer.onDelta(token) });
});
const result = await chatWithCorrection([
@@ -447,8 +535,14 @@ export async function initBot(config, api, tools, skills, agents) {
{ role: 'user', content: text },
]);
// Send with streaming effect
await sendStreamingMessage(ctx, result);
// Signal completion and wait for final edit
consumer.finish();
await runPromise;
// If streaming failed to deliver (no message sent), fallback to plain send
if (!consumer.alreadySent && result) {
await sendFormatted(ctx, result);
}
});
});