feat: add vision, TTS, and browser tools (17 tools total)

- VisionTool: image analysis via Z.AI GLM-4V multimodal API
- TTSTool: text-to-speech via node-edge-tts (free, auto-sends audio to chat)
- BrowserTool: web page content extraction via cheerio (strips HTML, extracts text)
- All 3 wired into tools/index.js + bot tool definitions + handlers
- TTS handler auto-sends generated audio as voice message to chat
This commit is contained in:
admin
2026-05-05 16:52:12 +00:00
Unverified
parent d7f1e3db90
commit e92e9f5b9d
7 changed files with 793 additions and 0 deletions

View File

@@ -311,6 +311,28 @@ export async function initBot(config, api, tools, skills, agents) {
command: { type: 'string', description: 'Command to run' },
}, required: ['action'] },
},
vision: {
description: 'Analyze an image from URL or file path. Returns detailed description and answers questions about the image.',
parameters: { type: 'object', properties: {
image_url: { type: 'string', description: 'Image URL (http/https) or local file path to analyze' },
question: { type: 'string', description: 'Specific question about the image (optional, defaults to full description)' },
}, required: ['image_url'] },
},
tts: {
description: 'Convert text to speech audio. Generates an MP3 file using Edge TTS (free, no API key needed).',
parameters: { type: 'object', properties: {
text: { type: 'string', description: 'Text to convert to speech (max 5000 chars)' },
voice: { type: 'string', description: 'Voice name (default: en-US-AvaNeural)' },
output_path: { type: 'string', description: 'Output file path (optional)' },
}, required: ['text'] },
},
browser: {
description: 'Fetch and extract readable content from a web page URL. Returns title, description, and main text content.',
parameters: { type: 'object', properties: {
url: { type: 'string', description: 'URL to fetch and extract content from' },
selector: { type: 'string', description: 'CSS selector for content extraction (optional, auto-detects article/main)' },
}, required: ['url'] },
},
delegate_agent: {
description: 'Delegate to a specialized agent role',
parameters: { type: 'object', properties: {
@@ -560,6 +582,39 @@ export async function initBot(config, api, tools, skills, agents) {
if (!tool) return '❌ Cron tool unavailable.';
try { return await tool.execute(args); } catch (e) { return `${e.message}`; }
},
vision: async (args) => {
const tool = svc.toolMap.get('vision');
if (!tool) return '❌ Vision tool unavailable.';
try { return await tool.execute(args); } catch (e) { return `${e.message}`; }
},
tts: async (args) => {
const tool = svc.toolMap.get('tts');
if (!tool) return '❌ TTS tool unavailable.';
try {
const result = await tool.execute(args);
// If audio was generated, send it as a voice message
if (result.startsWith('✅')) {
const filePath = result.match(/saved:\s*(.+)/)?.[1]?.trim();
if (filePath) {
try {
await svc.bot.api.sendAudio(svc.currentChatId, { source: filePath }, {
caption: '🔊 TTS',
performer: 'zCode',
});
return '✅ Audio sent as voice message.';
} catch (sendErr) {
return `${result}\n⚠ Could not auto-send audio: ${sendErr.message}`;
}
}
}
return result;
} catch (e) { return `${e.message}`; }
},
browser: async (args) => {
const tool = svc.toolMap.get('browser');
if (!tool) return '❌ Browser tool unavailable.';
try { return await tool.execute(args); } catch (e) { return `${e.message}`; }
},
delegate_agent: async (args) => {
const agent = svc.agents.find(a => a.id === args.agent_id);
if (!agent) return `❌ Agent not found: ${args.agent_id}`;
@@ -883,6 +938,7 @@ export async function initBot(config, api, tools, skills, agents) {
// ── Load conversation history for this chat ──
const chatKey = conversation._key(ctx.chat.id, ctx.message?.message_thread_id);
svc.currentChatId = ctx.chat.id; // Track for TTS auto-send
const history = await conversation.getContext(chatKey, text);
// Create stream consumer for real-time edit-in-place