feat: add vision, TTS, and browser tools (17 tools total)
- VisionTool: image analysis via Z.AI GLM-4V multimodal API - TTSTool: text-to-speech via node-edge-tts (free, auto-sends audio to chat) - BrowserTool: web page content extraction via cheerio (strips HTML, extracts text) - All 3 wired into tools/index.js + bot tool definitions + handlers - TTS handler auto-sends generated audio as voice message to chat
This commit is contained in:
@@ -311,6 +311,28 @@ export async function initBot(config, api, tools, skills, agents) {
|
||||
command: { type: 'string', description: 'Command to run' },
|
||||
}, required: ['action'] },
|
||||
},
|
||||
vision: {
|
||||
description: 'Analyze an image from URL or file path. Returns detailed description and answers questions about the image.',
|
||||
parameters: { type: 'object', properties: {
|
||||
image_url: { type: 'string', description: 'Image URL (http/https) or local file path to analyze' },
|
||||
question: { type: 'string', description: 'Specific question about the image (optional, defaults to full description)' },
|
||||
}, required: ['image_url'] },
|
||||
},
|
||||
tts: {
|
||||
description: 'Convert text to speech audio. Generates an MP3 file using Edge TTS (free, no API key needed).',
|
||||
parameters: { type: 'object', properties: {
|
||||
text: { type: 'string', description: 'Text to convert to speech (max 5000 chars)' },
|
||||
voice: { type: 'string', description: 'Voice name (default: en-US-AvaNeural)' },
|
||||
output_path: { type: 'string', description: 'Output file path (optional)' },
|
||||
}, required: ['text'] },
|
||||
},
|
||||
browser: {
|
||||
description: 'Fetch and extract readable content from a web page URL. Returns title, description, and main text content.',
|
||||
parameters: { type: 'object', properties: {
|
||||
url: { type: 'string', description: 'URL to fetch and extract content from' },
|
||||
selector: { type: 'string', description: 'CSS selector for content extraction (optional, auto-detects article/main)' },
|
||||
}, required: ['url'] },
|
||||
},
|
||||
delegate_agent: {
|
||||
description: 'Delegate to a specialized agent role',
|
||||
parameters: { type: 'object', properties: {
|
||||
@@ -560,6 +582,39 @@ export async function initBot(config, api, tools, skills, agents) {
|
||||
if (!tool) return '❌ Cron tool unavailable.';
|
||||
try { return await tool.execute(args); } catch (e) { return `❌ ${e.message}`; }
|
||||
},
|
||||
vision: async (args) => {
|
||||
const tool = svc.toolMap.get('vision');
|
||||
if (!tool) return '❌ Vision tool unavailable.';
|
||||
try { return await tool.execute(args); } catch (e) { return `❌ ${e.message}`; }
|
||||
},
|
||||
tts: async (args) => {
|
||||
const tool = svc.toolMap.get('tts');
|
||||
if (!tool) return '❌ TTS tool unavailable.';
|
||||
try {
|
||||
const result = await tool.execute(args);
|
||||
// If audio was generated, send it as a voice message
|
||||
if (result.startsWith('✅')) {
|
||||
const filePath = result.match(/saved:\s*(.+)/)?.[1]?.trim();
|
||||
if (filePath) {
|
||||
try {
|
||||
await svc.bot.api.sendAudio(svc.currentChatId, { source: filePath }, {
|
||||
caption: '🔊 TTS',
|
||||
performer: 'zCode',
|
||||
});
|
||||
return '✅ Audio sent as voice message.';
|
||||
} catch (sendErr) {
|
||||
return `${result}\n⚠ Could not auto-send audio: ${sendErr.message}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
} catch (e) { return `❌ ${e.message}`; }
|
||||
},
|
||||
browser: async (args) => {
|
||||
const tool = svc.toolMap.get('browser');
|
||||
if (!tool) return '❌ Browser tool unavailable.';
|
||||
try { return await tool.execute(args); } catch (e) { return `❌ ${e.message}`; }
|
||||
},
|
||||
delegate_agent: async (args) => {
|
||||
const agent = svc.agents.find(a => a.id === args.agent_id);
|
||||
if (!agent) return `❌ Agent not found: ${args.agent_id}`;
|
||||
@@ -883,6 +938,7 @@ export async function initBot(config, api, tools, skills, agents) {
|
||||
|
||||
// ── Load conversation history for this chat ──
|
||||
const chatKey = conversation._key(ctx.chat.id, ctx.message?.message_thread_id);
|
||||
svc.currentChatId = ctx.chat.id; // Track for TTS auto-send
|
||||
const history = await conversation.getContext(chatKey, text);
|
||||
|
||||
// Create stream consumer for real-time edit-in-place
|
||||
|
||||
Reference in New Issue
Block a user