Files
Agentic-Compaction-and-Pipl…/skills/podcast-generate/generate.ts
Z User 2380d33861 feat: Add complete Agentic Compaction & Pipeline System
- Context Compaction System with token counting and summarization
- Deterministic State Machine for flow control (no LLM decisions)
- Parallel Execution Engine (up to 12 concurrent sessions)
- Event-Driven Coordination via Event Bus
- Agent Workspace Isolation (tools, memory, identity, files)
- YAML Workflow Integration (OpenClaw/Lobster compatible)
- Claude Code integration layer
- Complete demo UI with real-time visualization
- Comprehensive documentation and README

Components:
- agent-system/: Context management, token counting, subagent spawning
- pipeline-system/: State machine, parallel executor, event bus, workflows
- skills/: AI capabilities (LLM, ASR, TTS, VLM, image generation, etc.)
- src/app/: Next.js demo application

Total: ~100KB of production-ready TypeScript code
2026-03-03 12:40:47 +00:00

662 lines
22 KiB
TypeScript
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env tsx
/**
* generate.ts - 统一入口(纯 SDK 版本)
* 原资料 -> podcast_script.md + podcast.wav
*
* 只使用 z-ai-web-dev-sdk不依赖 z-ai CLI
*
* Usage:
* tsx generate.ts --input=material.txt --out_dir=out
* tsx generate.ts --input=material.md --out_dir=out --duration=5
*/
import ZAI from 'z-ai-web-dev-sdk';
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
import os from 'os';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// -----------------------------
// Types
// -----------------------------
interface GenConfig {
mode: 'dual' | 'single-male' | 'single-female';
temperature: number;
durationManual: number;
charsPerMin: number;
hostName: string;
guestName: string;
audience: string;
tone: string;
maxAttempts: number;
timeoutSec: number;
voiceHost: string;
voiceGuest: string;
speed: number;
pauseMs: number;
}
interface Segment {
idx: number;
speaker: 'host' | 'guest';
name: string;
text: string;
}
// -----------------------------
// Config
// -----------------------------
const DEFAULT_CONFIG: GenConfig = {
mode: 'dual',
temperature: 0.9,
durationManual: 0,
charsPerMin: 240,
hostName: '小谱',
guestName: '锤锤',
audience: '白领小白',
tone: '轻松但有信息密度',
maxAttempts: 3,
timeoutSec: 300,
voiceHost: 'xiaochen',
voiceGuest: 'chuichui',
speed: 1.0,
pauseMs: 200,
};
const DURATION_RANGE_LOW = 3;
const DURATION_RANGE_HIGH = 20;
const BUDGET_TOLERANCE = 0.15;
// -----------------------------
// Functions
// -----------------------------
function parseArgs(): { [key: string]: any } {
const args = process.argv.slice(2);
const result: { [key: string]: any } = {};
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (arg.startsWith('--')) {
const key = arg.slice(2);
if (key.includes('=')) {
const [k, v] = key.split('=');
result[k] = v;
} else if (i + 1 < args.length && !args[i + 1].startsWith('--')) {
result[key] = args[i + 1];
i++;
} else {
result[key] = true;
}
}
}
return result;
}
function readText(filePath: string): string {
let content = fs.readFileSync(filePath, 'utf-8');
content = content.replace(/\r\n/g, '\n');
content = content.replace(/\n{3,}/g, '\n\n');
content = content.replace(/[ \t]{2,}/g, ' ');
content = content.replace(/-\n/g, '');
return content.trim();
}
function countNonWsChars(text: string): number {
return text.replace(/\s+/g, '').length;
}
function chooseDurationMinutes(inputChars: number, low: number = DURATION_RANGE_LOW, high: number = DURATION_RANGE_HIGH): number {
const estimated = Math.max(low, Math.min(high, Math.floor(inputChars / 1000)));
return estimated;
}
function charBudget(durationMin: number, charsPerMin: number, tolerance: number): [number, number, number] {
const target = durationMin * charsPerMin;
const low = Math.floor(target * (1 - tolerance));
const high = Math.ceil(target * (1 + tolerance));
return [target, low, high];
}
function buildPrompts(
material: string,
cfg: GenConfig,
durationMin: number,
budgetTarget: number,
budgetLow: number,
budgetHigh: number,
attemptHint: string = ''
): [string, string] {
let system: string;
let user: string;
if (cfg.mode === 'dual') {
system = (
`你是一个播客脚本编剧,擅长把资料提炼成双人对谈播客。` +
`角色固定为男主持「${cfg.hostName}」与女嘉宾「${cfg.guestName}」。` +
`你写作口播化、信息密度适中、有呼吸感、节奏自然。` +
`你必须严格遵守输出格式与字数预算。`
);
const hintBlock = attemptHint ? `\n【上一次生成纠偏提示】\n${attemptHint}\n` : '';
user = `请把下面【资料】改写为中文播客脚本,形式为双人对谈(男主持 ${cfg.hostName} + 女嘉宾 ${cfg.guestName})。
时长目标:${durationMin} 分钟。
【硬性约束】
1) 总字数必须在 ${budgetLow}${budgetHigh} 字之间(目标约 ${budgetTarget} 字)。
2) 严格使用轮次交替输出:每段必须以"**${cfg.hostName}**"或"**${cfg.guestName}**"开头。
3) 必须包含完整的叙事结构(但不要在对话中写出结构标签):
- 开场Hook 引入 + 本期主题介绍
- 主体3个不同维度的内容用自然过渡语连接
- 总结:回顾要点 + 行动建议1句话明确可执行
4) 不要在对话中写"核心点1"、"第一点"等结构标签,用自然的过渡语如"说到这个"、"还有个有趣的事"、"另外"等
5) 不要照念原文,不要大段引用;要用口播化表达。
6) 受众:${cfg.audience}
7) 风格:${cfg.tone}
【呼吸感与自然对话 - 重要!】
为了营造真实播客的呼吸感,请:
1) 适度加入语气词和感叹词:嗯、哦、啊、对、没错、哈哈、哇、天呐、啧啧等
2) 多用互动式表达:"你说得对"、"这就很有意思了"、"等等,让我想想"、"我懂你的意思"
3) 适当加入思考和停顿的暗示:"这个问题嘛..."、"怎么说呢..."、"其实..."
4) 避免过于密集的信息输出每段控制在3-5句话给听众消化时间
5) 用类比和生活化的例子来解释复杂概念
6) 两人之间要有自然的呼应和追问,而不是各说各话
7) 不同主题之间用自然过渡语连接,不要出现"核心点1/2/3"等标签
【输出格式示例】
**${cfg.hostName}**:开场……
**${cfg.guestName}**:回应……
(一直交替到结束)
${hintBlock}
【资料】
${material}
`;
} else {
const speakerName = cfg.mode === 'single-male' ? cfg.hostName : cfg.guestName;
const gender = cfg.mode === 'single-male' ? '男性' : '女性';
system = (
`你是一个${gender}单人播客主播,名字叫「${speakerName}」。` +
`你擅长把资料提炼成单人独白式播客,像讲课、读书分享、知识科普一样。` +
`你写作口播化、信息密度适中、有呼吸感、节奏自然。` +
`你必须严格遵守输出格式与字数预算。`
);
const hintBlock = attemptHint ? `\n【上一次生成纠偏提示】\n${attemptHint}\n` : '';
user = `请把下面【资料】改写为中文单人播客脚本,形式为独白式讲述(主播:${speakerName})。
时长目标:${durationMin} 分钟。
【硬性约束】
1) 总字数必须在 ${budgetLow}${budgetHigh} 字之间(目标约 ${budgetTarget} 字)。
2) 所有内容均由「${speakerName}」一人讲述,每段都以"**${speakerName}**"开头。
3) 必须包含完整的叙事结构(但不要在对话中写出结构标签):
- 开场Hook 引入 + 本期主题介绍
- 主体3个不同维度的内容用自然过渡语连接
- 总结:回顾要点 + 行动建议1句话明确可执行
4) 不要在对话中写"核心点1"、"第一点"等结构标签,用自然的过渡语如"说到这个"、"还有个有趣的事"、"另外"等
5) 不要照念原文,不要大段引用;要用口播化表达。
6) 受众:${cfg.audience}
7) 风格:${cfg.tone}
【单人播客的呼吸感 - 重要!】
为了营造自然的单人播客呼吸感,请:
1) 适度加入语气词和感叹词:嗯、哦、啊、对、没错、哈哈、哇、天呐、啧啧等
2) 多用自问自答式表达:"你可能会问...答案是..."、"这是为什么呢?让我来解释..."
3) 适当加入思考和停顿的暗示:"这个问题嘛..."、"怎么说呢..."、"其实..."
4) 避免过于密集的信息输出每段控制在3-5句话给听众消化时间
5) 用类比和生活化的例子来解释复杂概念
6) 像在和朋友聊天一样,而不是在念课文
【输出格式示例】
**${speakerName}**:开场,大家好,我是${speakerName},今天我们来聊……
**${speakerName}**:说到这个,最近有个特别有意思的事……
(所有内容都由${speakerName}讲述,分段输出)
${hintBlock}
【资料】
${material}
`;
}
return [system, user];
}
async function callZAI(
systemPrompt: string,
userPrompt: string,
temperature: number
): Promise<string> {
const zai = await ZAI.create();
const completion = await zai.chat.completions.create({
messages: [
{ role: 'assistant', content: systemPrompt },
{ role: 'user', content: userPrompt },
],
thinking: { type: 'disabled' },
});
const content = completion.choices[0]?.message?.content || '';
return content;
}
function scriptToSegments(script: string, hostName: string, guestName: string): Segment[] {
const segments: Segment[] = [];
const lines = script.split('\n');
let current: Segment | null = null;
let idx = 0;
const hostPrefix = `**${hostName}**`;
const guestPrefix = `**${guestName}**`;
for (const rawLine of lines) {
const line = rawLine.trim();
if (!line) continue;
if (line.startsWith(hostPrefix)) {
idx++;
current = {
idx,
speaker: 'host',
name: hostName,
text: line.slice(hostPrefix.length).trim(),
};
segments.push(current);
} else if (line.startsWith(guestPrefix)) {
idx++;
current = {
idx,
speaker: 'guest',
name: guestName,
text: line.slice(guestPrefix.length).trim(),
};
segments.push(current);
} else {
if (current) {
current.text = (current.text + ' ' + line).trim();
}
}
}
return segments;
}
function validateScript(
script: string,
cfg: GenConfig,
budgetLow: number,
budgetHigh: number
): [boolean, string[]] {
const reasons: string[] = [];
if (cfg.mode === 'dual') {
const hostTag = `**${cfg.hostName}**`;
const guestTag = `**${cfg.guestName}**`;
if (!script.includes(hostTag)) reasons.push(`缺少主持人标识:${hostTag}`);
if (!script.includes(guestTag)) reasons.push(`缺少嘉宾标识:${guestTag}`);
const turns = script.split('\n').filter(line =>
line.startsWith(hostTag) || line.startsWith(guestTag)
);
if (turns.length < 8) reasons.push('对谈轮次过少:建议至少 8 轮');
} else {
const speakerName = cfg.mode === 'single-male' ? cfg.hostName : cfg.guestName;
const speakerTag = `**${speakerName}**`;
if (!script.includes(speakerTag)) reasons.push(`缺少主播标识:${speakerTag}`);
const turns = script.split('\n').filter(line => line.startsWith(speakerTag));
if (turns.length < 5) reasons.push('播客段数过少:建议至少 5 段');
}
const n = countNonWsChars(script);
if (n < budgetLow || n > budgetHigh) {
reasons.push(`字数不在预算:当前约 ${n} 字,预算 ${budgetLow}-${budgetHigh}`);
}
// 只检查开场和总结,不检查"核心点1/2/3"标签(因为不应该出现在对话中)
const mustHave = ['开场', '总结'];
for (const kw of mustHave) {
if (!script.includes(kw)) {
reasons.push(`缺少结构要素:${kw}(请在对话中自然引入)`);
}
}
// 检查是否有足够的对话轮次(确保内容覆盖了多个主题)
const lineCount = script.split('\n').filter(l => l.trim()).length;
if (lineCount < 10) {
reasons.push('对话轮次过少建议至少10段对话');
}
return [reasons.length === 0, reasons];
}
function makeRetryHint(reasons: string[], cfg: GenConfig, budgetLow: number, budgetHigh: number): string {
const lines = ['请严格修复以下问题后重新生成:'];
for (const r of reasons) lines.push(`- ${r}`);
lines.push(`- 总字数必须在 ${budgetLow}-${budgetHigh} 之间。`);
if (cfg.mode === 'dual') {
lines.push(`- 每段必须以"**${cfg.hostName}**"或"**${cfg.guestName}**"开头。`);
} else {
const speakerName = cfg.mode === 'single-male' ? cfg.hostName : cfg.guestName;
lines.push(`- 所有内容都由一人讲述,每段必须以"**${speakerName}**"开头。`);
}
lines.push('- 必须包含开场和总结,中间用自然过渡语连接不同主题,不要出现"核心点1/2/3"等标签。');
return lines.join('\n');
}
async function ttsRequest(
zai: any,
text: string,
voice: string,
speed: number
): Promise<Buffer> {
const response = await zai.audio.tts.create({
input: text,
voice: voice,
speed: speed,
response_format: 'wav',
stream: false,
});
const arrayBuffer = await response.arrayBuffer();
const buffer = Buffer.from(new Uint8Array(arrayBuffer));
return buffer;
}
function ensureSilenceWav(filePath: string, params: { nchannels: number; sampwidth: number; framerate: number }, ms: number): void {
const { nchannels, sampwidth, framerate } = params;
const nframes = Math.floor((framerate * ms) / 1000);
const silenceFrame = Buffer.alloc(sampwidth * nchannels, 0);
const frames = Buffer.alloc(silenceFrame.length * nframes, 0);
const header = Buffer.alloc(44);
header.write('RIFF', 0);
header.writeUInt32LE(36 + frames.length, 4);
header.write('WAVE', 8);
header.write('fmt ', 12);
header.writeUInt32LE(16, 16);
header.writeUInt16LE(1, 20);
header.writeUInt16LE(nchannels, 22);
header.writeUInt32LE(framerate, 24);
header.writeUInt32LE(framerate * nchannels * sampwidth, 28);
header.writeUInt16LE(nchannels * sampwidth, 32);
header.writeUInt16LE(sampwidth * 8, 34);
header.write('data', 36);
header.writeUInt32LE(frames.length, 40);
fs.writeFileSync(filePath, Buffer.concat([header, frames]));
}
function wavParams(filePath: string): { nchannels: number; sampwidth: number; framerate: number } {
const buffer = fs.readFileSync(filePath);
const nchannels = buffer.readUInt16LE(22);
const sampwidth = buffer.readUInt16LE(34) / 8;
const framerate = buffer.readUInt32LE(24);
return { nchannels, sampwidth, framerate };
}
function joinWavsWave(outPath: string, wavPaths: string[], pauseMs: number): void {
if (wavPaths.length === 0) throw new Error('No wav files to join.');
const ref = wavPaths[0];
const refParams = wavParams(ref);
const silencePath = path.join(os.tmpdir(), `_silence_${Date.now()}.wav`);
if (pauseMs > 0) ensureSilenceWav(silencePath, refParams, pauseMs);
const chunks: Buffer[] = [];
for (let i = 0; i < wavPaths.length; i++) {
const wavPath = wavPaths[i];
const buffer = fs.readFileSync(wavPath);
const dataStart = buffer.indexOf('data') + 8;
const data = buffer.subarray(dataStart);
const params = wavParams(wavPath);
if (params.nchannels !== refParams.nchannels ||
params.sampwidth !== refParams.sampwidth ||
params.framerate !== refParams.framerate) {
throw new Error(`WAV params mismatch: ${wavPath}`);
}
chunks.push(data);
if (pauseMs > 0 && i < wavPaths.length - 1) {
const silenceBuffer = fs.readFileSync(silencePath);
const silenceData = silenceBuffer.subarray(silenceBuffer.indexOf('data') + 8);
chunks.push(silenceData);
}
}
const totalDataSize = chunks.reduce((sum, buf) => sum + buf.length, 0);
const header = Buffer.alloc(44);
header.write('RIFF', 0);
header.writeUInt32LE(36 + totalDataSize, 4);
header.write('WAVE', 8);
header.write('fmt ', 12);
header.writeUInt32LE(16, 16);
header.writeUInt16LE(1, 20);
header.writeUInt16LE(refParams.nchannels, 22);
header.writeUInt32LE(refParams.framerate, 24);
header.writeUInt32LE(refParams.framerate * refParams.nchannels * refParams.sampwidth, 28);
header.writeUInt16LE(refParams.nchannels * refParams.sampwidth, 32);
header.writeUInt16LE(refParams.sampwidth * 8, 34);
header.write('data', 36);
header.writeUInt32LE(totalDataSize, 40);
const output = Buffer.concat([header, ...chunks]);
fs.writeFileSync(outPath, output);
if (fs.existsSync(silencePath)) fs.unlinkSync(silencePath);
}
// -----------------------------
// Main
// -----------------------------
async function main() {
const args = parseArgs();
const inputPath = args.input;
const outDir = args.out_dir;
const topic = args.topic;
// 检查参数:必须提供 input 或 topic 之一
if ((!inputPath && !topic) || !outDir) {
console.error('Usage: tsx generate.ts --input=<file> --out_dir=<dir>');
console.error(' OR: tsx generate.ts --topic=<search-term> --out_dir=<dir>');
console.error('');
console.error('Examples:');
console.error(' # From file');
console.error(' npm run generate -- --input=article.txt --out_dir=out');
console.error(' # From web search');
console.error(' npm run generate -- --topic="最新AI新闻" --out_dir=out');
process.exit(1);
}
// Merge config
const cfg: GenConfig = {
...DEFAULT_CONFIG,
mode: (args.mode || 'dual') as GenConfig['mode'],
durationManual: parseInt(args.duration || '0'),
hostName: args.host_name || DEFAULT_CONFIG.hostName,
guestName: args.guest_name || DEFAULT_CONFIG.guestName,
voiceHost: args.voice_host || DEFAULT_CONFIG.voiceHost,
voiceGuest: args.voice_guest || DEFAULT_CONFIG.voiceGuest,
speed: parseFloat(args.speed || String(DEFAULT_CONFIG.speed)),
pauseMs: parseInt(args.pause_ms || String(DEFAULT_CONFIG.pauseMs)),
};
// Create output directory
if (!fs.existsSync(outDir)) {
fs.mkdirSync(outDir, { recursive: true });
}
// 根据模式获取资料
let material: string;
let inputSource: string;
if (inputPath) {
// 模式1从文件读取
console.log(`[MODE] Reading from file: ${inputPath}`);
material = readText(inputPath);
inputSource = `file:${inputPath}`;
} else if (topic) {
// 模式2联网搜索
console.log(`[MODE] Searching web for topic: ${topic}`);
const zai = await ZAI.create();
const searchResults = await zai.functions.invoke('web_search', {
query: topic,
num: 10
});
if (!Array.isArray(searchResults) || searchResults.length === 0) {
console.error(`未找到关于"${topic}"的搜索结果`);
process.exit(2);
}
console.log(`[SEARCH] Found ${searchResults.length} results`);
// 将搜索结果转换为文本资料
material = searchResults
.map((r: any, i: number) => `【来源 ${i + 1}${r.name}\n${r.snippet}\n链接${r.url}`)
.join('\n\n');
inputSource = `web_search:${topic}`;
console.log(`[SEARCH] Compiled material (${material.length} chars)`);
} else {
console.error('[ERROR] Neither --input nor --topic provided');
process.exit(1);
}
const inputChars = material.length;
// Calculate duration
let durationMin: number;
if (cfg.durationManual >= 3 && cfg.durationManual <= 20) {
durationMin = cfg.durationManual;
} else {
durationMin = chooseDurationMinutes(inputChars, DURATION_RANGE_LOW, DURATION_RANGE_HIGH);
}
const [target, low, high] = charBudget(durationMin, cfg.charsPerMin, BUDGET_TOLERANCE);
console.log(`[INFO] input_chars=${inputChars} duration=${durationMin}min budget=${low}-${high}`);
let attemptHint = '';
let lastScript: string | null = null;
// Initialize ZAI SDK (reuse for TTS)
const zai = await ZAI.create();
// Generate script
for (let attempt = 1; attempt <= cfg.maxAttempts; attempt++) {
const [systemPrompt, userPrompt] = buildPrompts(
material,
cfg,
durationMin,
target,
low,
high,
attemptHint
);
try {
console.log(`[LLM] Attempt ${attempt}/${cfg.maxAttempts}...`);
const content = await callZAI(systemPrompt, userPrompt, cfg.temperature);
lastScript = content;
const [ok, reasons] = validateScript(content, cfg, low, high);
if (ok) {
break;
}
attemptHint = makeRetryHint(reasons, cfg, low, high);
console.error(`[WARN] Validation failed:`, reasons.join(', '));
} catch (error: any) {
console.error(`[ERROR] LLM call failed: ${error.message}`);
throw error;
}
}
if (!lastScript) {
console.error('[ERROR] 未生成任何脚本输出。');
process.exit(1);
}
// Write script
const scriptPath = path.join(outDir, 'podcast_script.md');
fs.writeFileSync(scriptPath, lastScript, 'utf-8');
console.log(`[DONE] podcast_script.md -> ${scriptPath}`);
// Parse segments
const segments = scriptToSegments(lastScript, cfg.hostName, cfg.guestName);
console.log(`[INFO] Parsed ${segments.length} segments`);
// Generate TTS using SDK
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'podcast_segments_'));
const produced: string[] = [];
try {
for (let i = 0; i < segments.length; i++) {
const seg = segments[i];
const text = seg.text.trim();
if (!text) continue;
let voice: string;
if (cfg.mode === 'dual') {
voice = seg.speaker === 'host' ? cfg.voiceHost : cfg.voiceGuest;
} else if (cfg.mode === 'single-male') {
voice = cfg.voiceHost;
} else {
voice = cfg.voiceGuest;
}
const wavPath = path.join(tmpDir, `seg_${seg.idx.toString().padStart(4, '0')}.wav`);
console.log(`[TTS] [${i + 1}/${segments.length}] idx=${seg.idx} speaker=${seg.speaker} voice=${voice}`);
const buffer = await ttsRequest(zai, text, voice, cfg.speed);
fs.writeFileSync(wavPath, buffer);
produced.push(wavPath);
}
// Join segments
const podcastPath = path.join(outDir, 'podcast.wav');
console.log(`[JOIN] Joining ${produced.length} wav files -> ${podcastPath}`);
joinWavsWave(podcastPath, produced, cfg.pauseMs);
console.log(`[DONE] podcast.wav -> ${podcastPath}`);
} finally {
// Cleanup temp directory
try {
fs.rmSync(tmpDir, { recursive: true, force: true });
} catch (error: any) {
console.error(`[WARN] Failed to cleanup temp dir: ${error.message}`);
}
}
console.log('\n[FINAL OUTPUT]');
console.log(` 📄 podcast_script.md -> ${scriptPath}`);
console.log(` 🎙️ podcast.wav -> ${path.join(outDir, 'podcast.wav')}`);
}
main().catch(error => {
console.error('[FATAL ERROR]', error);
process.exit(1);
});