Files
zCode-CLI-X/scripts/stt.py

85 lines
2.6 KiB
Python

#!/usr/bin/env python3
"""Vosk STT — transcribe audio file to text. Optimized for speed."""
import sys, os, json, subprocess, tempfile, wave
os.environ['VOSK_LOG_LEVEL'] = '-1'
def main():
if len(sys.argv) < 2:
print(json.dumps({"error": "Usage: stt.py <audio_file>"}))
sys.exit(2)
audio_file = sys.argv[1]
model_path = '/home/uroma2/vosk-model'
# Convert to 16kHz mono WAV via ffmpeg — fast pipe, no temp file overhead
try:
proc = subprocess.Popen(
['ffmpeg', '-y', '-i', audio_file, '-ar', '16000', '-ac', '1',
'-f', 'wav', '-v', 'error', '-'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
wav_data = proc.stdout.read()
proc.wait(timeout=15)
if proc.returncode != 0 or len(wav_data) < 44:
print(json.dumps({"error": "ffmpeg conversion failed"}))
sys.exit(2)
except Exception as e:
print(json.dumps({"error": str(e)}))
sys.exit(2)
# Write wav_data to temp file for wave module (it needs a file path)
tmp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
try:
tmp.write(wav_data)
tmp.close()
import vosk
model = vosk.Model(model_path)
rec = vosk.KaldiRecognizer(model, 16000)
wf = wave.open(tmp.name, 'rb')
text_parts = []
total_conf = 0
conf_count = 0
while True:
data = wf.readframes(4000)
if not data:
break
if rec.AcceptWaveform(data):
r = json.loads(rec.Result())
t = r.get('text', '').strip()
if t:
text_parts.append(t)
for w in r.get('result', []):
total_conf += w.get('conf', 0)
conf_count += 1
# Final partial
r = json.loads(rec.FinalResult())
t = r.get('text', '').strip()
if t:
text_parts.append(t)
for w in r.get('result', []):
total_conf += w.get('conf', 0)
conf_count += 1
text = ' '.join(text_parts).strip()
confidence = round(total_conf / conf_count, 2) if conf_count > 0 else 0.0
if not text:
print(json.dumps({"text": "", "confidence": 0}))
sys.exit(1)
print(json.dumps({"text": text, "confidence": confidence}))
except Exception as e:
print(json.dumps({"error": str(e)}))
sys.exit(2)
finally:
try: os.unlink(tmp.name)
except: pass
if __name__ == '__main__':
main()