85 lines
2.6 KiB
Python
85 lines
2.6 KiB
Python
#!/usr/bin/env python3
|
|
"""Vosk STT — transcribe audio file to text. Optimized for speed."""
|
|
import sys, os, json, subprocess, tempfile, wave
|
|
|
|
os.environ['VOSK_LOG_LEVEL'] = '-1'
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print(json.dumps({"error": "Usage: stt.py <audio_file>"}))
|
|
sys.exit(2)
|
|
|
|
audio_file = sys.argv[1]
|
|
model_path = '/home/uroma2/vosk-model'
|
|
|
|
# Convert to 16kHz mono WAV via ffmpeg — fast pipe, no temp file overhead
|
|
try:
|
|
proc = subprocess.Popen(
|
|
['ffmpeg', '-y', '-i', audio_file, '-ar', '16000', '-ac', '1',
|
|
'-f', 'wav', '-v', 'error', '-'],
|
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
|
)
|
|
wav_data = proc.stdout.read()
|
|
proc.wait(timeout=15)
|
|
if proc.returncode != 0 or len(wav_data) < 44:
|
|
print(json.dumps({"error": "ffmpeg conversion failed"}))
|
|
sys.exit(2)
|
|
except Exception as e:
|
|
print(json.dumps({"error": str(e)}))
|
|
sys.exit(2)
|
|
|
|
# Write wav_data to temp file for wave module (it needs a file path)
|
|
tmp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
|
|
try:
|
|
tmp.write(wav_data)
|
|
tmp.close()
|
|
|
|
import vosk
|
|
model = vosk.Model(model_path)
|
|
rec = vosk.KaldiRecognizer(model, 16000)
|
|
|
|
wf = wave.open(tmp.name, 'rb')
|
|
text_parts = []
|
|
total_conf = 0
|
|
conf_count = 0
|
|
|
|
while True:
|
|
data = wf.readframes(4000)
|
|
if not data:
|
|
break
|
|
if rec.AcceptWaveform(data):
|
|
r = json.loads(rec.Result())
|
|
t = r.get('text', '').strip()
|
|
if t:
|
|
text_parts.append(t)
|
|
for w in r.get('result', []):
|
|
total_conf += w.get('conf', 0)
|
|
conf_count += 1
|
|
|
|
# Final partial
|
|
r = json.loads(rec.FinalResult())
|
|
t = r.get('text', '').strip()
|
|
if t:
|
|
text_parts.append(t)
|
|
for w in r.get('result', []):
|
|
total_conf += w.get('conf', 0)
|
|
conf_count += 1
|
|
|
|
text = ' '.join(text_parts).strip()
|
|
confidence = round(total_conf / conf_count, 2) if conf_count > 0 else 0.0
|
|
|
|
if not text:
|
|
print(json.dumps({"text": "", "confidence": 0}))
|
|
sys.exit(1)
|
|
|
|
print(json.dumps({"text": text, "confidence": confidence}))
|
|
except Exception as e:
|
|
print(json.dumps({"error": str(e)}))
|
|
sys.exit(2)
|
|
finally:
|
|
try: os.unlink(tmp.name)
|
|
except: pass
|
|
|
|
if __name__ == '__main__':
|
|
main()
|