Files
momentry_core/debug_asr.py
Warren b54c2def30 feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid
- Add test scripts for ASR, face, search, processing
- Add portal frontend (Tauri)
- Add config, benchmark, and monitoring utilities
- Add model checkpoints and pretrained model references
2026-04-30 15:11:53 +08:00

151 lines
4.1 KiB
Python

#!/usr/bin/env python3
"""
Debug ASR processing stages for large video.
"""
import os
import sys
import time
import subprocess
import tempfile
import json
from pathlib import Path
def run_ffmpeg_extract(video_path, audio_path):
"""Extract audio using ffmpeg."""
cmd = [
"ffmpeg",
"-i",
str(video_path),
"-vn",
"-acodec",
"pcm_s16le",
"-ar",
"16000",
"-ac",
"1",
"-y",
str(audio_path),
]
print(f"Running ffmpeg: {' '.join(cmd)}")
start = time.time()
proc = subprocess.run(cmd, capture_output=True, text=True)
elapsed = time.time() - start
print(f"ffmpeg completed in {elapsed:.1f}s, return code: {proc.returncode}")
if proc.returncode != 0:
print(f"stderr: {proc.stderr[:500]}")
return proc.returncode == 0, elapsed
def test_asr_stages(video_path):
"""Test ASR stages step by step."""
video_path = Path(video_path)
print(f"Testing video: {video_path}")
print(f"Size: {video_path.stat().st_size / 1024 / 1024:.1f} MB")
# Stage 1: Check audio streams
print("\n=== Stage 1: Check audio streams ===")
cmd = [
"ffprobe",
"-v",
"error",
"-select_streams",
"a",
"-show_entries",
"stream=codec_name,channels,sample_rate,duration",
"-of",
"csv=p=0",
str(video_path),
]
proc = subprocess.run(cmd, capture_output=True, text=True)
print(f"Audio streams: {proc.stdout.strip()}")
# Stage 2: Extract audio
print("\n=== Stage 2: Extract audio ===")
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
audio_path = f.name
try:
success, extract_time = run_ffmpeg_extract(video_path, audio_path)
if success:
print(f"Audio extracted to {audio_path}")
print(f"Audio size: {Path(audio_path).stat().st_size / 1024 / 1024:.1f} MB")
else:
print("Audio extraction failed")
os.unlink(audio_path)
return
except Exception as e:
print(f"Error extracting audio: {e}")
return
# Stage 3: Load faster_whisper model (just import)
print("\n=== Stage 3: Test faster_whisper import ===")
try:
start = time.time()
from faster_whisper import WhisperModel
elapsed = time.time() - start
print(f"Import faster_whisper: {elapsed:.1f}s")
except Exception as e:
print(f"Import failed: {e}")
os.unlink(audio_path)
return
# Stage 4: Transcribe a small segment (first 30 seconds)
print("\n=== Stage 4: Transcribe first 30 seconds ===")
try:
# Trim audio to first 30 seconds
trim_path = audio_path + ".trim.wav"
cmd = [
"ffmpeg",
"-i",
audio_path,
"-t",
"30",
"-acodec",
"pcm_s16le",
"-ar",
"16000",
"-ac",
"1",
"-y",
trim_path,
]
subprocess.run(cmd, capture_output=True)
# Load model with small model
start = time.time()
model = WhisperModel("tiny", device="cpu", compute_type="int8")
load_time = time.time() - start
print(f"Model loaded in {load_time:.1f}s")
# Transcribe
start = time.time()
segments, info = model.transcribe(trim_path, beam_size=5)
segments = list(segments) # Force processing
transcribe_time = time.time() - start
print(f"Transcription of 30s audio: {transcribe_time:.1f}s")
print(
f"Detected language: {info.language} with probability {info.language_probability}"
)
print(f"Segments found: {len(segments)}")
# Cleanup
os.unlink(trim_path)
except Exception as e:
print(f"Transcription test failed: {e}")
import traceback
traceback.print_exc()
finally:
os.unlink(audio_path)
print("\n=== Debug complete ===")
if __name__ == "__main__":
if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} <video_file>")
sys.exit(1)
test_asr_stages(sys.argv[1])