feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid - Add test scripts for ASR, face, search, processing - Add portal frontend (Tauri) - Add config, benchmark, and monitoring utilities - Add model checkpoints and pretrained model references
This commit is contained in:
132
test_faster_whisper.py
Normal file
132
test_faster_whisper.py
Normal file
@@ -0,0 +1,132 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""Test faster_whisper transcription in isolation."""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import tempfile
|
||||
import subprocess
|
||||
|
||||
# Add scripts directory to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
|
||||
def test_faster_whisper():
|
||||
print("Testing faster_whisper...")
|
||||
|
||||
# Try to import
|
||||
try:
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
print("✓ faster_whisper imported successfully")
|
||||
except ImportError as e:
|
||||
print(f"✗ Failed to import faster_whisper: {e}")
|
||||
return
|
||||
|
||||
# Load model
|
||||
print("Loading Whisper model (tiny, int8)...")
|
||||
start = time.time()
|
||||
try:
|
||||
model = WhisperModel("tiny", device="cpu", compute_type="int8")
|
||||
elapsed = time.time() - start
|
||||
print(f"✓ Model loaded successfully in {elapsed:.2f}s")
|
||||
except Exception as e:
|
||||
print(f"✗ Model loading failed: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
return
|
||||
|
||||
# Create a test audio file (1 second of silence)
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
||||
temp_wav = f.name
|
||||
|
||||
try:
|
||||
# Create silent audio using ffmpeg
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-f",
|
||||
"lavfi",
|
||||
"-i",
|
||||
"anullsrc=r=16000:cl=mono",
|
||||
"-t",
|
||||
"1",
|
||||
"-acodec",
|
||||
"pcm_s16le",
|
||||
temp_wav,
|
||||
"-y",
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True)
|
||||
if result.returncode != 0:
|
||||
print(f"✗ Failed to create test audio: {result.stderr.decode()}")
|
||||
# Try alternative: extract a small chunk from a known video
|
||||
print("Trying to extract 5-second chunk from test video...")
|
||||
test_video = "/Users/accusys/test_video/20250209_212949.mp4"
|
||||
if os.path.exists(test_video):
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-i",
|
||||
test_video,
|
||||
"-t",
|
||||
"5",
|
||||
"-acodec",
|
||||
"pcm_s16le",
|
||||
"-ar",
|
||||
"16000",
|
||||
"-ac",
|
||||
"1",
|
||||
temp_wav,
|
||||
"-y",
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True)
|
||||
if result.returncode != 0:
|
||||
print(f"✗ Failed to extract audio: {result.stderr.decode()}")
|
||||
os.unlink(temp_wav)
|
||||
return
|
||||
else:
|
||||
print("Test video not found, skipping transcription test")
|
||||
os.unlink(temp_wav)
|
||||
return
|
||||
|
||||
print(
|
||||
f"✓ Created test audio file: {temp_wav} ({os.path.getsize(temp_wav)} bytes)"
|
||||
)
|
||||
|
||||
# Try transcription
|
||||
print("Testing transcription...")
|
||||
start_trans = time.time()
|
||||
try:
|
||||
# Use beam_size=5 like in the ASR processor
|
||||
segments, info = model.transcribe(temp_wav, beam_size=5)
|
||||
elapsed_trans = time.time() - start_trans
|
||||
print(f"✓ Transcription initiated in {elapsed_trans:.2f}s")
|
||||
|
||||
# Convert generator to list to actually run the transcription
|
||||
print("Converting segments to list...")
|
||||
segments_list = list(segments)
|
||||
elapsed_total = time.time() - start_trans
|
||||
print(f"✓ Transcription completed in {elapsed_total:.2f}s")
|
||||
print(f" Segments: {len(segments_list)}")
|
||||
print(
|
||||
f" Language: {info.language}, Probability: {info.language_probability}"
|
||||
)
|
||||
|
||||
for i, segment in enumerate(segments_list[:3]): # Show first 3 segments
|
||||
print(
|
||||
f" Segment {i}: {segment.start:.2f}s - {segment.end:.2f}s: {segment.text}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ Transcription failed: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
|
||||
finally:
|
||||
if os.path.exists(temp_wav):
|
||||
os.unlink(temp_wav)
|
||||
print("✓ Cleaned up temp file")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_faster_whisper()
|
||||
Reference in New Issue
Block a user