- Rust-based digital asset management system - Video analysis: ASR, OCR, YOLO, Face, Pose - RAG capabilities with Qdrant vector database - Multi-database support: PostgreSQL, Redis, MongoDB - Monitoring system with launchd plists - n8n workflow automation integration
54 lines
1.5 KiB
Python
54 lines
1.5 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
import sys
|
|
import json
|
|
import tempfile
|
|
import os
|
|
from faster_whisper import WhisperModel
|
|
|
|
|
|
def run_asr(video_path, output_path):
|
|
print(f"ASR_START", file=sys.stderr)
|
|
print(f"Loading Whisper model...", file=sys.stderr)
|
|
|
|
model = WhisperModel("tiny", device="cpu", compute_type="int8")
|
|
|
|
print(f"Transcribing: {video_path}", file=sys.stderr)
|
|
segments, info = model.transcribe(video_path, beam_size=5)
|
|
|
|
print(f"ASR_LANGUAGE:{info.language}", file=sys.stderr)
|
|
print(
|
|
f"Detected language: {info.language} (probability: {info.language_probability:.2f})",
|
|
file=sys.stderr,
|
|
)
|
|
|
|
results = []
|
|
total_segments = 0
|
|
|
|
for segment in segments:
|
|
results.append(
|
|
{"start": segment.start, "end": segment.end, "text": segment.text.strip()}
|
|
)
|
|
total_segments += 1
|
|
if total_segments % 100 == 0:
|
|
print(f"ASR_PROGRESS:{total_segments}", file=sys.stderr)
|
|
|
|
output = {
|
|
"language": info.language,
|
|
"language_probability": info.language_probability,
|
|
"segments": results,
|
|
}
|
|
|
|
with open(output_path, "w") as f:
|
|
json.dump(output, f, indent=2)
|
|
|
|
print(f"ASR_COMPLETE:{total_segments}", file=sys.stderr)
|
|
print(f"ASR complete. {len(results)} segments.", file=sys.stderr)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 3:
|
|
print("Usage: asr_processor.py <video_path> <output_json_path>")
|
|
sys.exit(1)
|
|
|
|
run_asr(sys.argv[1], sys.argv[2])
|