#!/opt/homebrew/bin/python3.11 import sys import json import os import argparse import signal import subprocess from faster_whisper import WhisperModel sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from redis_publisher import RedisPublisher def signal_handler(signum, frame): print(f"ASR: Received signal {signum}, exiting...") sys.exit(1) def has_audio_stream(video_path): """Check if video file has audio stream using ffprobe.""" try: cmd = [ "ffprobe", "-v", "error", "-select_streams", "a", "-show_entries", "stream=codec_type", "-of", "csv=p=0", video_path, ] result = subprocess.run(cmd, capture_output=True, text=True, check=True) return bool(result.stdout.strip()) except subprocess.CalledProcessError: return False except FileNotFoundError: print("WARNING: ffprobe not found, assuming audio exists") return True def run_asr(video_path, output_path, uuid: str = ""): # Set up signal handlers signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) publisher = RedisPublisher(uuid) if uuid else None if publisher: publisher.info("asr", "ASR_START") # Check for audio stream if not has_audio_stream(video_path): if publisher: publisher.info("asr", "No audio stream detected, skipping transcription") output = {"language": "", "language_probability": 0.0, "segments": []} with open(output_path, "w") as f: json.dump(output, f, indent=2) if publisher: publisher.complete("asr", "0 segments (no audio)") sys.stderr.write("ASR: No audio stream, skipping transcription\n") sys.stderr.flush() sys.exit(0) if publisher: publisher.info("asr", "Loading Whisper model...") model = WhisperModel("tiny", device="cpu", compute_type="int8") if publisher: publisher.info("asr", f"Transcribing: {video_path}") segments, info = model.transcribe(video_path, beam_size=5) if publisher: publisher.info("asr", f"ASR_LANGUAGE:{info.language}") results = [] total_segments = 0 for segment in segments: results.append( {"start": segment.start, "end": segment.end, "text": segment.text.strip()} ) total_segments += 1 if total_segments % 100 == 0: if publisher: publisher.progress( "asr", total_segments, 0, f"Segment {total_segments}" ) output = { "language": info.language, "language_probability": info.language_probability, "segments": results, } with open(output_path, "w") as f: json.dump(output, f, indent=2) if publisher: publisher.complete("asr", f"{len(results)} segments") sys.stderr.write( f"ASR: Transcription complete, {len(results)} segments written to {output_path}\n" ) sys.stderr.flush() sys.exit(0) if __name__ == "__main__": parser = argparse.ArgumentParser(description="ASR Transcription") parser.add_argument("video_path", help="Path to video file") parser.add_argument("output_path", help="Output JSON path") parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="") args = parser.parse_args() run_asr(args.video_path, args.output_path, args.uuid)