#!/opt/homebrew/bin/python3.11 """ ASRX Processor - Custom Implementation Wrapper Uses SpeechBrain ECAPA-TDNN (no HuggingFace token required) """ import sys import json import argparse import os from pathlib import Path sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) sys.path.insert( 0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "asrx_self") ) from redis_publisher import RedisPublisher def process_asrx_custom(video_path: str, output_path: str, uuid: str = ""): """Process video for speaker diarization using custom implementation""" publisher = RedisPublisher(uuid) if uuid else None if publisher: publisher.info("asrx", "ASRX_START") try: from asrx_self.main_fixed import SelfASRXFixed if publisher: publisher.info("asrx", "ASRX_LOADING_MODEL") # Initialize custom ASRX processor asrx = SelfASRXFixed() if publisher: publisher.info("asrx", "ASRX_TRANSCRIBING") # Process video/audio result = asrx.process( video_path, output_path=None, # We'll save our own format min_speech_duration_ms=500, max_speakers=10, ) if "error" in result: if publisher: publisher.error("asrx", result["error"]) # Return empty result output_result = {"language": None, "segments": []} with open(output_path, "w") as f: json.dump(output_result, f, indent=2) if publisher: publisher.complete("asrx", "0 segments") return output_result # Convert to Rust-expected format output_result = { "language": None, # Custom implementation doesn't detect language "segments": [], } # Convert segments for seg in result["segments"]: output_result["segments"].append( { "start": seg["start"], "end": seg["end"], "text": "", # Will be filled by matching with ASR later "speaker_id": seg["speaker"], } ) # Add speaker_stats as optional metadata if "speaker_stats" in result: output_result["speaker_stats"] = result["speaker_stats"] if publisher: publisher.info("asrx", f"ASRX_COMPLETE:{len(output_result['segments'])}") # Save output with open(output_path, "w") as f: json.dump(output_result, f, indent=2) if publisher: publisher.complete("asrx", f"{len(output_result['segments'])} segments") print( f"[ASRX-Custom] Saved {len(output_result['segments'])} segments to {output_path}" ) return output_result except Exception as e: if publisher: publisher.error("asrx", str(e)) import traceback traceback.print_exc() # Return empty result on error output_result = {"language": None, "segments": []} with open(output_path, "w") as f: json.dump(output_result, f, indent=2) if publisher: publisher.complete("asrx", "0 segments") return output_result if __name__ == "__main__": parser = argparse.ArgumentParser( description="ASRX Processor (Custom Implementation)" ) parser.add_argument("video_path", help="Path to video/audio file") parser.add_argument("output_path", help="Path to output JSON file") parser.add_argument("--uuid", help="UUID for Redis publishing", default="") args = parser.parse_args() if not Path(args.video_path).exists(): print(f"Error: Video file not found: {args.video_path}") sys.exit(1) result = process_asrx_custom(args.video_path, args.output_path, args.uuid) print(f"\n[Summary]") print(f" Total segments: {len(result['segments'])}") if "speaker_stats" in result: print(f" Detected speakers: {len(result['speaker_stats'])}") for speaker, stats in result["speaker_stats"].items(): print(f" {speaker}: {stats['count']} segments")