feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
This commit is contained in:
141
scripts/asrx_processor_custom.py
Normal file
141
scripts/asrx_processor_custom.py
Normal file
@@ -0,0 +1,141 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
ASRX Processor - Custom Implementation Wrapper
|
||||
Uses SpeechBrain ECAPA-TDNN (no HuggingFace token required)
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(
|
||||
0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "asrx_self")
|
||||
)
|
||||
|
||||
from redis_publisher import RedisPublisher
|
||||
|
||||
|
||||
def process_asrx_custom(video_path: str, output_path: str, uuid: str = ""):
|
||||
"""Process video for speaker diarization using custom implementation"""
|
||||
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
publisher.info("asrx", "ASRX_START")
|
||||
|
||||
try:
|
||||
from asrx_self.main_fixed import SelfASRXFixed
|
||||
|
||||
if publisher:
|
||||
publisher.info("asrx", "ASRX_LOADING_MODEL")
|
||||
|
||||
# Initialize custom ASRX processor
|
||||
asrx = SelfASRXFixed()
|
||||
|
||||
if publisher:
|
||||
publisher.info("asrx", "ASRX_TRANSCRIBING")
|
||||
|
||||
# Process video/audio
|
||||
result = asrx.process(
|
||||
video_path,
|
||||
output_path=None, # We'll save our own format
|
||||
min_speech_duration_ms=500,
|
||||
max_speakers=10,
|
||||
)
|
||||
|
||||
if "error" in result:
|
||||
if publisher:
|
||||
publisher.error("asrx", result["error"])
|
||||
|
||||
# Return empty result
|
||||
output_result = {"language": None, "segments": []}
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(output_result, f, indent=2)
|
||||
|
||||
if publisher:
|
||||
publisher.complete("asrx", "0 segments")
|
||||
|
||||
return output_result
|
||||
|
||||
# Convert to Rust-expected format
|
||||
output_result = {
|
||||
"language": None, # Custom implementation doesn't detect language
|
||||
"segments": [],
|
||||
}
|
||||
|
||||
# Convert segments
|
||||
for seg in result["segments"]:
|
||||
output_result["segments"].append(
|
||||
{
|
||||
"start": seg["start"],
|
||||
"end": seg["end"],
|
||||
"text": "", # Will be filled by matching with ASR later
|
||||
"speaker_id": seg["speaker"],
|
||||
}
|
||||
)
|
||||
|
||||
# Add speaker_stats as optional metadata
|
||||
if "speaker_stats" in result:
|
||||
output_result["speaker_stats"] = result["speaker_stats"]
|
||||
|
||||
if publisher:
|
||||
publisher.info("asrx", f"ASRX_COMPLETE:{len(output_result['segments'])}")
|
||||
|
||||
# Save output
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(output_result, f, indent=2)
|
||||
|
||||
if publisher:
|
||||
publisher.complete("asrx", f"{len(output_result['segments'])} segments")
|
||||
|
||||
print(
|
||||
f"[ASRX-Custom] Saved {len(output_result['segments'])} segments to {output_path}"
|
||||
)
|
||||
|
||||
return output_result
|
||||
|
||||
except Exception as e:
|
||||
if publisher:
|
||||
publisher.error("asrx", str(e))
|
||||
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
|
||||
# Return empty result on error
|
||||
output_result = {"language": None, "segments": []}
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(output_result, f, indent=2)
|
||||
|
||||
if publisher:
|
||||
publisher.complete("asrx", "0 segments")
|
||||
|
||||
return output_result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ASRX Processor (Custom Implementation)"
|
||||
)
|
||||
parser.add_argument("video_path", help="Path to video/audio file")
|
||||
parser.add_argument("output_path", help="Path to output JSON file")
|
||||
parser.add_argument("--uuid", help="UUID for Redis publishing", default="")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not Path(args.video_path).exists():
|
||||
print(f"Error: Video file not found: {args.video_path}")
|
||||
sys.exit(1)
|
||||
|
||||
result = process_asrx_custom(args.video_path, args.output_path, args.uuid)
|
||||
|
||||
print(f"\n[Summary]")
|
||||
print(f" Total segments: {len(result['segments'])}")
|
||||
if "speaker_stats" in result:
|
||||
print(f" Detected speakers: {len(result['speaker_stats'])}")
|
||||
for speaker, stats in result["speaker_stats"].items():
|
||||
print(f" {speaker}: {stats['count']} segments")
|
||||
Reference in New Issue
Block a user