feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
This commit is contained in:
722
scripts/asr_processor_debug.py
Executable file
722
scripts/asr_processor_debug.py
Executable file
@@ -0,0 +1,722 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
ASR Processor with chunked transcription for large files and resource monitoring.
|
||||
Maintains backward compatibility with existing API.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
import argparse
|
||||
import signal
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
import shutil
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
|
||||
# Try to import psutil for resource monitoring
|
||||
PSUTIL_AVAILABLE = False
|
||||
psutil = None
|
||||
try:
|
||||
import psutil
|
||||
|
||||
PSUTIL_AVAILABLE = True
|
||||
except ImportError:
|
||||
sys.stderr.write("WARNING: psutil not available, resource monitoring disabled\n")
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from redis_publisher import RedisPublisher # noqa: E402
|
||||
|
||||
|
||||
def save_checkpoint(
|
||||
checkpoint_path: str,
|
||||
segments: List[Dict[str, Any]],
|
||||
language: Optional[str],
|
||||
language_prob: Optional[float],
|
||||
processed_chunks: List[int],
|
||||
total_chunks: int,
|
||||
) -> None:
|
||||
"""Save transcription checkpoint to resume later."""
|
||||
checkpoint_data = {
|
||||
"segments": segments,
|
||||
"language": language or "",
|
||||
"language_probability": language_prob or 0.0,
|
||||
"processed_chunks": processed_chunks,
|
||||
"total_chunks": total_chunks,
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
try:
|
||||
with open(checkpoint_path, "w") as f:
|
||||
json.dump(checkpoint_data, f, indent=2, default=str)
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"ASR: Failed to save checkpoint: {e}\n")
|
||||
|
||||
|
||||
def load_checkpoint(checkpoint_path: str) -> Optional[Dict[str, Any]]:
|
||||
"""Load transcription checkpoint if exists."""
|
||||
try:
|
||||
with open(checkpoint_path, "r") as f:
|
||||
return json.load(f)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def check_health() -> Dict[str, Any]:
|
||||
"""Check health of ASR processor dependencies."""
|
||||
health = {
|
||||
"status": "healthy",
|
||||
"checks": {},
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
|
||||
# Check ffmpeg
|
||||
try:
|
||||
result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True)
|
||||
health["checks"]["ffmpeg"] = {
|
||||
"available": result.returncode == 0,
|
||||
"version": result.stdout.split("\n")[0].split(" ")[2]
|
||||
if result.stdout
|
||||
else "unknown",
|
||||
}
|
||||
except Exception as e:
|
||||
health["checks"]["ffmpeg"] = {"available": False, "error": str(e)}
|
||||
|
||||
# Check ffprobe
|
||||
try:
|
||||
result = subprocess.run(["ffprobe", "-version"], capture_output=True, text=True)
|
||||
health["checks"]["ffprobe"] = {
|
||||
"available": result.returncode == 0,
|
||||
"version": result.stdout.split("\n")[0].split(" ")[2]
|
||||
if result.stdout
|
||||
else "unknown",
|
||||
}
|
||||
except Exception as e:
|
||||
health["checks"]["ffprobe"] = {"available": False, "error": str(e)}
|
||||
|
||||
# Check faster_whisper import
|
||||
try:
|
||||
import faster_whisper
|
||||
|
||||
health["checks"]["faster_whisper"] = {
|
||||
"available": True,
|
||||
"version": getattr(faster_whisper, "__version__", "unknown"),
|
||||
}
|
||||
except ImportError as e:
|
||||
health["checks"]["faster_whisper"] = {"available": False, "error": str(e)}
|
||||
health["status"] = "unhealthy"
|
||||
|
||||
# Check psutil import
|
||||
try:
|
||||
import psutil
|
||||
|
||||
health["checks"]["psutil"] = {
|
||||
"available": True,
|
||||
"version": getattr(psutil, "__version__", "unknown"),
|
||||
}
|
||||
except ImportError:
|
||||
health["checks"]["psutil"] = {
|
||||
"available": False,
|
||||
"warning": "resource monitoring disabled",
|
||||
}
|
||||
|
||||
# Determine overall status
|
||||
if not health["checks"].get("ffmpeg", {}).get("available", False) or not health[
|
||||
"checks"
|
||||
].get("ffprobe", {}).get("available", False):
|
||||
health["status"] = "unhealthy"
|
||||
|
||||
return health
|
||||
|
||||
|
||||
def signal_handler(signum, frame):
|
||||
sys.stderr.write(f"ASR: Received signal {signum}, exiting...\n")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def has_audio_stream(video_path: str) -> bool:
|
||||
"""Check if video file has audio stream using ffprobe."""
|
||||
try:
|
||||
cmd = [
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"error",
|
||||
"-select_streams",
|
||||
"a",
|
||||
"-show_entries",
|
||||
"stream=codec_type",
|
||||
"-of",
|
||||
"csv=p=0",
|
||||
video_path,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
return bool(result.stdout.strip())
|
||||
except subprocess.CalledProcessError:
|
||||
return False
|
||||
except FileNotFoundError:
|
||||
sys.stderr.write("WARNING: ffprobe not found, assuming audio exists\n")
|
||||
return True
|
||||
|
||||
|
||||
def get_media_duration(media_path: str) -> float:
|
||||
"""Get media duration in seconds using ffprobe."""
|
||||
cmd = [
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"error",
|
||||
"-show_entries",
|
||||
"format=duration",
|
||||
"-of",
|
||||
"csv=p=0",
|
||||
media_path,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
try:
|
||||
return float(result.stdout.strip())
|
||||
except (ValueError, AttributeError):
|
||||
return 0.0
|
||||
|
||||
|
||||
def extract_audio(video_path: str, audio_path: str) -> bool:
|
||||
"""Extract audio from video to WAV format."""
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-i",
|
||||
video_path,
|
||||
"-acodec",
|
||||
"pcm_s16le",
|
||||
"-ar",
|
||||
"16000",
|
||||
"-ac",
|
||||
"1",
|
||||
"-y",
|
||||
audio_path,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True)
|
||||
return result.returncode == 0 and os.path.exists(audio_path)
|
||||
|
||||
|
||||
def extract_chunk(
|
||||
audio_path: str, start: float, duration: float, output_path: str
|
||||
) -> bool:
|
||||
"""Extract a chunk of audio using ffmpeg."""
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-i",
|
||||
audio_path,
|
||||
"-ss",
|
||||
str(start),
|
||||
"-t",
|
||||
str(duration),
|
||||
"-acodec",
|
||||
"pcm_s16le",
|
||||
"-ar",
|
||||
"16000",
|
||||
"-ac",
|
||||
"1",
|
||||
"-y",
|
||||
output_path,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True)
|
||||
success = (
|
||||
result.returncode == 0
|
||||
and os.path.exists(output_path)
|
||||
and os.path.getsize(output_path) > 0
|
||||
)
|
||||
sys.stderr.write(
|
||||
f"ASR_DEBUG: extract_chunk: start={start}, duration={duration}, success={success}, returncode={result.returncode}\n"
|
||||
)
|
||||
sys.stderr.flush()
|
||||
return success
|
||||
|
||||
|
||||
def monitor_resources(pid: int, interval: float = 0.1) -> Dict[str, Any]:
|
||||
"""Monitor CPU and memory usage for a process."""
|
||||
if not PSUTIL_AVAILABLE or psutil is None:
|
||||
return {"cpu_percent": 0.0, "memory_mb": 0.0, "available": False}
|
||||
|
||||
try:
|
||||
process = psutil.Process(pid)
|
||||
cpu_percent = process.cpu_percent(interval=interval)
|
||||
memory_info = process.memory_info()
|
||||
memory_mb = memory_info.rss / (1024 * 1024)
|
||||
return {
|
||||
"cpu_percent": cpu_percent,
|
||||
"memory_mb": memory_mb,
|
||||
"available": True,
|
||||
"pid": pid,
|
||||
}
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||
return {"cpu_percent": 0.0, "memory_mb": 0.0, "available": False}
|
||||
|
||||
|
||||
def transcribe_direct(
|
||||
model, audio_path: str, publisher: Optional[RedisPublisher] = None
|
||||
) -> Tuple[List[Dict[str, Any]], Any]:
|
||||
"""Transcribe audio directly (non-chunked)."""
|
||||
if publisher:
|
||||
publisher.info("asr", "Transcribing audio directly...")
|
||||
|
||||
start_time = time.time()
|
||||
segments, info = model.transcribe(audio_path, beam_size=5)
|
||||
|
||||
results = []
|
||||
total_segments = 0
|
||||
for segment in segments:
|
||||
results.append(
|
||||
{"start": segment.start, "end": segment.end, "text": segment.text.strip()}
|
||||
)
|
||||
total_segments += 1
|
||||
if total_segments % 100 == 0 and publisher:
|
||||
publisher.progress("asr", total_segments, 0, f"Segment {total_segments}")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
if publisher:
|
||||
publisher.info(
|
||||
"asr", f"Direct transcription: {len(results)} segments in {elapsed:.1f}s"
|
||||
)
|
||||
|
||||
return results, info
|
||||
|
||||
|
||||
def transcribe_chunk(
|
||||
model,
|
||||
chunk_path: str,
|
||||
chunk_start: float,
|
||||
chunk_idx: int,
|
||||
total_chunks: int,
|
||||
publisher: Optional[RedisPublisher] = None,
|
||||
) -> Tuple[List[Dict[str, Any]], Any]:
|
||||
"""Transcribe a single audio chunk."""
|
||||
if publisher:
|
||||
publisher.info("asr", f"Transcribing chunk {chunk_idx + 1}/{total_chunks}")
|
||||
|
||||
sys.stderr.write(
|
||||
f"ASR_DEBUG: transcribe_chunk: chunk_idx={chunk_idx}, path={chunk_path}, size={os.path.getsize(chunk_path) if os.path.exists(chunk_path) else 0}\n"
|
||||
)
|
||||
sys.stderr.flush()
|
||||
|
||||
start_time = time.time()
|
||||
segments, info = model.transcribe(chunk_path, beam_size=5)
|
||||
sys.stderr.write(
|
||||
"ASR_DEBUG: transcribe_chunk: transcription completed, got segments\n"
|
||||
)
|
||||
sys.stderr.flush()
|
||||
|
||||
results = []
|
||||
for segment in segments:
|
||||
results.append(
|
||||
{
|
||||
"start": segment.start + chunk_start,
|
||||
"end": segment.end + chunk_start,
|
||||
"text": segment.text.strip(),
|
||||
}
|
||||
)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
if publisher:
|
||||
publisher.info(
|
||||
"asr",
|
||||
f"Chunk {chunk_idx + 1}/{total_chunks}: {len(results)} segments in {elapsed:.1f}s",
|
||||
)
|
||||
|
||||
return results, info
|
||||
|
||||
|
||||
def run_asr(
|
||||
video_path: str,
|
||||
output_path: str,
|
||||
uuid: str = "",
|
||||
chunk_duration: int = 600, # 10 minutes default
|
||||
max_direct_duration: int = 1200, # 20 minutes: use direct transcription for shorter files (safe limit)
|
||||
model_size: str = "tiny",
|
||||
compute_type: str = "int8",
|
||||
monitor_interval: int = 60,
|
||||
) -> None:
|
||||
# Set up signal handlers
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
publisher = RedisPublisher(uuid) if uuid else None
|
||||
if publisher:
|
||||
publisher.info("asr", "ASR_START")
|
||||
sys.stderr.write("ASR_DEBUG: Audio stream check...\n")
|
||||
|
||||
# Check for audio stream
|
||||
if not has_audio_stream(video_path):
|
||||
if publisher:
|
||||
publisher.info("asr", "No audio stream detected, skipping transcription")
|
||||
output = {
|
||||
"processor_name": "asr",
|
||||
"processor_version": "2.0.0",
|
||||
"contract_version": "1.0",
|
||||
"language": None,
|
||||
"language_probability": None,
|
||||
"segments": [],
|
||||
}
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(output, f, indent=2)
|
||||
if publisher:
|
||||
publisher.complete("asr", "0 segments (no audio)")
|
||||
sys.stderr.write("ASR: No audio stream, skipping transcription\n")
|
||||
sys.stderr.flush()
|
||||
sys.exit(0)
|
||||
|
||||
# Create temporary directory
|
||||
sys.stderr.write("ASR_DEBUG: Creating temporary directory...\n")
|
||||
temp_dir = tempfile.mkdtemp(prefix="asr_")
|
||||
sys.stderr.write(f"ASR_DEBUG: temp_dir={temp_dir}\n")
|
||||
audio_path = os.path.join(temp_dir, "audio.wav")
|
||||
|
||||
if publisher:
|
||||
publisher.info("asr", "Extracting audio from video...")
|
||||
sys.stderr.write("ASR_DEBUG: Extracting audio...\n")
|
||||
|
||||
# Extract audio
|
||||
if not extract_audio(video_path, audio_path):
|
||||
if publisher:
|
||||
publisher.error("asr", "Failed to extract audio")
|
||||
sys.stderr.write("ASR: Failed to extract audio\n")
|
||||
sys.stderr.flush()
|
||||
# Clean up
|
||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||
sys.exit(1)
|
||||
|
||||
sys.stderr.write("ASR_DEBUG: Audio extraction successful, getting duration...\n")
|
||||
# Get audio duration
|
||||
try:
|
||||
total_duration = get_media_duration(audio_path)
|
||||
except Exception as e:
|
||||
if publisher:
|
||||
publisher.error("asr", f"Failed to get audio duration: {e}")
|
||||
sys.stderr.write(f"ASR: Failed to get audio duration: {e}\n")
|
||||
sys.stderr.flush()
|
||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||
sys.exit(1)
|
||||
|
||||
if publisher:
|
||||
publisher.info(
|
||||
"asr",
|
||||
f"Audio duration: {total_duration:.1f}s ({total_duration / 3600:.1f} hrs)",
|
||||
)
|
||||
|
||||
sys.stderr.write("ASR_DEBUG: Loading Whisper model...\n")
|
||||
# Load Whisper model
|
||||
if publisher:
|
||||
publisher.info(
|
||||
"asr", f"Loading Whisper model ({model_size}, {compute_type})..."
|
||||
)
|
||||
|
||||
try:
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
model = WhisperModel(model_size, device="cpu", compute_type=compute_type)
|
||||
except Exception as e:
|
||||
if publisher:
|
||||
publisher.error("asr", f"Failed to load Whisper model: {e}")
|
||||
sys.stderr.write(f"ASR: Failed to load Whisper model: {e}\n")
|
||||
sys.stderr.flush()
|
||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||
sys.exit(1)
|
||||
|
||||
if publisher:
|
||||
publisher.info("asr", "Whisper model loaded successfully")
|
||||
sys.stderr.write("ASR_DEBUG: Whisper model loaded.\n")
|
||||
|
||||
# Decide whether to use chunked or direct transcription
|
||||
use_chunked = total_duration > max_direct_duration
|
||||
sys.stderr.write(
|
||||
f"ASR_DEBUG: total_duration={total_duration:.1f}s, max_direct_duration={max_direct_duration}s, use_chunked={use_chunked}\n"
|
||||
)
|
||||
|
||||
all_segments = []
|
||||
language = None
|
||||
language_prob = None
|
||||
chunks = [] # Initialize chunks variable
|
||||
|
||||
if not use_chunked:
|
||||
sys.stderr.write("ASR_DEBUG: Starting direct transcription...\n")
|
||||
# Direct transcription for shorter audio
|
||||
if publisher:
|
||||
publisher.info(
|
||||
"asr", f"Using direct transcription (duration ≤ {max_direct_duration}s)"
|
||||
)
|
||||
|
||||
try:
|
||||
segments, info = transcribe_direct(model, audio_path, publisher)
|
||||
all_segments.extend(segments)
|
||||
language = info.language
|
||||
language_prob = info.language_probability
|
||||
except Exception as e:
|
||||
if publisher:
|
||||
publisher.error("asr", f"Direct transcription failed: {e}")
|
||||
sys.stderr.write(f"ASR: Direct transcription failed: {e}\n")
|
||||
sys.stderr.flush()
|
||||
# Fall back to chunked approach
|
||||
use_chunked = True
|
||||
if publisher:
|
||||
publisher.info("asr", "Falling back to chunked transcription")
|
||||
|
||||
if use_chunked:
|
||||
# Chunked transcription for long audio
|
||||
sys.stderr.write("ASR_DEBUG: Starting chunked transcription...\n")
|
||||
if publisher:
|
||||
publisher.info(
|
||||
"asr", f"Using chunked transcription ({chunk_duration}s chunks)"
|
||||
)
|
||||
|
||||
# Calculate chunks
|
||||
chunks = []
|
||||
start = 0.0
|
||||
chunk_idx = 0
|
||||
while start < total_duration:
|
||||
chunk_end = min(start + chunk_duration, total_duration)
|
||||
chunks.append(
|
||||
{
|
||||
"start": start,
|
||||
"end": chunk_end,
|
||||
"duration": chunk_end - start,
|
||||
"idx": chunk_idx,
|
||||
}
|
||||
)
|
||||
start = chunk_end
|
||||
chunk_idx += 1
|
||||
|
||||
if publisher:
|
||||
publisher.info("asr", f"Split into {len(chunks)} chunks")
|
||||
|
||||
sys.stderr.write(f"ASR_DEBUG: Calculated {len(chunks)} chunks\n")
|
||||
chunk_temp_dir = os.path.join(temp_dir, "chunks")
|
||||
os.makedirs(chunk_temp_dir, exist_ok=True)
|
||||
sys.stderr.write("ASR_DEBUG: Created chunk directory\n")
|
||||
|
||||
last_resource_report = time.time()
|
||||
|
||||
sys.stderr.write(f"ASR_DEBUG: Starting loop over {len(chunks)} chunks\n")
|
||||
for i, chunk in enumerate(chunks):
|
||||
sys.stderr.write(
|
||||
f"ASR_DEBUG: Loop iteration {i}, chunk start={chunk['start']:.1f}\n"
|
||||
)
|
||||
sys.stderr.flush()
|
||||
chunk_path = os.path.join(chunk_temp_dir, f"chunk_{i:04d}.wav")
|
||||
|
||||
if publisher and os.environ.get("MOMENTRY_DISABLE_REDIS") != "1":
|
||||
sys.stderr.write("ASR_DEBUG: Before publisher.progress\n")
|
||||
sys.stderr.flush()
|
||||
publisher.progress(
|
||||
"asr", i, len(chunks), f"Processing chunk {i + 1}/{len(chunks)}"
|
||||
)
|
||||
sys.stderr.write("ASR_DEBUG: After publisher.progress\n")
|
||||
sys.stderr.flush()
|
||||
elif publisher:
|
||||
sys.stderr.write(
|
||||
"ASR_DEBUG: Redis disabled, skipping publisher.progress\n"
|
||||
)
|
||||
sys.stderr.flush()
|
||||
|
||||
# Extract chunk
|
||||
if not extract_chunk(
|
||||
audio_path, chunk["start"], chunk["duration"], chunk_path
|
||||
):
|
||||
if publisher:
|
||||
publisher.warning("asr", f"Failed to extract chunk {i}, skipping")
|
||||
continue
|
||||
|
||||
# Resource monitoring (sample every monitor_interval seconds)
|
||||
current_time = time.time()
|
||||
if (
|
||||
PSUTIL_AVAILABLE
|
||||
and publisher
|
||||
and (current_time - last_resource_report) >= monitor_interval
|
||||
):
|
||||
resources = monitor_resources(os.getpid())
|
||||
if resources["available"]:
|
||||
publisher.info(
|
||||
"asr",
|
||||
f"Resource usage: CPU {resources['cpu_percent']:.1f}%, "
|
||||
f"Memory {resources['memory_mb']:.1f}MB",
|
||||
)
|
||||
last_resource_report = current_time
|
||||
|
||||
# Transcribe chunk with retry logic
|
||||
sys.stderr.write(
|
||||
f"ASR_DEBUG: Starting transcription for chunk {i}, retry loop\n"
|
||||
)
|
||||
sys.stderr.flush()
|
||||
max_retries = 3
|
||||
transcribed = False
|
||||
last_error = None
|
||||
|
||||
for retry in range(max_retries):
|
||||
try:
|
||||
segments, info = transcribe_chunk(
|
||||
model, chunk_path, chunk["start"], i, len(chunks), publisher
|
||||
)
|
||||
all_segments.extend(segments)
|
||||
|
||||
if language is None:
|
||||
language = info.language
|
||||
language_prob = info.language_probability
|
||||
if publisher:
|
||||
publisher.info(
|
||||
"asr",
|
||||
f"Detected language: {language} (prob {language_prob:.2f})",
|
||||
)
|
||||
|
||||
transcribed = True
|
||||
break # Success, exit retry loop
|
||||
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
if publisher:
|
||||
publisher.warning(
|
||||
"asr",
|
||||
f"Error transcribing chunk {i} (attempt {retry + 1}/{max_retries}): {e}",
|
||||
)
|
||||
sys.stderr.write(
|
||||
f"ASR: Error transcribing chunk {i} (attempt {retry + 1}/{max_retries}): {e}\n"
|
||||
)
|
||||
sys.stderr.flush()
|
||||
|
||||
if retry < max_retries - 1:
|
||||
# Wait before retry (exponential backoff)
|
||||
wait_time = 2**retry # 1, 2, 4 seconds
|
||||
if publisher:
|
||||
publisher.info("asr", f"Retrying in {wait_time}s...")
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
# Final attempt failed
|
||||
if publisher:
|
||||
publisher.error(
|
||||
"asr",
|
||||
f"Failed to transcribe chunk {i} after {max_retries} attempts: {last_error}",
|
||||
)
|
||||
sys.stderr.write(
|
||||
f"ASR: Failed to transcribe chunk {i} after {max_retries} attempts: {last_error}\n"
|
||||
)
|
||||
sys.stderr.flush()
|
||||
# Continue with next chunk (skip this one)
|
||||
|
||||
# Clean up chunk file
|
||||
sys.stderr.write(
|
||||
f"ASR_DEBUG: Finished processing chunk {i}, transcribed={transcribed}\n"
|
||||
)
|
||||
sys.stderr.flush()
|
||||
try:
|
||||
os.unlink(chunk_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Clean up temporary directory
|
||||
try:
|
||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Sort segments by start time
|
||||
all_segments.sort(key=lambda x: x["start"])
|
||||
|
||||
# Prepare output (maintain same format as original)
|
||||
output = {
|
||||
"processor_name": "asr",
|
||||
"processor_version": "2.0.0",
|
||||
"contract_version": "1.0",
|
||||
"language": language if language is not None else None,
|
||||
"language_probability": language_prob if language_prob is not None else None,
|
||||
"segments": all_segments,
|
||||
}
|
||||
|
||||
# Add metadata for chunked processing (optional)
|
||||
if use_chunked:
|
||||
output["processing_mode"] = "chunked"
|
||||
output["chunk_count"] = len(chunks) if "chunks" in locals() else 0
|
||||
output["chunk_duration"] = chunk_duration
|
||||
else:
|
||||
output["processing_mode"] = "direct"
|
||||
|
||||
# Write output
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(output, f, indent=2)
|
||||
|
||||
if publisher:
|
||||
publisher.complete(
|
||||
"asr",
|
||||
f"{len(all_segments)} segments ({'chunked' if use_chunked else 'direct'} mode)",
|
||||
)
|
||||
|
||||
sys.stderr.write(
|
||||
f"ASR: Transcription complete, {len(all_segments)} segments written to {output_path}\n"
|
||||
)
|
||||
sys.stderr.flush()
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ASR Transcription with chunked processing"
|
||||
)
|
||||
parser.add_argument("video_path", nargs="?", help="Path to video file")
|
||||
parser.add_argument("output_path", nargs="?", help="Output JSON path")
|
||||
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")
|
||||
parser.add_argument("--version", action="version", version="2.0.0")
|
||||
parser.add_argument(
|
||||
"--check-health", action="store_true", help="Check dependencies and exit"
|
||||
)
|
||||
|
||||
# Hidden arguments for configuration (can be set via environment variables)
|
||||
parser.add_argument(
|
||||
"--chunk-duration", type=int, default=600, help=argparse.SUPPRESS
|
||||
) # 10 minutes default
|
||||
parser.add_argument(
|
||||
"--max-direct-duration", type=int, default=1200, help=argparse.SUPPRESS
|
||||
) # 20 minutes (safe limit based on testing)
|
||||
parser.add_argument("--model-size", default="tiny", help=argparse.SUPPRESS)
|
||||
parser.add_argument("--compute-type", default="int8", help=argparse.SUPPRESS)
|
||||
parser.add_argument(
|
||||
"--monitor-interval", type=int, default=60, help=argparse.SUPPRESS
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Handle health check
|
||||
if args.check_health:
|
||||
health = check_health()
|
||||
print(json.dumps(health, indent=2))
|
||||
sys.exit(0 if health["status"] == "healthy" else 1)
|
||||
|
||||
# Validate required arguments when not doing health check
|
||||
if args.video_path is None or args.output_path is None:
|
||||
parser.error(
|
||||
"video_path and output_path are required when not using --check-health"
|
||||
)
|
||||
|
||||
# Allow environment variable overrides
|
||||
chunk_duration_str = os.environ.get("MOMENTRY_ASR_CHUNK_DURATION")
|
||||
if chunk_duration_str is not None:
|
||||
chunk_duration = int(chunk_duration_str)
|
||||
else:
|
||||
chunk_duration = args.chunk_duration
|
||||
|
||||
max_direct_duration_str = os.environ.get("MOMENTRY_ASR_MAX_DIRECT_DURATION")
|
||||
if max_direct_duration_str is not None:
|
||||
max_direct_duration = int(max_direct_duration_str)
|
||||
else:
|
||||
max_direct_duration = args.max_direct_duration
|
||||
|
||||
model_size = os.environ.get("MOMENTRY_ASR_MODEL_SIZE")
|
||||
if model_size is None:
|
||||
model_size = args.model_size
|
||||
|
||||
compute_type = os.environ.get("MOMENTRY_ASR_COMPUTE_TYPE")
|
||||
if compute_type is None:
|
||||
compute_type = args.compute_type
|
||||
|
||||
run_asr(
|
||||
args.video_path,
|
||||
args.output_path,
|
||||
args.uuid,
|
||||
chunk_duration,
|
||||
max_direct_duration,
|
||||
model_size,
|
||||
compute_type,
|
||||
)
|
||||
Reference in New Issue
Block a user