fix: restore identity_id after face_dedup, rebuild package v20260512

- Re-ran identity_bind.py to restore identity_id on face_detections
- Dedup cleanup had removed rows with identity_id, kept NULL rows
- 70691 face_detections now have identity_id, 428 identities
- Full package rebuild: 169MB sqlite, 1358MB tar.gz
- identities.json: 428 identities + 5483 bindings + 5483 trace maps
- TMDB matching complete: Audrey Hepburn 843 traces, Cary Grant 482
This commit is contained in:
Accusys
2026-05-13 04:30:18 +08:00
parent fff2af8ad1
commit 48c3b13c37
837 changed files with 33273 additions and 5473 deletions

View File

@@ -0,0 +1,71 @@
#!/opt/homebrew/bin/python3.11
"""
Minimal test: run faster-whisper on full video, output each segment's text.
No VAD tuning, no speaker detection, no splitting. Just raw ASR output.
"""
import json, os, sys, time, subprocess, tempfile, shutil
import torchaudio
from faster_whisper import WhisperModel
def extract_audio(video_path, tmp_dir, sr=16000):
wav_path = os.path.join(tmp_dir, "audio.wav")
subprocess.run(["ffmpeg", "-y", "-v", "quiet", "-i", video_path,
"-ar", str(sr), "-ac", "1", "-sample_fmt", "s16", wav_path],
check=True, capture_output=True, timeout=300)
wav_data, sr_actual = torchaudio.load(wav_path)
if wav_data.shape[0] > 1:
wav_data = wav_data.mean(dim=0, keepdim=True)
return wav_path, wav_data, sr_actual
def main():
video = "/Users/accusys/momentry/var/sftpgo/data/demo/Charade (1963) Cary Grant & Audrey Hepburn Comedy Mystery Romance Thriller Full Movie.mp4"
output = "/Users/accusys/momentry/output_dev/segment_texts.txt"
t0 = time.time()
# Load model
print("Loading faster-whisper small int8 CPU...")
model = WhisperModel("small", device="cpu", compute_type="int8")
print(f"Model loaded ({time.time()-t0:.0f}s)")
# Extract audio
print("Extracting audio...")
tmp_dir = tempfile.mkdtemp(prefix="asr_test_")
wav_path, wav_data, sr = extract_audio(video, tmp_dir)
total_audio_s = wav_data.shape[1] / sr
print(f"Audio: {total_audio_s:.0f}s, {sr}Hz ({time.time()-t0:.0f}s)")
# Transcribe - NO VAD filter, let the model segment naturally
print("Transcribing (vad_filter=False)...")
segments, info = model.transcribe(wav_path, beam_size=5,
vad_filter=False, word_timestamps=True)
print(f" Detected language: {info.language} (prob: {info.language_probability:.2f})")
print(f" Duration after VAD: {info.duration_after_vad:.1f}s")
# Write each segment to file
count = 0
total_words = 0
total_dur = 0
with open(output, "w") as f:
for seg in segments:
text = seg.text.strip()
dur = seg.end - seg.start
words = len(seg.words) if seg.words else 0
f.write(f"{seg.start:.2f}\t{seg.end:.2f}\t{dur:.2f}\t{words}\t{text}\n")
count += 1
total_words += words
total_dur += dur
elapsed = time.time() - t0
print(f"\n=== Results ===")
print(f"Segments: {count}")
print(f"Words: {total_words}")
print(f"Speech duration: {total_dur:.0f}s")
print(f"Avg segment: {total_dur/count:.1f}s, {total_words/count:.1f} words")
print(f"Elapsed: {elapsed:.0f}s ({elapsed/60:.1f}min)")
print(f"Output: {output}")
shutil.rmtree(tmp_dir, ignore_errors=True)
if __name__ == "__main__":
main()