#!/opt/homebrew/bin/python3.11 """ Verify Charade pipeline completion. Usage: python3 scripts/verify_charade_pipeline.py """ import sys import psycopg2 import subprocess import json DATABASE_URL = "postgres://accusys@localhost:5432/momentry" OUTPUT_DIR = "/Users/accusys/momentry/output_dev" def check_file_outputs(file_uuid): """Check all expected output files exist""" expected_files = [ "cut.json", "yolo.json", "face.json", "face_traced.json", "pose.json", "asrx.json", "visual_chunk.json", "scene.json", "scene_meta.json", "story_llm.json", "story_story.json", "tmdb.json", ] results = [] for ext in expected_files: path = f"{OUTPUT_DIR}/{file_uuid}.{ext}" try: size = subprocess.check_output(["stat", "-f%z", path]).decode().strip() results.append({"file": ext, "exists": True, "size": int(size)}) except: results.append({"file": ext, "exists": False, "size": 0}) return results def check_db_records(file_uuid, schema="dev"): """Check database records""" conn = psycopg2.connect(DATABASE_URL) cur = conn.cursor() checks = [ ("videos", f"SELECT status FROM {schema}.videos WHERE file_uuid = '{file_uuid}'"), ("monitor_jobs", f"SELECT status, completed_processors FROM {schema}.monitor_jobs WHERE uuid = '{file_uuid}'"), ("pre_chunks", f"SELECT COUNT(*) FROM {schema}.pre_chunks WHERE file_uuid = '{file_uuid}'"), ("face_detections_total", f"SELECT COUNT(*) FROM {schema}.face_detections WHERE file_uuid = '{file_uuid}'"), ("face_detections_embedding", f"SELECT COUNT(embedding) FROM {schema}.face_detections WHERE file_uuid = '{file_uuid}'"), ("face_detections_trace", f"SELECT COUNT(trace_id) FROM {schema}.face_detections WHERE file_uuid = '{file_uuid}'"), ("face_detections_identity", f"SELECT COUNT(identity_id) FROM {schema}.face_detections WHERE file_uuid = '{file_uuid}'"), ("chunks_total", f"SELECT COUNT(*) FROM {schema}.chunk WHERE file_uuid = '{file_uuid}'"), ("chunks_embedding", f"SELECT COUNT(embedding) FROM {schema}.chunk WHERE file_uuid = '{file_uuid}'"), ] results = [] for name, query in checks: try: cur.execute(query) result = cur.fetchone() results.append({"check": name, "value": result[0] if result else None}) except Exception as e: results.append({"check": name, "error": str(e)}) cur.close() conn.close() return results def check_identity_bindings(file_uuid, schema="dev"): """Check identity bindings""" conn = psycopg2.connect(DATABASE_URL) cur = conn.cursor() checks = [ ("audrey_faces", f""" SELECT COUNT(*) FROM {schema}.face_detections fd JOIN {schema}.identities i ON fd.identity_id = i.id WHERE fd.file_uuid = '{file_uuid}' AND i.name = 'Audrey Hepburn' """), ("cary_faces", f""" SELECT COUNT(*) FROM {schema}.face_detections fd JOIN {schema}.identities i ON fd.identity_id = i.id WHERE fd.file_uuid = '{file_uuid}' AND i.name = 'Cary Grant' """), ("top_identities", f""" SELECT i.name, COUNT(*) as count FROM {schema}.face_detections fd JOIN {schema}.identities i ON fd.identity_id = i.id WHERE fd.file_uuid = '{file_uuid}' AND fd.identity_id IS NOT NULL GROUP BY i.name ORDER BY count DESC LIMIT 10 """), ] results = [] for name, query in checks: try: cur.execute(query) if name == "top_identities": rows = cur.fetchall() results.append({"check": name, "value": rows}) else: result = cur.fetchone() results.append({"check": name, "value": result[0] if result else 0}) except Exception as e: results.append({"check": name, "error": str(e)}) cur.close() conn.close() return results def print_report(file_uuid, file_outputs, db_records, identity_bindings): """Print verification report""" print(f"\n{'='*60}") print(f"Charade Pipeline Verification Report") print(f"File UUID: {file_uuid}") print(f"{'='*60}\n") print("## File Outputs") for f in file_outputs: status = "✅" if f["exists"] and f["size"] > 0 else "❌" size_kb = f["size"] / 1024 print(f" {status} {f['file']}: {size_kb:.1f} KB") print("\n## Database Records") for r in db_records: value = r.get("value", r.get("error", "N/A")) if isinstance(value, (list, tuple)): value = ", ".join(str(v) for v in value) print(f" {r['check']}: {value}") print("\n## Identity Bindings") for r in identity_bindings: value = r.get("value", r.get("error", "N/A")) if isinstance(value, list): print(f" {r['check']}:") for row in value: print(f" - {row[0]}: {row[1]} faces") else: print(f" {r['check']}: {value}") print(f"\n{'='*60}\n") def main(): if len(sys.argv) < 2: file_uuid = "c3c635e3641da80dde10cc555ffcdda5" else: file_uuid = sys.argv[1] print("Verifying pipeline...") file_outputs = check_file_outputs(file_uuid) db_records = check_db_records(file_uuid) identity_bindings = check_identity_bindings(file_uuid) print_report(file_uuid, file_outputs, db_records, identity_bindings) if __name__ == "__main__": main()