Files
momentry_core/scripts/verify_charade_pipeline.py

170 lines
5.6 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
Verify Charade pipeline completion.
Usage: python3 scripts/verify_charade_pipeline.py <file_uuid>
"""
import sys
import psycopg2
import subprocess
import json
DATABASE_URL = "postgres://accusys@localhost:5432/momentry"
OUTPUT_DIR = "/Users/accusys/momentry/output_dev"
def check_file_outputs(file_uuid):
"""Check all expected output files exist"""
expected_files = [
"cut.json",
"yolo.json",
"face.json",
"face_traced.json",
"pose.json",
"asrx.json",
"visual_chunk.json",
"scene.json",
"scene_meta.json",
"story_llm.json",
"story_story.json",
"tmdb.json",
]
results = []
for ext in expected_files:
path = f"{OUTPUT_DIR}/{file_uuid}.{ext}"
try:
size = subprocess.check_output(["stat", "-f%z", path]).decode().strip()
results.append({"file": ext, "exists": True, "size": int(size)})
except:
results.append({"file": ext, "exists": False, "size": 0})
return results
def check_db_records(file_uuid, schema="dev"):
"""Check database records"""
conn = psycopg2.connect(DATABASE_URL)
cur = conn.cursor()
checks = [
("videos", f"SELECT status FROM {schema}.videos WHERE file_uuid = '{file_uuid}'"),
("monitor_jobs", f"SELECT status, completed_processors FROM {schema}.monitor_jobs WHERE uuid = '{file_uuid}'"),
("pre_chunks", f"SELECT COUNT(*) FROM {schema}.pre_chunks WHERE file_uuid = '{file_uuid}'"),
("face_detections_total", f"SELECT COUNT(*) FROM {schema}.face_detections WHERE file_uuid = '{file_uuid}'"),
("face_detections_embedding", f"SELECT COUNT(embedding) FROM {schema}.face_detections WHERE file_uuid = '{file_uuid}'"),
("face_detections_trace", f"SELECT COUNT(trace_id) FROM {schema}.face_detections WHERE file_uuid = '{file_uuid}'"),
("face_detections_identity", f"SELECT COUNT(identity_id) FROM {schema}.face_detections WHERE file_uuid = '{file_uuid}'"),
("chunks_total", f"SELECT COUNT(*) FROM {schema}.chunk WHERE file_uuid = '{file_uuid}'"),
("chunks_embedding", f"SELECT COUNT(embedding) FROM {schema}.chunk WHERE file_uuid = '{file_uuid}'"),
]
results = []
for name, query in checks:
try:
cur.execute(query)
result = cur.fetchone()
results.append({"check": name, "value": result[0] if result else None})
except Exception as e:
results.append({"check": name, "error": str(e)})
cur.close()
conn.close()
return results
def check_identity_bindings(file_uuid, schema="dev"):
"""Check identity bindings"""
conn = psycopg2.connect(DATABASE_URL)
cur = conn.cursor()
checks = [
("audrey_faces", f"""
SELECT COUNT(*) FROM {schema}.face_detections fd
JOIN {schema}.identities i ON fd.identity_id = i.id
WHERE fd.file_uuid = '{file_uuid}' AND i.name = 'Audrey Hepburn'
"""),
("cary_faces", f"""
SELECT COUNT(*) FROM {schema}.face_detections fd
JOIN {schema}.identities i ON fd.identity_id = i.id
WHERE fd.file_uuid = '{file_uuid}' AND i.name = 'Cary Grant'
"""),
("top_identities", f"""
SELECT i.name, COUNT(*) as count
FROM {schema}.face_detections fd
JOIN {schema}.identities i ON fd.identity_id = i.id
WHERE fd.file_uuid = '{file_uuid}' AND fd.identity_id IS NOT NULL
GROUP BY i.name
ORDER BY count DESC
LIMIT 10
"""),
]
results = []
for name, query in checks:
try:
cur.execute(query)
if name == "top_identities":
rows = cur.fetchall()
results.append({"check": name, "value": rows})
else:
result = cur.fetchone()
results.append({"check": name, "value": result[0] if result else 0})
except Exception as e:
results.append({"check": name, "error": str(e)})
cur.close()
conn.close()
return results
def print_report(file_uuid, file_outputs, db_records, identity_bindings):
"""Print verification report"""
print(f"\n{'='*60}")
print(f"Charade Pipeline Verification Report")
print(f"File UUID: {file_uuid}")
print(f"{'='*60}\n")
print("## File Outputs")
for f in file_outputs:
status = "" if f["exists"] and f["size"] > 0 else ""
size_kb = f["size"] / 1024
print(f" {status} {f['file']}: {size_kb:.1f} KB")
print("\n## Database Records")
for r in db_records:
value = r.get("value", r.get("error", "N/A"))
if isinstance(value, (list, tuple)):
value = ", ".join(str(v) for v in value)
print(f" {r['check']}: {value}")
print("\n## Identity Bindings")
for r in identity_bindings:
value = r.get("value", r.get("error", "N/A"))
if isinstance(value, list):
print(f" {r['check']}:")
for row in value:
print(f" - {row[0]}: {row[1]} faces")
else:
print(f" {r['check']}: {value}")
print(f"\n{'='*60}\n")
def main():
if len(sys.argv) < 2:
file_uuid = "c3c635e3641da80dde10cc555ffcdda5"
else:
file_uuid = sys.argv[1]
print("Verifying pipeline...")
file_outputs = check_file_outputs(file_uuid)
db_records = check_db_records(file_uuid)
identity_bindings = check_identity_bindings(file_uuid)
print_report(file_uuid, file_outputs, db_records, identity_bindings)
if __name__ == "__main__":
main()