170 lines
5.6 KiB
Python
170 lines
5.6 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
Verify Charade pipeline completion.
|
|
Usage: python3 scripts/verify_charade_pipeline.py <file_uuid>
|
|
"""
|
|
|
|
import sys
|
|
import psycopg2
|
|
import subprocess
|
|
import json
|
|
|
|
DATABASE_URL = "postgres://accusys@localhost:5432/momentry"
|
|
OUTPUT_DIR = "/Users/accusys/momentry/output_dev"
|
|
|
|
|
|
def check_file_outputs(file_uuid):
|
|
"""Check all expected output files exist"""
|
|
expected_files = [
|
|
"cut.json",
|
|
"yolo.json",
|
|
"face.json",
|
|
"face_traced.json",
|
|
"pose.json",
|
|
"asrx.json",
|
|
"visual_chunk.json",
|
|
"scene.json",
|
|
"scene_meta.json",
|
|
"story_llm.json",
|
|
"story_story.json",
|
|
"tmdb.json",
|
|
]
|
|
|
|
results = []
|
|
for ext in expected_files:
|
|
path = f"{OUTPUT_DIR}/{file_uuid}.{ext}"
|
|
try:
|
|
size = subprocess.check_output(["stat", "-f%z", path]).decode().strip()
|
|
results.append({"file": ext, "exists": True, "size": int(size)})
|
|
except:
|
|
results.append({"file": ext, "exists": False, "size": 0})
|
|
|
|
return results
|
|
|
|
|
|
def check_db_records(file_uuid, schema="dev"):
|
|
"""Check database records"""
|
|
conn = psycopg2.connect(DATABASE_URL)
|
|
cur = conn.cursor()
|
|
|
|
checks = [
|
|
("videos", f"SELECT status FROM {schema}.videos WHERE file_uuid = '{file_uuid}'"),
|
|
("monitor_jobs", f"SELECT status, completed_processors FROM {schema}.monitor_jobs WHERE uuid = '{file_uuid}'"),
|
|
("pre_chunks", f"SELECT COUNT(*) FROM {schema}.pre_chunks WHERE file_uuid = '{file_uuid}'"),
|
|
("face_detections_total", f"SELECT COUNT(*) FROM {schema}.face_detections WHERE file_uuid = '{file_uuid}'"),
|
|
("face_detections_embedding", f"SELECT COUNT(embedding) FROM {schema}.face_detections WHERE file_uuid = '{file_uuid}'"),
|
|
("face_detections_trace", f"SELECT COUNT(trace_id) FROM {schema}.face_detections WHERE file_uuid = '{file_uuid}'"),
|
|
("face_detections_identity", f"SELECT COUNT(identity_id) FROM {schema}.face_detections WHERE file_uuid = '{file_uuid}'"),
|
|
("chunks_total", f"SELECT COUNT(*) FROM {schema}.chunk WHERE file_uuid = '{file_uuid}'"),
|
|
("chunks_embedding", f"SELECT COUNT(embedding) FROM {schema}.chunk WHERE file_uuid = '{file_uuid}'"),
|
|
]
|
|
|
|
results = []
|
|
for name, query in checks:
|
|
try:
|
|
cur.execute(query)
|
|
result = cur.fetchone()
|
|
results.append({"check": name, "value": result[0] if result else None})
|
|
except Exception as e:
|
|
results.append({"check": name, "error": str(e)})
|
|
|
|
cur.close()
|
|
conn.close()
|
|
return results
|
|
|
|
|
|
def check_identity_bindings(file_uuid, schema="dev"):
|
|
"""Check identity bindings"""
|
|
conn = psycopg2.connect(DATABASE_URL)
|
|
cur = conn.cursor()
|
|
|
|
checks = [
|
|
("audrey_faces", f"""
|
|
SELECT COUNT(*) FROM {schema}.face_detections fd
|
|
JOIN {schema}.identities i ON fd.identity_id = i.id
|
|
WHERE fd.file_uuid = '{file_uuid}' AND i.name = 'Audrey Hepburn'
|
|
"""),
|
|
("cary_faces", f"""
|
|
SELECT COUNT(*) FROM {schema}.face_detections fd
|
|
JOIN {schema}.identities i ON fd.identity_id = i.id
|
|
WHERE fd.file_uuid = '{file_uuid}' AND i.name = 'Cary Grant'
|
|
"""),
|
|
("top_identities", f"""
|
|
SELECT i.name, COUNT(*) as count
|
|
FROM {schema}.face_detections fd
|
|
JOIN {schema}.identities i ON fd.identity_id = i.id
|
|
WHERE fd.file_uuid = '{file_uuid}' AND fd.identity_id IS NOT NULL
|
|
GROUP BY i.name
|
|
ORDER BY count DESC
|
|
LIMIT 10
|
|
"""),
|
|
]
|
|
|
|
results = []
|
|
for name, query in checks:
|
|
try:
|
|
cur.execute(query)
|
|
if name == "top_identities":
|
|
rows = cur.fetchall()
|
|
results.append({"check": name, "value": rows})
|
|
else:
|
|
result = cur.fetchone()
|
|
results.append({"check": name, "value": result[0] if result else 0})
|
|
except Exception as e:
|
|
results.append({"check": name, "error": str(e)})
|
|
|
|
cur.close()
|
|
conn.close()
|
|
return results
|
|
|
|
|
|
def print_report(file_uuid, file_outputs, db_records, identity_bindings):
|
|
"""Print verification report"""
|
|
print(f"\n{'='*60}")
|
|
print(f"Charade Pipeline Verification Report")
|
|
print(f"File UUID: {file_uuid}")
|
|
print(f"{'='*60}\n")
|
|
|
|
print("## File Outputs")
|
|
for f in file_outputs:
|
|
status = "✅" if f["exists"] and f["size"] > 0 else "❌"
|
|
size_kb = f["size"] / 1024
|
|
print(f" {status} {f['file']}: {size_kb:.1f} KB")
|
|
|
|
print("\n## Database Records")
|
|
for r in db_records:
|
|
value = r.get("value", r.get("error", "N/A"))
|
|
if isinstance(value, (list, tuple)):
|
|
value = ", ".join(str(v) for v in value)
|
|
print(f" {r['check']}: {value}")
|
|
|
|
print("\n## Identity Bindings")
|
|
for r in identity_bindings:
|
|
value = r.get("value", r.get("error", "N/A"))
|
|
if isinstance(value, list):
|
|
print(f" {r['check']}:")
|
|
for row in value:
|
|
print(f" - {row[0]}: {row[1]} faces")
|
|
else:
|
|
print(f" {r['check']}: {value}")
|
|
|
|
print(f"\n{'='*60}\n")
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
file_uuid = "c3c635e3641da80dde10cc555ffcdda5"
|
|
else:
|
|
file_uuid = sys.argv[1]
|
|
|
|
print("Verifying pipeline...")
|
|
|
|
file_outputs = check_file_outputs(file_uuid)
|
|
db_records = check_db_records(file_uuid)
|
|
identity_bindings = check_identity_bindings(file_uuid)
|
|
|
|
print_report(file_uuid, file_outputs, db_records, identity_bindings)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |