momentry_core/scripts/face_landmark_qc.py

#!/opt/homebrew/bin/python3.11
"""
Face landmark QC: verify eyes/nose are within face bounding box.
Flags faces in DB where landmarks don't match the bbox.
Usage: python3 face_landmark_qc.py <file_uuid> [--threshold 0.5] [--apply]
"""
import sys, json, psycopg2, argparse, os

parser = argparse.ArgumentParser()
parser.add_argument("uuid")
parser.add_argument("--threshold", "-t", type=float, default=0.5,
    help="Fraction of landmark points that must be inside bbox (default: 0.5)")
parser.add_argument("--apply", action="store_true",
    help="Write qc_ok to face_detections.metadata in DB")
parser.add_argument("--schema", default="dev",
    help="DB schema (default: dev)")
args = parser.parse_args()

UUID = args.uuid
THRESHOLD = args.threshold
SCHEMA = args.schema
OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", f"/Users/accusys/momentry/output_dev")
FACE_PATH = f"{OUTPUT_DIR}/{UUID}.face.json"

print(f"=== Face Landmark QC ===")
print(f"UUID: {UUID}")
print(f"Schema: {SCHEMA}")
print(f"Face file: {FACE_PATH}")
print(f"Threshold: {THRESHOLD * 100:.0f}% points must be inside bbox")

# Load face.json
with open(FACE_PATH) as f:
    data = json.load(f)

total_faces = 0
faces_with_lm = 0
good_faces = 0
bad_faces = 0
qc_results = []  # list of (frame, face_idx, qc_ok, x, y, w, h)

# Build frame lookup for fast access
frame_map = {}
for frm in data['frames']:
    frame_map[frm['frame']] = frm

for frame_num, frm in frame_map.items():
    for fi, face in enumerate(frm.get('faces', [])):
        total_faces += 1
        lm = face.get('landmarks')
        if not lm:
            bbox = face.get('bbox', {})
            qc_results.append((frame_num, fi, False, bbox.get('x'), bbox.get('y'),
                               bbox.get('width'), bbox.get('height')))
            bad_faces += 1
            continue
        faces_with_lm += 1

        bbox = face.get('bbox', {})
        x, y, w, h = bbox.get('x'), bbox.get('y'), bbox.get('width'), bbox.get('height')
        if None in (x, y, w, h):
            qc_results.append((frame_num, fi, False, x, y, w, h))
            bad_faces += 1
            continue
        inside_pts = 0
        total_pts = 0
        eye_nose_inside = 0

        for lm_type in ['left_eye', 'right_eye', 'nose']:
            points = lm.get(lm_type, [])
            total_pts += len(points)
            any_inside = False
            for pt in points:
                px, py = pt[0], pt[1]
                if (x <= px <= x + w) and (y <= py <= y + h):
                    inside_pts += 1
                    any_inside = True
            if any_inside:
                eye_nose_inside += 1

        ratio = inside_pts / max(1, total_pts)
        qc_ok = (ratio >= THRESHOLD and eye_nose_inside >= 2)

        qc_results.append((frame_num, fi, qc_ok, x, y, w, h))
        if qc_ok:
            good_faces += 1
        else:
            bad_faces += 1

print(f"\nTotal faces: {total_faces:,}")
print(f"Faces with landmarks: {faces_with_lm:,}")
print(f"✅ Good (≥{THRESHOLD*100:.0f}% inside + ≥2 features): {good_faces:,}")
print(f"❌ Bad (no eyes or insufficient landmarks): {bad_faces:,}")
print(f"Quality pass rate: {100 * good_faces / max(1, faces_with_lm):.1f}%")

# Apply mode: write qc_ok to face_detections.metadata
if args.apply:
    print(f"\n=== Applying QC results to {SCHEMA}.face_detections ===")
    db_url = os.environ.get("DATABASE_URL", "postgres://accusys@localhost:5432/momentry")
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()
    updated = 0
    for frame_num, fi, qc_ok, x, y, w, h in qc_results:
        qc_str = "true" if qc_ok else "false"
        cur.execute(
            f"UPDATE {SCHEMA}.face_detections "
            f"SET metadata = jsonb_set(COALESCE(metadata, '{{}}'::jsonb), '{{qc_ok}}', '\"{qc_str}\"'::jsonb) "
            f"WHERE file_uuid = %s AND frame_number = %s AND x = %s AND y = %s AND width = %s AND height = %s",
            (UUID, frame_num, x, y, w, h)
        )
        if cur.rowcount > 0:
            updated += 1
    conn.commit()
    cur.close()
    conn.close()
    print(f"Updated {updated} rows in {SCHEMA}.face_detections")
    print(f"Skipped {len(qc_results) - updated} rows (no matching face_detections row)")