momentry_core/scripts/face_landmark_qc.py

#!/opt/homebrew/bin/python3.11
"""
Face landmark QC: verify eyes/nose are within face bounding box.
Flags faces in DB where landmarks don't match the bbox.
Usage: python3 face_landmark_qc.py <file_uuid> [--threshold 0.5] [--fix]
"""
import sys, json, psycopg2, argparse

parser = argparse.ArgumentParser()
parser.add_argument("uuid")
parser.add_argument("--threshold", "-t", type=float, default=0.5,
    help="Fraction of landmark points that must be inside bbox (default: 0.5)")
parser.add_argument("--fix", action="store_true", help="Update face_detections QC flag in DB")
args = parser.parse_args()

UUID = args.uuid
THRESHOLD = args.threshold
FACE_PATH = f"/Users/accusys/momentry/output_dev/{UUID}.face.json"

print(f"=== Face Landmark QC ===")
print(f"UUID: {UUID}")
print(f"Threshold: {THRESHOLD * 100:.0f}% points must be inside bbox")

# Load face.json
with open(FACE_PATH) as f:
    data = json.load(f)

total_faces = 0
faces_with_lm = 0
good_faces = 0
bad_faces = 0
bad_frame_ids = set()
bad_face_details = []

# Build frame lookup for fast access
frame_map = {}
for frm in data['frames']:
    frame_map[frm['frame']] = frm

for frame_num, frm in frame_map.items():
    for fi, face in enumerate(frm.get('faces', [])):
        total_faces += 1
        lm = face.get('landmarks')
        if not lm:
            continue
        faces_with_lm += 1

        x, y, w, h = face['x'], face['y'], face['width'], face['height']
        inside_pts = 0
        total_pts = 0
        eye_nose_inside = 0  # at least one point from each eye+nose inside

        for lm_type in ['left_eye', 'right_eye', 'nose']:
            points = lm.get(lm_type, [])
            total_pts += len(points)
            any_inside = False
            for pt in points:
                px, py = pt[0], pt[1]
                if (x <= px <= x + w) and (y <= py <= y + h):
                    inside_pts += 1
                    any_inside = True
            if any_inside:
                eye_nose_inside += 1

        ratio = inside_pts / max(1, total_pts)

        if ratio >= THRESHOLD and eye_nose_inside >= 2:
            good_faces += 1
        else:
            bad_faces += 1
            bad_frame_ids.add(frame_num)
            bad_face_details.append({
                'frame': frame_num,
                'face_idx': fi,
                'bbox': [x, y, w, h],
                'inside_pts': inside_pts,
                'total_pts': total_pts,
                'ratio': ratio,
                'eye_nose_ok': eye_nose_inside,
            })

print(f"\nTotal faces: {total_faces:,}")
print(f"Faces with landmarks: {faces_with_lm:,}")
print(f"✅ Good (≥{THRESHOLD*100:.0f}% inside + ≥2 features): {good_faces:,}")
print(f"❌ Bad: {bad_faces:,}")
print(f"Quality pass rate: {100 * good_faces / max(1, faces_with_lm):.1f}%")

print(f"\nBad faces in {len(bad_frame_ids)} unique frames")

# Show sample bad faces
print(f"\nSample bad faces:")
for bf in sorted(bad_face_details, key=lambda b: b['ratio'])[:5]:
    print(f"  frame={bf['frame']}, bbox={bf['bbox']}, {bf['inside_pts']}/{bf['total_pts']} inside ({bf['ratio']*100:.0f}%), eye/nose={bf['eye_nose_ok']}/3")

# Show sample good faces
print(f"\nSample good faces:")
good_details = []
for frame_num, frm in frame_map.items():
    for face in frm.get('faces', []):
        lm = face.get('landmarks')
        if not lm:
            continue
        x, y, w, h = face['x'], face['y'], face['width'], face['height']
        inside = sum(1 for pts in lm.values() for pt in pts
            if (x <= pt[0] <= x + w) and (y <= pt[1] <= y + h))
        total = sum(len(pts) for pts in lm.values())
        if inside / max(1, total) >= THRESHOLD:
            good_details.append((frame_num, x, y, w, h, inside, total))
            if len(good_details) >= 5:
                break
    if len(good_details) >= 5:
        break

for g in good_details:
    print(f"  frame={g[0]}, bbox=[{g[1]},{g[2]},{g[3]},{g[4]}], {g[5]}/{g[6]} inside ({100*g[5]/max(1,g[6]):.0f}%)")