feat: trace quality agent selection report, identity clustering runner_v2 DB write, age/gender CoreML selection, updated experiment config UUID

2026-05-06 14:41:48 +08:00
parent 74b6182eba
commit 65a1f77e65
1048 changed files with 103499 additions and 0 deletions
--- a/scripts/head_shoulder_bench.py
+++ b/scripts/head_shoulder_bench.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python3
+"""
+Head-to-Shoulder Ratio 年齡估算實驗
+使用 Apple Vision VNDetectHumanBodyPoseRequest 提取肩寬，
+再從已偵測的臉寬計算頭肩比。
+"""
+
+import json, os, sys, subprocess, tempfile
+from pathlib import Path
+
+VIDEO = "/Users/accusys/test_video/Old_Time_Movie_Show_-_Charade_1963.HD.mov"
+DB_URL = "postgresql://accusys@localhost:5432/momentry"
+FILE_UUID = "1a04db97be5fa12bd77369831dc141fd"
+OUT_DIR = Path("/Users/accusys/momentry/output_dev/experiments/head_shoulder")
+OUT_DIR.mkdir(parents=True, exist_ok=True)
+
+# 1. Get trace samples (same 12 traces from DeepFace benchmark)
+import psycopg2
+conn = psycopg2.connect(DB_URL)
+cur = conn.cursor()
+cur.execute(f"""
+    WITH ranked AS (
+        SELECT trace_id, COUNT(*) AS fc, MIN(frame_number) AS first_frame,
+               MAX(frame_number) AS last_frame, AVG(confidence) AS avg_conf
+        FROM dev.face_detections
+        WHERE file_uuid = '{FILE_UUID}' AND trace_id IS NOT NULL
+        GROUP BY trace_id HAVING COUNT(*) >= 5
+    )
+    SELECT trace_id, fc, first_frame, last_frame, ROUND(avg_conf::numeric,3)
+    FROM ranked
+    ORDER BY fc DESC LIMIT 12
+""")
+samples = cur.fetchall()
+cur.close()
+conn.close()
+
+print(f"Selected {len(samples)} traces for head-shoulder ratio benchmark\n")
+
+# 2. Extract frames + face crops for each trace
+from PIL import Image
+frames = []
+for trace_id, fc, first, last, conf in samples:
+    mid_frame = (first + last) // 2
+    mid_sec = mid_frame / 24.0
+    frame_file = OUT_DIR / f"trace_{trace_id}_frame_{mid_frame}.jpg"
+    
+    subprocess.run([
+        "ffmpeg", "-y", "-ss", str(mid_sec), "-i", VIDEO,
+        "-frames:v", "1", "-q:v", "2", str(frame_file)
+    ], capture_output=True)
+    
+    if frame_file.stat().st_size > 1000:
+        frames.append((trace_id, fc, first, conf, str(frame_file)))
+        print(f"  trace_{trace_id}: frame {mid_frame} ({mid_sec:.0f}s)")
+
+# 3. Get face bbox from face_detections DB
+conn = psycopg2.connect(DB_URL)
+cur = conn.cursor()
+face_boxes = {}
+for trace_id, fc, first, conf, _ in frames:
+    mid_frame = (first + last) // 2
+    cur.execute("""
+        SELECT x, y, width, height, frame_number
+        FROM dev.face_detections
+        WHERE file_uuid = %s AND trace_id = %s
+        ORDER BY ABS(frame_number - %s) LIMIT 1
+    """, (FILE_UUID, trace_id, mid_frame))
+    row = cur.fetchone()
+    if row:
+        face_boxes[trace_id] = {"x": row[0], "y": row[1], "w": row[2], "h": row[3], "frame": row[4]}
+cur.close()
+conn.close()
+
+print(f"\nFace bboxes loaded: {len(face_boxes)} traces\n")
+
+# 4. Run Apple Vision body pose detection on each frame
+# Using a simple AppleScript/Python bridge or subprocess to swift
+# For now, use Vision via a minimal Swift script that processes a single image
+
+swift_code = '''
+import Foundation
+import Vision
+import AppKit
+
+let args = CommandLine.arguments
+guard args.count >= 2 else { exit(1) }
+let imagePath = args[1]
+
+guard let image = NSImage(contentsOfFile: imagePath),
+      let tiff = image.tiffRepresentation,
+      let bitmap = NSBitmapImageRep(data: tiff),
+      let cgImage = bitmap.cgImage else {
+    print("{}")
+    exit(0)
+}
+
+let request = VNDetectHumanBodyPoseRequest()
+let handler = VNImageRequestHandler(cgImage: cgImage)
+
+do {
+    try handler.perform([request])
+    guard let results = request.results, !results.isEmpty else {
+        print("{}")
+        exit(0)
+    }
+    
+    var output: [[String: Double]] = []
+    for obs in results {
+        var joints: [String: Double] = [:]
+        do {
+            let pts = try obs.recognizedPoints(.all)
+            let imgH = Double(image.size.height)
+            // Vision (0,0) = bottom-left, (1,1) = top-right
+            // Convert to pixel coordinates (top-left origin)
+            for (name, pt) in pts {
+                if pt.confidence > 0.3 {
+                    let x = pt.location.x
+                    let y = imgH - pt.location.y  // flip Y
+                    joints[String(describing: name)] = round(x * 100) / 100
+                    joints[String(describing: name) + "_y"] = round(y * 100) / 100
+                }
+            }
+        } catch {}
+        if !joints.isEmpty { output.append(joints) }
+    }
+    
+    let jsonData = try JSONSerialization.data(withJSONObject: output, options: [])
+    print(String(data: jsonData, encoding: .utf8)!)
+} catch {
+    print("{}")
+}
+'''
+
+swift_file = OUT_DIR / "detect_body.swift"
+swift_file.write_text(swift_code)
+subprocess.run(["swiftc", "-o", str(OUT_DIR / "detect_body"), str(swift_file)], check=True)
+
+print("=" * 60)
+print("Head-to-Shoulder Ratio Benchmark")
+print("=" * 60)
+print()
+
+results = []
+for trace_id, fc, first_frame, conf, frame_path in frames:
+    result = subprocess.run(
+        [str(OUT_DIR / "detect_body"), frame_path],
+        capture_output=True, text=True
+    )
+    try:
+        joints_list = json.loads(result.stdout.strip())
+    except:
+        joints_list = []
+    
+    fb = face_boxes.get(trace_id, {"w": 0})
+    face_w = fb["w"]
+    
+    if joints_list:
+        joints = joints_list[0]
+        # Find shoulder keypoints
+        l_shoulder = joints.get("left_shoulder", None)
+        r_shoulder = joints.get("right_shoulder", None)
+        neck = joints.get("neck", joints.get("root", None))
+        
+        # Calculate shoulder width in pixels
+        shoulder_w = -1
+        if l_shoulder is not None and r_shoulder is not None:
+            ly = joints.get("left_shoulder_y", 0)
+            ry = joints.get("right_shoulder_y", 0)
+            shoulder_w = abs(l_shoulder - r_shoulder)  # normalized coords
+        
+        ratio = face_w / shoulder_w if shoulder_w > 0 else 0
+        
+        h2s = {
+            "trace_id": trace_id,
+            "faces": fc,
+            "first_sec": round(first_frame / 24.0, 1),
+            "face_w_px": face_w,
+            "shoulder_w_unit": round(shoulder_w, 3),
+            "ratio": round(ratio, 2),
+            "joints": joints,
+        }
+        results.append(h2s)
+        
+        status = "OK" if ratio > 0 else "no shoulder"
+        print(f"  trace_{trace_id:5d} | face={face_w:4d}px | shoulder={shoulder_w:.3f} | ratio={ratio:.2f} | {status}")
+    else:
+        print(f"  trace_{trace_id:5d} | face={face_w:4d}px | no body detected")
+
+# 5. Save results
+report = {
+    "method": "Apple Vision Head-to-Shoulder Ratio",
+    "video": "Charade (1963)",
+    "samples": len(frames),
+    "results": results,
+    "notes": "Ratio = face_width_px / shoulder_width_normalized. Higher ratio = proportionally larger head (younger)."
+}
+
+with open(OUT_DIR / "head_shoulder_report.json", "w") as f:
+    json.dump(report, f, indent=2, ensure_ascii=False)
+
+print(f"\nReport saved: {OUT_DIR}/head_shoulder_report.json")
+print(f"\nNote: Apple Vision body pose returns normalized coordinates.")
+print(f"Shoulder width is in Vision normalized [0,1] space.")
+print(f"For meaningful ratio, face_bbox needs to be in same coordinate space.")
+print(f"Consider using Vision face detection + body pose simultaneously on the same frame.")