feat: trace quality agent selection report, identity clustering runner_v2 DB write, age/gender CoreML selection, updated experiment config UUID
This commit is contained in:
205
scripts/head_shoulder_bench.py
Normal file
205
scripts/head_shoulder_bench.py
Normal file
@@ -0,0 +1,205 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Head-to-Shoulder Ratio 年齡估算實驗
|
||||
使用 Apple Vision VNDetectHumanBodyPoseRequest 提取肩寬,
|
||||
再從已偵測的臉寬計算頭肩比。
|
||||
"""
|
||||
|
||||
import json, os, sys, subprocess, tempfile
|
||||
from pathlib import Path
|
||||
|
||||
VIDEO = "/Users/accusys/test_video/Old_Time_Movie_Show_-_Charade_1963.HD.mov"
|
||||
DB_URL = "postgresql://accusys@localhost:5432/momentry"
|
||||
FILE_UUID = "1a04db97be5fa12bd77369831dc141fd"
|
||||
OUT_DIR = Path("/Users/accusys/momentry/output_dev/experiments/head_shoulder")
|
||||
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 1. Get trace samples (same 12 traces from DeepFace benchmark)
|
||||
import psycopg2
|
||||
conn = psycopg2.connect(DB_URL)
|
||||
cur = conn.cursor()
|
||||
cur.execute(f"""
|
||||
WITH ranked AS (
|
||||
SELECT trace_id, COUNT(*) AS fc, MIN(frame_number) AS first_frame,
|
||||
MAX(frame_number) AS last_frame, AVG(confidence) AS avg_conf
|
||||
FROM dev.face_detections
|
||||
WHERE file_uuid = '{FILE_UUID}' AND trace_id IS NOT NULL
|
||||
GROUP BY trace_id HAVING COUNT(*) >= 5
|
||||
)
|
||||
SELECT trace_id, fc, first_frame, last_frame, ROUND(avg_conf::numeric,3)
|
||||
FROM ranked
|
||||
ORDER BY fc DESC LIMIT 12
|
||||
""")
|
||||
samples = cur.fetchall()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print(f"Selected {len(samples)} traces for head-shoulder ratio benchmark\n")
|
||||
|
||||
# 2. Extract frames + face crops for each trace
|
||||
from PIL import Image
|
||||
frames = []
|
||||
for trace_id, fc, first, last, conf in samples:
|
||||
mid_frame = (first + last) // 2
|
||||
mid_sec = mid_frame / 24.0
|
||||
frame_file = OUT_DIR / f"trace_{trace_id}_frame_{mid_frame}.jpg"
|
||||
|
||||
subprocess.run([
|
||||
"ffmpeg", "-y", "-ss", str(mid_sec), "-i", VIDEO,
|
||||
"-frames:v", "1", "-q:v", "2", str(frame_file)
|
||||
], capture_output=True)
|
||||
|
||||
if frame_file.stat().st_size > 1000:
|
||||
frames.append((trace_id, fc, first, conf, str(frame_file)))
|
||||
print(f" trace_{trace_id}: frame {mid_frame} ({mid_sec:.0f}s)")
|
||||
|
||||
# 3. Get face bbox from face_detections DB
|
||||
conn = psycopg2.connect(DB_URL)
|
||||
cur = conn.cursor()
|
||||
face_boxes = {}
|
||||
for trace_id, fc, first, conf, _ in frames:
|
||||
mid_frame = (first + last) // 2
|
||||
cur.execute("""
|
||||
SELECT x, y, width, height, frame_number
|
||||
FROM dev.face_detections
|
||||
WHERE file_uuid = %s AND trace_id = %s
|
||||
ORDER BY ABS(frame_number - %s) LIMIT 1
|
||||
""", (FILE_UUID, trace_id, mid_frame))
|
||||
row = cur.fetchone()
|
||||
if row:
|
||||
face_boxes[trace_id] = {"x": row[0], "y": row[1], "w": row[2], "h": row[3], "frame": row[4]}
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print(f"\nFace bboxes loaded: {len(face_boxes)} traces\n")
|
||||
|
||||
# 4. Run Apple Vision body pose detection on each frame
|
||||
# Using a simple AppleScript/Python bridge or subprocess to swift
|
||||
# For now, use Vision via a minimal Swift script that processes a single image
|
||||
|
||||
swift_code = '''
|
||||
import Foundation
|
||||
import Vision
|
||||
import AppKit
|
||||
|
||||
let args = CommandLine.arguments
|
||||
guard args.count >= 2 else { exit(1) }
|
||||
let imagePath = args[1]
|
||||
|
||||
guard let image = NSImage(contentsOfFile: imagePath),
|
||||
let tiff = image.tiffRepresentation,
|
||||
let bitmap = NSBitmapImageRep(data: tiff),
|
||||
let cgImage = bitmap.cgImage else {
|
||||
print("{}")
|
||||
exit(0)
|
||||
}
|
||||
|
||||
let request = VNDetectHumanBodyPoseRequest()
|
||||
let handler = VNImageRequestHandler(cgImage: cgImage)
|
||||
|
||||
do {
|
||||
try handler.perform([request])
|
||||
guard let results = request.results, !results.isEmpty else {
|
||||
print("{}")
|
||||
exit(0)
|
||||
}
|
||||
|
||||
var output: [[String: Double]] = []
|
||||
for obs in results {
|
||||
var joints: [String: Double] = [:]
|
||||
do {
|
||||
let pts = try obs.recognizedPoints(.all)
|
||||
let imgH = Double(image.size.height)
|
||||
// Vision (0,0) = bottom-left, (1,1) = top-right
|
||||
// Convert to pixel coordinates (top-left origin)
|
||||
for (name, pt) in pts {
|
||||
if pt.confidence > 0.3 {
|
||||
let x = pt.location.x
|
||||
let y = imgH - pt.location.y // flip Y
|
||||
joints[String(describing: name)] = round(x * 100) / 100
|
||||
joints[String(describing: name) + "_y"] = round(y * 100) / 100
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
if !joints.isEmpty { output.append(joints) }
|
||||
}
|
||||
|
||||
let jsonData = try JSONSerialization.data(withJSONObject: output, options: [])
|
||||
print(String(data: jsonData, encoding: .utf8)!)
|
||||
} catch {
|
||||
print("{}")
|
||||
}
|
||||
'''
|
||||
|
||||
swift_file = OUT_DIR / "detect_body.swift"
|
||||
swift_file.write_text(swift_code)
|
||||
subprocess.run(["swiftc", "-o", str(OUT_DIR / "detect_body"), str(swift_file)], check=True)
|
||||
|
||||
print("=" * 60)
|
||||
print("Head-to-Shoulder Ratio Benchmark")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
results = []
|
||||
for trace_id, fc, first_frame, conf, frame_path in frames:
|
||||
result = subprocess.run(
|
||||
[str(OUT_DIR / "detect_body"), frame_path],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
try:
|
||||
joints_list = json.loads(result.stdout.strip())
|
||||
except:
|
||||
joints_list = []
|
||||
|
||||
fb = face_boxes.get(trace_id, {"w": 0})
|
||||
face_w = fb["w"]
|
||||
|
||||
if joints_list:
|
||||
joints = joints_list[0]
|
||||
# Find shoulder keypoints
|
||||
l_shoulder = joints.get("left_shoulder", None)
|
||||
r_shoulder = joints.get("right_shoulder", None)
|
||||
neck = joints.get("neck", joints.get("root", None))
|
||||
|
||||
# Calculate shoulder width in pixels
|
||||
shoulder_w = -1
|
||||
if l_shoulder is not None and r_shoulder is not None:
|
||||
ly = joints.get("left_shoulder_y", 0)
|
||||
ry = joints.get("right_shoulder_y", 0)
|
||||
shoulder_w = abs(l_shoulder - r_shoulder) # normalized coords
|
||||
|
||||
ratio = face_w / shoulder_w if shoulder_w > 0 else 0
|
||||
|
||||
h2s = {
|
||||
"trace_id": trace_id,
|
||||
"faces": fc,
|
||||
"first_sec": round(first_frame / 24.0, 1),
|
||||
"face_w_px": face_w,
|
||||
"shoulder_w_unit": round(shoulder_w, 3),
|
||||
"ratio": round(ratio, 2),
|
||||
"joints": joints,
|
||||
}
|
||||
results.append(h2s)
|
||||
|
||||
status = "OK" if ratio > 0 else "no shoulder"
|
||||
print(f" trace_{trace_id:5d} | face={face_w:4d}px | shoulder={shoulder_w:.3f} | ratio={ratio:.2f} | {status}")
|
||||
else:
|
||||
print(f" trace_{trace_id:5d} | face={face_w:4d}px | no body detected")
|
||||
|
||||
# 5. Save results
|
||||
report = {
|
||||
"method": "Apple Vision Head-to-Shoulder Ratio",
|
||||
"video": "Charade (1963)",
|
||||
"samples": len(frames),
|
||||
"results": results,
|
||||
"notes": "Ratio = face_width_px / shoulder_width_normalized. Higher ratio = proportionally larger head (younger)."
|
||||
}
|
||||
|
||||
with open(OUT_DIR / "head_shoulder_report.json", "w") as f:
|
||||
json.dump(report, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\nReport saved: {OUT_DIR}/head_shoulder_report.json")
|
||||
print(f"\nNote: Apple Vision body pose returns normalized coordinates.")
|
||||
print(f"Shoulder width is in Vision normalized [0,1] space.")
|
||||
print(f"For meaningful ratio, face_bbox needs to be in same coordinate space.")
|
||||
print(f"Consider using Vision face detection + body pose simultaneously on the same frame.")
|
||||
Reference in New Issue
Block a user