- Remove session-ses_2f27.md (161KB raw session log) - Remove 49 ROOT_* duplicate files across REFERENCE/ - Remove 14 duplicate files between REFERENCE/ root and history/ - Remove asr_legacy.rs (dead code, replaced by asr.rs) - Remove src/core/worker/ (duplicate JobWorker) - Remove src/core/layers/ (empty directory) - Remove 4 .bak files in src/ - Remove 7 dead private methods in worker/processor.rs - Remove backup directory from git tracking
402 lines
12 KiB
Python
402 lines
12 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
Pose Analyzer - Multi-feature Pose Angle Classification
|
|
|
|
Purpose:
|
|
1. Calculate pose angle from 5-point landmarks (InsightFace kps)
|
|
2. Use multiple features for accurate classification:
|
|
- nose_to_eye_ratio: nose distance relative to eye width
|
|
- eye_slope: eye line slope (pitch detection)
|
|
- nose_offset: nose position relative to eye center
|
|
- mouth_symmetry: mouth corners symmetry
|
|
3. Provide confidence score for classification
|
|
|
|
Landmarks Order (InsightFace kps):
|
|
- 0: left eye
|
|
- 1: right eye
|
|
- 2: nose
|
|
- 3: left mouth corner
|
|
- 4: right mouth corner
|
|
|
|
Angles:
|
|
- frontal: nose near center, low ratio (< 0.4)
|
|
- three_quarter: moderate offset (ratio 0.4 - 0.6)
|
|
- profile_left: nose left of eye center (ratio > 0.6)
|
|
- profile_right: nose right of eye center (ratio > 0.6)
|
|
|
|
Usage:
|
|
from pose_analyzer import calculate_pose_angle_v2
|
|
|
|
pose_result = calculate_pose_angle_v2(landmarks)
|
|
print(f"Angle: {pose_result['angle']}, Confidence: {pose_result['confidence']}")
|
|
"""
|
|
|
|
import numpy as np
|
|
from typing import Dict, List, Tuple
|
|
|
|
|
|
def calculate_nose_to_eye_ratio(landmarks: List) -> Tuple[float, float, float]:
|
|
"""
|
|
Calculate nose-to-eye ratio
|
|
|
|
Returns:
|
|
(ratio, eye_width, nose_to_eye_distance)
|
|
"""
|
|
if len(landmarks) < 5:
|
|
return (0.0, 0.0, 0.0)
|
|
|
|
left_eye = np.array(landmarks[0][:2])
|
|
right_eye = np.array(landmarks[1][:2])
|
|
nose = np.array(landmarks[2][:2])
|
|
|
|
eye_center = (left_eye + right_eye) / 2
|
|
eye_width = np.linalg.norm(right_eye - left_eye)
|
|
nose_to_eye = np.linalg.norm(nose - eye_center)
|
|
|
|
ratio = nose_to_eye / eye_width if eye_width > 0 else 0.0
|
|
|
|
return (ratio, eye_width, nose_to_eye)
|
|
|
|
|
|
def calculate_eye_slope(landmarks: List) -> Tuple[float, float]:
|
|
"""
|
|
Calculate eye line slope (for pitch detection)
|
|
|
|
Positive slope = head tilted down
|
|
Negative slope = head tilted up
|
|
|
|
Returns:
|
|
(slope, angle_degrees)
|
|
"""
|
|
if len(landmarks) < 5:
|
|
return (0.0, 0.0)
|
|
|
|
left_eye = np.array(landmarks[0][:2])
|
|
right_eye = np.array(landmarks[1][:2])
|
|
|
|
dx = right_eye[0] - left_eye[0]
|
|
dy = right_eye[1] - left_eye[1]
|
|
|
|
slope = dy / dx if dx != 0 else 0.0
|
|
angle_degrees = np.arctan(slope) * 180 / np.pi
|
|
|
|
return (slope, angle_degrees)
|
|
|
|
|
|
def calculate_nose_offset(landmarks: List) -> Tuple[float, float]:
|
|
"""
|
|
Calculate nose horizontal offset relative to eye center
|
|
|
|
Returns:
|
|
(offset_x, normalized_offset)
|
|
"""
|
|
if len(landmarks) < 5:
|
|
return (0.0, 0.0)
|
|
|
|
left_eye = np.array(landmarks[0][:2])
|
|
right_eye = np.array(landmarks[1][:2])
|
|
nose = np.array(landmarks[2][:2])
|
|
|
|
eye_center = (left_eye + right_eye) / 2
|
|
eye_width = np.linalg.norm(right_eye - left_eye)
|
|
|
|
offset_x = nose[0] - eye_center[0]
|
|
normalized_offset = offset_x / eye_width if eye_width > 0 else 0.0
|
|
|
|
return (offset_x, normalized_offset)
|
|
|
|
|
|
def calculate_mouth_symmetry(landmarks: List) -> Tuple[float, float]:
|
|
"""
|
|
Calculate mouth corners symmetry
|
|
|
|
For profile faces, mouth corners are asymmetric
|
|
|
|
Returns:
|
|
(symmetry_score, mouth_width)
|
|
"""
|
|
if len(landmarks) < 5:
|
|
return (1.0, 0.0)
|
|
|
|
left_mouth = np.array(landmarks[3][:2])
|
|
right_mouth = np.array(landmarks[4][:2])
|
|
nose = np.array(landmarks[2][:2])
|
|
|
|
mouth_width = np.linalg.norm(right_mouth - left_mouth)
|
|
|
|
left_dist = np.linalg.norm(left_mouth - nose)
|
|
right_dist = np.linalg.norm(right_mouth - nose)
|
|
|
|
symmetry = min(left_dist, right_dist) / max(left_dist, right_dist) if max(left_dist, right_dist) > 0 else 1.0
|
|
|
|
return (symmetry, mouth_width)
|
|
|
|
|
|
def calculate_jaw_visibility_hint(landmarks: List) -> float:
|
|
"""
|
|
Estimate jaw visibility from mouth position
|
|
|
|
For profile faces, one side of jaw is more visible
|
|
|
|
Returns:
|
|
visibility_hint (0.0 - 1.0)
|
|
"""
|
|
if len(landmarks) < 5:
|
|
return 0.5
|
|
|
|
left_eye = np.array(landmarks[0][:2])
|
|
right_eye = np.array(landmarks[1][:2])
|
|
nose = np.array(landmarks[2][:2])
|
|
left_mouth = np.array(landmarks[3][:2])
|
|
right_mouth = np.array(landmarks[4][:2])
|
|
|
|
eye_center_y = (left_eye[1] + right_eye[1]) / 2
|
|
mouth_center_y = (left_mouth[1] + right_mouth[1]) / 2
|
|
|
|
nose_to_mouth_dist = mouth_center_y - nose[1]
|
|
|
|
eye_to_nose_dist = nose[1] - eye_center_y
|
|
|
|
ratio = nose_to_mouth_dist / eye_to_nose_dist if eye_to_nose_dist > 0 else 0.5
|
|
|
|
return min(1.0, max(0.0, ratio))
|
|
|
|
|
|
def classify_angle_from_features(
|
|
ratio: float,
|
|
nose_offset_norm: float,
|
|
mouth_symmetry: float,
|
|
eye_slope: float,
|
|
) -> Tuple[str, float]:
|
|
"""
|
|
Classify angle using multiple features
|
|
|
|
Returns:
|
|
(angle_type, confidence)
|
|
"""
|
|
if ratio < 0.35 and abs(nose_offset_norm) < 0.15:
|
|
return ("frontal", 0.95)
|
|
|
|
if ratio < 0.55 and abs(nose_offset_norm) < 0.25:
|
|
return ("three_quarter", 0.85)
|
|
|
|
if ratio >= 0.55:
|
|
if nose_offset_norm < -0.1:
|
|
if mouth_symmetry < 0.85:
|
|
return ("profile_left", 0.90)
|
|
else:
|
|
return ("profile_left", 0.75)
|
|
elif nose_offset_norm > 0.1:
|
|
if mouth_symmetry < 0.85:
|
|
return ("profile_right", 0.90)
|
|
else:
|
|
return ("profile_right", 0.75)
|
|
else:
|
|
return ("three_quarter", 0.70)
|
|
|
|
return ("unknown", 0.50)
|
|
|
|
|
|
def calculate_pose_angle_v2(landmarks: List) -> Dict:
|
|
"""
|
|
Calculate pose angle using multi-feature analysis (V2)
|
|
|
|
This is an improved version that uses multiple features:
|
|
- nose_to_eye_ratio
|
|
- eye_slope (pitch)
|
|
- nose_offset (yaw)
|
|
- mouth_symmetry
|
|
|
|
Args:
|
|
landmarks: List of 5 points [[x, y], [x, y], ...]
|
|
Order: left_eye, right_eye, nose, left_mouth, right_mouth
|
|
|
|
Returns:
|
|
Dict with:
|
|
- angle: 'frontal', 'three_quarter', 'profile_left', 'profile_right', 'unknown'
|
|
- confidence: 0.0 - 1.0
|
|
- features: Dict of all calculated features
|
|
"""
|
|
if len(landmarks) < 5:
|
|
return {
|
|
"angle": "unknown",
|
|
"confidence": 0.0,
|
|
"features": {},
|
|
"method": "v2_multi_feature",
|
|
}
|
|
|
|
ratio, eye_width, nose_to_eye = calculate_nose_to_eye_ratio(landmarks)
|
|
eye_slope, eye_angle = calculate_eye_slope(landmarks)
|
|
nose_offset, nose_offset_norm = calculate_nose_offset(landmarks)
|
|
mouth_symmetry, mouth_width = calculate_mouth_symmetry(landmarks)
|
|
jaw_hint = calculate_jaw_visibility_hint(landmarks)
|
|
|
|
angle, confidence = classify_angle_from_features(
|
|
ratio=ratio,
|
|
nose_offset_norm=nose_offset_norm,
|
|
mouth_symmetry=mouth_symmetry,
|
|
eye_slope=eye_slope,
|
|
)
|
|
|
|
if eye_slope > 0.15:
|
|
pitch = "tilted_down"
|
|
elif eye_slope < -0.15:
|
|
pitch = "tilted_up"
|
|
else:
|
|
pitch = "neutral"
|
|
|
|
return {
|
|
"angle": angle,
|
|
"confidence": confidence,
|
|
"pitch": pitch,
|
|
"features": {
|
|
"nose_to_eye_ratio": round(ratio, 4),
|
|
"eye_width": round(eye_width, 2),
|
|
"nose_to_eye_dist": round(nose_to_eye, 2),
|
|
"eye_slope": round(eye_slope, 4),
|
|
"eye_angle_deg": round(eye_angle, 2),
|
|
"nose_offset_x": round(nose_offset, 2),
|
|
"nose_offset_norm": round(nose_offset_norm, 4),
|
|
"mouth_symmetry": round(mouth_symmetry, 4),
|
|
"mouth_width": round(mouth_width, 2),
|
|
"jaw_visibility_hint": round(jaw_hint, 4),
|
|
},
|
|
"method": "v2_multi_feature",
|
|
"landmarks_count": len(landmarks),
|
|
}
|
|
|
|
|
|
def calculate_pose_angle_v1(landmarks: List) -> Dict:
|
|
"""
|
|
Legacy version (V1) - single feature ratio-based
|
|
|
|
For comparison purposes only
|
|
"""
|
|
if len(landmarks) < 5:
|
|
return {"angle": "unknown", "confidence": 0.0}
|
|
|
|
left_eye = np.array(landmarks[0][:2])
|
|
right_eye = np.array(landmarks[1][:2])
|
|
nose = np.array(landmarks[2][:2])
|
|
|
|
eye_center = (left_eye + right_eye) / 2
|
|
eye_width = np.linalg.norm(right_eye - left_eye)
|
|
nose_to_eye = np.linalg.norm(nose - eye_center)
|
|
|
|
ratio = nose_to_eye / eye_width if eye_width > 0 else 0.0
|
|
|
|
if ratio < 0.4:
|
|
angle = "frontal"
|
|
elif ratio < 0.6:
|
|
angle = "three_quarter"
|
|
elif nose[0] < eye_center[0]:
|
|
angle = "profile_left"
|
|
else:
|
|
angle = "profile_right"
|
|
|
|
return {
|
|
"angle": angle,
|
|
"confidence": 0.7,
|
|
"ratio": round(ratio, 4),
|
|
"method": "v1_single_feature",
|
|
}
|
|
|
|
|
|
def compare_v1_v2(landmarks: List) -> Dict:
|
|
"""
|
|
Compare V1 and V2 classification results
|
|
|
|
Useful for validation and debugging
|
|
"""
|
|
v1_result = calculate_pose_angle_v1(landmarks)
|
|
v2_result = calculate_pose_angle_v2(landmarks)
|
|
|
|
return {
|
|
"v1": v1_result,
|
|
"v2": v2_result,
|
|
"agreement": v1_result["angle"] == v2_result["angle"],
|
|
"confidence_improvement": v2_result["confidence"] - v1_result["confidence"],
|
|
}
|
|
|
|
|
|
def batch_classify_angles(face_json_path: str) -> Dict:
|
|
"""
|
|
Batch classify all faces in face.json
|
|
|
|
Returns:
|
|
Statistics and per-frame results
|
|
"""
|
|
import json
|
|
|
|
with open(face_json_path) as f:
|
|
data = json.load(f)
|
|
|
|
frames = data.get("frames", {})
|
|
|
|
results = []
|
|
angle_counts = {}
|
|
confidence_stats = []
|
|
|
|
for frame_key, frame_data in frames.items():
|
|
for face_idx, face in enumerate(frame_data.get("faces", [])):
|
|
landmarks = face.get("landmarks", [])
|
|
|
|
if not landmarks or len(landmarks) < 5:
|
|
continue
|
|
|
|
pose_result = calculate_pose_angle_v2(landmarks)
|
|
pose_result["frame"] = frame_key
|
|
pose_result["face_index"] = face_idx
|
|
|
|
results.append(pose_result)
|
|
|
|
angle = pose_result["angle"]
|
|
angle_counts[angle] = angle_counts.get(angle, 0) + 1
|
|
confidence_stats.append(pose_result["confidence"])
|
|
|
|
return {
|
|
"total_faces": len(results),
|
|
"angle_distribution": angle_counts,
|
|
"confidence_avg": np.mean(confidence_stats) if confidence_stats else 0.0,
|
|
"confidence_min": np.min(confidence_stats) if confidence_stats else 0.0,
|
|
"confidence_max": np.max(confidence_stats) if confidence_stats else 0.0,
|
|
"results": results,
|
|
}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Pose Analyzer")
|
|
parser.add_argument("--face-json", help="Path to face.json for batch analysis")
|
|
parser.add_argument("--test", action="store_true", help="Run unit tests")
|
|
args = parser.parse_args()
|
|
|
|
if args.test:
|
|
print("=" * 60)
|
|
print("Pose Analyzer Unit Tests")
|
|
print("=" * 60)
|
|
|
|
test_landmarks = [
|
|
[[100, 100], [120, 100], [110, 120], [105, 130], [115, 130]],
|
|
[[100, 100], [120, 100], [125, 120], [105, 130], [115, 130]],
|
|
[[100, 100], [120, 100], [95, 120], [105, 130], [115, 130]],
|
|
]
|
|
|
|
for i, lm in enumerate(test_landmarks):
|
|
result = calculate_pose_angle_v2(lm)
|
|
print(f"\nTest {i+1}: {result['angle']} (confidence: {result['confidence']:.2f})")
|
|
print(f" Features: {result['features']}")
|
|
|
|
elif args.face_json:
|
|
print("=" * 60)
|
|
print("Batch Pose Analysis")
|
|
print("=" * 60)
|
|
|
|
batch_result = batch_classify_angles(args.face_json)
|
|
|
|
print(f"\nTotal faces: {batch_result['total_faces']}")
|
|
print(f"Angle distribution: {batch_result['angle_distribution']}")
|
|
print(f"Confidence: avg={batch_result['confidence_avg']:.2f}, min={batch_result['confidence_min']:.2f}, max={batch_result['confidence_max']:.2f}")
|
|
else:
|
|
print("Please provide --face-json or --test") |