feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
This commit is contained in:
377
scripts/utils/test_mediapipe.py
Normal file
377
scripts/utils/test_mediapipe.py
Normal file
@@ -0,0 +1,377 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
MediaPipe Test Script - Test all MediaPipe modules
|
||||
|
||||
Test modules:
|
||||
1. Face Mesh (468 keypoints)
|
||||
2. Pose (33 keypoints)
|
||||
3. Hands (21 keypoints per hand)
|
||||
4. Holistic (Face + Pose + Hands)
|
||||
"""
|
||||
|
||||
import sys
|
||||
import cv2
|
||||
import numpy as np
|
||||
import mediapipe as mp
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def test_face_mesh():
|
||||
"""
|
||||
Test MediaPipe Face Mesh (468 keypoints)
|
||||
"""
|
||||
print("=" * 60)
|
||||
print("Testing MediaPipe Face Mesh")
|
||||
print("=" * 60)
|
||||
|
||||
mp_face_mesh = mp.solutions.face_mesh
|
||||
|
||||
# Create Face Mesh model
|
||||
face_mesh = mp_face_mesh.FaceMesh(
|
||||
static_image_mode=True,
|
||||
max_num_faces=1,
|
||||
refine_landmarks=True, # Enable iris detection
|
||||
min_detection_confidence=0.5,
|
||||
)
|
||||
|
||||
print("✅ Face Mesh model created")
|
||||
|
||||
# Test on sample image
|
||||
test_image_path = "/Users/accusys/momentry_core_0.1/output/quick_preview/frame_220.jpg"
|
||||
|
||||
if Path(test_image_path).exists():
|
||||
image = cv2.imread(test_image_path)
|
||||
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
results = face_mesh.process(image_rgb)
|
||||
|
||||
if results.multi_face_landmarks:
|
||||
face_landmarks = results.multi_face_landmarks[0]
|
||||
num_landmarks = len(face_landmarks.landmark)
|
||||
|
||||
print(f"✅ Face detected: {num_landmarks} landmarks")
|
||||
|
||||
# Key landmark indices
|
||||
key_indices = {
|
||||
"nose_tip": 1,
|
||||
"left_eye_center": 33,
|
||||
"right_eye_center": 263,
|
||||
"left_iris_center": 468,
|
||||
"right_iris_center": 473,
|
||||
"mouth_top": 13,
|
||||
"mouth_bottom": 14,
|
||||
"mouth_left": 61,
|
||||
"mouth_right": 291,
|
||||
}
|
||||
|
||||
print("\nKey landmarks:")
|
||||
for name, idx in key_indices.items():
|
||||
if idx < num_landmarks:
|
||||
landmark = face_landmarks.landmark[idx]
|
||||
print(f" {name} ({idx}): x={landmark.x:.3f}, y={landmark.y:.3f}")
|
||||
|
||||
# Calculate Eye Aspect Ratio (EAR)
|
||||
# Left eye
|
||||
p1 = face_landmarks.landmark[33] # Left eye top
|
||||
p2 = face_landmarks.landmark[133] # Left eye bottom
|
||||
p3 = face_landmarks.landmark[159] # Left eye left
|
||||
p4 = face_landmarks.landmark[145] # Left eye right
|
||||
|
||||
vertical_dist = abs(p2.y - p1.y)
|
||||
horizontal_dist = abs(p4.x - p3.x)
|
||||
ear_left = vertical_dist / horizontal_dist if horizontal_dist > 0 else 0
|
||||
|
||||
print(f"\nEye Aspect Ratio (EAR):")
|
||||
print(f" Left eye EAR: {ear_left:.3f}")
|
||||
print(f" Interpretation: {'wide_open' if ear_left > 0.35 else 'normal' if ear_left > 0.2 else 'closed'}")
|
||||
|
||||
# Calculate Mouth Aspect Ratio (MAR)
|
||||
mouth_top = face_landmarks.landmark[13]
|
||||
mouth_bottom = face_landmarks.landmark[14]
|
||||
mouth_left = face_landmarks.landmark[61]
|
||||
mouth_right = face_landmarks.landmark[291]
|
||||
|
||||
mouth_height = abs(mouth_bottom.y - mouth_top.y)
|
||||
mouth_width = abs(mouth_right.x - mouth_left.x)
|
||||
mar = mouth_height / mouth_width if mouth_width > 0 else 0
|
||||
|
||||
print(f"\nMouth Aspect Ratio (MAR):")
|
||||
print(f" MAR: {mar:.3f}")
|
||||
print(f" Interpretation: {'open' if mar > 0.5 else 'closed' if mar < 0.2 else 'slightly_open'}")
|
||||
else:
|
||||
print("❌ No face detected")
|
||||
|
||||
face_mesh.close()
|
||||
print("\n✅ Face Mesh test completed")
|
||||
|
||||
|
||||
def test_pose():
|
||||
"""
|
||||
Test MediaPipe Pose (33 keypoints)
|
||||
"""
|
||||
print("\n" + "=" * 60)
|
||||
print("Testing MediaPipe Pose")
|
||||
print("=" * 60)
|
||||
|
||||
mp_pose = mp.solutions.pose
|
||||
|
||||
pose = mp_pose.Pose(
|
||||
static_image_mode=True,
|
||||
model_complexity=2, # Full model
|
||||
enable_segmentation=False,
|
||||
min_detection_confidence=0.5,
|
||||
)
|
||||
|
||||
print("✅ Pose model created")
|
||||
|
||||
test_image_path = "/Users/accusys/momentry_core_0.1/output/quick_preview/frame_220.jpg"
|
||||
|
||||
if Path(test_image_path).exists():
|
||||
image = cv2.imread(test_image_path)
|
||||
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
results = pose.process(image_rgb)
|
||||
|
||||
if results.pose_landmarks:
|
||||
landmarks = results.pose_landmarks.landmark
|
||||
num_landmarks = len(landmarks)
|
||||
|
||||
print(f"✅ Pose detected: {num_landmarks} keypoints")
|
||||
|
||||
# Key keypoints
|
||||
key_indices = {
|
||||
"nose": 0,
|
||||
"left_shoulder": 11,
|
||||
"right_shoulder": 12,
|
||||
"left_elbow": 13,
|
||||
"right_elbow": 14,
|
||||
"left_wrist": 15,
|
||||
"right_wrist": 16,
|
||||
"left_hip": 23,
|
||||
"right_hip": 24,
|
||||
"left_knee": 25,
|
||||
"right_knee": 26,
|
||||
"left_ankle": 27,
|
||||
"right_ankle": 28,
|
||||
}
|
||||
|
||||
print("\nKey keypoints:")
|
||||
for name, idx in key_indices.items():
|
||||
landmark = landmarks[idx]
|
||||
print(f" {name} ({idx}): x={landmark.x:.3f}, y={landmark.y:.3f}, visibility={landmark.visibility:.2f}")
|
||||
|
||||
# Calculate elbow angles
|
||||
def calculate_angle(p1, p2, p3):
|
||||
v1 = np.array([p1.x, p1.y]) - np.array([p2.x, p2.y])
|
||||
v2 = np.array([p3.x, p3.y]) - np.array([p2.x, p2.y])
|
||||
angle = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
|
||||
return np.degrees(angle)
|
||||
|
||||
# Right arm angle
|
||||
right_shoulder = landmarks[12]
|
||||
right_elbow = landmarks[14]
|
||||
right_wrist = landmarks[16]
|
||||
|
||||
right_elbow_angle = calculate_angle(right_shoulder, right_elbow, right_wrist)
|
||||
|
||||
print(f"\nRight elbow angle: {right_elbow_angle:.1f}°")
|
||||
print(f" Interpretation: {'extended' if right_elbow_angle > 150 else 'folded' if right_elbow_angle < 90 else 'neutral'}")
|
||||
|
||||
# Check if arm is raised
|
||||
if right_wrist.y < right_elbow.y < right_shoulder.y:
|
||||
print(f" Action: raise_right (arm raised)")
|
||||
|
||||
# Knee angles
|
||||
left_hip = landmarks[23]
|
||||
left_knee = landmarks[25]
|
||||
left_ankle = landmarks[27]
|
||||
|
||||
left_knee_angle = calculate_angle(left_hip, left_knee, left_ankle)
|
||||
|
||||
print(f"\nLeft knee angle: {left_knee_angle:.1f}°")
|
||||
print(f" Interpretation: {'standing' if left_knee_angle > 160 else 'knee_bend' if left_knee_angle < 120 else 'neutral'}")
|
||||
else:
|
||||
print("❌ No pose detected")
|
||||
|
||||
pose.close()
|
||||
print("\n✅ Pose test completed")
|
||||
|
||||
|
||||
def test_hands():
|
||||
"""
|
||||
Test MediaPipe Hands (21 keypoints per hand)
|
||||
"""
|
||||
print("\n" + "=" * 60)
|
||||
print("Testing MediaPipe Hands")
|
||||
print("=" * 60)
|
||||
|
||||
mp_hands = mp.solutions.hands
|
||||
|
||||
hands = mp_hands.Hands(
|
||||
static_image_mode=True,
|
||||
max_num_hands=2,
|
||||
min_detection_confidence=0.5,
|
||||
)
|
||||
|
||||
print("✅ Hands model created")
|
||||
|
||||
test_image_path = "/Users/accusys/momentry_core_0.1/output/quick_preview/frame_220.jpg"
|
||||
|
||||
if Path(test_image_path).exists():
|
||||
image = cv2.imread(test_image_path)
|
||||
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
results = hands.process(image_rgb)
|
||||
|
||||
if results.multi_hand_landmarks:
|
||||
for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
|
||||
hand_label = results.multi_handedness[idx].classification[0].label
|
||||
|
||||
print(f"\n✅ Hand {idx+1} detected ({hand_label}): 21 keypoints")
|
||||
|
||||
landmarks = hand_landmarks.landmark
|
||||
|
||||
# Key landmarks
|
||||
key_indices = {
|
||||
"wrist": 0,
|
||||
"thumb_tip": 4,
|
||||
"index_tip": 8,
|
||||
"middle_tip": 12,
|
||||
"ring_tip": 16,
|
||||
"pinky_tip": 20,
|
||||
}
|
||||
|
||||
print(f" Key landmarks:")
|
||||
for name, i in key_indices.items():
|
||||
lm = landmarks[i]
|
||||
print(f" {name} ({i}): x={lm.x:.3f}, y={lm.y:.3f}")
|
||||
|
||||
# Detect gesture
|
||||
thumb_tip = landmarks[4]
|
||||
index_tip = landmarks[8]
|
||||
middle_tip = landmarks[12]
|
||||
ring_tip = landmarks[16]
|
||||
pinky_tip = landmarks[20]
|
||||
wrist = landmarks[0]
|
||||
|
||||
# Calculate finger extensions
|
||||
def is_finger_extended(tip, base, wrist):
|
||||
return tip.y < base.y # Extended upward
|
||||
|
||||
thumb_extended = is_finger_extended(landmarks[4], landmarks[2], wrist)
|
||||
index_extended = is_finger_extended(landmarks[8], landmarks[5], wrist)
|
||||
middle_extended = is_finger_extended(landmarks[12], landmarks[9], wrist)
|
||||
ring_extended = is_finger_extended(landmarks[16], landmarks[13], wrist)
|
||||
pinky_extended = is_finger_extended(landmarks[20], landmarks[17], wrist)
|
||||
|
||||
extensions = [thumb_extended, index_extended, middle_extended, ring_extended, pinky_extended]
|
||||
|
||||
print(f"\n Finger extensions: {['thumb', 'index', 'middle', 'ring', 'pinky']}")
|
||||
print(f" {extensions}")
|
||||
|
||||
# Detect gesture
|
||||
gesture = "unknown"
|
||||
if all(extensions):
|
||||
gesture = "open_hand"
|
||||
elif not any(extensions):
|
||||
gesture = "fist"
|
||||
elif thumb_extended and not any(extensions[1:]):
|
||||
gesture = "thumbs_up"
|
||||
elif index_extended and middle_extended and not any(extensions[2:]):
|
||||
gesture = "peace_sign"
|
||||
elif index_extended and not any(extensions[2:]) and not thumb_extended:
|
||||
gesture = "pointing"
|
||||
|
||||
print(f" Detected gesture: {gesture}")
|
||||
else:
|
||||
print("❌ No hands detected")
|
||||
|
||||
hands.close()
|
||||
print("\n✅ Hands test completed")
|
||||
|
||||
|
||||
def test_holistic():
|
||||
"""
|
||||
Test MediaPipe Holistic (Face + Pose + Hands combined)
|
||||
"""
|
||||
print("\n" + "=" * 60)
|
||||
print("Testing MediaPipe Holistic")
|
||||
print("=" * 60)
|
||||
|
||||
mp_holistic = mp.solutions.holistic
|
||||
|
||||
holistic = mp_holistic.Holistic(
|
||||
static_image_mode=True,
|
||||
model_complexity=2,
|
||||
enable_segmentation=False,
|
||||
refine_face_landmarks=True,
|
||||
)
|
||||
|
||||
print("✅ Holistic model created")
|
||||
|
||||
test_image_path = "/Users/accusys/momentry_core_0.1/output/quick_preview/frame_220.jpg"
|
||||
|
||||
if Path(test_image_path).exists():
|
||||
image = cv2.imread(test_image_path)
|
||||
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
results = holistic.process(image_rgb)
|
||||
|
||||
detected_count = 0
|
||||
|
||||
if results.face_landmarks:
|
||||
num_face = len(results.face_landmarks.landmark)
|
||||
print(f"✅ Face: {num_face} landmarks")
|
||||
detected_count += 1
|
||||
|
||||
if results.pose_landmarks:
|
||||
num_pose = len(results.pose_landmarks.landmark)
|
||||
print(f"✅ Pose: {num_pose} keypoints")
|
||||
detected_count += 1
|
||||
|
||||
if results.left_hand_landmarks:
|
||||
num_left_hand = len(results.left_hand_landmarks.landmark)
|
||||
print(f"✅ Left hand: {num_left_hand} keypoints")
|
||||
detected_count += 1
|
||||
|
||||
if results.right_hand_landmarks:
|
||||
num_right_hand = len(results.right_hand_landmarks.landmark)
|
||||
print(f"✅ Right hand: {num_right_hand} keypoints")
|
||||
detected_count += 1
|
||||
|
||||
if detected_count == 0:
|
||||
print("❌ No landmarks detected")
|
||||
else:
|
||||
print(f"\nTotal detections: {detected_count} components")
|
||||
|
||||
holistic.close()
|
||||
print("\n✅ Holistic test completed")
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print("MediaPipe Installation Test")
|
||||
print("=" * 70)
|
||||
|
||||
print(f"\nMediaPipe version: {mp.__version__}")
|
||||
print()
|
||||
|
||||
# Test all modules
|
||||
test_face_mesh()
|
||||
test_pose()
|
||||
test_hands()
|
||||
test_holistic()
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("✅ All MediaPipe tests completed!")
|
||||
print("=" * 70)
|
||||
|
||||
print("\nNext steps:")
|
||||
print(" 1. Face Mesh: Use for eye/mouth action detection")
|
||||
print(" 2. Pose: Use for arm/leg/feet action detection")
|
||||
print(" 3. Hands: Use for hand gesture detection")
|
||||
print(" 4. Holistic: Use for full-body action detection")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user