feat: add Level 2/3 dynamic feature extraction CLI
- test_level2_level3.py: on-demand extraction script - Level 2: face, torso, leg, arm regions (medium) - Level 3: glasses, earrings, watch (fine details) - Demonstrates dynamic calculation from keypoints
This commit is contained in:
193
scripts/test_level2_level3.py
Normal file
193
scripts/test_level2_level3.py
Normal file
@@ -0,0 +1,193 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Test Level 2/3 Dynamic Feature Extraction
|
||||
|
||||
Purpose:
|
||||
1. Demonstrate Level 2/3 on-demand extraction
|
||||
2. Test with pose.json + video frames
|
||||
3. Show feature output structure
|
||||
|
||||
Usage:
|
||||
python scripts/test_level2_level3.py --file-uuid <uuid> --frame <frame_num>
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import argparse
|
||||
import cv2
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "utils"))
|
||||
|
||||
from feature_extractor import HierarchicalFeatureExtractor
|
||||
from proportion_calculator import calculate_proportions, estimate_region_from_keypoints
|
||||
|
||||
OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev")
|
||||
|
||||
|
||||
def get_regions_from_keypoints(keypoints, eye_width):
|
||||
"""Calculate Level 2 regions from keypoints"""
|
||||
regions = {}
|
||||
|
||||
# Face region (eyes to chin)
|
||||
face_region = estimate_region_from_keypoints(
|
||||
keypoints,
|
||||
top_keypoint='left_eye',
|
||||
bottom_keypoint='nose',
|
||||
left_keypoint='left_ear',
|
||||
right_keypoint='right_ear',
|
||||
eye_width_factor=2.0
|
||||
)
|
||||
if face_region.get('width', 0) > 0:
|
||||
regions['face'] = face_region
|
||||
|
||||
# Torso region (neck to hip)
|
||||
torso_region = estimate_region_from_keypoints(
|
||||
keypoints,
|
||||
top_keypoint='neck',
|
||||
bottom_keypoint='left_hip',
|
||||
left_keypoint='left_shoulder',
|
||||
right_keypoint='right_shoulder',
|
||||
eye_width_factor=0
|
||||
)
|
||||
if torso_region.get('width', 0) > 0:
|
||||
regions['torso'] = torso_region
|
||||
|
||||
# Leg region (hip to ankle)
|
||||
leg_region = estimate_region_from_keypoints(
|
||||
keypoints,
|
||||
top_keypoint='left_hip',
|
||||
bottom_keypoint='left_ankle',
|
||||
left_keypoint=None,
|
||||
right_keypoint=None,
|
||||
eye_width_factor=0
|
||||
)
|
||||
if leg_region.get('width', 0) > 0:
|
||||
regions['leg'] = leg_region
|
||||
|
||||
# Arms
|
||||
left_arm_region = estimate_region_from_keypoints(
|
||||
keypoints,
|
||||
top_keypoint='left_shoulder',
|
||||
bottom_keypoint='left_wrist',
|
||||
left_keypoint=None,
|
||||
right_keypoint=None,
|
||||
eye_width_factor=0
|
||||
)
|
||||
if left_arm_region.get('width', 0) > 0:
|
||||
regions['left_arm'] = left_arm_region
|
||||
|
||||
right_arm_region = estimate_region_from_keypoints(
|
||||
keypoints,
|
||||
top_keypoint='right_shoulder',
|
||||
bottom_keypoint='right_wrist',
|
||||
left_keypoint=None,
|
||||
right_keypoint=None,
|
||||
eye_width_factor=0
|
||||
)
|
||||
if right_arm_region.get('width', 0) > 0:
|
||||
regions['right_arm'] = right_arm_region
|
||||
|
||||
return regions
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Test Level 2/3 Feature Extraction")
|
||||
parser.add_argument("--file-uuid", "-u", required=True, help="File UUID")
|
||||
parser.add_argument("--frame", "-f", type=int, default=0, help="Frame number")
|
||||
parser.add_argument("--video", "-v", help="Video path (optional)")
|
||||
parser.add_argument("--pose-json", "-p", help="Pose JSON path (optional)")
|
||||
args = parser.parse_args()
|
||||
|
||||
file_uuid = args.file_uuid
|
||||
frame_num = args.frame
|
||||
|
||||
video_path = args.video or f"{OUTPUT_DIR}/{file_uuid}.mp4"
|
||||
pose_json_path = args.pose_json or f"{OUTPUT_DIR}/{file_uuid}.pose.json"
|
||||
|
||||
if not os.path.exists(video_path):
|
||||
print(f"Video not found: {video_path}")
|
||||
sys.exit(1)
|
||||
|
||||
if not os.path.exists(pose_json_path):
|
||||
print(f"Pose JSON not found: {pose_json_path}")
|
||||
sys.exit(1)
|
||||
|
||||
with open(pose_json_path) as f:
|
||||
pose_data = json.load(f)
|
||||
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
|
||||
ret, frame = cap.read()
|
||||
cap.release()
|
||||
|
||||
if not ret:
|
||||
print(f"Cannot read frame {frame_num}")
|
||||
sys.exit(1)
|
||||
|
||||
# Find frame in pose data
|
||||
pose_frame = None
|
||||
for pf in pose_data.get('frames', []):
|
||||
if pf.get('frame') == frame_num:
|
||||
pose_frame = pf
|
||||
break
|
||||
|
||||
if not pose_frame:
|
||||
print(f"Frame {frame_num} not in pose.json")
|
||||
sys.exit(1)
|
||||
|
||||
person = pose_frame['persons'][0]
|
||||
keypoints = person.get('keypoints', [])
|
||||
bbox = person.get('bbox', {})
|
||||
|
||||
# Calculate proportions (Level 1 reference)
|
||||
proportions = calculate_proportions(keypoints, bbox)
|
||||
eye_width = proportions.get('reference_units', {}).get('eye_width', 0)
|
||||
|
||||
# Get Level 2 regions
|
||||
regions = get_regions_from_keypoints(keypoints, eye_width)
|
||||
|
||||
# Extract features
|
||||
extractor = HierarchicalFeatureExtractor()
|
||||
|
||||
print(f"=== Frame {frame_num} ===")
|
||||
print(f"Bbox: {bbox}")
|
||||
print(f"Proportions: height_estimate={proportions.get('height_estimate', {}).get('estimated_height_cm', 0)}cm")
|
||||
print(f"Shot type: {proportions.get('shot_type', {}).get('shot_type', 'unknown')}")
|
||||
|
||||
# Level 2 extraction
|
||||
print("\n=== Level 2 (Medium Regions) ===")
|
||||
level2 = extractor.extract_level2(frame, regions)
|
||||
for region, features in level2.items():
|
||||
if region != 'level' and features:
|
||||
color = features.get('color', {})
|
||||
print(f"{region}: dominant_colors={color.get('dominant_colors', [[]])[:1]}, h_mean={color.get('h_mean', 0):.1f}")
|
||||
|
||||
# Level 3 extraction
|
||||
print("\n=== Level 3 (Fine Details) ===")
|
||||
level3 = extractor.extract_level3(frame, keypoints, eye_width)
|
||||
for region, features in level3.items():
|
||||
if region != 'level' and features:
|
||||
color = features.get('color', {})
|
||||
print(f"{region}: h_mean={color.get('h_mean', 0):.1f}")
|
||||
|
||||
# Save output
|
||||
output = {
|
||||
'frame': frame_num,
|
||||
'bbox': bbox,
|
||||
'proportions': proportions,
|
||||
'regions': regions,
|
||||
'level2': level2,
|
||||
'level3': level3,
|
||||
}
|
||||
|
||||
output_path = f"{OUTPUT_DIR}/{file_uuid}_level23_frame{frame_num}.json"
|
||||
with open(output_path, 'w') as f:
|
||||
json.dump(output, f, indent=2, default=str)
|
||||
|
||||
print(f"\nSaved to: {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user