feat: add Level 2/3 dynamic feature extraction CLI

- test_level2_level3.py: on-demand extraction script
- Level 2: face, torso, leg, arm regions (medium)
- Level 3: glasses, earrings, watch (fine details)
- Demonstrates dynamic calculation from keypoints
This commit is contained in:
Accusys
2026-06-22 03:26:12 +08:00
parent d0858f288a
commit bce9435823

View File

@@ -0,0 +1,193 @@
#!/opt/homebrew/bin/python3.11
"""
Test Level 2/3 Dynamic Feature Extraction
Purpose:
1. Demonstrate Level 2/3 on-demand extraction
2. Test with pose.json + video frames
3. Show feature output structure
Usage:
python scripts/test_level2_level3.py --file-uuid <uuid> --frame <frame_num>
"""
import sys
import os
import json
import argparse
import cv2
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "utils"))
from feature_extractor import HierarchicalFeatureExtractor
from proportion_calculator import calculate_proportions, estimate_region_from_keypoints
OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev")
def get_regions_from_keypoints(keypoints, eye_width):
"""Calculate Level 2 regions from keypoints"""
regions = {}
# Face region (eyes to chin)
face_region = estimate_region_from_keypoints(
keypoints,
top_keypoint='left_eye',
bottom_keypoint='nose',
left_keypoint='left_ear',
right_keypoint='right_ear',
eye_width_factor=2.0
)
if face_region.get('width', 0) > 0:
regions['face'] = face_region
# Torso region (neck to hip)
torso_region = estimate_region_from_keypoints(
keypoints,
top_keypoint='neck',
bottom_keypoint='left_hip',
left_keypoint='left_shoulder',
right_keypoint='right_shoulder',
eye_width_factor=0
)
if torso_region.get('width', 0) > 0:
regions['torso'] = torso_region
# Leg region (hip to ankle)
leg_region = estimate_region_from_keypoints(
keypoints,
top_keypoint='left_hip',
bottom_keypoint='left_ankle',
left_keypoint=None,
right_keypoint=None,
eye_width_factor=0
)
if leg_region.get('width', 0) > 0:
regions['leg'] = leg_region
# Arms
left_arm_region = estimate_region_from_keypoints(
keypoints,
top_keypoint='left_shoulder',
bottom_keypoint='left_wrist',
left_keypoint=None,
right_keypoint=None,
eye_width_factor=0
)
if left_arm_region.get('width', 0) > 0:
regions['left_arm'] = left_arm_region
right_arm_region = estimate_region_from_keypoints(
keypoints,
top_keypoint='right_shoulder',
bottom_keypoint='right_wrist',
left_keypoint=None,
right_keypoint=None,
eye_width_factor=0
)
if right_arm_region.get('width', 0) > 0:
regions['right_arm'] = right_arm_region
return regions
def main():
parser = argparse.ArgumentParser(description="Test Level 2/3 Feature Extraction")
parser.add_argument("--file-uuid", "-u", required=True, help="File UUID")
parser.add_argument("--frame", "-f", type=int, default=0, help="Frame number")
parser.add_argument("--video", "-v", help="Video path (optional)")
parser.add_argument("--pose-json", "-p", help="Pose JSON path (optional)")
args = parser.parse_args()
file_uuid = args.file_uuid
frame_num = args.frame
video_path = args.video or f"{OUTPUT_DIR}/{file_uuid}.mp4"
pose_json_path = args.pose_json or f"{OUTPUT_DIR}/{file_uuid}.pose.json"
if not os.path.exists(video_path):
print(f"Video not found: {video_path}")
sys.exit(1)
if not os.path.exists(pose_json_path):
print(f"Pose JSON not found: {pose_json_path}")
sys.exit(1)
with open(pose_json_path) as f:
pose_data = json.load(f)
cap = cv2.VideoCapture(video_path)
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
ret, frame = cap.read()
cap.release()
if not ret:
print(f"Cannot read frame {frame_num}")
sys.exit(1)
# Find frame in pose data
pose_frame = None
for pf in pose_data.get('frames', []):
if pf.get('frame') == frame_num:
pose_frame = pf
break
if not pose_frame:
print(f"Frame {frame_num} not in pose.json")
sys.exit(1)
person = pose_frame['persons'][0]
keypoints = person.get('keypoints', [])
bbox = person.get('bbox', {})
# Calculate proportions (Level 1 reference)
proportions = calculate_proportions(keypoints, bbox)
eye_width = proportions.get('reference_units', {}).get('eye_width', 0)
# Get Level 2 regions
regions = get_regions_from_keypoints(keypoints, eye_width)
# Extract features
extractor = HierarchicalFeatureExtractor()
print(f"=== Frame {frame_num} ===")
print(f"Bbox: {bbox}")
print(f"Proportions: height_estimate={proportions.get('height_estimate', {}).get('estimated_height_cm', 0)}cm")
print(f"Shot type: {proportions.get('shot_type', {}).get('shot_type', 'unknown')}")
# Level 2 extraction
print("\n=== Level 2 (Medium Regions) ===")
level2 = extractor.extract_level2(frame, regions)
for region, features in level2.items():
if region != 'level' and features:
color = features.get('color', {})
print(f"{region}: dominant_colors={color.get('dominant_colors', [[]])[:1]}, h_mean={color.get('h_mean', 0):.1f}")
# Level 3 extraction
print("\n=== Level 3 (Fine Details) ===")
level3 = extractor.extract_level3(frame, keypoints, eye_width)
for region, features in level3.items():
if region != 'level' and features:
color = features.get('color', {})
print(f"{region}: h_mean={color.get('h_mean', 0):.1f}")
# Save output
output = {
'frame': frame_num,
'bbox': bbox,
'proportions': proportions,
'regions': regions,
'level2': level2,
'level3': level3,
}
output_path = f"{OUTPUT_DIR}/{file_uuid}_level23_frame{frame_num}.json"
with open(output_path, 'w') as f:
json.dump(output, f, indent=2, default=str)
print(f"\nSaved to: {output_path}")
if __name__ == "__main__":
main()