diff --git a/scripts/test_level2_level3.py b/scripts/test_level2_level3.py new file mode 100644 index 0000000..6becb6d --- /dev/null +++ b/scripts/test_level2_level3.py @@ -0,0 +1,193 @@ +#!/opt/homebrew/bin/python3.11 +""" +Test Level 2/3 Dynamic Feature Extraction + +Purpose: +1. Demonstrate Level 2/3 on-demand extraction +2. Test with pose.json + video frames +3. Show feature output structure + +Usage: + python scripts/test_level2_level3.py --file-uuid --frame +""" + +import sys +import os +import json +import argparse +import cv2 + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "utils")) + +from feature_extractor import HierarchicalFeatureExtractor +from proportion_calculator import calculate_proportions, estimate_region_from_keypoints + +OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev") + + +def get_regions_from_keypoints(keypoints, eye_width): + """Calculate Level 2 regions from keypoints""" + regions = {} + + # Face region (eyes to chin) + face_region = estimate_region_from_keypoints( + keypoints, + top_keypoint='left_eye', + bottom_keypoint='nose', + left_keypoint='left_ear', + right_keypoint='right_ear', + eye_width_factor=2.0 + ) + if face_region.get('width', 0) > 0: + regions['face'] = face_region + + # Torso region (neck to hip) + torso_region = estimate_region_from_keypoints( + keypoints, + top_keypoint='neck', + bottom_keypoint='left_hip', + left_keypoint='left_shoulder', + right_keypoint='right_shoulder', + eye_width_factor=0 + ) + if torso_region.get('width', 0) > 0: + regions['torso'] = torso_region + + # Leg region (hip to ankle) + leg_region = estimate_region_from_keypoints( + keypoints, + top_keypoint='left_hip', + bottom_keypoint='left_ankle', + left_keypoint=None, + right_keypoint=None, + eye_width_factor=0 + ) + if leg_region.get('width', 0) > 0: + regions['leg'] = leg_region + + # Arms + left_arm_region = estimate_region_from_keypoints( + keypoints, + top_keypoint='left_shoulder', + bottom_keypoint='left_wrist', + left_keypoint=None, + right_keypoint=None, + eye_width_factor=0 + ) + if left_arm_region.get('width', 0) > 0: + regions['left_arm'] = left_arm_region + + right_arm_region = estimate_region_from_keypoints( + keypoints, + top_keypoint='right_shoulder', + bottom_keypoint='right_wrist', + left_keypoint=None, + right_keypoint=None, + eye_width_factor=0 + ) + if right_arm_region.get('width', 0) > 0: + regions['right_arm'] = right_arm_region + + return regions + + +def main(): + parser = argparse.ArgumentParser(description="Test Level 2/3 Feature Extraction") + parser.add_argument("--file-uuid", "-u", required=True, help="File UUID") + parser.add_argument("--frame", "-f", type=int, default=0, help="Frame number") + parser.add_argument("--video", "-v", help="Video path (optional)") + parser.add_argument("--pose-json", "-p", help="Pose JSON path (optional)") + args = parser.parse_args() + + file_uuid = args.file_uuid + frame_num = args.frame + + video_path = args.video or f"{OUTPUT_DIR}/{file_uuid}.mp4" + pose_json_path = args.pose_json or f"{OUTPUT_DIR}/{file_uuid}.pose.json" + + if not os.path.exists(video_path): + print(f"Video not found: {video_path}") + sys.exit(1) + + if not os.path.exists(pose_json_path): + print(f"Pose JSON not found: {pose_json_path}") + sys.exit(1) + + with open(pose_json_path) as f: + pose_data = json.load(f) + + cap = cv2.VideoCapture(video_path) + cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) + ret, frame = cap.read() + cap.release() + + if not ret: + print(f"Cannot read frame {frame_num}") + sys.exit(1) + + # Find frame in pose data + pose_frame = None + for pf in pose_data.get('frames', []): + if pf.get('frame') == frame_num: + pose_frame = pf + break + + if not pose_frame: + print(f"Frame {frame_num} not in pose.json") + sys.exit(1) + + person = pose_frame['persons'][0] + keypoints = person.get('keypoints', []) + bbox = person.get('bbox', {}) + + # Calculate proportions (Level 1 reference) + proportions = calculate_proportions(keypoints, bbox) + eye_width = proportions.get('reference_units', {}).get('eye_width', 0) + + # Get Level 2 regions + regions = get_regions_from_keypoints(keypoints, eye_width) + + # Extract features + extractor = HierarchicalFeatureExtractor() + + print(f"=== Frame {frame_num} ===") + print(f"Bbox: {bbox}") + print(f"Proportions: height_estimate={proportions.get('height_estimate', {}).get('estimated_height_cm', 0)}cm") + print(f"Shot type: {proportions.get('shot_type', {}).get('shot_type', 'unknown')}") + + # Level 2 extraction + print("\n=== Level 2 (Medium Regions) ===") + level2 = extractor.extract_level2(frame, regions) + for region, features in level2.items(): + if region != 'level' and features: + color = features.get('color', {}) + print(f"{region}: dominant_colors={color.get('dominant_colors', [[]])[:1]}, h_mean={color.get('h_mean', 0):.1f}") + + # Level 3 extraction + print("\n=== Level 3 (Fine Details) ===") + level3 = extractor.extract_level3(frame, keypoints, eye_width) + for region, features in level3.items(): + if region != 'level' and features: + color = features.get('color', {}) + print(f"{region}: h_mean={color.get('h_mean', 0):.1f}") + + # Save output + output = { + 'frame': frame_num, + 'bbox': bbox, + 'proportions': proportions, + 'regions': regions, + 'level2': level2, + 'level3': level3, + } + + output_path = f"{OUTPUT_DIR}/{file_uuid}_level23_frame{frame_num}.json" + with open(output_path, 'w') as f: + json.dump(output, f, indent=2, default=str) + + print(f"\nSaved to: {output_path}") + + +if __name__ == "__main__": + main() \ No newline at end of file