From 606f31f13c62a3a7e5aa3eeef84f32b274f371b2 Mon Sep 17 00:00:00 2001 From: Accusys Date: Mon, 22 Jun 2026 02:27:03 +0800 Subject: [PATCH] feat: add appearance feature system with coordinate/scale fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add Appearance_Feature_System_V1.0.md design doc - Add proportion_calculator.py for body proportions (height, body shape) - Add feature_extractor.py for hierarchical feature extraction - Add tkg_level1_builder.py for TKG person_trace nodes - Fix mediapipe_holistic_processor.py to output Top-Left pixels - Add MediaPipe format conversion in proportion_calculator Coordinate system alignment: - Swift Pose: Top-Left pixels (Y-flip done in swift_pose.swift) - MediaPipe: Top-Left pixels (norm→pixel conversion added) --- .../DESIGN/Appearance_Feature_System_V1.0.md | 664 +++++++++++++++++ scripts/mediapipe_holistic_processor.py | 63 +- scripts/tkg_level1_builder.py | 341 +++++++++ scripts/utils/feature_extractor.py | 684 ++++++++++++++++++ scripts/utils/proportion_calculator.py | 674 +++++++++++++++++ 5 files changed, 2397 insertions(+), 29 deletions(-) create mode 100644 docs_v1.0/DESIGN/Appearance_Feature_System_V1.0.md create mode 100644 scripts/tkg_level1_builder.py create mode 100644 scripts/utils/feature_extractor.py create mode 100644 scripts/utils/proportion_calculator.py diff --git a/docs_v1.0/DESIGN/Appearance_Feature_System_V1.0.md b/docs_v1.0/DESIGN/Appearance_Feature_System_V1.0.md new file mode 100644 index 0000000..c65828a --- /dev/null +++ b/docs_v1.0/DESIGN/Appearance_Feature_System_V1.0.md @@ -0,0 +1,664 @@ +--- +title: Appearance Feature System V1.0 +version: 1.0.0 +date: 2025-06-22 +author: OpenCode +status: Draft +--- + +# Appearance Feature System V1.0 + +## Overview + +### Purpose +Lock onto a target and continuously track across frames using appearance features. + +### Architecture +``` +Face (identification) → Pose (tracking) → Appearance (tracking) + ↓ ↓ ↓ + identity_uuid bbox features + proportions +``` + +### Data Sources +| Source | Provides | Output | +|--------|----------|--------| +| Face | identity, landmarks | face.json | +| Pose | bbox, keypoints | pose.json | +| MediaPipe | detailed landmarks, hands | mediapipe.json | + +--- + +## Keypoint Systems + +### Swift Pose (Apple Vision) - 19 Keypoints + +| Index | Keypoint | Vision Framework Joint | +|-------|----------|------------------------| +| 0 | nose | .nose (head_joint) | +| 1 | left_eye | .leftEye (left_eye_joint) | +| 2 | right_eye | .rightEye (right_eye_joint) | +| 3 | left_ear | .leftEar (left_ear_joint) | +| 4 | right_ear | .rightEar (right_ear_joint) | +| 5 | neck | .neck (neck_1_joint) | +| 6 | root | .root (center_hip_joint) | +| 7 | left_shoulder | .leftShoulder | +| 8 | right_shoulder | .rightShoulder | +| 9 | left_elbow | .leftElbow | +| 10 | right_elbow | .rightElbow | +| 11 | left_wrist | .leftWrist (left_hand_joint) | +| 12 | right_wrist | .rightWrist (right_hand_joint) | +| 13 | left_hip | .leftHip | +| 14 | right_hip | .rightHip | +| 15 | left_knee | .leftKnee | +| 16 | right_knee | .rightKnee | +| 17 | left_ankle | .leftAnkle | +| 18 | right_ankle | .rightAnkle | + +### MediaPipe Pose - 33 Landmarks + +| Index | Name | Index | Name | +|-------|------|-------|------| +| 0 | nose | 17 | left_pinky | +| 1 | left_eye_inner | 18 | right_pinky | +| 2 | left_eye | 19 | left_index | +| 3 | left_eye_outer | 20 | right_index | +| 4 | right_eye_inner | 21 | left_thumb | +| 5 | right_eye | 22 | right_thumb | +| 6 | right_eye_outer | 23 | left_hip | +| 7 | left_ear | 24 | right_hip | +| 8 | right_ear | 25 | left_knee | +| 9 | mouth_left | 26 | right_knee | +| 10 | mouth_right | 27 | left_ankle | +| 11 | left_shoulder | 28 | right_ankle | +| 12 | right_shoulder | 29 | left_heel | +| 13 | left_elbow | 30 | right_heel | +| 14 | right_elbow | 31 | left_foot_index | +| 15 | left_wrist | 32 | right_foot_index | +| 16 | right_wrist | | | + +### MediaPipe Hand - 21 Landmarks + +| Index | Name | Finger | +|-------|------|--------| +| 0 | wrist | - | +| 1-4 | thumb_cmc/mcp/ip/tip | thumb | +| 5-8 | index_mcp/pip/dip/tip | index | +| 9-12 | middle_mcp/pip/dip/tip | middle | +| 13-16 | ring_mcp/pip/dip/tip | ring | +| 17-20 | pinky_mcp/pip/dip/tip | pinky | + +### YOLOv8 Pose (Fallback) - 17 Keypoints + +| Index | Name | +|-------|------| +| 0 | nose | +| 1 | left_eye | +| 2 | right_eye | +| 3 | left_ear | +| 4 | right_ear | +| 5 | left_shoulder | +| 6 | right_shoulder | +| 7 | left_elbow | +| 8 | right_elbow | +| 9 | left_wrist | +| 10 | right_wrist | +| 11 | left_hip | +| 12 | right_hip | +| 13 | left_knee | +| 14 | right_knee | +| 15 | left_ankle | +| 16 | right_ankle | + +--- + +## Body Proportions Calculation + +### Reference Unit +```python +# Eye distance as reference unit +eye_width = distance(left_eye, right_eye) +``` + +### Body Measurements +```python +# Full body height (nose to ankle) +nose_y = keypoints['nose']['y'] +ankle_y = max(keypoints['left_ankle']['y'], keypoints['right_ankle']['y']) +body_height = ankle_y - nose_y + +# Upper body (neck to hip) +neck_y = keypoints['neck']['y'] +hip_y = (keypoints['left_hip']['y'] + keypoints['right_hip']['y']) / 2 +torso_height = hip_y - neck_y + +# Lower body (hip to ankle) +leg_height = ankle_y - hip_y + +# Shoulder width +shoulder_width = distance(left_shoulder, right_shoulder) +``` + +### Proportion Ratios +```python +proportions = { + 'eye_width': eye_width, + 'body_height': body_height, + 'torso_height': torso_height, + 'leg_height': leg_height, + 'shoulder_width': shoulder_width, + 'head_ratio': eye_width / body_height, + 'torso_ratio': torso_height / body_height, + 'leg_ratio': leg_height / body_height, +} +``` + +### Body Shape Calculation (三圍) +```python +# Chest width (shoulder width approximation) +chest_width = distance(left_shoulder, right_shoulder) + +# Waist width (hip width approximation) +waist_width = distance(left_hip, right_hip) + +# Hip width +hip_width = distance(left_hip, right_hip) + +# Body shape classification +if chest_waist_ratio < 1.0 and waist_hip_ratio < 0.9: + shape_type = "hourglass" #葫芦形 +elif chest_waist_ratio > 1.2: + shape_type = "triangle" # 倒三角 +elif waist_hip_ratio > 1.1: + shape_type = "inverted_triangle" # 正三角 +elif abs(chest_width - hip_width) < 0.1 * max(chest_width, hip_width): + shape_type = "rectangle" #矩形 +else: + shape_type = "oval" #椭圆形 +``` + +### Height Estimation +```python +# Use eye_width as reference (≈6cm) +height_ratio = body_height / eye_width +estimated_height_cm = height_ratio * 6.0 + +# Height category +if estimated_height_cm < 150: + height_category = "short" +elif estimated_height_cm < 170: + height_category = "medium" +elif estimated_height_cm < 180: + height_category = "tall" +else: + height_category = "very_tall" +``` + +--- + +## Appearance Feature Location Mapping + +### Environment Factors + +| Feature | Location | Detection Method | +|---------|----------|------------------| +| Light type | Frame background | HSV H distribution | +| Light direction | Shadow analysis | Shadow orientation | +| Light intensity | Overall brightness | HSV V mean | + +### Head Features + +#### Hair Style +| Feature | Keypoints Range | +|---------|-----------------| +| Short hair | head_top → ear/neck | +| Long hair | head_top → shoulder/back | +| Ponytail | head_top → neck (tied) | +| Braids | head_top → shoulder (braided) | +| Curly hair | hair region texture | +| Straight hair | hair region texture | + +#### Hair Accessories +| Feature | Keypoints | +|---------|-----------| +| Hair band | eye_distance (head top) | +| Hair clip | ear/head | +| Hair wrap | ear_distance | +| Hair tie | neck (ponytail position) | +| Hair pin | head | + +#### Head Accessories +| Feature | Keypoints | +|---------|-----------| +| Hat | head_top → eye | +| Headscarf | ear_distance (wrapped) | +| Hood | head_top → neck (full head) | + +#### Hair Color +| Feature | Detection | +|---------|-----------| +| Hair color HSV | hair region HSV histogram | + +### Face Features + +#### Eye Accessories +| Feature | Keypoints | +|---------|-----------| +| Glasses | eye_distance | +| Sunglasses | eye_distance (larger) | + +#### Ear Accessories +| Feature | Keypoints | +|---------|-----------| +| Earrings | ear_position | +| Headphones (over-ear) | ear_distance (wrapped) | +| Earphones (in-ear) | ear_position | +| Earphones (ear-hook) | ear_position | + +#### Face Accessories +| Feature | Keypoints | +|---------|-----------| +| Blush | cheeks (below eye) | +| Lipstick | lips (nose + eye_width * 0.5) | +| Mask | ear_distance, eye → neck | + +#### Skin Tone +| Feature | Detection | +|---------|-----------| +| Skin color HSV | face region HSV histogram | + +### Neck Features + +#### Neck Accessories +| Feature | Keypoints | +|---------|-----------| +| Collar | neck | +| Bow tie | neck → chest | +| Tie | neck → hip | +| Scarf | neck → shoulder | +| Necklace | neck | + +#### Hanging Accessories +| Feature | Keypoints | +|---------|-----------| +| Pendant (necklace) | neck → chest | +| Charm (bag) | bag_position | +| Charm (phone) | phone_position | + +### Upper Body Features + +#### Clothing +| Feature | Keypoints | +|---------|-----------| +| Shirt color | neck → hip | +| Shirt material | clothing texture (LBP) | +| Clothing pattern | pattern detection | + +#### Sleeves +| Feature | Keypoints | +|---------|-----------| +| Long sleeve | shoulder → wrist | +| Short sleeve | shoulder → elbow | +| Arm sleeve | elbow → wrist | + +#### Back Features +| Feature | Keypoints | +|---------|-----------| +| Back exposed | shoulder → hip (view angle) | +| Back tattoo | back exposed skin | + +### Bags + +| Feature | Keypoints | +|---------|-----------| +| Handbag | hand_position | +| Shoulder bag | shoulder_position | +| Backpack | shoulder → hip (back) | +| Waist bag | hip_position | + +### Hand Features + +#### Hand Accessories +| Feature | Keypoints | +|---------|-----------| +| Watch | wrist | +| Bracelet | wrist → hand | +| Ring | finger (MediaPipe hand landmarks 13-16) | +| Gloves | wrist → hand | +| Nail polish | finger tips | + +#### Handheld Objects +| Feature | Keypoints | +|---------|-----------| +| Phone | hand + object detection | +| Handbag | hand + object detection | + +### Lower Body Features + +#### Pants +| Feature | Keypoints | +|---------|-----------| +| Long pants | hip → ankle | +| Shorts | hip → knee | + +#### Waist Accessories +| Feature | Keypoints | +|---------|-----------| +| Belt | hip | + +### Foot Features + +#### Foot Accessories +| Feature | Keypoints | +|---------|-----------| +| Anklet | ankle | +| Socks | ankle → foot | +| Shoes | ankle | + +### Skin Features + +| Feature | Detection | +|---------|-----------| +| Tattoo | exposed skin anomaly color block | + +### Exposed Skin Detection + +| Location | Coverage Detection | +|----------|-------------------| +| Face | always exposed | +| Arms | exposed if short sleeve | +| Legs | exposed if shorts | +| Hands | exposed if no gloves | +| Feet | exposed if no socks | + +--- + +## Mobility Aids / Vehicles + +### Walking Aids (Object Detection) +| Feature | Keypoints | +|---------|-----------| +| Cane | hand + object | +| Wheelchair | hip + object | +| Walker | both hands + object | + +### Mobility Tools (Object Detection) +| Feature | Keypoints | +|---------|-----------| +| Roller skates | ankle + object | +| Skateboard | ankle + object | +| Scooter | hand + ankle + object | + +### Vehicles (Object Detection) +| Feature | Keypoints | +|---------|-----------| +| Motorcycle | hip + ankle + object | +| Bicycle | hip + ankle + object | +| Tricycle | hip + ankle + object | +| Car | hip + object | + +--- + +## Feature Extraction Techniques + +### Color Extraction (HSV Histogram) +```python +def extract_color(roi): + hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) + h_hist = cv2.calcHist([hsv], [0], None, [30], [0, 180]) + s_hist = cv2.calcHist([hsv], [1], None, [32], [0, 256]) + v_hist = cv2.calcHist([hsv], [2], None, [32], [0, 256]) + return { + 'h_histogram': normalize(h_hist), + 's_histogram': normalize(s_hist), + 'v_histogram': normalize(v_hist), + } +``` + +### Dominant Color (K-means) +```python +def extract_dominant_colors(roi, k=5): + hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) + pixels = hsv.reshape(-1, 3).astype(np.float32) + _, labels, centers = cv2.kmeans(pixels, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS) + counts = np.bincount(labels.flatten()) + return centers[np.argsort(-counts)[:k]] +``` + +### Texture Extraction (LBP) +```python +def extract_texture(roi): + gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) + lbp = local_binary_pattern(gray, P=8, R=1) + return { + 'lbp_variance': np.var(lbp), + 'lbp_histogram': np.histogram(lbp, bins=256)[0], + } +``` + +### Shininess Detection +```python +def detect_shininess(roi): + hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) + v_mean = np.mean(hsv[:,:,2]) + v_std = np.std(hsv[:,:,2]) + return { + 'brightness': v_mean, + 'brightness_variance': v_std, + } +``` + +--- + +## Tracking Flow + +### Feature Storage Strategy +| Level | Storage | Reason | +|-------|---------|--------| +| **Level 1** | TKG nodes | Stable features for tracking | +| **Level 2** | Dynamic | On-demand calculation | +| **Level 3** | Dynamic | On-demand calculation | + +### Level 1 in TKG +```sql +-- New node_type: person_trace +INSERT INTO tkg_nodes ( + node_type = 'person_trace', + external_id = 'person_{frame}_{index}', + file_uuid = 'xxx', + properties = { + 'frame_count': 100, + 'frames': [1, 30, 60, ...], + 'avg_bbox': {...}, + 'height_estimate': { + 'estimated_height_cm': 170.5, + 'height_ratio': 28.4, + 'height_category': 'tall' + }, + 'body_shape': { + 'chest_width': 150.2, + 'waist_width': 100.5, + 'hip_width': 120.3, + 'chest_waist_ratio': 1.49, + 'waist_hip_ratio': 0.84, + 'body_shape': 'hourglass' + }, + 'level1_features': { + 'body': {...}, + 'head_top': {...}, + 'upper_body': {...}, + 'lower_body': {...} + } + } +) +``` + +### Level 2/3 Dynamic Calculation +```python +# Level 2: computed on query +face_features = extractor.extract_level2(frame, regions) + +# Level 3: computed on query +accessory_features = extractor.extract_level3(frame, keypoints, eye_width) +``` + +### Matching Strategy +``` +Frame N → Frame N+1: + +1. Pose bbox IoU → same person position +2. Level 1 similarity (TKG) → same feature combination +3. Level 2/3 dynamic → detailed verification +4. Face identity → final confirmation (if face detected) + +Result: Continuous tracking of same identity +``` + +### IoU Calculation +```python +def calculate_iou(bbox1, bbox2): + x1, y1, w1, h1 = bbox1 + x2, y2, w2, h2 = bbox2 + + xi1 = max(x1, x2) + yi1 = max(y1, y2) + xi2 = min(x1 + w1, x2 + w2) + yi2 = min(y1 + h1, y2 + h2) + + inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1) + union_area = w1 * h1 + w2 * h2 - inter_area + + return inter_area / union_area if union_area > 0 else 0 +``` + +### Feature Similarity +```python +def calculate_similarity(features1, features2): + # HSV histogram similarity + h_sim = cv2.compareHist(features1['h_histogram'], features2['h_histogram'], cv2.HISTCMP_CORREL) + + # Dominant color similarity + color_dist = np.linalg.norm(features1['dominant_colors'] - features2['dominant_colors']) + + # Combined score + return { + 'color_similarity': h_sim, + 'color_distance': color_dist, + 'overall_score': h_sim * 0.7 + (1 - color_dist/255) * 0.3, + } +``` + +--- + +## Output Format + +### appearance.json Structure +```json +{ + "frame_count": 100, + "fps": 30.0, + "frames": [ + { + "frame": 1, + "timestamp": 0.033, + "persons": [ + { + "person_index": 0, + "bbox": {"x": 100, "y": 200, "width": 400, "height": 600}, + "identity_uuid": "xxx-xxx-xxx", + "proportions": { + "eye_width": 50.0, + "body_height": 600.0, + "torso_height": 200.0, + "leg_height": 300.0, + "shoulder_width": 150.0, + "head_ratio": 0.08, + "torso_ratio": 0.33, + "leg_ratio": 0.50 + }, + "features": { + "hair": { + "color": {"h_histogram": [...], "dominant_colors": [...]}, + "length": "long", + "style": "straight" + }, + "skin": { + "color": {"h_histogram": [...], "dominant_colors": [...]} + }, + "clothing": { + "upper": { + "color": {...}, + "material": "cotton", + "pattern": "solid", + "sleeve": "short" + }, + "lower": { + "color": {...}, + "length": "long" + } + }, + "accessories": { + "earring": true, + "watch": true, + "shoes_color": {...} + } + } + } + ] + } + ] +} +``` + +--- + +## Dependencies + +### Processor Dependencies +| Processor | Depends On | Reason | +|-----------|------------|--------| +| Appearance | Pose | bbox for region extraction | +| Appearance | Face | identity matching + face landmarks | +| Appearance | MediaPipe | hand landmarks + detailed pose | + +### Data Flow +``` +pose.json → bbox + keypoints +face.json → identity + face landmarks +mediapipe.json → hand landmarks + pose landmarks + ↓ +appearance.json → features + proportions + tracking +``` + +--- + +## Implementation Phases + +### Phase 1: Design Document +- Create this design document +- Define all feature mappings +- Define output format + +### Phase 2: Appearance Processor Refactor +- Add proportion calculation module +- Add feature extraction module +- Integrate Pose + MediaPipe + Face data +- Add IoU matching for pose-face + +### Phase 3: Output Format Update +- Update appearance.json structure +- Update Rust structs +- Update DB schema + +### Phase 4: Testing +- Unit tests for proportion calculation +- Integration tests for full pipeline +- Real video tracking validation + +--- + +## Version History + +| Version | Date | Author | Changes | +|---------|------|--------|---------| +| 1.0.0 | 2025-06-22 | OpenCode | Initial design document | \ No newline at end of file diff --git a/scripts/mediapipe_holistic_processor.py b/scripts/mediapipe_holistic_processor.py index 35989aa..43ae634 100644 --- a/scripts/mediapipe_holistic_processor.py +++ b/scripts/mediapipe_holistic_processor.py @@ -167,55 +167,56 @@ class MediaPipeHolisticProcessor: "hands": {"left": None, "right": None}, } - # Extract face mesh +# Extract face mesh + height, width = frame.shape[:2] if results.face_landmarks: - person_data["face_mesh"] = self._extract_face_mesh(results.face_landmarks) - + person_data["face_mesh"] = self._extract_face_mesh(results.face_landmarks, width, height) + # Extract pose if results.pose_landmarks: - person_data["pose"] = self._extract_pose(results.pose_landmarks) - + person_data["pose"] = self._extract_pose(results.pose_landmarks, width, height) + # Extract hands if results.left_hand_landmarks: - person_data["hands"]["left"] = self._extract_hand(results.left_hand_landmarks, "left") - + person_data["hands"]["left"] = self._extract_hand(results.left_hand_landmarks, "left", width, height) + if results.right_hand_landmarks: - person_data["hands"]["right"] = self._extract_hand(results.right_hand_landmarks, "right") + person_data["hands"]["right"] = self._extract_hand(results.right_hand_landmarks, "right", width, height) # Calculate bbox from pose landmarks if results.pose_landmarks: landmarks = results.pose_landmarks.landmark x_coords = [lm.x for lm in landmarks if lm.visibility > 0.5] y_coords = [lm.y for lm in landmarks if lm.visibility > 0.5] - + if x_coords and y_coords: x_min, x_max = min(x_coords), max(x_coords) y_min, y_max = min(y_coords), max(y_coords) - - height, width = frame.shape[:2] - + person_data["bbox"] = { "x": int(x_min * width), "y": int(y_min * height), "width": int((x_max - x_min) * width), "height": int((y_max - y_min) * height), } - + return person_data - - def _extract_face_mesh(self, face_landmarks) -> Dict: + + def _extract_face_mesh(self, face_landmarks, width: int, height: int) -> Dict: """ Extract face mesh landmarks and calculate features - + Args: face_landmarks: MediaPipe face landmarks - + width: Frame width in pixels + height: Frame height in pixels + Returns: - Dict with landmarks, eye_features, mouth_features + Dict with landmarks (in pixels), eye_features, mouth_features """ landmarks = [] for lm in face_landmarks.landmark: - landmarks.append([lm.x, lm.y, lm.z]) + landmarks.append([int(lm.x * width), int(lm.y * height), lm.z]) # Eye Aspect Ratio (EAR) def calculate_ear(eye_indices): @@ -329,19 +330,21 @@ class MediaPipeHolisticProcessor: }, } - def _extract_pose(self, pose_landmarks) -> Dict: + def _extract_pose(self, pose_landmarks, width: int, height: int) -> Dict: """ Extract pose landmarks and calculate features - + Args: pose_landmarks: MediaPipe pose landmarks - + width: Frame width in pixels + height: Frame height in pixels + Returns: - Dict with landmarks, arm_features, leg_features + Dict with landmarks (in pixels), arm_features, leg_features """ landmarks = [] for lm in pose_landmarks.landmark: - landmarks.append([lm.x, lm.y, lm.z, lm.visibility]) + landmarks.append([int(lm.x * width), int(lm.y * height), lm.z, lm.visibility]) # Helper function to calculate angle def calculate_angle(p1_idx, p2_idx, p3_idx): @@ -450,20 +453,22 @@ class MediaPipeHolisticProcessor: }, } - def _extract_hand(self, hand_landmarks, hand_type: str) -> Dict: + def _extract_hand(self, hand_landmarks, hand_type: str, width: int, height: int) -> Dict: """ Extract hand landmarks and detect gesture - + Args: hand_landmarks: MediaPipe hand landmarks hand_type: "left" or "right" - + width: Frame width in pixels + height: Frame height in pixels + Returns: - Dict with landmarks, gesture + Dict with landmarks (in pixels), gesture """ landmarks = [] for lm in hand_landmarks.landmark: - landmarks.append([lm.x, lm.y, lm.z]) + landmarks.append([int(lm.x * width), int(lm.y * height), lm.z]) # Check finger extensions def is_finger_extended(tip_idx, pip_idx): diff --git a/scripts/tkg_level1_builder.py b/scripts/tkg_level1_builder.py new file mode 100644 index 0000000..bd2f8bf --- /dev/null +++ b/scripts/tkg_level1_builder.py @@ -0,0 +1,341 @@ +#!/opt/homebrew/bin/python3.11 +""" +TKG Level 1 Builder - Store Level 1 appearance features in TKG + +Purpose: +1. Extract Level 1 features from pose.json + video frames +2. Store as person_trace nodes in TKG +3. Enable tracking via Level 1 feature similarity + +Level 1 Features: +- body: overall color distribution +- head_top: hair color +- upper_body: upper clothing color +- lower_body: lower clothing color + +Usage: + python tkg_level1_builder.py --file-uuid [--schema ] +""" + +import sys +import os +import json +import argparse +import psycopg2 +import psycopg2.extras +import cv2 + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "utils")) + +from utils.feature_extractor import HierarchicalFeatureExtractor +from utils.proportion_calculator import calculate_proportions, get_head_region + +DB_URL = os.environ.get("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry") +SCHEMA = os.environ.get("DATABASE_SCHEMA", "dev") +OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev") + + +def get_conn(): + return psycopg2.connect(DB_URL) + + +def ensure_node(cur, schema, file_uuid, node_type, external_id, label="", properties=None): + """Insert or get graph node""" + cur.execute( + f""" + INSERT INTO {schema}.tkg_nodes (node_type, external_id, file_uuid, label, properties) + VALUES (%s, %s, %s, %s, %s::jsonb) + ON CONFLICT (file_uuid, node_type, external_id) + DO UPDATE SET properties = COALESCE(EXCLUDED.properties, {schema}.tkg_nodes.properties), + label = COALESCE(NULLIF(EXCLUDED.label, ''), {schema}.tkg_nodes.label) + RETURNING id + """, + (node_type, str(external_id), file_uuid, label, json.dumps(properties or {})), + ) + row = cur.fetchone() + return row[0] + + +def extract_level1_features(video_path, pose_json_path): + """ + Extract Level 1 features for each person in each frame + + Args: + video_path: Path to video file + pose_json_path: Path to pose.json + + Returns: + List of (frame, person_index, bbox, level1_features) + """ + with open(pose_json_path) as f: + pose_data = json.load(f) + + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + print(f"[TKG-L1] Cannot open video: {video_path}", file=sys.stderr) + return [] + + fps = pose_data.get("fps", 30.0) + extractor = HierarchicalFeatureExtractor() + + results = [] + + for pose_frame in pose_data.get("frames", []): + frame_num = pose_frame["frame"] + persons = pose_frame.get("persons", []) + + if not persons: + continue + + cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) + ret, frame = cap.read() + + if not ret: + continue + + for person_idx, person in enumerate(persons): + bbox = person.get("bbox", {}) + keypoints = person.get("keypoints", []) + + if bbox.get("width", 0) <= 0 or bbox.get("height", 0) <= 0: + continue + + # Calculate proportions + proportions = calculate_proportions(keypoints, bbox) + + # Get head region + head_region = get_head_region(keypoints) + + # Extract Level 1 features + level1 = extractor.extract_level1(frame, bbox, head_region) + + results.append({ + "frame": frame_num, + "timestamp": pose_frame.get("timestamp", frame_num / fps), + "person_index": person_idx, + "bbox": bbox, + "proportions": proportions, + "level1_features": level1, + }) + + cap.release() + return results + + +def build_person_trace_nodes(cur, schema, file_uuid, level1_data): + """ + Build person_trace nodes with Level 1 features + + Args: + cur: Database cursor + schema: Database schema + file_uuid: File UUID + level1_data: Level 1 extracted features + """ + print("[TKG-L1] Building person_trace nodes...") + + # Group by person (assuming person_index consistency across frames) + person_groups = {} + for item in level1_data: + person_idx = item["person_index"] + if person_idx not in person_groups: + person_groups[person_idx] = [] + person_groups[person_idx].append(item) + + count = 0 + for person_idx, items in person_groups.items(): + if not items: + continue + + # Aggregate Level 1 features (average across frames) + body_colors = [] + head_colors = [] + upper_colors = [] + lower_colors = [] + + frames = [] + bboxes = [] + + for item in items: + l1 = item["level1_features"] + frames.append(item["frame"]) + bboxes.append(item["bbox"]) + + if "body" in l1 and "color" in l1["body"]: + body_colors.append(l1["body"]["color"].get("dominant_colors", [])) + + if "head_top" in l1 and "color" in l1["head_top"]: + head_colors.append(l1["head_top"]["color"].get("dominant_colors", [])) + + if "upper_body" in l1 and "color" in l1["upper_body"]: + upper_colors.append(l1["upper_body"]["color"].get("dominant_colors", [])) + + if "lower_body" in l1 and "color" in l1["lower_body"]: + lower_colors.append(l1["lower_body"]["color"].get("dominant_colors", [])) + + # Average dominant colors + avg_body_color = average_colors(body_colors) if body_colors else [] + avg_head_color = average_colors(head_colors) if head_colors else [] + avg_upper_color = average_colors(upper_colors) if upper_colors else [] + avg_lower_color = average_colors(lower_colors) if lower_colors else [] + + # Build node properties + external_id = f"person_{person_idx}" + label = f"Person {person_idx}" + + # Get average height and body shape + avg_height_estimate = {} + avg_body_shape = {} + + for item in items: + props = item.get("proportions", {}) + if "height_estimate" in props: + if not avg_height_estimate: + avg_height_estimate = props["height_estimate"] + if "body_shape" in props: + if not avg_body_shape: + avg_body_shape = props["body_shape"] + + properties = { + "frame_count": len(frames), + "frames": frames, + "avg_bbox": average_bbox(bboxes) if bboxes else {}, + "height_estimate": avg_height_estimate, + "body_shape": avg_body_shape, + "level1_features": { + "body": { + "dominant_colors": avg_body_color, + "h_mean": average_h_mean(items, "body"), + }, + "head_top": { + "dominant_colors": avg_head_color, + "h_mean": average_h_mean(items, "head_top"), + }, + "upper_body": { + "dominant_colors": avg_upper_color, + "h_mean": average_h_mean(items, "upper_body"), + }, + "lower_body": { + "dominant_colors": avg_lower_color, + "h_mean": average_h_mean(items, "lower_body"), + }, + }, + } + + # Store node + ensure_node(cur, schema, file_uuid, "person_trace", external_id, label, properties) + count += 1 + print(f"[TKG-L1] Created person_trace node: {external_id} ({len(frames)} frames)") + + print(f"[TKG-L1] Total: {count} person_trace nodes") + return count + + +def average_colors(color_lists): + """Average multiple color lists""" + if not color_lists: + return [] + + valid_colors = [c for c in color_lists if c] + if not valid_colors: + return [] + + # Average first dominant color + first_colors = [c[0] if c else [0, 0, 0] for c in valid_colors] + avg = [sum(x) / len(x) for x in zip(*first_colors)] + return [round(x, 2) for x in avg] + + +def average_h_mean(items, region): + """Average H mean from Level 1 items""" + h_means = [] + for item in items: + l1 = item["level1_features"] + if region in l1 and "color" in l1[region]: + h_mean = l1[region]["color"].get("h_mean", 0) + if h_mean: + h_means.append(h_mean) + + return round(sum(h_means) / len(h_means), 2) if h_means else 0 + + +def average_bbox(bboxes): + """Average bbox across frames""" + if not bboxes: + return {} + + avg_x = sum(b.get("x", 0) for b in bboxes) / len(bboxes) + avg_y = sum(b.get("y", 0) for b in bboxes) / len(bboxes) + avg_w = sum(b.get("width", 0) for b in bboxes) / len(bboxes) + avg_h = sum(b.get("height", 0) for b in bboxes) / len(bboxes) + + return { + "x": round(avg_x, 1), + "y": round(avg_y, 1), + "width": round(avg_w, 1), + "height": round(avg_h, 1), + } + + +def main(): + parser = argparse.ArgumentParser(description="TKG Level 1 Builder") + parser.add_argument("--file-uuid", "-u", required=True, help="File UUID") + parser.add_argument("--schema", "-s", default=SCHEMA, help="Database schema") + parser.add_argument("--video", "-v", help="Video path (optional, auto-detected)") + parser.add_argument("--pose-json", "-p", help="Pose JSON path (optional, auto-detected)") + args = parser.parse_args() + + file_uuid = args.file_uuid + schema = args.schema + + # Auto-detect paths + video_path = args.video or f"{OUTPUT_DIR}/{file_uuid}.mp4" + pose_json_path = args.pose_json or f"{OUTPUT_DIR}/{file_uuid}.pose.json" + + # Check files exist + if not os.path.exists(video_path): + print(f"[TKG-L1] Video not found: {video_path}", file=sys.stderr) + sys.exit(1) + + if not os.path.exists(pose_json_path): + print(f"[TKG-L1] Pose JSON not found: {pose_json_path}", file=sys.stderr) + sys.exit(1) + + print(f"[TKG-L1] Processing: {file_uuid}") + print(f"[TKG-L1] Video: {video_path}") + print(f"[TKG-L1] Pose: {pose_json_path}") + + # Extract Level 1 features + print("[TKG-L1] Extracting Level 1 features...") + level1_data = extract_level1_features(video_path, pose_json_path) + + if not level1_data: + print("[TKG-L1] No Level 1 data extracted", file=sys.stderr) + sys.exit(1) + + print(f"[TKG-L1] Extracted: {len(level1_data)} frame-person pairs") + + # Connect to DB + conn = get_conn() + cur = conn.cursor() + + try: + # Build person_trace nodes + count = build_person_trace_nodes(cur, schema, file_uuid, level1_data) + + conn.commit() + print(f"[TKG-L1] Success: {count} person_trace nodes created") + + except Exception as e: + conn.rollback() + print(f"[TKG-L1] Error: {e}", file=sys.stderr) + sys.exit(1) + + finally: + cur.close() + conn.close() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/utils/feature_extractor.py b/scripts/utils/feature_extractor.py new file mode 100644 index 0000000..ee06737 --- /dev/null +++ b/scripts/utils/feature_extractor.py @@ -0,0 +1,684 @@ +#!/opt/homebrew/bin/python3.11 +""" +Feature Extractor - Appearance feature extraction from video frames + +Purpose: +1. Extract color features (HSV histogram, dominant colors) +2. Extract texture features (LBP, shininess) +3. Extract pattern features +4. Detect accessories and clothing attributes + +Output: +{ + 'color': {...}, + 'texture': {...}, + 'pattern': {...}, + 'accessories': {...}, +} + +Usage: + from feature_extractor import FeatureExtractor + + extractor = FeatureExtractor() + features = extractor.extract_all(frame, region) +""" + +import cv2 +import numpy as np +from typing import Dict, List, Optional +from skimage.feature import local_binary_pattern + + +class FeatureExtractor: + """ + Extract appearance features from image regions + """ + + def __init__(self): + self.lbp_radius = 1 + self.lbp_points = 8 + self.dominant_color_k = 5 + + def extract_color(self, roi: np.ndarray) -> Dict: + """ + Extract color features from ROI + + Args: + roi: Image region (BGR) + + Returns: + Dict with HSV histogram and dominant colors + """ + if roi is None or roi.size == 0: + return {'error': 'empty_roi'} + + hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) + + # HSV histograms + h_hist = cv2.calcHist([hsv], [0], None, [30], [0, 180]).flatten() + s_hist = cv2.calcHist([hsv], [1], None, [32], [0, 256]).flatten() + v_hist = cv2.calcHist([hsv], [2], None, [32], [0, 256]).flatten() + + # Normalize + h_sum = h_hist.sum() or 1 + s_sum = s_hist.sum() or 1 + v_sum = v_hist.sum() or 1 + + h_hist_norm = (h_hist / h_sum).tolist() + s_hist_norm = (s_hist / s_sum).tolist() + v_hist_norm = (v_hist / v_sum).tolist() + + # Dominant colors via k-means + pixels = hsv.reshape(-1, 3).astype(np.float32) + dominant_colors = [] + + if len(pixels) >= self.dominant_color_k: + criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0) + _, labels, centers = cv2.kmeans( + pixels, self.dominant_color_k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS + ) + counts = np.bincount(labels.flatten()) + dominant_colors = centers[np.argsort(-counts)[:self.dominant_color_k]].tolist() + elif len(pixels) > 0: + dominant_colors = [pixels.mean(axis=0).tolist()] + + # Color statistics + h_mean = np.mean(hsv[:,:,0]) + s_mean = np.mean(hsv[:,:,1]) + v_mean = np.mean(hsv[:,:,2]) + + return { + 'h_histogram': h_hist_norm, + 's_histogram': s_hist_norm, + 'v_histogram': v_hist_norm, + 'dominant_colors': dominant_colors, + 'h_mean': round(h_mean, 2), + 's_mean': round(s_mean, 2), + 'v_mean': round(v_mean, 2), + } + + def extract_texture(self, roi: np.ndarray) -> Dict: + """ + Extract texture features from ROI + + Args: + roi: Image region (BGR) + + Returns: + Dict with LBP and shininess features + """ + if roi is None or roi.size == 0: + return {'error': 'empty_roi'} + + gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) + hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) + + # LBP texture + lbp = local_binary_pattern(gray, self.lbp_points, self.lbp_radius) + lbp_hist = np.histogram(lbp, bins=256, range=(0, 256))[0] + lbp_hist_norm = (lbp_hist / lbp_hist.sum()).tolist() + + lbp_variance = np.var(lbp) + lbp_mean = np.mean(lbp) + + # Shininess (V channel statistics) + v_values = hsv[:,:,2].flatten() + v_mean = np.mean(v_values) + v_std = np.std(v_values) + v_max = np.max(v_values) + + # High brightness ratio (shiny materials) + high_brightness_ratio = np.sum(v_values > 200) / len(v_values) + + return { + 'lbp_histogram': lbp_hist_norm, + 'lbp_variance': round(lbp_variance, 2), + 'lbp_mean': round(lbp_mean, 2), + 'brightness': round(v_mean, 2), + 'brightness_std': round(v_std, 2), + 'brightness_max': int(v_max), + 'shininess_ratio': round(high_brightness_ratio, 4), + } + + def detect_pattern(self, roi: np.ndarray) -> Dict: + """ + Detect clothing pattern + + Args: + roi: Image region (BGR) + + Returns: + Dict with pattern classification + """ + if roi is None or roi.size == 0: + return {'pattern': 'unknown', 'confidence': 0.0} + + gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) + + # Edge detection + edges = cv2.Canny(gray, 50, 150) + edge_ratio = np.sum(edges > 0) / edges.size + + # Gradient analysis + sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3) + sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3) + gradient_magnitude = np.sqrt(sobelx**2 + sobely**2) + gradient_mean = np.mean(gradient_magnitude) + + # Color variance (for pattern detection) + hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) + h_std = np.std(hsv[:,:,0]) + s_std = np.std(hsv[:,:,1]) + + # Pattern classification + pattern = 'solid' + confidence = 0.7 + + if edge_ratio > 0.1 and gradient_mean > 20: + if h_std > 30: + pattern = 'patterned' + confidence = 0.8 + elif edge_ratio > 0.2: + pattern = 'striped' + confidence = 0.6 + + if s_std > 50 and gradient_mean > 30: + pattern = 'patterned' + confidence = 0.85 + + return { + 'pattern': pattern, + 'confidence': confidence, + 'edge_ratio': round(edge_ratio, 4), + 'gradient_mean': round(gradient_mean, 2), + 'color_variance': round(h_std, 2), + } + + def classify_material(self, roi: np.ndarray) -> Dict: + """ + Classify clothing material + + Args: + roi: Image region (BGR) + + Returns: + Dict with material classification + """ + if roi is None or roi.size == 0: + return {'material': 'unknown', 'confidence': 0.0} + + texture = self.extract_texture(roi) + + material = 'unknown' + confidence = 0.0 + + lbp_var = texture.get('lbp_variance', 0) + shininess = texture.get('shininess_ratio', 0) + brightness = texture.get('brightness', 0) + + # Material classification rules + if shininess > 0.1 and brightness > 150: + material = 'silk' + confidence = 0.7 + elif shininess > 0.05 and lbp_var > 50: + material = 'leather' + confidence = 0.6 + elif lbp_var > 100: + material = 'denim' + confidence = 0.65 + elif lbp_var < 20 and shininess < 0.02: + material = 'cotton' + confidence = 0.6 + elif lbp_var < 50 and brightness < 100: + material = 'polyester' + confidence = 0.5 + + return { + 'material': material, + 'confidence': confidence, + 'texture_features': texture, + } + + def extract_all(self, roi: np.ndarray) -> Dict: + """ + Extract all features from ROI + + Args: + roi: Image region (BGR) + + Returns: + Dict with all features + """ + return { + 'color': self.extract_color(roi), + 'texture': self.extract_texture(roi), + 'pattern': self.detect_pattern(roi), + 'material': self.classify_material(roi), + } + + def extract_split_region( + self, + frame: np.ndarray, + region: Dict, + split_ratio: float = 0.5 + ) -> Dict: + """ + Extract features from split region (upper/lower) + + Args: + frame: Full frame + region: Region dict {'x', 'y', 'width', 'height'} + split_ratio: Split ratio (0.5 = 50%) + + Returns: + Dict with upper and lower features + """ + x, y, w, h = region['x'], region['y'], region['width'], region['height'] + + if w <= 0 or h <= 0: + return {'error': 'invalid_region'} + + mid_y = y + int(h * split_ratio) + + # Upper region + upper_roi = frame[y:mid_y, x:x+w] if mid_y > y else None + upper_features = self.extract_all(upper_roi) if upper_roi is not None else {'error': 'empty'} + + # Lower region + lower_roi = frame[mid_y:y+h, x:x+w] if y+h > mid_y else None + lower_features = self.extract_all(lower_roi) if lower_roi is not None else {'error': 'empty'} + + return { + 'upper': upper_features, + 'lower': lower_features, + } + + def detect_exposed_skin(self, roi: np.ndarray) -> Dict: + """ + Detect exposed skin in ROI + + Args: + roi: Image region (BGR) + + Returns: + Dict with skin detection results + """ + if roi is None or roi.size == 0: + return {'skin_ratio': 0.0, 'skin_detected': False} + + hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) + + # Skin color range (HSV) + # H: 0-50 (skin tones) + # S: 10-150 (not too saturated) + # V: 50-255 (visible) + skin_mask = cv2.inRange(hsv, (0, 10, 50), (50, 150, 255)) + + skin_ratio = np.sum(skin_mask > 0) / skin_mask.size + + return { + 'skin_ratio': round(skin_ratio, 4), + 'skin_detected': skin_ratio > 0.3, + } + + def calculate_similarity(self, features1: Dict, features2: Dict) -> Dict: + """ + Calculate similarity between two feature sets + + Args: + features1: First feature dict + features2: Second feature dict + + Returns: + Dict with similarity scores + """ + scores = {} + + # Color similarity (histogram correlation) + if 'color' in features1 and 'color' in features2: + h1 = np.array(features1['color'].get('h_histogram', [])) + h2 = np.array(features2['color'].get('h_histogram', [])) + + if len(h1) > 0 and len(h2) > 0: + h_corr = cv2.compareHist(h1.astype(np.float32), h2.astype(np.float32), cv2.HISTCMP_CORREL) + scores['color_similarity'] = round(h_corr, 4) + else: + scores['color_similarity'] = 0.0 + + # Dominant color distance + dc1 = np.array(features1['color'].get('dominant_colors', [[0,0,0]])) + dc2 = np.array(features2['color'].get('dominant_colors', [[0,0,0]])) + + if len(dc1) > 0 and len(dc2) > 0: + color_dist = np.linalg.norm(dc1[0] - dc2[0]) + scores['color_distance'] = round(color_dist, 2) + else: + scores['color_distance'] = 255.0 + + # Texture similarity + if 'texture' in features1 and 'texture' in features2: + lbp1 = np.array(features1['texture'].get('lbp_histogram', [])) + lbp2 = np.array(features2['texture'].get('lbp_histogram', [])) + + if len(lbp1) > 0 and len(lbp2) > 0: + lbp_corr = cv2.compareHist(lbp1.astype(np.float32), lbp2.astype(np.float32), cv2.HISTCMP_CORREL) + scores['texture_similarity'] = round(lbp_corr, 4) + else: + scores['texture_similarity'] = 0.0 + + # Overall score + color_sim = scores.get('color_similarity', 0) + texture_sim = scores.get('texture_similarity', 0) + scores['overall_score'] = round(color_sim * 0.7 + texture_sim * 0.3, 4) + + return scores + + +# Helper functions for specific feature extraction + +def extract_hair_color(frame: np.ndarray, head_region: Dict) -> Dict: + """Extract hair color from head region""" + extractor = FeatureExtractor() + x, y, w, h = head_region['x'], head_region['y'], head_region['width'], head_region['height'] + + # Focus on upper part of head (hair area) + hair_roi = frame[y:y+int(h*0.5), x:x+w] + + return extractor.extract_color(hair_roi) + + +def extract_skin_color(frame: np.ndarray, face_region: Dict) -> Dict: + """Extract skin color from face region""" + extractor = FeatureExtractor() + x, y, w, h = face_region['x'], face_region['y'], face_region['width'], face_region['height'] + + face_roi = frame[y:y+h, x:x+w] + + return extractor.extract_color(face_roi) + + +def extract_clothing_color(frame: np.ndarray, torso_region: Dict) -> Dict: + """Extract clothing color from torso region""" + extractor = FeatureExtractor() + + return extractor.extract_split_region(frame, torso_region) + + +def extract_accessory_color(frame: np.ndarray, accessory_region: Dict) -> Dict: + """Extract accessory color from region""" + extractor = FeatureExtractor() + x, y, w, h = accessory_region['x'], accessory_region['y'], accessory_region['width'], accessory_region['height'] + + roi = frame[y:y+h, x:x+w] + + return extractor.extract_color(roi) + + +class HierarchicalFeatureExtractor: + """ + Hierarchical feature extraction: coarse → fine + + Level 1: Large regions (body bbox, upper/lower body) + Level 2: Medium regions (head, face, arms, legs) + Level 3: Fine features (accessories, details) + """ + + def __init__(self): + self.extractor = FeatureExtractor() + + def extract_level1(self, frame: np.ndarray, bbox: Dict, head_region: Optional[Dict] = None) -> Dict: + """ + Level 1: Extract large region features + + Args: + frame: Full frame + bbox: Full body bbox {'x', 'y', 'width', 'height'} + head_region: Optional head region for hair extraction + + Returns: + Dict with large region features + """ + x, y, w, h = bbox['x'], bbox['y'], bbox['width'], bbox['height'] + + if w <= 0 or h <= 0: + return {'error': 'invalid_bbox'} + + # Full body region + body_roi = frame[y:y+h, x:x+w] + body_features = self.extractor.extract_all(body_roi) + + # Split upper/lower body (50%) + mid_y = y + h // 2 + + upper_roi = frame[y:mid_y, x:x+w] if mid_y > y else None + lower_roi = frame[mid_y:y+h, x:x+w] if y+h > mid_y else None + + upper_features = self.extractor.extract_all(upper_roi) if upper_roi is not None else {} + lower_features = self.extractor.extract_all(lower_roi) if lower_roi is not None else {} + + # Head top (hair) - part of Level 1 + head_features = {} + if head_region is not None: + head_roi = self._get_roi(frame, head_region) + if head_roi is not None: + head_features = self.extractor.extract_all(head_roi) + + return { + 'level': 1, + 'body': body_features, + 'head_top': head_features, + 'upper_body': upper_features, + 'lower_body': lower_features, + 'bbox': bbox, + } + + def extract_level2( + self, + frame: np.ndarray, + regions: Dict + ) -> Dict: + """ + Level 2: Extract medium region features + + Args: + frame: Full frame + regions: Dict with face, torso, leg, arm regions + + Returns: + Dict with medium region features + """ + features = {'level': 2} + + # Face region (skin, lips, eyes) + if 'face' in regions: + face_roi = self._get_roi(frame, regions['face']) + features['face'] = self.extractor.extract_all(face_roi) if face_roi is not None else {} + features['face']['skin'] = self.extractor.detect_exposed_skin(face_roi) if face_roi is not None else {} + + # Torso region (clothing details) + if 'torso' in regions: + torso_roi = self._get_roi(frame, regions['torso']) + features['torso'] = self.extractor.extract_all(torso_roi) if torso_roi is not None else {} + + # Leg region + if 'leg' in regions: + leg_roi = self._get_roi(frame, regions['leg']) + features['leg'] = self.extractor.extract_all(leg_roi) if leg_roi is not None else {} + + # Arms (left/right) - sleeve detection + if 'left_arm' in regions: + arm_roi = self._get_roi(frame, regions['left_arm']) + features['left_arm'] = self.extractor.extract_all(arm_roi) if arm_roi is not None else {} + + if 'right_arm' in regions: + arm_roi = self._get_roi(frame, regions['right_arm']) + features['right_arm'] = self.extractor.extract_all(arm_roi) if arm_roi is not None else {} + + return features + + def extract_level3( + self, + frame: np.ndarray, + keypoints: List[Dict], + eye_width: float + ) -> Dict: + """ + Level 3: Extract fine features (accessories, details) + + Args: + frame: Full frame + keypoints: Pose keypoints + eye_width: Eye distance (reference unit) + + Returns: + Dict with fine features + """ + features = {'level': 3} + + # Estimate accessory regions from keypoints + offset = int(eye_width * 0.5) if eye_width > 0 else 20 + + # Glasses (eye region) + left_eye = self._get_kp(keypoints, 'left_eye') + right_eye = self._get_kp(keypoints, 'right_eye') + if left_eye and right_eye: + glasses_roi = frame[ + int(min(left_eye['y'], right_eye['y']) - offset):int(max(left_eye['y'], right_eye['y']) + offset), + int(left_eye['x'] - offset):int(right_eye['x'] + offset) + ] + features['glasses'] = self.extractor.extract_all(glasses_roi) if glasses_roi is not None else {} + + # Earrings (ear positions) + left_ear = self._get_kp(keypoints, 'left_ear') + right_ear = self._get_kp(keypoints, 'right_ear') + if left_ear and left_ear.get('confidence', 0) > 0.1: + ear_roi = frame[ + int(left_ear['y'] - offset):int(left_ear['y'] + offset), + int(left_ear['x'] - offset):int(left_ear['x'] + offset) + ] + features['left_earring'] = self.extractor.extract_all(ear_roi) if ear_roi is not None else {} + + if right_ear and right_ear.get('confidence', 0) > 0.1: + ear_roi = frame[ + int(right_ear['y'] - offset):int(right_ear['y'] + offset), + int(right_ear['x'] - offset):int(right_ear['x'] + offset) + ] + features['right_earring'] = self.extractor.extract_all(ear_roi) if ear_roi is not None else {} + + # Watch (wrist position) + left_wrist = self._get_kp(keypoints, 'left_wrist') + right_wrist = self._get_kp(keypoints, 'right_wrist') + if left_wrist and left_wrist.get('confidence', 0) > 0.1: + wrist_roi = frame[ + int(left_wrist['y'] - offset):int(left_wrist['y'] + offset), + int(left_wrist['x'] - offset):int(left_wrist['x'] + offset) + ] + features['left_watch'] = self.extractor.extract_all(wrist_roi) if wrist_roi is not None else {} + + if right_wrist and right_wrist.get('confidence', 0) > 0.1: + wrist_roi = frame[ + int(right_wrist['y'] - offset):int(right_wrist['y'] + offset), + int(right_wrist['x'] - offset):int(right_wrist['x'] + offset) + ] + features['right_watch'] = self.extractor.extract_all(wrist_roi) if wrist_roi is not None else {} + + # Shoes (ankle positions) + left_ankle = self._get_kp(keypoints, 'left_ankle') + right_ankle = self._get_kp(keypoints, 'right_ankle') + if left_ankle and left_ankle.get('confidence', 0) > 0.1: + shoe_roi = frame[ + int(left_ankle['y'] - offset):int(left_ankle['y'] + offset * 2), + int(left_ankle['x'] - offset):int(left_ankle['x'] + offset) + ] + features['left_shoe'] = self.extractor.extract_all(shoe_roi) if shoe_roi is not None else {} + + if right_ankle and right_ankle.get('confidence', 0) > 0.1: + shoe_roi = frame[ + int(right_ankle['y'] - offset):int(right_ankle['y'] + offset * 2), + int(right_ankle['x'] - offset):int(right_ankle['x'] + offset) + ] + features['right_shoe'] = self.extractor.extract_all(shoe_roi) if shoe_roi is not None else {} + + return features + + def extract_hierarchical( + self, + frame: np.ndarray, + bbox: Dict, + regions: Dict, + keypoints: List[Dict], + eye_width: float + ) -> Dict: + """ + Full hierarchical extraction: Level 1 → Level 2 → Level 3 + + Args: + frame: Full frame + bbox: Full body bbox + regions: Medium regions dict (includes 'head' for Level 1) + keypoints: Pose keypoints + eye_width: Reference unit + + Returns: + Dict with all hierarchical features + """ + head_region = regions.get('head') if regions else None + level1 = self.extract_level1(frame, bbox, head_region) + level2 = self.extract_level2(frame, regions) + level3 = self.extract_level3(frame, keypoints, eye_width) + + return { + 'level1': level1, + 'level2': level2, + 'level3': level3, + } + + def _get_roi(self, frame: np.ndarray, region: Dict) -> Optional[np.ndarray]: + """Get ROI from frame using region dict""" + if region is None: + return None + x, y, w, h = region.get('x', 0), region.get('y', 0), region.get('width', 0), region.get('height', 0) + if w <= 0 or h <= 0: + return None + return frame[y:y+h, x:x+w] + + def _get_kp(self, keypoints: List[Dict], name: str) -> Optional[Dict]: + """Get keypoint by name""" + for kp in keypoints: + if kp.get('name') == name: + return kp + return None + + +if __name__ == '__main__': + # Test with sample image + import sys + + if len(sys.argv) > 1: + img_path = sys.argv[1] + img = cv2.imread(img_path) + + if img is not None: + extractor = FeatureExtractor() + + # Extract from full image + features = extractor.extract_all(img) + + print("Color features:") + print(f" H mean: {features['color']['h_mean']}") + print(f" S mean: {features['color']['s_mean']}") + print(f" V mean: {features['color']['v_mean']}") + print(f" Dominant colors: {len(features['color']['dominant_colors'])}") + + print("\nTexture features:") + print(f" LBP variance: {features['texture']['lbp_variance']}") + print(f" Brightness: {features['texture']['brightness']}") + print(f" Shininess: {features['texture']['shininess_ratio']}") + + print("\nPattern:") + print(f" {features['pattern']['pattern']} (conf: {features['pattern']['confidence']})") + + print("\nMaterial:") + print(f" {features['material']['material']} (conf: {features['material']['confidence']})") + else: + print("Usage: python feature_extractor.py ") \ No newline at end of file diff --git a/scripts/utils/proportion_calculator.py b/scripts/utils/proportion_calculator.py new file mode 100644 index 0000000..6680cbe --- /dev/null +++ b/scripts/utils/proportion_calculator.py @@ -0,0 +1,674 @@ +#!/opt/homebrew/bin/python3.11 +""" +Proportion Calculator - Body proportion calculation from keypoints + +Purpose: +1. Calculate body proportions from Pose keypoints +2. Use eye_width as reference unit +3. Provide normalized ratios for feature extraction + +Keypoints Used: +- Swift Pose (19 keypoints): nose, eyes, ears, neck, shoulders, elbows, wrists, hips, knees, ankles +- MediaPipe Pose (33 landmarks): additional details +- YOLOv8 Pose (17 keypoints): fallback + +Output: +{ + 'eye_width': float, + 'body_height': float, + 'torso_height': float, + 'leg_height': float, + 'shoulder_width': float, + 'head_ratio': float, + 'torso_ratio': float, + 'leg_ratio': float, +} + +Usage: + from proportion_calculator import calculate_proportions + + proportions = calculate_proportions(pose_keypoints) +""" + +import numpy as np +from typing import Dict, List, Optional, Tuple + + +# MediaPipe pose landmark index to name mapping +MEDIAPIPE_POSE_NAMES = { + 0: 'nose', + 1: 'left_eye_inner', + 2: 'left_eye', + 3: 'left_eye_outer', + 4: 'right_eye_inner', + 5: 'right_eye', + 6: 'right_eye_outer', + 7: 'left_ear', + 8: 'right_ear', + 9: 'mouth_left', + 10: 'mouth_right', + 11: 'left_shoulder', + 12: 'right_shoulder', + 13: 'left_elbow', + 14: 'right_elbow', + 15: 'left_wrist', + 16: 'right_wrist', + 17: 'left_pinky', + 18: 'right_pinky', + 19: 'left_index', + 20: 'right_index', + 21: 'left_thumb', + 22: 'right_thumb', + 23: 'left_hip', + 24: 'right_hip', + 25: 'left_knee', + 26: 'right_knee', + 27: 'left_ankle', + 28: 'right_ankle', + 29: 'left_heel', + 30: 'right_heel', + 31: 'left_foot_index', + 32: 'right_foot_index', +} + + +def convert_mediapipe_to_named(landmarks: List[List]) -> List[Dict]: + """ + Convert MediaPipe landmarks [x,y,z,vis] to named keypoints format + + Args: + landmarks: MediaPipe landmarks [[x, y, z, visibility], ...] + + Returns: + Named keypoints [{'name': 'nose', 'x': 100, 'y': 200, 'confidence': 0.9}, ...] + """ + named_keypoints = [] + for i, lm in enumerate(landmarks): + if i in MEDIAPIPE_POSE_NAMES: + named_keypoints.append({ + 'name': MEDIAPIPE_POSE_NAMES[i], + 'x': lm[0], + 'y': lm[1], + 'confidence': lm[3] if len(lm) > 3 else 1.0, + }) + return named_keypoints + + +def get_keypoint_by_name(keypoints: List[Dict], name: str) -> Optional[Dict]: + """ + Get keypoint by name from keypoints list + + Args: + keypoints: List of keypoints [{'name': 'nose', 'x': 100, 'y': 200, 'confidence': 0.9}, ...] + name: Keypoint name to find + + Returns: + Keypoint dict or None if not found + """ + for kp in keypoints: + if kp.get('name') == name: + return kp + return None + + +def calculate_distance(p1: Dict, p2: Dict) -> float: + """ + Calculate Euclidean distance between two keypoints + + Args: + p1: Keypoint {'x': float, 'y': float} + p2: Keypoint {'x': float, 'y': float} + + Returns: + Distance in pixels + """ + if p1 is None or p2 is None: + return 0.0 + return np.sqrt((p1['x'] - p2['x'])**2 + (p1['y'] - p2['y'])**2) + + +def calculate_eye_width(keypoints: List[Dict]) -> float: + """ + Calculate eye distance (reference unit) + + Args: + keypoints: Pose keypoints list + + Returns: + Eye width in pixels + """ + left_eye = get_keypoint_by_name(keypoints, 'left_eye') + right_eye = get_keypoint_by_name(keypoints, 'right_eye') + + if left_eye is None or right_eye is None: + return 0.0 + + # Filter by confidence + if left_eye.get('confidence', 0) < 0.1 or right_eye.get('confidence', 0) < 0.1: + return 0.0 + + return calculate_distance(left_eye, right_eye) + + +def calculate_body_height(keypoints: List[Dict], bbox: Optional[Dict] = None) -> float: + """ + Calculate full body height + + Assumes keypoints are already in Top-Left pixel coordinates + (Y-flip and scale already handled by swift_pose.swift) + + Args: + keypoints: Pose keypoints list (Top-Left pixels) + bbox: Optional bbox {'x', 'y', 'width', 'height'} + + Returns: + Body height in pixels + """ + nose = get_keypoint_by_name(keypoints, 'nose') + left_ankle = get_keypoint_by_name(keypoints, 'left_ankle') + right_ankle = get_keypoint_by_name(keypoints, 'right_ankle') + + if nose is None: + return 0.0 + + nose_y = nose['y'] + + # Get ankle position (max Y = bottom of body in Top-Left system) + ankle_y = 0.0 + if left_ankle and left_ankle.get('confidence', 0) > 0.1: + ankle_y = max(ankle_y, left_ankle['y']) + if right_ankle and right_ankle.get('confidence', 0) > 0.1: + ankle_y = max(ankle_y, right_ankle['y']) + + if ankle_y > 0: + return ankle_y - nose_y + + # Fallback to bbox height + if bbox and bbox.get('height', 0) > 0: + return bbox['height'] + + return 0.0 + + +def calculate_torso_height(keypoints: List[Dict]) -> float: + """ + Calculate torso height (neck to hip) + + Assumes keypoints are already in Top-Left pixel coordinates + + Args: + keypoints: Pose keypoints list + + Returns: + Torso height in pixels + """ + neck = get_keypoint_by_name(keypoints, 'neck') + left_hip = get_keypoint_by_name(keypoints, 'left_hip') + right_hip = get_keypoint_by_name(keypoints, 'right_hip') + + # Get neck position + neck_y = 0.0 + if neck and neck.get('confidence', 0) > 0.1: + neck_y = neck['y'] + + # Fallback: estimate neck from nose + eye_width + if neck_y == 0: + nose = get_keypoint_by_name(keypoints, 'nose') + eye_width = calculate_eye_width(keypoints) + if nose and eye_width > 0: + neck_y = nose['y'] + eye_width * 0.5 + + # Get hip position (average of both hips) + hip_y = 0.0 + hip_count = 0 + if left_hip and left_hip.get('confidence', 0) > 0.1: + hip_y += left_hip['y'] + hip_count += 1 + if right_hip and right_hip.get('confidence', 0) > 0.1: + hip_y += right_hip['y'] + hip_count += 1 + + if hip_count > 0: + hip_y = hip_y / hip_count + + if neck_y > 0 and hip_y > 0: + return hip_y - neck_y + + return 0.0 + + +def calculate_leg_height(keypoints: List[Dict]) -> float: + """ + Calculate leg height (hip to ankle) + + Assumes keypoints are already in Top-Left pixel coordinates + + Args: + keypoints: Pose keypoints list + + Returns: + Leg height in pixels + """ + left_hip = get_keypoint_by_name(keypoints, 'left_hip') + right_hip = get_keypoint_by_name(keypoints, 'right_hip') + left_ankle = get_keypoint_by_name(keypoints, 'left_ankle') + right_ankle = get_keypoint_by_name(keypoints, 'right_ankle') + + # Get hip position (average of both hips) + hip_y = 0.0 + hip_count = 0 + if left_hip and left_hip.get('confidence', 0) > 0.1: + hip_y += left_hip['y'] + hip_count += 1 + if right_hip and right_hip.get('confidence', 0) > 0.1: + hip_y += right_hip['y'] + hip_count += 1 + + if hip_count > 0: + hip_y = hip_y / hip_count + + # Get ankle position (max Y = bottom of body) + ankle_y = 0.0 + if left_ankle and left_ankle.get('confidence', 0) > 0.1: + ankle_y = max(ankle_y, left_ankle['y']) + if right_ankle and right_ankle.get('confidence', 0) > 0.1: + ankle_y = max(ankle_y, right_ankle['y']) + + if hip_y > 0 and ankle_y > 0: + return ankle_y - hip_y + + return 0.0 + + +def calculate_should_width(keypoints: List[Dict]) -> float: + """ + Calculate shoulder width + + Args: + keypoints: Pose keypoints list + + Returns: + Shoulder width in pixels + """ + left_shoulder = get_keypoint_by_name(keypoints, 'left_shoulder') + right_shoulder = get_keypoint_by_name(keypoints, 'right_shoulder') + + if left_shoulder is None or right_shoulder is None: + return 0.0 + + if left_shoulder.get('confidence', 0) < 0.1 or right_shoulder.get('confidence', 0) < 0.1: + return 0.0 + + return calculate_distance(left_shoulder, right_shoulder) + + +def calculate_chest_width(keypoints: List[Dict]) -> float: + """ + Calculate chest/bust width (shoulder width as approximation) + + Args: + keypoints: Pose keypoints list + + Returns: + Chest width in pixels + """ + return calculate_should_width(keypoints) + + +def calculate_waist_width(keypoints: List[Dict]) -> float: + """ + Calculate waist width (hip width as approximation) + + Args: + keypoints: Pose keypoints list + + Returns: + Waist width in pixels + """ + left_hip = get_keypoint_by_name(keypoints, 'left_hip') + right_hip = get_keypoint_by_name(keypoints, 'right_hip') + + if left_hip is None or right_hip is None: + return 0.0 + + if left_hip.get('confidence', 0) < 0.1 or right_hip.get('confidence', 0) < 0.1: + return 0.0 + + return calculate_distance(left_hip, right_hip) + + +def calculate_hip_width(keypoints: List[Dict]) -> float: + """ + Calculate hip width + + Args: + keypoints: Pose keypoints list + + Returns: + Hip width in pixels + """ + return calculate_waist_width(keypoints) + + +def calculate_body_shape(keypoints: List[Dict]) -> Dict: + """ + Calculate body shape (三圍): chest, waist, hip + + Args: + keypoints: Pose keypoints list + + Returns: + Dict with chest, waist, hip measurements and ratios + """ + chest_width = calculate_chest_width(keypoints) + waist_width = calculate_waist_width(keypoints) + hip_width = calculate_hip_width(keypoints) + + # Calculate ratios (body shape classification) + shape_type = "unknown" + + if chest_width > 0 and waist_width > 0 and hip_width > 0: + chest_waist_ratio = chest_width / waist_width + waist_hip_ratio = waist_width / hip_width + + # Body shape classification + if chest_waist_ratio < 1.0 and waist_hip_ratio < 0.9: + shape_type = "hourglass" # 葫芦形 + elif chest_waist_ratio > 1.2: + shape_type = "triangle" # 倒三角(上身宽) + elif waist_hip_ratio > 1.1: + shape_type = "inverted_triangle" # 正三角(下身宽) + elif abs(chest_width - hip_width) < 0.1 * max(chest_width, hip_width): + shape_type = "rectangle" # 矩形 + else: + shape_type = "oval" #椭圆形 + + return { + 'chest_width': round(chest_width, 2), + 'waist_width': round(waist_width, 2), + 'hip_width': round(hip_width, 2), + 'chest_waist_ratio': round(chest_width / waist_width, 4) if waist_width > 0 else 0, + 'waist_hip_ratio': round(waist_width / hip_width, 4) if hip_width > 0 else 0, + 'body_shape': shape_type, + } + + +def estimate_real_height(keypoints: List[Dict], eye_width: float) -> Dict: + """ + Estimate real height using eye_width as reference + + Assumptions: + - Average eye_width ≈ 6 cm + - Average adult height ≈ 170 cm + - ratio = body_height_pixels / eye_width_pixels + + Args: + keypoints: Pose keypoints list + eye_width: Eye distance in pixels + + Returns: + Dict with estimated real height + """ + body_height = calculate_body_height(keypoints) + + if eye_width <= 0 or body_height <= 0: + return { + 'estimated_height_cm': 0, + 'height_ratio': 0, + } + + # Height ratio (body_height / eye_width) + height_ratio = body_height / eye_width + + # Estimate real height (assuming eye_width ≈ 6cm) + # estimated_height = height_ratio * 6 cm + estimated_height_cm = height_ratio * 6.0 + + # Height category + height_category = "unknown" + if estimated_height_cm < 150: + height_category = "short" + elif estimated_height_cm < 170: + height_category = "medium" + elif estimated_height_cm < 180: + height_category = "tall" + else: + height_category = "very_tall" + + return { + 'estimated_height_cm': round(estimated_height_cm, 1), + 'height_ratio': round(height_ratio, 2), + 'height_category': height_category, + 'body_height_px': round(body_height, 2), + 'eye_width_px': round(eye_width, 2), + } + + +def calculate_proportions(keypoints: List, bbox: Optional[Dict] = None) -> Dict: + """ + Calculate all body proportions including height and body shape + + Accepts both formats: + - Swift Pose: [{'name': 'nose', 'x': 100, 'y': 200, 'confidence': 0.9}, ...] + - MediaPipe: [[x, y, z, visibility], ...] (auto-converts) + + Args: + keypoints: Pose keypoints list (named or indexed) + bbox: Optional bbox for fallback + + Returns: + Dict with all proportions + """ + # Auto-detect and convert MediaPipe format + if keypoints and isinstance(keypoints[0], list): + keypoints = convert_mediapipe_to_named(keypoints) + + eye_width = calculate_eye_width(keypoints) + body_height = calculate_body_height(keypoints, bbox) + torso_height = calculate_torso_height(keypoints) + leg_height = calculate_leg_height(keypoints) + shoulder_width = calculate_should_width(keypoints) + + proportions = { + 'eye_width': round(eye_width, 2), + 'body_height': round(body_height, 2), + 'torso_height': round(torso_height, 2), + 'leg_height': round(leg_height, 2), + 'shoulder_width': round(shoulder_width, 2), + } + + # Calculate ratios + if body_height > 0: + proportions['head_ratio'] = round(eye_width / body_height, 4) + proportions['torso_ratio'] = round(torso_height / body_height, 4) + proportions['leg_ratio'] = round(leg_height / body_height, 4) + else: + proportions['head_ratio'] = 0.0 + proportions['torso_ratio'] = 0.0 + proportions['leg_ratio'] = 0.0 + + # Calculate body shape (三圍) + body_shape = calculate_body_shape(keypoints) + proportions['body_shape'] = body_shape + + # Estimate real height + height_estimate = estimate_real_height(keypoints, eye_width) + proportions['height_estimate'] = height_estimate + + return proportions + + +def estimate_head_top(keypoints: List[Dict]) -> Tuple[float, float]: + """ + Estimate head top position (for hair/hat detection) + + Args: + keypoints: Pose keypoints list + + Returns: + (head_top_y, head_top_x) position + """ + nose = get_keypoint_by_name(keypoints, 'nose') + eye_width = calculate_eye_width(keypoints) + + if nose is None or eye_width == 0: + return (0.0, 0.0) + + # Head top is approximately above nose by eye_width + head_top_y = nose['y'] - eye_width + head_top_x = nose['x'] + + return (head_top_y, head_top_x) + + +def estimate_region_from_keypoints( + keypoints: List[Dict], + top_keypoint: str, + bottom_keypoint: str, + left_keypoint: Optional[str] = None, + right_keypoint: Optional[str] = None, + eye_width_factor: float = 0.0 +) -> Dict: + """ + Estimate region from keypoints + + Args: + keypoints: Pose keypoints list + top_keypoint: Name of top boundary keypoint + bottom_keypoint: Name of bottom boundary keypoint + left_keypoint: Name of left boundary keypoint (optional) + right_keypoint: Name of right boundary keypoint (optional) + eye_width_factor: Factor to expand region by eye_width + + Returns: + Region dict {'x', 'y', 'width', 'height'} + """ + top = get_keypoint_by_name(keypoints, top_keypoint) + bottom = get_keypoint_by_name(keypoints, bottom_keypoint) + left = get_keypoint_by_name(keypoints, left_keypoint) if left_keypoint else None + right = get_keypoint_by_name(keypoints, right_keypoint) if right_keypoint else None + + eye_width = calculate_eye_width(keypoints) + + # Get Y boundaries + top_y = top['y'] if top and top.get('confidence', 0) > 0.1 else 0 + bottom_y = bottom['y'] if bottom and bottom.get('confidence', 0) > 0.1 else 0 + + # Apply eye_width factor + if eye_width_factor > 0 and eye_width > 0: + top_y -= eye_width * eye_width_factor + bottom_y += eye_width * eye_width_factor + + # Get X boundaries + if left and right and left.get('confidence', 0) > 0.1 and right.get('confidence', 0) > 0.1: + left_x = min(left['x'], right['x']) + right_x = max(left['x'], right['x']) + else: + # Fallback: use nose position + nose = get_keypoint_by_name(keypoints, 'nose') + if nose: + left_x = nose['x'] - eye_width * 2 if eye_width > 0 else nose['x'] - 50 + right_x = nose['x'] + eye_width * 2 if eye_width > 0 else nose['x'] + 50 + else: + left_x = 0 + right_x = 100 + + return { + 'x': int(left_x), + 'y': int(top_y), + 'width': int(right_x - left_x), + 'height': int(bottom_y - top_y) + } + + +# Region estimation helpers for specific body parts + +def get_head_region(keypoints: List[Dict]) -> Dict: + """Get head region (for hair/hat detection)""" + return estimate_region_from_keypoints( + keypoints, + top_keypoint='nose', + bottom_keypoint='neck', + left_keypoint='left_ear', + right_keypoint='right_ear', + eye_width_factor=1.0 + ) + + +def get_face_region(keypoints: List[Dict]) -> Dict: + """Get face region (for skin/face accessories detection)""" + return estimate_region_from_keypoints( + keypoints, + top_keypoint='nose', + bottom_keypoint='neck', + left_keypoint='left_eye', + right_keypoint='right_eye', + eye_width_factor=0.5 + ) + + +def get_torso_region(keypoints: List[Dict]) -> Dict: + """Get torso region (for upper clothing detection)""" + return estimate_region_from_keypoints( + keypoints, + top_keypoint='neck', + bottom_keypoint='left_hip', + left_keypoint='left_shoulder', + right_keypoint='right_shoulder' + ) + + +def get_leg_region(keypoints: List[Dict]) -> Dict: + """Get leg region (for lower clothing detection)""" + return estimate_region_from_keypoints( + keypoints, + top_keypoint='left_hip', + bottom_keypoint='left_ankle', + left_keypoint='left_hip', + right_keypoint='right_hip' + ) + + +def get_arm_region(keypoints: List[Dict], side: str = 'left') -> Dict: + """Get arm region (for sleeve/arm detection)""" + if side == 'left': + return estimate_region_from_keypoints( + keypoints, + top_keypoint='left_shoulder', + bottom_keypoint='left_wrist', + left_keypoint='left_shoulder', + right_keypoint='left_elbow' + ) + else: + return estimate_region_from_keypoints( + keypoints, + top_keypoint='right_shoulder', + bottom_keypoint='right_wrist', + left_keypoint='right_elbow', + right_keypoint='right_shoulder' + ) + + +if __name__ == '__main__': + # Test with sample keypoints + sample_keypoints = [ + {'name': 'nose', 'x': 100, 'y': 50, 'confidence': 0.9}, + {'name': 'left_eye', 'x': 90, 'y': 40, 'confidence': 0.8}, + {'name': 'right_eye', 'x': 110, 'y': 40, 'confidence': 0.8}, + {'name': 'neck', 'x': 100, 'y': 80, 'confidence': 0.7}, + {'name': 'left_shoulder', 'x': 70, 'y': 100, 'confidence': 0.8}, + {'name': 'right_shoulder', 'x': 130, 'y': 100, 'confidence': 0.8}, + {'name': 'left_hip', 'x': 80, 'y': 200, 'confidence': 0.7}, + {'name': 'right_hip', 'x': 120, 'y': 200, 'confidence': 0.7}, + {'name': 'left_ankle', 'x': 80, 'y': 400, 'confidence': 0.6}, + {'name': 'right_ankle', 'x': 120, 'y': 400, 'confidence': 0.6}, + ] + + proportions = calculate_proportions(sample_keypoints) + print("Proportions:", proportions) + + head_region = get_head_region(sample_keypoints) + print("Head region:", head_region) + + torso_region = get_torso_region(sample_keypoints) + print("Torso region:", torso_region) \ No newline at end of file