feat: add appearance feature system with coordinate/scale fixes

- Add Appearance_Feature_System_V1.0.md design doc - Add proportion_calculator.py for body proportions (height, body shape) - Add feature_extractor.py for hierarchical feature extraction - Add tkg_level1_builder.py for TKG person_trace nodes - Fix mediapipe_holistic_processor.py to output Top-Left pixels - Add MediaPipe format conversion in proportion_calculator Coordinate system alignment: - Swift Pose: Top-Left pixels (Y-flip done in swift_pose.swift) - MediaPipe: Top-Left pixels (norm→pixel conversion added)
2026-06-22 02:27:03 +08:00
parent 97180aa7cd
commit 606f31f13c
5 changed files with 2397 additions and 29 deletions
--- a/scripts/utils/feature_extractor.py
+++ b/scripts/utils/feature_extractor.py
@@ -0,0 +1,684 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Feature Extractor - Appearance feature extraction from video frames
+
+Purpose:
+1. Extract color features (HSV histogram, dominant colors)
+2. Extract texture features (LBP, shininess)
+3. Extract pattern features
+4. Detect accessories and clothing attributes
+
+Output:
+{
+  'color': {...},
+  'texture': {...},
+  'pattern': {...},
+  'accessories': {...},
+}
+
+Usage:
+    from feature_extractor import FeatureExtractor
+    
+    extractor = FeatureExtractor()
+    features = extractor.extract_all(frame, region)
+"""
+
+import cv2
+import numpy as np
+from typing import Dict, List, Optional
+from skimage.feature import local_binary_pattern
+
+
+class FeatureExtractor:
+    """
+    Extract appearance features from image regions
+    """
+    
+    def __init__(self):
+        self.lbp_radius = 1
+        self.lbp_points = 8
+        self.dominant_color_k = 5
+    
+    def extract_color(self, roi: np.ndarray) -> Dict:
+        """
+        Extract color features from ROI
+        
+        Args:
+            roi: Image region (BGR)
+        
+        Returns:
+            Dict with HSV histogram and dominant colors
+        """
+        if roi is None or roi.size == 0:
+            return {'error': 'empty_roi'}
+        
+        hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
+        
+        # HSV histograms
+        h_hist = cv2.calcHist([hsv], [0], None, [30], [0, 180]).flatten()
+        s_hist = cv2.calcHist([hsv], [1], None, [32], [0, 256]).flatten()
+        v_hist = cv2.calcHist([hsv], [2], None, [32], [0, 256]).flatten()
+        
+        # Normalize
+        h_sum = h_hist.sum() or 1
+        s_sum = s_hist.sum() or 1
+        v_sum = v_hist.sum() or 1
+        
+        h_hist_norm = (h_hist / h_sum).tolist()
+        s_hist_norm = (s_hist / s_sum).tolist()
+        v_hist_norm = (v_hist / v_sum).tolist()
+        
+        # Dominant colors via k-means
+        pixels = hsv.reshape(-1, 3).astype(np.float32)
+        dominant_colors = []
+        
+        if len(pixels) >= self.dominant_color_k:
+            criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
+            _, labels, centers = cv2.kmeans(
+                pixels, self.dominant_color_k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS
+            )
+            counts = np.bincount(labels.flatten())
+            dominant_colors = centers[np.argsort(-counts)[:self.dominant_color_k]].tolist()
+        elif len(pixels) > 0:
+            dominant_colors = [pixels.mean(axis=0).tolist()]
+        
+        # Color statistics
+        h_mean = np.mean(hsv[:,:,0])
+        s_mean = np.mean(hsv[:,:,1])
+        v_mean = np.mean(hsv[:,:,2])
+        
+        return {
+            'h_histogram': h_hist_norm,
+            's_histogram': s_hist_norm,
+            'v_histogram': v_hist_norm,
+            'dominant_colors': dominant_colors,
+            'h_mean': round(h_mean, 2),
+            's_mean': round(s_mean, 2),
+            'v_mean': round(v_mean, 2),
+        }
+    
+    def extract_texture(self, roi: np.ndarray) -> Dict:
+        """
+        Extract texture features from ROI
+        
+        Args:
+            roi: Image region (BGR)
+        
+        Returns:
+            Dict with LBP and shininess features
+        """
+        if roi is None or roi.size == 0:
+            return {'error': 'empty_roi'}
+        
+        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+        hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
+        
+        # LBP texture
+        lbp = local_binary_pattern(gray, self.lbp_points, self.lbp_radius)
+        lbp_hist = np.histogram(lbp, bins=256, range=(0, 256))[0]
+        lbp_hist_norm = (lbp_hist / lbp_hist.sum()).tolist()
+        
+        lbp_variance = np.var(lbp)
+        lbp_mean = np.mean(lbp)
+        
+        # Shininess (V channel statistics)
+        v_values = hsv[:,:,2].flatten()
+        v_mean = np.mean(v_values)
+        v_std = np.std(v_values)
+        v_max = np.max(v_values)
+        
+        # High brightness ratio (shiny materials)
+        high_brightness_ratio = np.sum(v_values > 200) / len(v_values)
+        
+        return {
+            'lbp_histogram': lbp_hist_norm,
+            'lbp_variance': round(lbp_variance, 2),
+            'lbp_mean': round(lbp_mean, 2),
+            'brightness': round(v_mean, 2),
+            'brightness_std': round(v_std, 2),
+            'brightness_max': int(v_max),
+            'shininess_ratio': round(high_brightness_ratio, 4),
+        }
+    
+    def detect_pattern(self, roi: np.ndarray) -> Dict:
+        """
+        Detect clothing pattern
+        
+        Args:
+            roi: Image region (BGR)
+        
+        Returns:
+            Dict with pattern classification
+        """
+        if roi is None or roi.size == 0:
+            return {'pattern': 'unknown', 'confidence': 0.0}
+        
+        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+        
+        # Edge detection
+        edges = cv2.Canny(gray, 50, 150)
+        edge_ratio = np.sum(edges > 0) / edges.size
+        
+        # Gradient analysis
+        sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
+        sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
+        gradient_magnitude = np.sqrt(sobelx**2 + sobely**2)
+        gradient_mean = np.mean(gradient_magnitude)
+        
+        # Color variance (for pattern detection)
+        hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
+        h_std = np.std(hsv[:,:,0])
+        s_std = np.std(hsv[:,:,1])
+        
+        # Pattern classification
+        pattern = 'solid'
+        confidence = 0.7
+        
+        if edge_ratio > 0.1 and gradient_mean > 20:
+            if h_std > 30:
+                pattern = 'patterned'
+                confidence = 0.8
+            elif edge_ratio > 0.2:
+                pattern = 'striped'
+                confidence = 0.6
+        
+        if s_std > 50 and gradient_mean > 30:
+            pattern = 'patterned'
+            confidence = 0.85
+        
+        return {
+            'pattern': pattern,
+            'confidence': confidence,
+            'edge_ratio': round(edge_ratio, 4),
+            'gradient_mean': round(gradient_mean, 2),
+            'color_variance': round(h_std, 2),
+        }
+    
+    def classify_material(self, roi: np.ndarray) -> Dict:
+        """
+        Classify clothing material
+        
+        Args:
+            roi: Image region (BGR)
+        
+        Returns:
+            Dict with material classification
+        """
+        if roi is None or roi.size == 0:
+            return {'material': 'unknown', 'confidence': 0.0}
+        
+        texture = self.extract_texture(roi)
+        
+        material = 'unknown'
+        confidence = 0.0
+        
+        lbp_var = texture.get('lbp_variance', 0)
+        shininess = texture.get('shininess_ratio', 0)
+        brightness = texture.get('brightness', 0)
+        
+        # Material classification rules
+        if shininess > 0.1 and brightness > 150:
+            material = 'silk'
+            confidence = 0.7
+        elif shininess > 0.05 and lbp_var > 50:
+            material = 'leather'
+            confidence = 0.6
+        elif lbp_var > 100:
+            material = 'denim'
+            confidence = 0.65
+        elif lbp_var < 20 and shininess < 0.02:
+            material = 'cotton'
+            confidence = 0.6
+        elif lbp_var < 50 and brightness < 100:
+            material = 'polyester'
+            confidence = 0.5
+        
+        return {
+            'material': material,
+            'confidence': confidence,
+            'texture_features': texture,
+        }
+    
+    def extract_all(self, roi: np.ndarray) -> Dict:
+        """
+        Extract all features from ROI
+        
+        Args:
+            roi: Image region (BGR)
+        
+        Returns:
+            Dict with all features
+        """
+        return {
+            'color': self.extract_color(roi),
+            'texture': self.extract_texture(roi),
+            'pattern': self.detect_pattern(roi),
+            'material': self.classify_material(roi),
+        }
+    
+    def extract_split_region(
+        self,
+        frame: np.ndarray,
+        region: Dict,
+        split_ratio: float = 0.5
+    ) -> Dict:
+        """
+        Extract features from split region (upper/lower)
+        
+        Args:
+            frame: Full frame
+            region: Region dict {'x', 'y', 'width', 'height'}
+            split_ratio: Split ratio (0.5 = 50%)
+        
+        Returns:
+            Dict with upper and lower features
+        """
+        x, y, w, h = region['x'], region['y'], region['width'], region['height']
+        
+        if w <= 0 or h <= 0:
+            return {'error': 'invalid_region'}
+        
+        mid_y = y + int(h * split_ratio)
+        
+        # Upper region
+        upper_roi = frame[y:mid_y, x:x+w] if mid_y > y else None
+        upper_features = self.extract_all(upper_roi) if upper_roi is not None else {'error': 'empty'}
+        
+        # Lower region
+        lower_roi = frame[mid_y:y+h, x:x+w] if y+h > mid_y else None
+        lower_features = self.extract_all(lower_roi) if lower_roi is not None else {'error': 'empty'}
+        
+        return {
+            'upper': upper_features,
+            'lower': lower_features,
+        }
+    
+    def detect_exposed_skin(self, roi: np.ndarray) -> Dict:
+        """
+        Detect exposed skin in ROI
+        
+        Args:
+            roi: Image region (BGR)
+        
+        Returns:
+            Dict with skin detection results
+        """
+        if roi is None or roi.size == 0:
+            return {'skin_ratio': 0.0, 'skin_detected': False}
+        
+        hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
+        
+        # Skin color range (HSV)
+        # H: 0-50 (skin tones)
+        # S: 10-150 (not too saturated)
+        # V: 50-255 (visible)
+        skin_mask = cv2.inRange(hsv, (0, 10, 50), (50, 150, 255))
+        
+        skin_ratio = np.sum(skin_mask > 0) / skin_mask.size
+        
+        return {
+            'skin_ratio': round(skin_ratio, 4),
+            'skin_detected': skin_ratio > 0.3,
+        }
+    
+    def calculate_similarity(self, features1: Dict, features2: Dict) -> Dict:
+        """
+        Calculate similarity between two feature sets
+        
+        Args:
+            features1: First feature dict
+            features2: Second feature dict
+        
+        Returns:
+            Dict with similarity scores
+        """
+        scores = {}
+        
+        # Color similarity (histogram correlation)
+        if 'color' in features1 and 'color' in features2:
+            h1 = np.array(features1['color'].get('h_histogram', []))
+            h2 = np.array(features2['color'].get('h_histogram', []))
+            
+            if len(h1) > 0 and len(h2) > 0:
+                h_corr = cv2.compareHist(h1.astype(np.float32), h2.astype(np.float32), cv2.HISTCMP_CORREL)
+                scores['color_similarity'] = round(h_corr, 4)
+            else:
+                scores['color_similarity'] = 0.0
+            
+            # Dominant color distance
+            dc1 = np.array(features1['color'].get('dominant_colors', [[0,0,0]]))
+            dc2 = np.array(features2['color'].get('dominant_colors', [[0,0,0]]))
+            
+            if len(dc1) > 0 and len(dc2) > 0:
+                color_dist = np.linalg.norm(dc1[0] - dc2[0])
+                scores['color_distance'] = round(color_dist, 2)
+            else:
+                scores['color_distance'] = 255.0
+        
+        # Texture similarity
+        if 'texture' in features1 and 'texture' in features2:
+            lbp1 = np.array(features1['texture'].get('lbp_histogram', []))
+            lbp2 = np.array(features2['texture'].get('lbp_histogram', []))
+            
+            if len(lbp1) > 0 and len(lbp2) > 0:
+                lbp_corr = cv2.compareHist(lbp1.astype(np.float32), lbp2.astype(np.float32), cv2.HISTCMP_CORREL)
+                scores['texture_similarity'] = round(lbp_corr, 4)
+            else:
+                scores['texture_similarity'] = 0.0
+        
+        # Overall score
+        color_sim = scores.get('color_similarity', 0)
+        texture_sim = scores.get('texture_similarity', 0)
+        scores['overall_score'] = round(color_sim * 0.7 + texture_sim * 0.3, 4)
+        
+        return scores
+
+
+# Helper functions for specific feature extraction
+
+def extract_hair_color(frame: np.ndarray, head_region: Dict) -> Dict:
+    """Extract hair color from head region"""
+    extractor = FeatureExtractor()
+    x, y, w, h = head_region['x'], head_region['y'], head_region['width'], head_region['height']
+    
+    # Focus on upper part of head (hair area)
+    hair_roi = frame[y:y+int(h*0.5), x:x+w]
+    
+    return extractor.extract_color(hair_roi)
+
+
+def extract_skin_color(frame: np.ndarray, face_region: Dict) -> Dict:
+    """Extract skin color from face region"""
+    extractor = FeatureExtractor()
+    x, y, w, h = face_region['x'], face_region['y'], face_region['width'], face_region['height']
+    
+    face_roi = frame[y:y+h, x:x+w]
+    
+    return extractor.extract_color(face_roi)
+
+
+def extract_clothing_color(frame: np.ndarray, torso_region: Dict) -> Dict:
+    """Extract clothing color from torso region"""
+    extractor = FeatureExtractor()
+    
+    return extractor.extract_split_region(frame, torso_region)
+
+
+def extract_accessory_color(frame: np.ndarray, accessory_region: Dict) -> Dict:
+    """Extract accessory color from region"""
+    extractor = FeatureExtractor()
+    x, y, w, h = accessory_region['x'], accessory_region['y'], accessory_region['width'], accessory_region['height']
+    
+    roi = frame[y:y+h, x:x+w]
+    
+    return extractor.extract_color(roi)
+
+
+class HierarchicalFeatureExtractor:
+    """
+    Hierarchical feature extraction: coarse → fine
+    
+    Level 1: Large regions (body bbox, upper/lower body)
+    Level 2: Medium regions (head, face, arms, legs)
+    Level 3: Fine features (accessories, details)
+    """
+    
+    def __init__(self):
+        self.extractor = FeatureExtractor()
+    
+    def extract_level1(self, frame: np.ndarray, bbox: Dict, head_region: Optional[Dict] = None) -> Dict:
+        """
+        Level 1: Extract large region features
+        
+        Args:
+            frame: Full frame
+            bbox: Full body bbox {'x', 'y', 'width', 'height'}
+            head_region: Optional head region for hair extraction
+        
+        Returns:
+            Dict with large region features
+        """
+        x, y, w, h = bbox['x'], bbox['y'], bbox['width'], bbox['height']
+        
+        if w <= 0 or h <= 0:
+            return {'error': 'invalid_bbox'}
+        
+        # Full body region
+        body_roi = frame[y:y+h, x:x+w]
+        body_features = self.extractor.extract_all(body_roi)
+        
+        # Split upper/lower body (50%)
+        mid_y = y + h // 2
+        
+        upper_roi = frame[y:mid_y, x:x+w] if mid_y > y else None
+        lower_roi = frame[mid_y:y+h, x:x+w] if y+h > mid_y else None
+        
+        upper_features = self.extractor.extract_all(upper_roi) if upper_roi is not None else {}
+        lower_features = self.extractor.extract_all(lower_roi) if lower_roi is not None else {}
+        
+        # Head top (hair) - part of Level 1
+        head_features = {}
+        if head_region is not None:
+            head_roi = self._get_roi(frame, head_region)
+            if head_roi is not None:
+                head_features = self.extractor.extract_all(head_roi)
+        
+        return {
+            'level': 1,
+            'body': body_features,
+            'head_top': head_features,
+            'upper_body': upper_features,
+            'lower_body': lower_features,
+            'bbox': bbox,
+        }
+    
+    def extract_level2(
+        self,
+        frame: np.ndarray,
+        regions: Dict
+    ) -> Dict:
+        """
+        Level 2: Extract medium region features
+        
+        Args:
+            frame: Full frame
+            regions: Dict with face, torso, leg, arm regions
+        
+        Returns:
+            Dict with medium region features
+        """
+        features = {'level': 2}
+        
+        # Face region (skin, lips, eyes)
+        if 'face' in regions:
+            face_roi = self._get_roi(frame, regions['face'])
+            features['face'] = self.extractor.extract_all(face_roi) if face_roi is not None else {}
+            features['face']['skin'] = self.extractor.detect_exposed_skin(face_roi) if face_roi is not None else {}
+        
+        # Torso region (clothing details)
+        if 'torso' in regions:
+            torso_roi = self._get_roi(frame, regions['torso'])
+            features['torso'] = self.extractor.extract_all(torso_roi) if torso_roi is not None else {}
+        
+        # Leg region
+        if 'leg' in regions:
+            leg_roi = self._get_roi(frame, regions['leg'])
+            features['leg'] = self.extractor.extract_all(leg_roi) if leg_roi is not None else {}
+        
+        # Arms (left/right) - sleeve detection
+        if 'left_arm' in regions:
+            arm_roi = self._get_roi(frame, regions['left_arm'])
+            features['left_arm'] = self.extractor.extract_all(arm_roi) if arm_roi is not None else {}
+        
+        if 'right_arm' in regions:
+            arm_roi = self._get_roi(frame, regions['right_arm'])
+            features['right_arm'] = self.extractor.extract_all(arm_roi) if arm_roi is not None else {}
+        
+        return features
+    
+    def extract_level3(
+        self,
+        frame: np.ndarray,
+        keypoints: List[Dict],
+        eye_width: float
+    ) -> Dict:
+        """
+        Level 3: Extract fine features (accessories, details)
+        
+        Args:
+            frame: Full frame
+            keypoints: Pose keypoints
+            eye_width: Eye distance (reference unit)
+        
+        Returns:
+            Dict with fine features
+        """
+        features = {'level': 3}
+        
+        # Estimate accessory regions from keypoints
+        offset = int(eye_width * 0.5) if eye_width > 0 else 20
+        
+        # Glasses (eye region)
+        left_eye = self._get_kp(keypoints, 'left_eye')
+        right_eye = self._get_kp(keypoints, 'right_eye')
+        if left_eye and right_eye:
+            glasses_roi = frame[
+                int(min(left_eye['y'], right_eye['y']) - offset):int(max(left_eye['y'], right_eye['y']) + offset),
+                int(left_eye['x'] - offset):int(right_eye['x'] + offset)
+            ]
+            features['glasses'] = self.extractor.extract_all(glasses_roi) if glasses_roi is not None else {}
+        
+        # Earrings (ear positions)
+        left_ear = self._get_kp(keypoints, 'left_ear')
+        right_ear = self._get_kp(keypoints, 'right_ear')
+        if left_ear and left_ear.get('confidence', 0) > 0.1:
+            ear_roi = frame[
+                int(left_ear['y'] - offset):int(left_ear['y'] + offset),
+                int(left_ear['x'] - offset):int(left_ear['x'] + offset)
+            ]
+            features['left_earring'] = self.extractor.extract_all(ear_roi) if ear_roi is not None else {}
+        
+        if right_ear and right_ear.get('confidence', 0) > 0.1:
+            ear_roi = frame[
+                int(right_ear['y'] - offset):int(right_ear['y'] + offset),
+                int(right_ear['x'] - offset):int(right_ear['x'] + offset)
+            ]
+            features['right_earring'] = self.extractor.extract_all(ear_roi) if ear_roi is not None else {}
+        
+        # Watch (wrist position)
+        left_wrist = self._get_kp(keypoints, 'left_wrist')
+        right_wrist = self._get_kp(keypoints, 'right_wrist')
+        if left_wrist and left_wrist.get('confidence', 0) > 0.1:
+            wrist_roi = frame[
+                int(left_wrist['y'] - offset):int(left_wrist['y'] + offset),
+                int(left_wrist['x'] - offset):int(left_wrist['x'] + offset)
+            ]
+            features['left_watch'] = self.extractor.extract_all(wrist_roi) if wrist_roi is not None else {}
+        
+        if right_wrist and right_wrist.get('confidence', 0) > 0.1:
+            wrist_roi = frame[
+                int(right_wrist['y'] - offset):int(right_wrist['y'] + offset),
+                int(right_wrist['x'] - offset):int(right_wrist['x'] + offset)
+            ]
+            features['right_watch'] = self.extractor.extract_all(wrist_roi) if wrist_roi is not None else {}
+        
+        # Shoes (ankle positions)
+        left_ankle = self._get_kp(keypoints, 'left_ankle')
+        right_ankle = self._get_kp(keypoints, 'right_ankle')
+        if left_ankle and left_ankle.get('confidence', 0) > 0.1:
+            shoe_roi = frame[
+                int(left_ankle['y'] - offset):int(left_ankle['y'] + offset * 2),
+                int(left_ankle['x'] - offset):int(left_ankle['x'] + offset)
+            ]
+            features['left_shoe'] = self.extractor.extract_all(shoe_roi) if shoe_roi is not None else {}
+        
+        if right_ankle and right_ankle.get('confidence', 0) > 0.1:
+            shoe_roi = frame[
+                int(right_ankle['y'] - offset):int(right_ankle['y'] + offset * 2),
+                int(right_ankle['x'] - offset):int(right_ankle['x'] + offset)
+            ]
+            features['right_shoe'] = self.extractor.extract_all(shoe_roi) if shoe_roi is not None else {}
+        
+        return features
+    
+    def extract_hierarchical(
+        self,
+        frame: np.ndarray,
+        bbox: Dict,
+        regions: Dict,
+        keypoints: List[Dict],
+        eye_width: float
+    ) -> Dict:
+        """
+        Full hierarchical extraction: Level 1 → Level 2 → Level 3
+        
+        Args:
+            frame: Full frame
+            bbox: Full body bbox
+            regions: Medium regions dict (includes 'head' for Level 1)
+            keypoints: Pose keypoints
+            eye_width: Reference unit
+        
+        Returns:
+            Dict with all hierarchical features
+        """
+        head_region = regions.get('head') if regions else None
+        level1 = self.extract_level1(frame, bbox, head_region)
+        level2 = self.extract_level2(frame, regions)
+        level3 = self.extract_level3(frame, keypoints, eye_width)
+        
+        return {
+            'level1': level1,
+            'level2': level2,
+            'level3': level3,
+        }
+    
+    def _get_roi(self, frame: np.ndarray, region: Dict) -> Optional[np.ndarray]:
+        """Get ROI from frame using region dict"""
+        if region is None:
+            return None
+        x, y, w, h = region.get('x', 0), region.get('y', 0), region.get('width', 0), region.get('height', 0)
+        if w <= 0 or h <= 0:
+            return None
+        return frame[y:y+h, x:x+w]
+    
+    def _get_kp(self, keypoints: List[Dict], name: str) -> Optional[Dict]:
+        """Get keypoint by name"""
+        for kp in keypoints:
+            if kp.get('name') == name:
+                return kp
+        return None
+
+
+if __name__ == '__main__':
+    # Test with sample image
+    import sys
+    
+    if len(sys.argv) > 1:
+        img_path = sys.argv[1]
+        img = cv2.imread(img_path)
+        
+        if img is not None:
+            extractor = FeatureExtractor()
+            
+            # Extract from full image
+            features = extractor.extract_all(img)
+            
+            print("Color features:")
+            print(f"  H mean: {features['color']['h_mean']}")
+            print(f"  S mean: {features['color']['s_mean']}")
+            print(f"  V mean: {features['color']['v_mean']}")
+            print(f"  Dominant colors: {len(features['color']['dominant_colors'])}")
+            
+            print("\nTexture features:")
+            print(f"  LBP variance: {features['texture']['lbp_variance']}")
+            print(f"  Brightness: {features['texture']['brightness']}")
+            print(f"  Shininess: {features['texture']['shininess_ratio']}")
+            
+            print("\nPattern:")
+            print(f"  {features['pattern']['pattern']} (conf: {features['pattern']['confidence']})")
+            
+            print("\nMaterial:")
+            print(f"  {features['material']['material']} (conf: {features['material']['confidence']})")
+    else:
+        print("Usage: python feature_extractor.py <image_path>")
--- a/scripts/utils/proportion_calculator.py
+++ b/scripts/utils/proportion_calculator.py
@@ -0,0 +1,674 @@
+#!/opt/homebrew/bin/python3.11
+"""
+Proportion Calculator - Body proportion calculation from keypoints
+
+Purpose:
+1. Calculate body proportions from Pose keypoints
+2. Use eye_width as reference unit
+3. Provide normalized ratios for feature extraction
+
+Keypoints Used:
+- Swift Pose (19 keypoints): nose, eyes, ears, neck, shoulders, elbows, wrists, hips, knees, ankles
+- MediaPipe Pose (33 landmarks): additional details
+- YOLOv8 Pose (17 keypoints): fallback
+
+Output:
+{
+  'eye_width': float,
+  'body_height': float,
+  'torso_height': float,
+  'leg_height': float,
+  'shoulder_width': float,
+  'head_ratio': float,
+  'torso_ratio': float,
+  'leg_ratio': float,
+}
+
+Usage:
+    from proportion_calculator import calculate_proportions
+    
+    proportions = calculate_proportions(pose_keypoints)
+"""
+
+import numpy as np
+from typing import Dict, List, Optional, Tuple
+
+
+# MediaPipe pose landmark index to name mapping
+MEDIAPIPE_POSE_NAMES = {
+    0: 'nose',
+    1: 'left_eye_inner',
+    2: 'left_eye',
+    3: 'left_eye_outer',
+    4: 'right_eye_inner',
+    5: 'right_eye',
+    6: 'right_eye_outer',
+    7: 'left_ear',
+    8: 'right_ear',
+    9: 'mouth_left',
+    10: 'mouth_right',
+    11: 'left_shoulder',
+    12: 'right_shoulder',
+    13: 'left_elbow',
+    14: 'right_elbow',
+    15: 'left_wrist',
+    16: 'right_wrist',
+    17: 'left_pinky',
+    18: 'right_pinky',
+    19: 'left_index',
+    20: 'right_index',
+    21: 'left_thumb',
+    22: 'right_thumb',
+    23: 'left_hip',
+    24: 'right_hip',
+    25: 'left_knee',
+    26: 'right_knee',
+    27: 'left_ankle',
+    28: 'right_ankle',
+    29: 'left_heel',
+    30: 'right_heel',
+    31: 'left_foot_index',
+    32: 'right_foot_index',
+}
+
+
+def convert_mediapipe_to_named(landmarks: List[List]) -> List[Dict]:
+    """
+    Convert MediaPipe landmarks [x,y,z,vis] to named keypoints format
+    
+    Args:
+        landmarks: MediaPipe landmarks [[x, y, z, visibility], ...]
+    
+    Returns:
+        Named keypoints [{'name': 'nose', 'x': 100, 'y': 200, 'confidence': 0.9}, ...]
+    """
+    named_keypoints = []
+    for i, lm in enumerate(landmarks):
+        if i in MEDIAPIPE_POSE_NAMES:
+            named_keypoints.append({
+                'name': MEDIAPIPE_POSE_NAMES[i],
+                'x': lm[0],
+                'y': lm[1],
+                'confidence': lm[3] if len(lm) > 3 else 1.0,
+            })
+    return named_keypoints
+
+
+def get_keypoint_by_name(keypoints: List[Dict], name: str) -> Optional[Dict]:
+    """
+    Get keypoint by name from keypoints list
+    
+    Args:
+        keypoints: List of keypoints [{'name': 'nose', 'x': 100, 'y': 200, 'confidence': 0.9}, ...]
+        name: Keypoint name to find
+    
+    Returns:
+        Keypoint dict or None if not found
+    """
+    for kp in keypoints:
+        if kp.get('name') == name:
+            return kp
+    return None
+
+
+def calculate_distance(p1: Dict, p2: Dict) -> float:
+    """
+    Calculate Euclidean distance between two keypoints
+    
+    Args:
+        p1: Keypoint {'x': float, 'y': float}
+        p2: Keypoint {'x': float, 'y': float}
+    
+    Returns:
+        Distance in pixels
+    """
+    if p1 is None or p2 is None:
+        return 0.0
+    return np.sqrt((p1['x'] - p2['x'])**2 + (p1['y'] - p2['y'])**2)
+
+
+def calculate_eye_width(keypoints: List[Dict]) -> float:
+    """
+    Calculate eye distance (reference unit)
+    
+    Args:
+        keypoints: Pose keypoints list
+    
+    Returns:
+        Eye width in pixels
+    """
+    left_eye = get_keypoint_by_name(keypoints, 'left_eye')
+    right_eye = get_keypoint_by_name(keypoints, 'right_eye')
+    
+    if left_eye is None or right_eye is None:
+        return 0.0
+    
+    # Filter by confidence
+    if left_eye.get('confidence', 0) < 0.1 or right_eye.get('confidence', 0) < 0.1:
+        return 0.0
+    
+    return calculate_distance(left_eye, right_eye)
+
+
+def calculate_body_height(keypoints: List[Dict], bbox: Optional[Dict] = None) -> float:
+    """
+    Calculate full body height
+    
+    Assumes keypoints are already in Top-Left pixel coordinates
+    (Y-flip and scale already handled by swift_pose.swift)
+    
+    Args:
+        keypoints: Pose keypoints list (Top-Left pixels)
+        bbox: Optional bbox {'x', 'y', 'width', 'height'}
+    
+    Returns:
+        Body height in pixels
+    """
+    nose = get_keypoint_by_name(keypoints, 'nose')
+    left_ankle = get_keypoint_by_name(keypoints, 'left_ankle')
+    right_ankle = get_keypoint_by_name(keypoints, 'right_ankle')
+    
+    if nose is None:
+        return 0.0
+    
+    nose_y = nose['y']
+    
+    # Get ankle position (max Y = bottom of body in Top-Left system)
+    ankle_y = 0.0
+    if left_ankle and left_ankle.get('confidence', 0) > 0.1:
+        ankle_y = max(ankle_y, left_ankle['y'])
+    if right_ankle and right_ankle.get('confidence', 0) > 0.1:
+        ankle_y = max(ankle_y, right_ankle['y'])
+    
+    if ankle_y > 0:
+        return ankle_y - nose_y
+    
+    # Fallback to bbox height
+    if bbox and bbox.get('height', 0) > 0:
+        return bbox['height']
+    
+    return 0.0
+
+
+def calculate_torso_height(keypoints: List[Dict]) -> float:
+    """
+    Calculate torso height (neck to hip)
+    
+    Assumes keypoints are already in Top-Left pixel coordinates
+    
+    Args:
+        keypoints: Pose keypoints list
+    
+    Returns:
+        Torso height in pixels
+    """
+    neck = get_keypoint_by_name(keypoints, 'neck')
+    left_hip = get_keypoint_by_name(keypoints, 'left_hip')
+    right_hip = get_keypoint_by_name(keypoints, 'right_hip')
+    
+    # Get neck position
+    neck_y = 0.0
+    if neck and neck.get('confidence', 0) > 0.1:
+        neck_y = neck['y']
+    
+    # Fallback: estimate neck from nose + eye_width
+    if neck_y == 0:
+        nose = get_keypoint_by_name(keypoints, 'nose')
+        eye_width = calculate_eye_width(keypoints)
+        if nose and eye_width > 0:
+            neck_y = nose['y'] + eye_width * 0.5
+    
+    # Get hip position (average of both hips)
+    hip_y = 0.0
+    hip_count = 0
+    if left_hip and left_hip.get('confidence', 0) > 0.1:
+        hip_y += left_hip['y']
+        hip_count += 1
+    if right_hip and right_hip.get('confidence', 0) > 0.1:
+        hip_y += right_hip['y']
+        hip_count += 1
+    
+    if hip_count > 0:
+        hip_y = hip_y / hip_count
+    
+    if neck_y > 0 and hip_y > 0:
+        return hip_y - neck_y
+    
+    return 0.0
+
+
+def calculate_leg_height(keypoints: List[Dict]) -> float:
+    """
+    Calculate leg height (hip to ankle)
+    
+    Assumes keypoints are already in Top-Left pixel coordinates
+    
+    Args:
+        keypoints: Pose keypoints list
+    
+    Returns:
+        Leg height in pixels
+    """
+    left_hip = get_keypoint_by_name(keypoints, 'left_hip')
+    right_hip = get_keypoint_by_name(keypoints, 'right_hip')
+    left_ankle = get_keypoint_by_name(keypoints, 'left_ankle')
+    right_ankle = get_keypoint_by_name(keypoints, 'right_ankle')
+    
+    # Get hip position (average of both hips)
+    hip_y = 0.0
+    hip_count = 0
+    if left_hip and left_hip.get('confidence', 0) > 0.1:
+        hip_y += left_hip['y']
+        hip_count += 1
+    if right_hip and right_hip.get('confidence', 0) > 0.1:
+        hip_y += right_hip['y']
+        hip_count += 1
+    
+    if hip_count > 0:
+        hip_y = hip_y / hip_count
+    
+    # Get ankle position (max Y = bottom of body)
+    ankle_y = 0.0
+    if left_ankle and left_ankle.get('confidence', 0) > 0.1:
+        ankle_y = max(ankle_y, left_ankle['y'])
+    if right_ankle and right_ankle.get('confidence', 0) > 0.1:
+        ankle_y = max(ankle_y, right_ankle['y'])
+    
+    if hip_y > 0 and ankle_y > 0:
+        return ankle_y - hip_y
+    
+    return 0.0
+
+
+def calculate_should_width(keypoints: List[Dict]) -> float:
+    """
+    Calculate shoulder width
+    
+    Args:
+        keypoints: Pose keypoints list
+    
+    Returns:
+        Shoulder width in pixels
+    """
+    left_shoulder = get_keypoint_by_name(keypoints, 'left_shoulder')
+    right_shoulder = get_keypoint_by_name(keypoints, 'right_shoulder')
+    
+    if left_shoulder is None or right_shoulder is None:
+        return 0.0
+    
+    if left_shoulder.get('confidence', 0) < 0.1 or right_shoulder.get('confidence', 0) < 0.1:
+        return 0.0
+    
+    return calculate_distance(left_shoulder, right_shoulder)
+
+
+def calculate_chest_width(keypoints: List[Dict]) -> float:
+    """
+    Calculate chest/bust width (shoulder width as approximation)
+    
+    Args:
+        keypoints: Pose keypoints list
+    
+    Returns:
+        Chest width in pixels
+    """
+    return calculate_should_width(keypoints)
+
+
+def calculate_waist_width(keypoints: List[Dict]) -> float:
+    """
+    Calculate waist width (hip width as approximation)
+    
+    Args:
+        keypoints: Pose keypoints list
+    
+    Returns:
+        Waist width in pixels
+    """
+    left_hip = get_keypoint_by_name(keypoints, 'left_hip')
+    right_hip = get_keypoint_by_name(keypoints, 'right_hip')
+    
+    if left_hip is None or right_hip is None:
+        return 0.0
+    
+    if left_hip.get('confidence', 0) < 0.1 or right_hip.get('confidence', 0) < 0.1:
+        return 0.0
+    
+    return calculate_distance(left_hip, right_hip)
+
+
+def calculate_hip_width(keypoints: List[Dict]) -> float:
+    """
+    Calculate hip width
+    
+    Args:
+        keypoints: Pose keypoints list
+    
+    Returns:
+        Hip width in pixels
+    """
+    return calculate_waist_width(keypoints)
+
+
+def calculate_body_shape(keypoints: List[Dict]) -> Dict:
+    """
+    Calculate body shape (三圍): chest, waist, hip
+    
+    Args:
+        keypoints: Pose keypoints list
+    
+    Returns:
+        Dict with chest, waist, hip measurements and ratios
+    """
+    chest_width = calculate_chest_width(keypoints)
+    waist_width = calculate_waist_width(keypoints)
+    hip_width = calculate_hip_width(keypoints)
+    
+    # Calculate ratios (body shape classification)
+    shape_type = "unknown"
+    
+    if chest_width > 0 and waist_width > 0 and hip_width > 0:
+        chest_waist_ratio = chest_width / waist_width
+        waist_hip_ratio = waist_width / hip_width
+        
+        # Body shape classification
+        if chest_waist_ratio < 1.0 and waist_hip_ratio < 0.9:
+            shape_type = "hourglass"  # 葫芦形
+        elif chest_waist_ratio > 1.2:
+            shape_type = "triangle"  # 倒三角（上身宽）
+        elif waist_hip_ratio > 1.1:
+            shape_type = "inverted_triangle"  # 正三角（下身宽）
+        elif abs(chest_width - hip_width) < 0.1 * max(chest_width, hip_width):
+            shape_type = "rectangle"  # 矩形
+        else:
+            shape_type = "oval"  #椭圆形
+    
+    return {
+        'chest_width': round(chest_width, 2),
+        'waist_width': round(waist_width, 2),
+        'hip_width': round(hip_width, 2),
+        'chest_waist_ratio': round(chest_width / waist_width, 4) if waist_width > 0 else 0,
+        'waist_hip_ratio': round(waist_width / hip_width, 4) if hip_width > 0 else 0,
+        'body_shape': shape_type,
+    }
+
+
+def estimate_real_height(keypoints: List[Dict], eye_width: float) -> Dict:
+    """
+    Estimate real height using eye_width as reference
+    
+    Assumptions:
+    - Average eye_width ≈ 6 cm
+    - Average adult height ≈ 170 cm
+    - ratio = body_height_pixels / eye_width_pixels
+    
+    Args:
+        keypoints: Pose keypoints list
+        eye_width: Eye distance in pixels
+    
+    Returns:
+        Dict with estimated real height
+    """
+    body_height = calculate_body_height(keypoints)
+    
+    if eye_width <= 0 or body_height <= 0:
+        return {
+            'estimated_height_cm': 0,
+            'height_ratio': 0,
+        }
+    
+    # Height ratio (body_height / eye_width)
+    height_ratio = body_height / eye_width
+    
+    # Estimate real height (assuming eye_width ≈ 6cm)
+    # estimated_height = height_ratio * 6 cm
+    estimated_height_cm = height_ratio * 6.0
+    
+    # Height category
+    height_category = "unknown"
+    if estimated_height_cm < 150:
+        height_category = "short"
+    elif estimated_height_cm < 170:
+        height_category = "medium"
+    elif estimated_height_cm < 180:
+        height_category = "tall"
+    else:
+        height_category = "very_tall"
+    
+    return {
+        'estimated_height_cm': round(estimated_height_cm, 1),
+        'height_ratio': round(height_ratio, 2),
+        'height_category': height_category,
+        'body_height_px': round(body_height, 2),
+        'eye_width_px': round(eye_width, 2),
+    }
+
+
+def calculate_proportions(keypoints: List, bbox: Optional[Dict] = None) -> Dict:
+    """
+    Calculate all body proportions including height and body shape
+    
+    Accepts both formats:
+    - Swift Pose: [{'name': 'nose', 'x': 100, 'y': 200, 'confidence': 0.9}, ...]
+    - MediaPipe: [[x, y, z, visibility], ...] (auto-converts)
+    
+    Args:
+        keypoints: Pose keypoints list (named or indexed)
+        bbox: Optional bbox for fallback
+    
+    Returns:
+        Dict with all proportions
+    """
+    # Auto-detect and convert MediaPipe format
+    if keypoints and isinstance(keypoints[0], list):
+        keypoints = convert_mediapipe_to_named(keypoints)
+    
+    eye_width = calculate_eye_width(keypoints)
+    body_height = calculate_body_height(keypoints, bbox)
+    torso_height = calculate_torso_height(keypoints)
+    leg_height = calculate_leg_height(keypoints)
+    shoulder_width = calculate_should_width(keypoints)
+    
+    proportions = {
+        'eye_width': round(eye_width, 2),
+        'body_height': round(body_height, 2),
+        'torso_height': round(torso_height, 2),
+        'leg_height': round(leg_height, 2),
+        'shoulder_width': round(shoulder_width, 2),
+    }
+    
+    # Calculate ratios
+    if body_height > 0:
+        proportions['head_ratio'] = round(eye_width / body_height, 4)
+        proportions['torso_ratio'] = round(torso_height / body_height, 4)
+        proportions['leg_ratio'] = round(leg_height / body_height, 4)
+    else:
+        proportions['head_ratio'] = 0.0
+        proportions['torso_ratio'] = 0.0
+        proportions['leg_ratio'] = 0.0
+    
+    # Calculate body shape (三圍)
+    body_shape = calculate_body_shape(keypoints)
+    proportions['body_shape'] = body_shape
+    
+    # Estimate real height
+    height_estimate = estimate_real_height(keypoints, eye_width)
+    proportions['height_estimate'] = height_estimate
+    
+    return proportions
+
+
+def estimate_head_top(keypoints: List[Dict]) -> Tuple[float, float]:
+    """
+    Estimate head top position (for hair/hat detection)
+    
+    Args:
+        keypoints: Pose keypoints list
+    
+    Returns:
+        (head_top_y, head_top_x) position
+    """
+    nose = get_keypoint_by_name(keypoints, 'nose')
+    eye_width = calculate_eye_width(keypoints)
+    
+    if nose is None or eye_width == 0:
+        return (0.0, 0.0)
+    
+    # Head top is approximately above nose by eye_width
+    head_top_y = nose['y'] - eye_width
+    head_top_x = nose['x']
+    
+    return (head_top_y, head_top_x)
+
+
+def estimate_region_from_keypoints(
+    keypoints: List[Dict],
+    top_keypoint: str,
+    bottom_keypoint: str,
+    left_keypoint: Optional[str] = None,
+    right_keypoint: Optional[str] = None,
+    eye_width_factor: float = 0.0
+) -> Dict:
+    """
+    Estimate region from keypoints
+    
+    Args:
+        keypoints: Pose keypoints list
+        top_keypoint: Name of top boundary keypoint
+        bottom_keypoint: Name of bottom boundary keypoint
+        left_keypoint: Name of left boundary keypoint (optional)
+        right_keypoint: Name of right boundary keypoint (optional)
+        eye_width_factor: Factor to expand region by eye_width
+    
+    Returns:
+        Region dict {'x', 'y', 'width', 'height'}
+    """
+    top = get_keypoint_by_name(keypoints, top_keypoint)
+    bottom = get_keypoint_by_name(keypoints, bottom_keypoint)
+    left = get_keypoint_by_name(keypoints, left_keypoint) if left_keypoint else None
+    right = get_keypoint_by_name(keypoints, right_keypoint) if right_keypoint else None
+    
+    eye_width = calculate_eye_width(keypoints)
+    
+    # Get Y boundaries
+    top_y = top['y'] if top and top.get('confidence', 0) > 0.1 else 0
+    bottom_y = bottom['y'] if bottom and bottom.get('confidence', 0) > 0.1 else 0
+    
+    # Apply eye_width factor
+    if eye_width_factor > 0 and eye_width > 0:
+        top_y -= eye_width * eye_width_factor
+        bottom_y += eye_width * eye_width_factor
+    
+    # Get X boundaries
+    if left and right and left.get('confidence', 0) > 0.1 and right.get('confidence', 0) > 0.1:
+        left_x = min(left['x'], right['x'])
+        right_x = max(left['x'], right['x'])
+    else:
+        # Fallback: use nose position
+        nose = get_keypoint_by_name(keypoints, 'nose')
+        if nose:
+            left_x = nose['x'] - eye_width * 2 if eye_width > 0 else nose['x'] - 50
+            right_x = nose['x'] + eye_width * 2 if eye_width > 0 else nose['x'] + 50
+        else:
+            left_x = 0
+            right_x = 100
+    
+    return {
+        'x': int(left_x),
+        'y': int(top_y),
+        'width': int(right_x - left_x),
+        'height': int(bottom_y - top_y)
+    }
+
+
+# Region estimation helpers for specific body parts
+
+def get_head_region(keypoints: List[Dict]) -> Dict:
+    """Get head region (for hair/hat detection)"""
+    return estimate_region_from_keypoints(
+        keypoints,
+        top_keypoint='nose',
+        bottom_keypoint='neck',
+        left_keypoint='left_ear',
+        right_keypoint='right_ear',
+        eye_width_factor=1.0
+    )
+
+
+def get_face_region(keypoints: List[Dict]) -> Dict:
+    """Get face region (for skin/face accessories detection)"""
+    return estimate_region_from_keypoints(
+        keypoints,
+        top_keypoint='nose',
+        bottom_keypoint='neck',
+        left_keypoint='left_eye',
+        right_keypoint='right_eye',
+        eye_width_factor=0.5
+    )
+
+
+def get_torso_region(keypoints: List[Dict]) -> Dict:
+    """Get torso region (for upper clothing detection)"""
+    return estimate_region_from_keypoints(
+        keypoints,
+        top_keypoint='neck',
+        bottom_keypoint='left_hip',
+        left_keypoint='left_shoulder',
+        right_keypoint='right_shoulder'
+    )
+
+
+def get_leg_region(keypoints: List[Dict]) -> Dict:
+    """Get leg region (for lower clothing detection)"""
+    return estimate_region_from_keypoints(
+        keypoints,
+        top_keypoint='left_hip',
+        bottom_keypoint='left_ankle',
+        left_keypoint='left_hip',
+        right_keypoint='right_hip'
+    )
+
+
+def get_arm_region(keypoints: List[Dict], side: str = 'left') -> Dict:
+    """Get arm region (for sleeve/arm detection)"""
+    if side == 'left':
+        return estimate_region_from_keypoints(
+            keypoints,
+            top_keypoint='left_shoulder',
+            bottom_keypoint='left_wrist',
+            left_keypoint='left_shoulder',
+            right_keypoint='left_elbow'
+        )
+    else:
+        return estimate_region_from_keypoints(
+            keypoints,
+            top_keypoint='right_shoulder',
+            bottom_keypoint='right_wrist',
+            left_keypoint='right_elbow',
+            right_keypoint='right_shoulder'
+        )
+
+
+if __name__ == '__main__':
+    # Test with sample keypoints
+    sample_keypoints = [
+        {'name': 'nose', 'x': 100, 'y': 50, 'confidence': 0.9},
+        {'name': 'left_eye', 'x': 90, 'y': 40, 'confidence': 0.8},
+        {'name': 'right_eye', 'x': 110, 'y': 40, 'confidence': 0.8},
+        {'name': 'neck', 'x': 100, 'y': 80, 'confidence': 0.7},
+        {'name': 'left_shoulder', 'x': 70, 'y': 100, 'confidence': 0.8},
+        {'name': 'right_shoulder', 'x': 130, 'y': 100, 'confidence': 0.8},
+        {'name': 'left_hip', 'x': 80, 'y': 200, 'confidence': 0.7},
+        {'name': 'right_hip', 'x': 120, 'y': 200, 'confidence': 0.7},
+        {'name': 'left_ankle', 'x': 80, 'y': 400, 'confidence': 0.6},
+        {'name': 'right_ankle', 'x': 120, 'y': 400, 'confidence': 0.6},
+    ]
+    
+    proportions = calculate_proportions(sample_keypoints)
+    print("Proportions:", proportions)
+    
+    head_region = get_head_region(sample_keypoints)
+    print("Head region:", head_region)
+    
+    torso_region = get_torso_region(sample_keypoints)
+    print("Torso region:", torso_region)