#!/opt/homebrew/bin/python3.11 """ Feature Extractor - Appearance feature extraction from video frames Purpose: 1. Extract color features (HSV histogram, dominant colors) 2. Extract texture features (LBP, shininess) 3. Extract pattern features 4. Detect accessories and clothing attributes Output: { 'color': {...}, 'texture': {...}, 'pattern': {...}, 'accessories': {...}, } Usage: from feature_extractor import FeatureExtractor extractor = FeatureExtractor() features = extractor.extract_all(frame, region) """ import cv2 import numpy as np from typing import Dict, List, Optional from skimage.feature import local_binary_pattern class FeatureExtractor: """ Extract appearance features from image regions """ def __init__(self): self.lbp_radius = 1 self.lbp_points = 8 self.dominant_color_k = 5 def extract_color(self, roi: np.ndarray) -> Dict: """ Extract color features from ROI Args: roi: Image region (BGR) Returns: Dict with HSV histogram and dominant colors """ if roi is None or roi.size == 0: return {'error': 'empty_roi'} hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) # HSV histograms h_hist = cv2.calcHist([hsv], [0], None, [30], [0, 180]).flatten() s_hist = cv2.calcHist([hsv], [1], None, [32], [0, 256]).flatten() v_hist = cv2.calcHist([hsv], [2], None, [32], [0, 256]).flatten() # Normalize h_sum = h_hist.sum() or 1 s_sum = s_hist.sum() or 1 v_sum = v_hist.sum() or 1 h_hist_norm = (h_hist / h_sum).tolist() s_hist_norm = (s_hist / s_sum).tolist() v_hist_norm = (v_hist / v_sum).tolist() # Dominant colors via k-means pixels = hsv.reshape(-1, 3).astype(np.float32) dominant_colors = [] if len(pixels) >= self.dominant_color_k: criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0) _, labels, centers = cv2.kmeans( pixels, self.dominant_color_k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS ) counts = np.bincount(labels.flatten()) dominant_colors = centers[np.argsort(-counts)[:self.dominant_color_k]].tolist() elif len(pixels) > 0: dominant_colors = [pixels.mean(axis=0).tolist()] # Color statistics h_mean = np.mean(hsv[:,:,0]) s_mean = np.mean(hsv[:,:,1]) v_mean = np.mean(hsv[:,:,2]) return { 'h_histogram': h_hist_norm, 's_histogram': s_hist_norm, 'v_histogram': v_hist_norm, 'dominant_colors': dominant_colors, 'h_mean': round(h_mean, 2), 's_mean': round(s_mean, 2), 'v_mean': round(v_mean, 2), } def extract_texture(self, roi: np.ndarray) -> Dict: """ Extract texture features from ROI Args: roi: Image region (BGR) Returns: Dict with LBP and shininess features """ if roi is None or roi.size == 0: return {'error': 'empty_roi'} gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) # LBP texture lbp = local_binary_pattern(gray, self.lbp_points, self.lbp_radius) lbp_hist = np.histogram(lbp, bins=256, range=(0, 256))[0] lbp_hist_norm = (lbp_hist / lbp_hist.sum()).tolist() lbp_variance = np.var(lbp) lbp_mean = np.mean(lbp) # Shininess (V channel statistics) v_values = hsv[:,:,2].flatten() v_mean = np.mean(v_values) v_std = np.std(v_values) v_max = np.max(v_values) # High brightness ratio (shiny materials) high_brightness_ratio = np.sum(v_values > 200) / len(v_values) return { 'lbp_histogram': lbp_hist_norm, 'lbp_variance': round(lbp_variance, 2), 'lbp_mean': round(lbp_mean, 2), 'brightness': round(v_mean, 2), 'brightness_std': round(v_std, 2), 'brightness_max': int(v_max), 'shininess_ratio': round(high_brightness_ratio, 4), } def detect_pattern(self, roi: np.ndarray) -> Dict: """ Detect clothing pattern Args: roi: Image region (BGR) Returns: Dict with pattern classification """ if roi is None or roi.size == 0: return {'pattern': 'unknown', 'confidence': 0.0} gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) # Edge detection edges = cv2.Canny(gray, 50, 150) edge_ratio = np.sum(edges > 0) / edges.size # Gradient analysis sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3) sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3) gradient_magnitude = np.sqrt(sobelx**2 + sobely**2) gradient_mean = np.mean(gradient_magnitude) # Color variance (for pattern detection) hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) h_std = np.std(hsv[:,:,0]) s_std = np.std(hsv[:,:,1]) # Pattern classification pattern = 'solid' confidence = 0.7 if edge_ratio > 0.1 and gradient_mean > 20: if h_std > 30: pattern = 'patterned' confidence = 0.8 elif edge_ratio > 0.2: pattern = 'striped' confidence = 0.6 if s_std > 50 and gradient_mean > 30: pattern = 'patterned' confidence = 0.85 return { 'pattern': pattern, 'confidence': confidence, 'edge_ratio': round(edge_ratio, 4), 'gradient_mean': round(gradient_mean, 2), 'color_variance': round(h_std, 2), } def classify_material(self, roi: np.ndarray) -> Dict: """ Classify clothing material Args: roi: Image region (BGR) Returns: Dict with material classification """ if roi is None or roi.size == 0: return {'material': 'unknown', 'confidence': 0.0} texture = self.extract_texture(roi) material = 'unknown' confidence = 0.0 lbp_var = texture.get('lbp_variance', 0) shininess = texture.get('shininess_ratio', 0) brightness = texture.get('brightness', 0) # Material classification rules if shininess > 0.1 and brightness > 150: material = 'silk' confidence = 0.7 elif shininess > 0.05 and lbp_var > 50: material = 'leather' confidence = 0.6 elif lbp_var > 100: material = 'denim' confidence = 0.65 elif lbp_var < 20 and shininess < 0.02: material = 'cotton' confidence = 0.6 elif lbp_var < 50 and brightness < 100: material = 'polyester' confidence = 0.5 return { 'material': material, 'confidence': confidence, 'texture_features': texture, } def extract_all(self, roi: np.ndarray) -> Dict: """ Extract all features from ROI Args: roi: Image region (BGR) Returns: Dict with all features """ return { 'color': self.extract_color(roi), 'texture': self.extract_texture(roi), 'pattern': self.detect_pattern(roi), 'material': self.classify_material(roi), } def extract_split_region( self, frame: np.ndarray, region: Dict, split_ratio: float = 0.5 ) -> Dict: """ Extract features from split region (upper/lower) Args: frame: Full frame region: Region dict {'x', 'y', 'width', 'height'} split_ratio: Split ratio (0.5 = 50%) Returns: Dict with upper and lower features """ x, y, w, h = region['x'], region['y'], region['width'], region['height'] if w <= 0 or h <= 0: return {'error': 'invalid_region'} mid_y = y + int(h * split_ratio) # Upper region upper_roi = frame[y:mid_y, x:x+w] if mid_y > y else None upper_features = self.extract_all(upper_roi) if upper_roi is not None else {'error': 'empty'} # Lower region lower_roi = frame[mid_y:y+h, x:x+w] if y+h > mid_y else None lower_features = self.extract_all(lower_roi) if lower_roi is not None else {'error': 'empty'} return { 'upper': upper_features, 'lower': lower_features, } def detect_exposed_skin(self, roi: np.ndarray) -> Dict: """ Detect exposed skin in ROI Args: roi: Image region (BGR) Returns: Dict with skin detection results """ if roi is None or roi.size == 0: return {'skin_ratio': 0.0, 'skin_detected': False} hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) # Skin color range (HSV) # H: 0-50 (skin tones) # S: 10-150 (not too saturated) # V: 50-255 (visible) skin_mask = cv2.inRange(hsv, (0, 10, 50), (50, 150, 255)) skin_ratio = np.sum(skin_mask > 0) / skin_mask.size return { 'skin_ratio': round(skin_ratio, 4), 'skin_detected': skin_ratio > 0.3, } def calculate_similarity(self, features1: Dict, features2: Dict) -> Dict: """ Calculate similarity between two feature sets Args: features1: First feature dict features2: Second feature dict Returns: Dict with similarity scores """ scores = {} # Color similarity (histogram correlation) if 'color' in features1 and 'color' in features2: h1 = np.array(features1['color'].get('h_histogram', [])) h2 = np.array(features2['color'].get('h_histogram', [])) if len(h1) > 0 and len(h2) > 0: h_corr = cv2.compareHist(h1.astype(np.float32), h2.astype(np.float32), cv2.HISTCMP_CORREL) scores['color_similarity'] = round(h_corr, 4) else: scores['color_similarity'] = 0.0 # Dominant color distance dc1 = np.array(features1['color'].get('dominant_colors', [[0,0,0]])) dc2 = np.array(features2['color'].get('dominant_colors', [[0,0,0]])) if len(dc1) > 0 and len(dc2) > 0: color_dist = np.linalg.norm(dc1[0] - dc2[0]) scores['color_distance'] = round(color_dist, 2) else: scores['color_distance'] = 255.0 # Texture similarity if 'texture' in features1 and 'texture' in features2: lbp1 = np.array(features1['texture'].get('lbp_histogram', [])) lbp2 = np.array(features2['texture'].get('lbp_histogram', [])) if len(lbp1) > 0 and len(lbp2) > 0: lbp_corr = cv2.compareHist(lbp1.astype(np.float32), lbp2.astype(np.float32), cv2.HISTCMP_CORREL) scores['texture_similarity'] = round(lbp_corr, 4) else: scores['texture_similarity'] = 0.0 # Overall score color_sim = scores.get('color_similarity', 0) texture_sim = scores.get('texture_similarity', 0) scores['overall_score'] = round(color_sim * 0.7 + texture_sim * 0.3, 4) return scores # Helper functions for specific feature extraction def extract_hair_color(frame: np.ndarray, head_region: Dict) -> Dict: """Extract hair color from head region""" extractor = FeatureExtractor() x, y, w, h = head_region['x'], head_region['y'], head_region['width'], head_region['height'] # Focus on upper part of head (hair area) hair_roi = frame[y:y+int(h*0.5), x:x+w] return extractor.extract_color(hair_roi) def extract_skin_color(frame: np.ndarray, face_region: Dict) -> Dict: """Extract skin color from face region""" extractor = FeatureExtractor() x, y, w, h = face_region['x'], face_region['y'], face_region['width'], face_region['height'] face_roi = frame[y:y+h, x:x+w] return extractor.extract_color(face_roi) def extract_clothing_color(frame: np.ndarray, torso_region: Dict) -> Dict: """Extract clothing color from torso region""" extractor = FeatureExtractor() return extractor.extract_split_region(frame, torso_region) def extract_accessory_color(frame: np.ndarray, accessory_region: Dict) -> Dict: """Extract accessory color from region""" extractor = FeatureExtractor() x, y, w, h = accessory_region['x'], accessory_region['y'], accessory_region['width'], accessory_region['height'] roi = frame[y:y+h, x:x+w] return extractor.extract_color(roi) class HierarchicalFeatureExtractor: """ Hierarchical feature extraction: coarse → fine Level 1: Large regions (body bbox, upper/lower body) Level 2: Medium regions (head, face, arms, legs) Level 3: Fine features (accessories, details) """ def __init__(self): self.extractor = FeatureExtractor() def extract_level1(self, frame: np.ndarray, bbox: Dict, head_region: Optional[Dict] = None) -> Dict: """ Level 1: Extract large region features Args: frame: Full frame bbox: Full body bbox {'x', 'y', 'width', 'height'} head_region: Optional head region for hair extraction Returns: Dict with large region features """ x, y, w, h = bbox['x'], bbox['y'], bbox['width'], bbox['height'] if w <= 0 or h <= 0: return {'error': 'invalid_bbox'} # Full body region body_roi = frame[y:y+h, x:x+w] body_features = self.extractor.extract_all(body_roi) # Split upper/lower body (50%) mid_y = y + h // 2 upper_roi = frame[y:mid_y, x:x+w] if mid_y > y else None lower_roi = frame[mid_y:y+h, x:x+w] if y+h > mid_y else None upper_features = self.extractor.extract_all(upper_roi) if upper_roi is not None else {} lower_features = self.extractor.extract_all(lower_roi) if lower_roi is not None else {} # Head top (hair) - part of Level 1 head_features = {} if head_region is not None: head_roi = self._get_roi(frame, head_region) if head_roi is not None: head_features = self.extractor.extract_all(head_roi) return { 'level': 1, 'body': body_features, 'head_top': head_features, 'upper_body': upper_features, 'lower_body': lower_features, 'bbox': bbox, } def extract_level2( self, frame: np.ndarray, regions: Dict ) -> Dict: """ Level 2: Extract medium region features Args: frame: Full frame regions: Dict with face, torso, leg, arm regions Returns: Dict with medium region features """ features = {'level': 2} # Face region (skin, lips, eyes) if 'face' in regions: face_roi = self._get_roi(frame, regions['face']) features['face'] = self.extractor.extract_all(face_roi) if face_roi is not None else {} features['face']['skin'] = self.extractor.detect_exposed_skin(face_roi) if face_roi is not None else {} # Torso region (clothing details) if 'torso' in regions: torso_roi = self._get_roi(frame, regions['torso']) features['torso'] = self.extractor.extract_all(torso_roi) if torso_roi is not None else {} # Leg region if 'leg' in regions: leg_roi = self._get_roi(frame, regions['leg']) features['leg'] = self.extractor.extract_all(leg_roi) if leg_roi is not None else {} # Arms (left/right) - sleeve detection if 'left_arm' in regions: arm_roi = self._get_roi(frame, regions['left_arm']) features['left_arm'] = self.extractor.extract_all(arm_roi) if arm_roi is not None else {} if 'right_arm' in regions: arm_roi = self._get_roi(frame, regions['right_arm']) features['right_arm'] = self.extractor.extract_all(arm_roi) if arm_roi is not None else {} return features def extract_level3( self, frame: np.ndarray, keypoints: List[Dict], eye_width: float ) -> Dict: """ Level 3: Extract fine features (accessories, details) Args: frame: Full frame keypoints: Pose keypoints eye_width: Eye distance (reference unit) Returns: Dict with fine features """ features = {'level': 3} # Estimate accessory regions from keypoints offset = int(eye_width * 0.5) if eye_width > 0 else 20 # Glasses (eye region) left_eye = self._get_kp(keypoints, 'left_eye') right_eye = self._get_kp(keypoints, 'right_eye') if left_eye and right_eye: glasses_roi = frame[ int(min(left_eye['y'], right_eye['y']) - offset):int(max(left_eye['y'], right_eye['y']) + offset), int(left_eye['x'] - offset):int(right_eye['x'] + offset) ] features['glasses'] = self.extractor.extract_all(glasses_roi) if glasses_roi is not None else {} # Earrings (ear positions) left_ear = self._get_kp(keypoints, 'left_ear') right_ear = self._get_kp(keypoints, 'right_ear') if left_ear and left_ear.get('confidence', 0) > 0.1: ear_roi = frame[ int(left_ear['y'] - offset):int(left_ear['y'] + offset), int(left_ear['x'] - offset):int(left_ear['x'] + offset) ] features['left_earring'] = self.extractor.extract_all(ear_roi) if ear_roi is not None else {} if right_ear and right_ear.get('confidence', 0) > 0.1: ear_roi = frame[ int(right_ear['y'] - offset):int(right_ear['y'] + offset), int(right_ear['x'] - offset):int(right_ear['x'] + offset) ] features['right_earring'] = self.extractor.extract_all(ear_roi) if ear_roi is not None else {} # Watch (wrist position) left_wrist = self._get_kp(keypoints, 'left_wrist') right_wrist = self._get_kp(keypoints, 'right_wrist') if left_wrist and left_wrist.get('confidence', 0) > 0.1: wrist_roi = frame[ int(left_wrist['y'] - offset):int(left_wrist['y'] + offset), int(left_wrist['x'] - offset):int(left_wrist['x'] + offset) ] features['left_watch'] = self.extractor.extract_all(wrist_roi) if wrist_roi is not None else {} if right_wrist and right_wrist.get('confidence', 0) > 0.1: wrist_roi = frame[ int(right_wrist['y'] - offset):int(right_wrist['y'] + offset), int(right_wrist['x'] - offset):int(right_wrist['x'] + offset) ] features['right_watch'] = self.extractor.extract_all(wrist_roi) if wrist_roi is not None else {} # Shoes (ankle positions) left_ankle = self._get_kp(keypoints, 'left_ankle') right_ankle = self._get_kp(keypoints, 'right_ankle') if left_ankle and left_ankle.get('confidence', 0) > 0.1: shoe_roi = frame[ int(left_ankle['y'] - offset):int(left_ankle['y'] + offset * 2), int(left_ankle['x'] - offset):int(left_ankle['x'] + offset) ] features['left_shoe'] = self.extractor.extract_all(shoe_roi) if shoe_roi is not None else {} if right_ankle and right_ankle.get('confidence', 0) > 0.1: shoe_roi = frame[ int(right_ankle['y'] - offset):int(right_ankle['y'] + offset * 2), int(right_ankle['x'] - offset):int(right_ankle['x'] + offset) ] features['right_shoe'] = self.extractor.extract_all(shoe_roi) if shoe_roi is not None else {} return features def extract_hierarchical( self, frame: np.ndarray, bbox: Dict, regions: Dict, keypoints: List[Dict], eye_width: float ) -> Dict: """ Full hierarchical extraction: Level 1 → Level 2 → Level 3 Args: frame: Full frame bbox: Full body bbox regions: Medium regions dict (includes 'head' for Level 1) keypoints: Pose keypoints eye_width: Reference unit Returns: Dict with all hierarchical features """ head_region = regions.get('head') if regions else None level1 = self.extract_level1(frame, bbox, head_region) level2 = self.extract_level2(frame, regions) level3 = self.extract_level3(frame, keypoints, eye_width) return { 'level1': level1, 'level2': level2, 'level3': level3, } def _get_roi(self, frame: np.ndarray, region: Dict) -> Optional[np.ndarray]: """Get ROI from frame using region dict""" if region is None: return None x, y, w, h = region.get('x', 0), region.get('y', 0), region.get('width', 0), region.get('height', 0) if w <= 0 or h <= 0: return None return frame[y:y+h, x:x+w] def _get_kp(self, keypoints: List[Dict], name: str) -> Optional[Dict]: """Get keypoint by name""" for kp in keypoints: if kp.get('name') == name: return kp return None if __name__ == '__main__': # Test with sample image import sys if len(sys.argv) > 1: img_path = sys.argv[1] img = cv2.imread(img_path) if img is not None: extractor = FeatureExtractor() # Extract from full image features = extractor.extract_all(img) print("Color features:") print(f" H mean: {features['color']['h_mean']}") print(f" S mean: {features['color']['s_mean']}") print(f" V mean: {features['color']['v_mean']}") print(f" Dominant colors: {len(features['color']['dominant_colors'])}") print("\nTexture features:") print(f" LBP variance: {features['texture']['lbp_variance']}") print(f" Brightness: {features['texture']['brightness']}") print(f" Shininess: {features['texture']['shininess_ratio']}") print("\nPattern:") print(f" {features['pattern']['pattern']} (conf: {features['pattern']['confidence']})") print("\nMaterial:") print(f" {features['material']['material']} (conf: {features['material']['confidence']})") else: print("Usage: python feature_extractor.py ")