momentry_core/scripts/utils/feature_extractor.py

#!/opt/homebrew/bin/python3.11
"""
Feature Extractor - Appearance feature extraction from video frames

Purpose:
1. Extract color features (HSV histogram, dominant colors)
2. Extract texture features (LBP, shininess)
3. Extract pattern features
4. Detect accessories and clothing attributes

Output:
{
  'color': {...},
  'texture': {...},
  'pattern': {...},
  'accessories': {...},
}

Usage:
    from feature_extractor import FeatureExtractor

    extractor = FeatureExtractor()
    features = extractor.extract_all(frame, region)
"""

import cv2
import numpy as np
from typing import Dict, List, Optional
from skimage.feature import local_binary_pattern


class FeatureExtractor:
    """
    Extract appearance features from image regions
    """

    def __init__(self):
        self.lbp_radius = 1
        self.lbp_points = 8
        self.dominant_color_k = 5

    def extract_color(self, roi: np.ndarray) -> Dict:
        """
        Extract color features from ROI

        Args:
            roi: Image region (BGR)

        Returns:
            Dict with HSV histogram and dominant colors
        """
        if roi is None or roi.size == 0:
            return {'error': 'empty_roi'}

        hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)

        # HSV histograms
        h_hist = cv2.calcHist([hsv], [0], None, [30], [0, 180]).flatten()
        s_hist = cv2.calcHist([hsv], [1], None, [32], [0, 256]).flatten()
        v_hist = cv2.calcHist([hsv], [2], None, [32], [0, 256]).flatten()

        # Normalize
        h_sum = h_hist.sum() or 1
        s_sum = s_hist.sum() or 1
        v_sum = v_hist.sum() or 1

        h_hist_norm = (h_hist / h_sum).tolist()
        s_hist_norm = (s_hist / s_sum).tolist()
        v_hist_norm = (v_hist / v_sum).tolist()

        # Dominant colors via k-means
        pixels = hsv.reshape(-1, 3).astype(np.float32)
        dominant_colors = []

        if len(pixels) >= self.dominant_color_k:
            criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
            _, labels, centers = cv2.kmeans(
                pixels, self.dominant_color_k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS
            )
            counts = np.bincount(labels.flatten())
            dominant_colors = centers[np.argsort(-counts)[:self.dominant_color_k]].tolist()
        elif len(pixels) > 0:
            dominant_colors = [pixels.mean(axis=0).tolist()]

        # Color statistics
        h_mean = np.mean(hsv[:,:,0])
        s_mean = np.mean(hsv[:,:,1])
        v_mean = np.mean(hsv[:,:,2])

        return {
            'h_histogram': h_hist_norm,
            's_histogram': s_hist_norm,
            'v_histogram': v_hist_norm,
            'dominant_colors': dominant_colors,
            'h_mean': round(h_mean, 2),
            's_mean': round(s_mean, 2),
            'v_mean': round(v_mean, 2),
        }

    def extract_texture(self, roi: np.ndarray) -> Dict:
        """
        Extract texture features from ROI

        Args:
            roi: Image region (BGR)

        Returns:
            Dict with LBP and shininess features
        """
        if roi is None or roi.size == 0:
            return {'error': 'empty_roi'}

        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)

        # LBP texture
        lbp = local_binary_pattern(gray, self.lbp_points, self.lbp_radius)
        lbp_hist = np.histogram(lbp, bins=256, range=(0, 256))[0]
        lbp_hist_norm = (lbp_hist / lbp_hist.sum()).tolist()

        lbp_variance = np.var(lbp)
        lbp_mean = np.mean(lbp)

        # Shininess (V channel statistics)
        v_values = hsv[:,:,2].flatten()
        v_mean = np.mean(v_values)
        v_std = np.std(v_values)
        v_max = np.max(v_values)

        # High brightness ratio (shiny materials)
        high_brightness_ratio = np.sum(v_values > 200) / len(v_values)

        return {
            'lbp_histogram': lbp_hist_norm,
            'lbp_variance': round(lbp_variance, 2),
            'lbp_mean': round(lbp_mean, 2),
            'brightness': round(v_mean, 2),
            'brightness_std': round(v_std, 2),
            'brightness_max': int(v_max),
            'shininess_ratio': round(high_brightness_ratio, 4),
        }

    def detect_pattern(self, roi: np.ndarray) -> Dict:
        """
        Detect clothing pattern

        Args:
            roi: Image region (BGR)

        Returns:
            Dict with pattern classification
        """
        if roi is None or roi.size == 0:
            return {'pattern': 'unknown', 'confidence': 0.0}

        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

        # Edge detection
        edges = cv2.Canny(gray, 50, 150)
        edge_ratio = np.sum(edges > 0) / edges.size

        # Gradient analysis
        sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
        sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
        gradient_magnitude = np.sqrt(sobelx**2 + sobely**2)
        gradient_mean = np.mean(gradient_magnitude)

        # Color variance (for pattern detection)
        hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
        h_std = np.std(hsv[:,:,0])
        s_std = np.std(hsv[:,:,1])

        # Pattern classification
        pattern = 'solid'
        confidence = 0.7

        if edge_ratio > 0.1 and gradient_mean > 20:
            if h_std > 30:
                pattern = 'patterned'
                confidence = 0.8
            elif edge_ratio > 0.2:
                pattern = 'striped'
                confidence = 0.6

        if s_std > 50 and gradient_mean > 30:
            pattern = 'patterned'
            confidence = 0.85

        return {
            'pattern': pattern,
            'confidence': confidence,
            'edge_ratio': round(edge_ratio, 4),
            'gradient_mean': round(gradient_mean, 2),
            'color_variance': round(h_std, 2),
        }

    def classify_material(self, roi: np.ndarray) -> Dict:
        """
        Classify clothing material

        Args:
            roi: Image region (BGR)

        Returns:
            Dict with material classification
        """
        if roi is None or roi.size == 0:
            return {'material': 'unknown', 'confidence': 0.0}

        texture = self.extract_texture(roi)

        material = 'unknown'
        confidence = 0.0

        lbp_var = texture.get('lbp_variance', 0)
        shininess = texture.get('shininess_ratio', 0)
        brightness = texture.get('brightness', 0)

        # Material classification rules
        if shininess > 0.1 and brightness > 150:
            material = 'silk'
            confidence = 0.7
        elif shininess > 0.05 and lbp_var > 50:
            material = 'leather'
            confidence = 0.6
        elif lbp_var > 100:
            material = 'denim'
            confidence = 0.65
        elif lbp_var < 20 and shininess < 0.02:
            material = 'cotton'
            confidence = 0.6
        elif lbp_var < 50 and brightness < 100:
            material = 'polyester'
            confidence = 0.5

        return {
            'material': material,
            'confidence': confidence,
            'texture_features': texture,
        }

    def extract_all(self, roi: np.ndarray) -> Dict:
        """
        Extract all features from ROI

        Args:
            roi: Image region (BGR)

        Returns:
            Dict with all features
        """
        return {
            'color': self.extract_color(roi),
            'texture': self.extract_texture(roi),
            'pattern': self.detect_pattern(roi),
            'material': self.classify_material(roi),
        }

    def extract_split_region(
        self,
        frame: np.ndarray,
        region: Dict,
        split_ratio: float = 0.5
    ) -> Dict:
        """
        Extract features from split region (upper/lower)

        Args:
            frame: Full frame
            region: Region dict {'x', 'y', 'width', 'height'}
            split_ratio: Split ratio (0.5 = 50%)

        Returns:
            Dict with upper and lower features
        """
        x, y, w, h = region['x'], region['y'], region['width'], region['height']

        if w <= 0 or h <= 0:
            return {'error': 'invalid_region'}

        mid_y = y + int(h * split_ratio)

        # Upper region
        upper_roi = frame[y:mid_y, x:x+w] if mid_y > y else None
        upper_features = self.extract_all(upper_roi) if upper_roi is not None else {'error': 'empty'}

        # Lower region
        lower_roi = frame[mid_y:y+h, x:x+w] if y+h > mid_y else None
        lower_features = self.extract_all(lower_roi) if lower_roi is not None else {'error': 'empty'}

        return {
            'upper': upper_features,
            'lower': lower_features,
        }

    def detect_exposed_skin(self, roi: np.ndarray) -> Dict:
        """
        Detect exposed skin in ROI

        Args:
            roi: Image region (BGR)

        Returns:
            Dict with skin detection results
        """
        if roi is None or roi.size == 0:
            return {'skin_ratio': 0.0, 'skin_detected': False}

        hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)

        # Skin color range (HSV)
        # H: 0-50 (skin tones)
        # S: 10-150 (not too saturated)
        # V: 50-255 (visible)
        skin_mask = cv2.inRange(hsv, (0, 10, 50), (50, 150, 255))

        skin_ratio = np.sum(skin_mask > 0) / skin_mask.size

        return {
            'skin_ratio': round(skin_ratio, 4),
            'skin_detected': skin_ratio > 0.3,
        }

    def calculate_similarity(self, features1: Dict, features2: Dict) -> Dict:
        """
        Calculate similarity between two feature sets

        Args:
            features1: First feature dict
            features2: Second feature dict

        Returns:
            Dict with similarity scores
        """
        scores = {}

        # Color similarity (histogram correlation)
        if 'color' in features1 and 'color' in features2:
            h1 = np.array(features1['color'].get('h_histogram', []))
            h2 = np.array(features2['color'].get('h_histogram', []))

            if len(h1) > 0 and len(h2) > 0:
                h_corr = cv2.compareHist(h1.astype(np.float32), h2.astype(np.float32), cv2.HISTCMP_CORREL)
                scores['color_similarity'] = round(h_corr, 4)
            else:
                scores['color_similarity'] = 0.0

            # Dominant color distance
            dc1 = np.array(features1['color'].get('dominant_colors', [[0,0,0]]))
            dc2 = np.array(features2['color'].get('dominant_colors', [[0,0,0]]))

            if len(dc1) > 0 and len(dc2) > 0:
                color_dist = np.linalg.norm(dc1[0] - dc2[0])
                scores['color_distance'] = round(color_dist, 2)
            else:
                scores['color_distance'] = 255.0

        # Texture similarity
        if 'texture' in features1 and 'texture' in features2:
            lbp1 = np.array(features1['texture'].get('lbp_histogram', []))
            lbp2 = np.array(features2['texture'].get('lbp_histogram', []))

            if len(lbp1) > 0 and len(lbp2) > 0:
                lbp_corr = cv2.compareHist(lbp1.astype(np.float32), lbp2.astype(np.float32), cv2.HISTCMP_CORREL)
                scores['texture_similarity'] = round(lbp_corr, 4)
            else:
                scores['texture_similarity'] = 0.0

        # Overall score
        color_sim = scores.get('color_similarity', 0)
        texture_sim = scores.get('texture_similarity', 0)
        scores['overall_score'] = round(color_sim * 0.7 + texture_sim * 0.3, 4)

        return scores


# Helper functions for specific feature extraction

def extract_hair_color(frame: np.ndarray, head_region: Dict) -> Dict:
    """Extract hair color from head region"""
    extractor = FeatureExtractor()
    x, y, w, h = head_region['x'], head_region['y'], head_region['width'], head_region['height']

    # Focus on upper part of head (hair area)
    hair_roi = frame[y:y+int(h*0.5), x:x+w]

    return extractor.extract_color(hair_roi)


def extract_skin_color(frame: np.ndarray, face_region: Dict) -> Dict:
    """Extract skin color from face region"""
    extractor = FeatureExtractor()
    x, y, w, h = face_region['x'], face_region['y'], face_region['width'], face_region['height']

    face_roi = frame[y:y+h, x:x+w]

    return extractor.extract_color(face_roi)


def extract_clothing_color(frame: np.ndarray, torso_region: Dict) -> Dict:
    """Extract clothing color from torso region"""
    extractor = FeatureExtractor()

    return extractor.extract_split_region(frame, torso_region)


def extract_accessory_color(frame: np.ndarray, accessory_region: Dict) -> Dict:
    """Extract accessory color from region"""
    extractor = FeatureExtractor()
    x, y, w, h = accessory_region['x'], accessory_region['y'], accessory_region['width'], accessory_region['height']

    roi = frame[y:y+h, x:x+w]

    return extractor.extract_color(roi)


class HierarchicalFeatureExtractor:
    """
    Hierarchical feature extraction: coarse → fine

    Level 1: Large regions (body bbox, upper/lower body)
    Level 2: Medium regions (head, face, arms, legs)
    Level 3: Fine features (accessories, details)
    """

    def __init__(self):
        self.extractor = FeatureExtractor()

    def extract_level1(self, frame: np.ndarray, bbox: Dict, head_region: Optional[Dict] = None) -> Dict:
        """
        Level 1: Extract large region features

        Args:
            frame: Full frame
            bbox: Full body bbox {'x', 'y', 'width', 'height'}
            head_region: Optional head region for hair extraction

        Returns:
            Dict with large region features
        """
        x, y, w, h = bbox['x'], bbox['y'], bbox['width'], bbox['height']

        if w <= 0 or h <= 0:
            return {'error': 'invalid_bbox'}

        # Full body region
        body_roi = frame[y:y+h, x:x+w]
        body_features = self.extractor.extract_all(body_roi)

        # Split upper/lower body (50%)
        mid_y = y + h // 2

        upper_roi = frame[y:mid_y, x:x+w] if mid_y > y else None
        lower_roi = frame[mid_y:y+h, x:x+w] if y+h > mid_y else None

        upper_features = self.extractor.extract_all(upper_roi) if upper_roi is not None else {}
        lower_features = self.extractor.extract_all(lower_roi) if lower_roi is not None else {}

        # Head top (hair) - part of Level 1
        head_features = {}
        if head_region is not None:
            head_roi = self._get_roi(frame, head_region)
            if head_roi is not None:
                head_features = self.extractor.extract_all(head_roi)

        return {
            'level': 1,
            'body': body_features,
            'head_top': head_features,
            'upper_body': upper_features,
            'lower_body': lower_features,
            'bbox': bbox,
        }

    def extract_level2(
        self,
        frame: np.ndarray,
        regions: Dict
    ) -> Dict:
        """
        Level 2: Extract medium region features

        Args:
            frame: Full frame
            regions: Dict with face, torso, leg, arm regions

        Returns:
            Dict with medium region features
        """
        features = {'level': 2}

        # Face region (skin, lips, eyes)
        if 'face' in regions:
            face_roi = self._get_roi(frame, regions['face'])
            features['face'] = self.extractor.extract_all(face_roi) if face_roi is not None else {}
            features['face']['skin'] = self.extractor.detect_exposed_skin(face_roi) if face_roi is not None else {}

        # Torso region (clothing details)
        if 'torso' in regions:
            torso_roi = self._get_roi(frame, regions['torso'])
            features['torso'] = self.extractor.extract_all(torso_roi) if torso_roi is not None else {}

        # Leg region
        if 'leg' in regions:
            leg_roi = self._get_roi(frame, regions['leg'])
            features['leg'] = self.extractor.extract_all(leg_roi) if leg_roi is not None else {}

        # Arms (left/right) - sleeve detection
        if 'left_arm' in regions:
            arm_roi = self._get_roi(frame, regions['left_arm'])
            features['left_arm'] = self.extractor.extract_all(arm_roi) if arm_roi is not None else {}

        if 'right_arm' in regions:
            arm_roi = self._get_roi(frame, regions['right_arm'])
            features['right_arm'] = self.extractor.extract_all(arm_roi) if arm_roi is not None else {}

        return features

    def extract_level3(
        self,
        frame: np.ndarray,
        keypoints: List[Dict],
        eye_width: float
    ) -> Dict:
        """
        Level 3: Extract fine features (accessories, details)

        Args:
            frame: Full frame
            keypoints: Pose keypoints
            eye_width: Eye distance (reference unit)

        Returns:
            Dict with fine features
        """
        features = {'level': 3}

        # Estimate accessory regions from keypoints
        offset = int(eye_width * 0.5) if eye_width > 0 else 20

        # Glasses (eye region)
        left_eye = self._get_kp(keypoints, 'left_eye')
        right_eye = self._get_kp(keypoints, 'right_eye')
        if left_eye and right_eye:
            glasses_roi = frame[
                int(min(left_eye['y'], right_eye['y']) - offset):int(max(left_eye['y'], right_eye['y']) + offset),
                int(left_eye['x'] - offset):int(right_eye['x'] + offset)
            ]
            features['glasses'] = self.extractor.extract_all(glasses_roi) if glasses_roi is not None else {}

        # Earrings (ear positions)
        left_ear = self._get_kp(keypoints, 'left_ear')
        right_ear = self._get_kp(keypoints, 'right_ear')
        if left_ear and left_ear.get('confidence', 0) > 0.1:
            ear_roi = frame[
                int(left_ear['y'] - offset):int(left_ear['y'] + offset),
                int(left_ear['x'] - offset):int(left_ear['x'] + offset)
            ]
            features['left_earring'] = self.extractor.extract_all(ear_roi) if ear_roi is not None else {}

        if right_ear and right_ear.get('confidence', 0) > 0.1:
            ear_roi = frame[
                int(right_ear['y'] - offset):int(right_ear['y'] + offset),
                int(right_ear['x'] - offset):int(right_ear['x'] + offset)
            ]
            features['right_earring'] = self.extractor.extract_all(ear_roi) if ear_roi is not None else {}

        # Watch (wrist position)
        left_wrist = self._get_kp(keypoints, 'left_wrist')
        right_wrist = self._get_kp(keypoints, 'right_wrist')
        if left_wrist and left_wrist.get('confidence', 0) > 0.1:
            wrist_roi = frame[
                int(left_wrist['y'] - offset):int(left_wrist['y'] + offset),
                int(left_wrist['x'] - offset):int(left_wrist['x'] + offset)
            ]
            features['left_watch'] = self.extractor.extract_all(wrist_roi) if wrist_roi is not None else {}

        if right_wrist and right_wrist.get('confidence', 0) > 0.1:
            wrist_roi = frame[
                int(right_wrist['y'] - offset):int(right_wrist['y'] + offset),
                int(right_wrist['x'] - offset):int(right_wrist['x'] + offset)
            ]
            features['right_watch'] = self.extractor.extract_all(wrist_roi) if wrist_roi is not None else {}

        # Shoes (ankle positions)
        left_ankle = self._get_kp(keypoints, 'left_ankle')
        right_ankle = self._get_kp(keypoints, 'right_ankle')
        if left_ankle and left_ankle.get('confidence', 0) > 0.1:
            shoe_roi = frame[
                int(left_ankle['y'] - offset):int(left_ankle['y'] + offset * 2),
                int(left_ankle['x'] - offset):int(left_ankle['x'] + offset)
            ]
            features['left_shoe'] = self.extractor.extract_all(shoe_roi) if shoe_roi is not None else {}

        if right_ankle and right_ankle.get('confidence', 0) > 0.1:
            shoe_roi = frame[
                int(right_ankle['y'] - offset):int(right_ankle['y'] + offset * 2),
                int(right_ankle['x'] - offset):int(right_ankle['x'] + offset)
            ]
            features['right_shoe'] = self.extractor.extract_all(shoe_roi) if shoe_roi is not None else {}

        return features

    def extract_hierarchical(
        self,
        frame: np.ndarray,
        bbox: Dict,
        regions: Dict,
        keypoints: List[Dict],
        eye_width: float
    ) -> Dict:
        """
        Full hierarchical extraction: Level 1 → Level 2 → Level 3

        Args:
            frame: Full frame
            bbox: Full body bbox
            regions: Medium regions dict (includes 'head' for Level 1)
            keypoints: Pose keypoints
            eye_width: Reference unit

        Returns:
            Dict with all hierarchical features
        """
        head_region = regions.get('head') if regions else None
        level1 = self.extract_level1(frame, bbox, head_region)
        level2 = self.extract_level2(frame, regions)
        level3 = self.extract_level3(frame, keypoints, eye_width)

        return {
            'level1': level1,
            'level2': level2,
            'level3': level3,
        }

    def _get_roi(self, frame: np.ndarray, region: Dict) -> Optional[np.ndarray]:
        """Get ROI from frame using region dict"""
        if region is None:
            return None
        x, y, w, h = region.get('x', 0), region.get('y', 0), region.get('width', 0), region.get('height', 0)
        if w <= 0 or h <= 0:
            return None
        return frame[y:y+h, x:x+w]

    def _get_kp(self, keypoints: List[Dict], name: str) -> Optional[Dict]:
        """Get keypoint by name"""
        for kp in keypoints:
            if kp.get('name') == name:
                return kp
        return None


if __name__ == '__main__':
    # Test with sample image
    import sys

    if len(sys.argv) > 1:
        img_path = sys.argv[1]
        img = cv2.imread(img_path)

        if img is not None:
            extractor = FeatureExtractor()

            # Extract from full image
            features = extractor.extract_all(img)

            print("Color features:")
            print(f"  H mean: {features['color']['h_mean']}")
            print(f"  S mean: {features['color']['s_mean']}")
            print(f"  V mean: {features['color']['v_mean']}")
            print(f"  Dominant colors: {len(features['color']['dominant_colors'])}")

            print("\nTexture features:")
            print(f"  LBP variance: {features['texture']['lbp_variance']}")
            print(f"  Brightness: {features['texture']['brightness']}")
            print(f"  Shininess: {features['texture']['shininess_ratio']}")

            print("\nPattern:")
            print(f"  {features['pattern']['pattern']} (conf: {features['pattern']['confidence']})")

            print("\nMaterial:")
            print(f"  {features['material']['material']} (conf: {features['material']['confidence']})")
    else:
        print("Usage: python feature_extractor.py <image_path>")