Files
momentry_core/scripts/utils/feature_extractor.py
Accusys 606f31f13c feat: add appearance feature system with coordinate/scale fixes
- Add Appearance_Feature_System_V1.0.md design doc
- Add proportion_calculator.py for body proportions (height, body shape)
- Add feature_extractor.py for hierarchical feature extraction
- Add tkg_level1_builder.py for TKG person_trace nodes
- Fix mediapipe_holistic_processor.py to output Top-Left pixels
- Add MediaPipe format conversion in proportion_calculator

Coordinate system alignment:
- Swift Pose: Top-Left pixels (Y-flip done in swift_pose.swift)
- MediaPipe: Top-Left pixels (norm→pixel conversion added)
2026-06-22 02:27:03 +08:00

684 lines
24 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
Feature Extractor - Appearance feature extraction from video frames
Purpose:
1. Extract color features (HSV histogram, dominant colors)
2. Extract texture features (LBP, shininess)
3. Extract pattern features
4. Detect accessories and clothing attributes
Output:
{
'color': {...},
'texture': {...},
'pattern': {...},
'accessories': {...},
}
Usage:
from feature_extractor import FeatureExtractor
extractor = FeatureExtractor()
features = extractor.extract_all(frame, region)
"""
import cv2
import numpy as np
from typing import Dict, List, Optional
from skimage.feature import local_binary_pattern
class FeatureExtractor:
"""
Extract appearance features from image regions
"""
def __init__(self):
self.lbp_radius = 1
self.lbp_points = 8
self.dominant_color_k = 5
def extract_color(self, roi: np.ndarray) -> Dict:
"""
Extract color features from ROI
Args:
roi: Image region (BGR)
Returns:
Dict with HSV histogram and dominant colors
"""
if roi is None or roi.size == 0:
return {'error': 'empty_roi'}
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
# HSV histograms
h_hist = cv2.calcHist([hsv], [0], None, [30], [0, 180]).flatten()
s_hist = cv2.calcHist([hsv], [1], None, [32], [0, 256]).flatten()
v_hist = cv2.calcHist([hsv], [2], None, [32], [0, 256]).flatten()
# Normalize
h_sum = h_hist.sum() or 1
s_sum = s_hist.sum() or 1
v_sum = v_hist.sum() or 1
h_hist_norm = (h_hist / h_sum).tolist()
s_hist_norm = (s_hist / s_sum).tolist()
v_hist_norm = (v_hist / v_sum).tolist()
# Dominant colors via k-means
pixels = hsv.reshape(-1, 3).astype(np.float32)
dominant_colors = []
if len(pixels) >= self.dominant_color_k:
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
_, labels, centers = cv2.kmeans(
pixels, self.dominant_color_k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS
)
counts = np.bincount(labels.flatten())
dominant_colors = centers[np.argsort(-counts)[:self.dominant_color_k]].tolist()
elif len(pixels) > 0:
dominant_colors = [pixels.mean(axis=0).tolist()]
# Color statistics
h_mean = np.mean(hsv[:,:,0])
s_mean = np.mean(hsv[:,:,1])
v_mean = np.mean(hsv[:,:,2])
return {
'h_histogram': h_hist_norm,
's_histogram': s_hist_norm,
'v_histogram': v_hist_norm,
'dominant_colors': dominant_colors,
'h_mean': round(h_mean, 2),
's_mean': round(s_mean, 2),
'v_mean': round(v_mean, 2),
}
def extract_texture(self, roi: np.ndarray) -> Dict:
"""
Extract texture features from ROI
Args:
roi: Image region (BGR)
Returns:
Dict with LBP and shininess features
"""
if roi is None or roi.size == 0:
return {'error': 'empty_roi'}
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
# LBP texture
lbp = local_binary_pattern(gray, self.lbp_points, self.lbp_radius)
lbp_hist = np.histogram(lbp, bins=256, range=(0, 256))[0]
lbp_hist_norm = (lbp_hist / lbp_hist.sum()).tolist()
lbp_variance = np.var(lbp)
lbp_mean = np.mean(lbp)
# Shininess (V channel statistics)
v_values = hsv[:,:,2].flatten()
v_mean = np.mean(v_values)
v_std = np.std(v_values)
v_max = np.max(v_values)
# High brightness ratio (shiny materials)
high_brightness_ratio = np.sum(v_values > 200) / len(v_values)
return {
'lbp_histogram': lbp_hist_norm,
'lbp_variance': round(lbp_variance, 2),
'lbp_mean': round(lbp_mean, 2),
'brightness': round(v_mean, 2),
'brightness_std': round(v_std, 2),
'brightness_max': int(v_max),
'shininess_ratio': round(high_brightness_ratio, 4),
}
def detect_pattern(self, roi: np.ndarray) -> Dict:
"""
Detect clothing pattern
Args:
roi: Image region (BGR)
Returns:
Dict with pattern classification
"""
if roi is None or roi.size == 0:
return {'pattern': 'unknown', 'confidence': 0.0}
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
# Edge detection
edges = cv2.Canny(gray, 50, 150)
edge_ratio = np.sum(edges > 0) / edges.size
# Gradient analysis
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
gradient_magnitude = np.sqrt(sobelx**2 + sobely**2)
gradient_mean = np.mean(gradient_magnitude)
# Color variance (for pattern detection)
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
h_std = np.std(hsv[:,:,0])
s_std = np.std(hsv[:,:,1])
# Pattern classification
pattern = 'solid'
confidence = 0.7
if edge_ratio > 0.1 and gradient_mean > 20:
if h_std > 30:
pattern = 'patterned'
confidence = 0.8
elif edge_ratio > 0.2:
pattern = 'striped'
confidence = 0.6
if s_std > 50 and gradient_mean > 30:
pattern = 'patterned'
confidence = 0.85
return {
'pattern': pattern,
'confidence': confidence,
'edge_ratio': round(edge_ratio, 4),
'gradient_mean': round(gradient_mean, 2),
'color_variance': round(h_std, 2),
}
def classify_material(self, roi: np.ndarray) -> Dict:
"""
Classify clothing material
Args:
roi: Image region (BGR)
Returns:
Dict with material classification
"""
if roi is None or roi.size == 0:
return {'material': 'unknown', 'confidence': 0.0}
texture = self.extract_texture(roi)
material = 'unknown'
confidence = 0.0
lbp_var = texture.get('lbp_variance', 0)
shininess = texture.get('shininess_ratio', 0)
brightness = texture.get('brightness', 0)
# Material classification rules
if shininess > 0.1 and brightness > 150:
material = 'silk'
confidence = 0.7
elif shininess > 0.05 and lbp_var > 50:
material = 'leather'
confidence = 0.6
elif lbp_var > 100:
material = 'denim'
confidence = 0.65
elif lbp_var < 20 and shininess < 0.02:
material = 'cotton'
confidence = 0.6
elif lbp_var < 50 and brightness < 100:
material = 'polyester'
confidence = 0.5
return {
'material': material,
'confidence': confidence,
'texture_features': texture,
}
def extract_all(self, roi: np.ndarray) -> Dict:
"""
Extract all features from ROI
Args:
roi: Image region (BGR)
Returns:
Dict with all features
"""
return {
'color': self.extract_color(roi),
'texture': self.extract_texture(roi),
'pattern': self.detect_pattern(roi),
'material': self.classify_material(roi),
}
def extract_split_region(
self,
frame: np.ndarray,
region: Dict,
split_ratio: float = 0.5
) -> Dict:
"""
Extract features from split region (upper/lower)
Args:
frame: Full frame
region: Region dict {'x', 'y', 'width', 'height'}
split_ratio: Split ratio (0.5 = 50%)
Returns:
Dict with upper and lower features
"""
x, y, w, h = region['x'], region['y'], region['width'], region['height']
if w <= 0 or h <= 0:
return {'error': 'invalid_region'}
mid_y = y + int(h * split_ratio)
# Upper region
upper_roi = frame[y:mid_y, x:x+w] if mid_y > y else None
upper_features = self.extract_all(upper_roi) if upper_roi is not None else {'error': 'empty'}
# Lower region
lower_roi = frame[mid_y:y+h, x:x+w] if y+h > mid_y else None
lower_features = self.extract_all(lower_roi) if lower_roi is not None else {'error': 'empty'}
return {
'upper': upper_features,
'lower': lower_features,
}
def detect_exposed_skin(self, roi: np.ndarray) -> Dict:
"""
Detect exposed skin in ROI
Args:
roi: Image region (BGR)
Returns:
Dict with skin detection results
"""
if roi is None or roi.size == 0:
return {'skin_ratio': 0.0, 'skin_detected': False}
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
# Skin color range (HSV)
# H: 0-50 (skin tones)
# S: 10-150 (not too saturated)
# V: 50-255 (visible)
skin_mask = cv2.inRange(hsv, (0, 10, 50), (50, 150, 255))
skin_ratio = np.sum(skin_mask > 0) / skin_mask.size
return {
'skin_ratio': round(skin_ratio, 4),
'skin_detected': skin_ratio > 0.3,
}
def calculate_similarity(self, features1: Dict, features2: Dict) -> Dict:
"""
Calculate similarity between two feature sets
Args:
features1: First feature dict
features2: Second feature dict
Returns:
Dict with similarity scores
"""
scores = {}
# Color similarity (histogram correlation)
if 'color' in features1 and 'color' in features2:
h1 = np.array(features1['color'].get('h_histogram', []))
h2 = np.array(features2['color'].get('h_histogram', []))
if len(h1) > 0 and len(h2) > 0:
h_corr = cv2.compareHist(h1.astype(np.float32), h2.astype(np.float32), cv2.HISTCMP_CORREL)
scores['color_similarity'] = round(h_corr, 4)
else:
scores['color_similarity'] = 0.0
# Dominant color distance
dc1 = np.array(features1['color'].get('dominant_colors', [[0,0,0]]))
dc2 = np.array(features2['color'].get('dominant_colors', [[0,0,0]]))
if len(dc1) > 0 and len(dc2) > 0:
color_dist = np.linalg.norm(dc1[0] - dc2[0])
scores['color_distance'] = round(color_dist, 2)
else:
scores['color_distance'] = 255.0
# Texture similarity
if 'texture' in features1 and 'texture' in features2:
lbp1 = np.array(features1['texture'].get('lbp_histogram', []))
lbp2 = np.array(features2['texture'].get('lbp_histogram', []))
if len(lbp1) > 0 and len(lbp2) > 0:
lbp_corr = cv2.compareHist(lbp1.astype(np.float32), lbp2.astype(np.float32), cv2.HISTCMP_CORREL)
scores['texture_similarity'] = round(lbp_corr, 4)
else:
scores['texture_similarity'] = 0.0
# Overall score
color_sim = scores.get('color_similarity', 0)
texture_sim = scores.get('texture_similarity', 0)
scores['overall_score'] = round(color_sim * 0.7 + texture_sim * 0.3, 4)
return scores
# Helper functions for specific feature extraction
def extract_hair_color(frame: np.ndarray, head_region: Dict) -> Dict:
"""Extract hair color from head region"""
extractor = FeatureExtractor()
x, y, w, h = head_region['x'], head_region['y'], head_region['width'], head_region['height']
# Focus on upper part of head (hair area)
hair_roi = frame[y:y+int(h*0.5), x:x+w]
return extractor.extract_color(hair_roi)
def extract_skin_color(frame: np.ndarray, face_region: Dict) -> Dict:
"""Extract skin color from face region"""
extractor = FeatureExtractor()
x, y, w, h = face_region['x'], face_region['y'], face_region['width'], face_region['height']
face_roi = frame[y:y+h, x:x+w]
return extractor.extract_color(face_roi)
def extract_clothing_color(frame: np.ndarray, torso_region: Dict) -> Dict:
"""Extract clothing color from torso region"""
extractor = FeatureExtractor()
return extractor.extract_split_region(frame, torso_region)
def extract_accessory_color(frame: np.ndarray, accessory_region: Dict) -> Dict:
"""Extract accessory color from region"""
extractor = FeatureExtractor()
x, y, w, h = accessory_region['x'], accessory_region['y'], accessory_region['width'], accessory_region['height']
roi = frame[y:y+h, x:x+w]
return extractor.extract_color(roi)
class HierarchicalFeatureExtractor:
"""
Hierarchical feature extraction: coarse → fine
Level 1: Large regions (body bbox, upper/lower body)
Level 2: Medium regions (head, face, arms, legs)
Level 3: Fine features (accessories, details)
"""
def __init__(self):
self.extractor = FeatureExtractor()
def extract_level1(self, frame: np.ndarray, bbox: Dict, head_region: Optional[Dict] = None) -> Dict:
"""
Level 1: Extract large region features
Args:
frame: Full frame
bbox: Full body bbox {'x', 'y', 'width', 'height'}
head_region: Optional head region for hair extraction
Returns:
Dict with large region features
"""
x, y, w, h = bbox['x'], bbox['y'], bbox['width'], bbox['height']
if w <= 0 or h <= 0:
return {'error': 'invalid_bbox'}
# Full body region
body_roi = frame[y:y+h, x:x+w]
body_features = self.extractor.extract_all(body_roi)
# Split upper/lower body (50%)
mid_y = y + h // 2
upper_roi = frame[y:mid_y, x:x+w] if mid_y > y else None
lower_roi = frame[mid_y:y+h, x:x+w] if y+h > mid_y else None
upper_features = self.extractor.extract_all(upper_roi) if upper_roi is not None else {}
lower_features = self.extractor.extract_all(lower_roi) if lower_roi is not None else {}
# Head top (hair) - part of Level 1
head_features = {}
if head_region is not None:
head_roi = self._get_roi(frame, head_region)
if head_roi is not None:
head_features = self.extractor.extract_all(head_roi)
return {
'level': 1,
'body': body_features,
'head_top': head_features,
'upper_body': upper_features,
'lower_body': lower_features,
'bbox': bbox,
}
def extract_level2(
self,
frame: np.ndarray,
regions: Dict
) -> Dict:
"""
Level 2: Extract medium region features
Args:
frame: Full frame
regions: Dict with face, torso, leg, arm regions
Returns:
Dict with medium region features
"""
features = {'level': 2}
# Face region (skin, lips, eyes)
if 'face' in regions:
face_roi = self._get_roi(frame, regions['face'])
features['face'] = self.extractor.extract_all(face_roi) if face_roi is not None else {}
features['face']['skin'] = self.extractor.detect_exposed_skin(face_roi) if face_roi is not None else {}
# Torso region (clothing details)
if 'torso' in regions:
torso_roi = self._get_roi(frame, regions['torso'])
features['torso'] = self.extractor.extract_all(torso_roi) if torso_roi is not None else {}
# Leg region
if 'leg' in regions:
leg_roi = self._get_roi(frame, regions['leg'])
features['leg'] = self.extractor.extract_all(leg_roi) if leg_roi is not None else {}
# Arms (left/right) - sleeve detection
if 'left_arm' in regions:
arm_roi = self._get_roi(frame, regions['left_arm'])
features['left_arm'] = self.extractor.extract_all(arm_roi) if arm_roi is not None else {}
if 'right_arm' in regions:
arm_roi = self._get_roi(frame, regions['right_arm'])
features['right_arm'] = self.extractor.extract_all(arm_roi) if arm_roi is not None else {}
return features
def extract_level3(
self,
frame: np.ndarray,
keypoints: List[Dict],
eye_width: float
) -> Dict:
"""
Level 3: Extract fine features (accessories, details)
Args:
frame: Full frame
keypoints: Pose keypoints
eye_width: Eye distance (reference unit)
Returns:
Dict with fine features
"""
features = {'level': 3}
# Estimate accessory regions from keypoints
offset = int(eye_width * 0.5) if eye_width > 0 else 20
# Glasses (eye region)
left_eye = self._get_kp(keypoints, 'left_eye')
right_eye = self._get_kp(keypoints, 'right_eye')
if left_eye and right_eye:
glasses_roi = frame[
int(min(left_eye['y'], right_eye['y']) - offset):int(max(left_eye['y'], right_eye['y']) + offset),
int(left_eye['x'] - offset):int(right_eye['x'] + offset)
]
features['glasses'] = self.extractor.extract_all(glasses_roi) if glasses_roi is not None else {}
# Earrings (ear positions)
left_ear = self._get_kp(keypoints, 'left_ear')
right_ear = self._get_kp(keypoints, 'right_ear')
if left_ear and left_ear.get('confidence', 0) > 0.1:
ear_roi = frame[
int(left_ear['y'] - offset):int(left_ear['y'] + offset),
int(left_ear['x'] - offset):int(left_ear['x'] + offset)
]
features['left_earring'] = self.extractor.extract_all(ear_roi) if ear_roi is not None else {}
if right_ear and right_ear.get('confidence', 0) > 0.1:
ear_roi = frame[
int(right_ear['y'] - offset):int(right_ear['y'] + offset),
int(right_ear['x'] - offset):int(right_ear['x'] + offset)
]
features['right_earring'] = self.extractor.extract_all(ear_roi) if ear_roi is not None else {}
# Watch (wrist position)
left_wrist = self._get_kp(keypoints, 'left_wrist')
right_wrist = self._get_kp(keypoints, 'right_wrist')
if left_wrist and left_wrist.get('confidence', 0) > 0.1:
wrist_roi = frame[
int(left_wrist['y'] - offset):int(left_wrist['y'] + offset),
int(left_wrist['x'] - offset):int(left_wrist['x'] + offset)
]
features['left_watch'] = self.extractor.extract_all(wrist_roi) if wrist_roi is not None else {}
if right_wrist and right_wrist.get('confidence', 0) > 0.1:
wrist_roi = frame[
int(right_wrist['y'] - offset):int(right_wrist['y'] + offset),
int(right_wrist['x'] - offset):int(right_wrist['x'] + offset)
]
features['right_watch'] = self.extractor.extract_all(wrist_roi) if wrist_roi is not None else {}
# Shoes (ankle positions)
left_ankle = self._get_kp(keypoints, 'left_ankle')
right_ankle = self._get_kp(keypoints, 'right_ankle')
if left_ankle and left_ankle.get('confidence', 0) > 0.1:
shoe_roi = frame[
int(left_ankle['y'] - offset):int(left_ankle['y'] + offset * 2),
int(left_ankle['x'] - offset):int(left_ankle['x'] + offset)
]
features['left_shoe'] = self.extractor.extract_all(shoe_roi) if shoe_roi is not None else {}
if right_ankle and right_ankle.get('confidence', 0) > 0.1:
shoe_roi = frame[
int(right_ankle['y'] - offset):int(right_ankle['y'] + offset * 2),
int(right_ankle['x'] - offset):int(right_ankle['x'] + offset)
]
features['right_shoe'] = self.extractor.extract_all(shoe_roi) if shoe_roi is not None else {}
return features
def extract_hierarchical(
self,
frame: np.ndarray,
bbox: Dict,
regions: Dict,
keypoints: List[Dict],
eye_width: float
) -> Dict:
"""
Full hierarchical extraction: Level 1 → Level 2 → Level 3
Args:
frame: Full frame
bbox: Full body bbox
regions: Medium regions dict (includes 'head' for Level 1)
keypoints: Pose keypoints
eye_width: Reference unit
Returns:
Dict with all hierarchical features
"""
head_region = regions.get('head') if regions else None
level1 = self.extract_level1(frame, bbox, head_region)
level2 = self.extract_level2(frame, regions)
level3 = self.extract_level3(frame, keypoints, eye_width)
return {
'level1': level1,
'level2': level2,
'level3': level3,
}
def _get_roi(self, frame: np.ndarray, region: Dict) -> Optional[np.ndarray]:
"""Get ROI from frame using region dict"""
if region is None:
return None
x, y, w, h = region.get('x', 0), region.get('y', 0), region.get('width', 0), region.get('height', 0)
if w <= 0 or h <= 0:
return None
return frame[y:y+h, x:x+w]
def _get_kp(self, keypoints: List[Dict], name: str) -> Optional[Dict]:
"""Get keypoint by name"""
for kp in keypoints:
if kp.get('name') == name:
return kp
return None
if __name__ == '__main__':
# Test with sample image
import sys
if len(sys.argv) > 1:
img_path = sys.argv[1]
img = cv2.imread(img_path)
if img is not None:
extractor = FeatureExtractor()
# Extract from full image
features = extractor.extract_all(img)
print("Color features:")
print(f" H mean: {features['color']['h_mean']}")
print(f" S mean: {features['color']['s_mean']}")
print(f" V mean: {features['color']['v_mean']}")
print(f" Dominant colors: {len(features['color']['dominant_colors'])}")
print("\nTexture features:")
print(f" LBP variance: {features['texture']['lbp_variance']}")
print(f" Brightness: {features['texture']['brightness']}")
print(f" Shininess: {features['texture']['shininess_ratio']}")
print("\nPattern:")
print(f" {features['pattern']['pattern']} (conf: {features['pattern']['confidence']})")
print("\nMaterial:")
print(f" {features['material']['material']} (conf: {features['material']['confidence']})")
else:
print("Usage: python feature_extractor.py <image_path>")