feat: add appearance feature system with coordinate/scale fixes
- Add Appearance_Feature_System_V1.0.md design doc - Add proportion_calculator.py for body proportions (height, body shape) - Add feature_extractor.py for hierarchical feature extraction - Add tkg_level1_builder.py for TKG person_trace nodes - Fix mediapipe_holistic_processor.py to output Top-Left pixels - Add MediaPipe format conversion in proportion_calculator Coordinate system alignment: - Swift Pose: Top-Left pixels (Y-flip done in swift_pose.swift) - MediaPipe: Top-Left pixels (norm→pixel conversion added)
This commit is contained in:
664
docs_v1.0/DESIGN/Appearance_Feature_System_V1.0.md
Normal file
664
docs_v1.0/DESIGN/Appearance_Feature_System_V1.0.md
Normal file
@@ -0,0 +1,664 @@
|
||||
---
|
||||
title: Appearance Feature System V1.0
|
||||
version: 1.0.0
|
||||
date: 2025-06-22
|
||||
author: OpenCode
|
||||
status: Draft
|
||||
---
|
||||
|
||||
# Appearance Feature System V1.0
|
||||
|
||||
## Overview
|
||||
|
||||
### Purpose
|
||||
Lock onto a target and continuously track across frames using appearance features.
|
||||
|
||||
### Architecture
|
||||
```
|
||||
Face (identification) → Pose (tracking) → Appearance (tracking)
|
||||
↓ ↓ ↓
|
||||
identity_uuid bbox features + proportions
|
||||
```
|
||||
|
||||
### Data Sources
|
||||
| Source | Provides | Output |
|
||||
|--------|----------|--------|
|
||||
| Face | identity, landmarks | face.json |
|
||||
| Pose | bbox, keypoints | pose.json |
|
||||
| MediaPipe | detailed landmarks, hands | mediapipe.json |
|
||||
|
||||
---
|
||||
|
||||
## Keypoint Systems
|
||||
|
||||
### Swift Pose (Apple Vision) - 19 Keypoints
|
||||
|
||||
| Index | Keypoint | Vision Framework Joint |
|
||||
|-------|----------|------------------------|
|
||||
| 0 | nose | .nose (head_joint) |
|
||||
| 1 | left_eye | .leftEye (left_eye_joint) |
|
||||
| 2 | right_eye | .rightEye (right_eye_joint) |
|
||||
| 3 | left_ear | .leftEar (left_ear_joint) |
|
||||
| 4 | right_ear | .rightEar (right_ear_joint) |
|
||||
| 5 | neck | .neck (neck_1_joint) |
|
||||
| 6 | root | .root (center_hip_joint) |
|
||||
| 7 | left_shoulder | .leftShoulder |
|
||||
| 8 | right_shoulder | .rightShoulder |
|
||||
| 9 | left_elbow | .leftElbow |
|
||||
| 10 | right_elbow | .rightElbow |
|
||||
| 11 | left_wrist | .leftWrist (left_hand_joint) |
|
||||
| 12 | right_wrist | .rightWrist (right_hand_joint) |
|
||||
| 13 | left_hip | .leftHip |
|
||||
| 14 | right_hip | .rightHip |
|
||||
| 15 | left_knee | .leftKnee |
|
||||
| 16 | right_knee | .rightKnee |
|
||||
| 17 | left_ankle | .leftAnkle |
|
||||
| 18 | right_ankle | .rightAnkle |
|
||||
|
||||
### MediaPipe Pose - 33 Landmarks
|
||||
|
||||
| Index | Name | Index | Name |
|
||||
|-------|------|-------|------|
|
||||
| 0 | nose | 17 | left_pinky |
|
||||
| 1 | left_eye_inner | 18 | right_pinky |
|
||||
| 2 | left_eye | 19 | left_index |
|
||||
| 3 | left_eye_outer | 20 | right_index |
|
||||
| 4 | right_eye_inner | 21 | left_thumb |
|
||||
| 5 | right_eye | 22 | right_thumb |
|
||||
| 6 | right_eye_outer | 23 | left_hip |
|
||||
| 7 | left_ear | 24 | right_hip |
|
||||
| 8 | right_ear | 25 | left_knee |
|
||||
| 9 | mouth_left | 26 | right_knee |
|
||||
| 10 | mouth_right | 27 | left_ankle |
|
||||
| 11 | left_shoulder | 28 | right_ankle |
|
||||
| 12 | right_shoulder | 29 | left_heel |
|
||||
| 13 | left_elbow | 30 | right_heel |
|
||||
| 14 | right_elbow | 31 | left_foot_index |
|
||||
| 15 | left_wrist | 32 | right_foot_index |
|
||||
| 16 | right_wrist | | |
|
||||
|
||||
### MediaPipe Hand - 21 Landmarks
|
||||
|
||||
| Index | Name | Finger |
|
||||
|-------|------|--------|
|
||||
| 0 | wrist | - |
|
||||
| 1-4 | thumb_cmc/mcp/ip/tip | thumb |
|
||||
| 5-8 | index_mcp/pip/dip/tip | index |
|
||||
| 9-12 | middle_mcp/pip/dip/tip | middle |
|
||||
| 13-16 | ring_mcp/pip/dip/tip | ring |
|
||||
| 17-20 | pinky_mcp/pip/dip/tip | pinky |
|
||||
|
||||
### YOLOv8 Pose (Fallback) - 17 Keypoints
|
||||
|
||||
| Index | Name |
|
||||
|-------|------|
|
||||
| 0 | nose |
|
||||
| 1 | left_eye |
|
||||
| 2 | right_eye |
|
||||
| 3 | left_ear |
|
||||
| 4 | right_ear |
|
||||
| 5 | left_shoulder |
|
||||
| 6 | right_shoulder |
|
||||
| 7 | left_elbow |
|
||||
| 8 | right_elbow |
|
||||
| 9 | left_wrist |
|
||||
| 10 | right_wrist |
|
||||
| 11 | left_hip |
|
||||
| 12 | right_hip |
|
||||
| 13 | left_knee |
|
||||
| 14 | right_knee |
|
||||
| 15 | left_ankle |
|
||||
| 16 | right_ankle |
|
||||
|
||||
---
|
||||
|
||||
## Body Proportions Calculation
|
||||
|
||||
### Reference Unit
|
||||
```python
|
||||
# Eye distance as reference unit
|
||||
eye_width = distance(left_eye, right_eye)
|
||||
```
|
||||
|
||||
### Body Measurements
|
||||
```python
|
||||
# Full body height (nose to ankle)
|
||||
nose_y = keypoints['nose']['y']
|
||||
ankle_y = max(keypoints['left_ankle']['y'], keypoints['right_ankle']['y'])
|
||||
body_height = ankle_y - nose_y
|
||||
|
||||
# Upper body (neck to hip)
|
||||
neck_y = keypoints['neck']['y']
|
||||
hip_y = (keypoints['left_hip']['y'] + keypoints['right_hip']['y']) / 2
|
||||
torso_height = hip_y - neck_y
|
||||
|
||||
# Lower body (hip to ankle)
|
||||
leg_height = ankle_y - hip_y
|
||||
|
||||
# Shoulder width
|
||||
shoulder_width = distance(left_shoulder, right_shoulder)
|
||||
```
|
||||
|
||||
### Proportion Ratios
|
||||
```python
|
||||
proportions = {
|
||||
'eye_width': eye_width,
|
||||
'body_height': body_height,
|
||||
'torso_height': torso_height,
|
||||
'leg_height': leg_height,
|
||||
'shoulder_width': shoulder_width,
|
||||
'head_ratio': eye_width / body_height,
|
||||
'torso_ratio': torso_height / body_height,
|
||||
'leg_ratio': leg_height / body_height,
|
||||
}
|
||||
```
|
||||
|
||||
### Body Shape Calculation (三圍)
|
||||
```python
|
||||
# Chest width (shoulder width approximation)
|
||||
chest_width = distance(left_shoulder, right_shoulder)
|
||||
|
||||
# Waist width (hip width approximation)
|
||||
waist_width = distance(left_hip, right_hip)
|
||||
|
||||
# Hip width
|
||||
hip_width = distance(left_hip, right_hip)
|
||||
|
||||
# Body shape classification
|
||||
if chest_waist_ratio < 1.0 and waist_hip_ratio < 0.9:
|
||||
shape_type = "hourglass" #葫芦形
|
||||
elif chest_waist_ratio > 1.2:
|
||||
shape_type = "triangle" # 倒三角
|
||||
elif waist_hip_ratio > 1.1:
|
||||
shape_type = "inverted_triangle" # 正三角
|
||||
elif abs(chest_width - hip_width) < 0.1 * max(chest_width, hip_width):
|
||||
shape_type = "rectangle" #矩形
|
||||
else:
|
||||
shape_type = "oval" #椭圆形
|
||||
```
|
||||
|
||||
### Height Estimation
|
||||
```python
|
||||
# Use eye_width as reference (≈6cm)
|
||||
height_ratio = body_height / eye_width
|
||||
estimated_height_cm = height_ratio * 6.0
|
||||
|
||||
# Height category
|
||||
if estimated_height_cm < 150:
|
||||
height_category = "short"
|
||||
elif estimated_height_cm < 170:
|
||||
height_category = "medium"
|
||||
elif estimated_height_cm < 180:
|
||||
height_category = "tall"
|
||||
else:
|
||||
height_category = "very_tall"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Appearance Feature Location Mapping
|
||||
|
||||
### Environment Factors
|
||||
|
||||
| Feature | Location | Detection Method |
|
||||
|---------|----------|------------------|
|
||||
| Light type | Frame background | HSV H distribution |
|
||||
| Light direction | Shadow analysis | Shadow orientation |
|
||||
| Light intensity | Overall brightness | HSV V mean |
|
||||
|
||||
### Head Features
|
||||
|
||||
#### Hair Style
|
||||
| Feature | Keypoints Range |
|
||||
|---------|-----------------|
|
||||
| Short hair | head_top → ear/neck |
|
||||
| Long hair | head_top → shoulder/back |
|
||||
| Ponytail | head_top → neck (tied) |
|
||||
| Braids | head_top → shoulder (braided) |
|
||||
| Curly hair | hair region texture |
|
||||
| Straight hair | hair region texture |
|
||||
|
||||
#### Hair Accessories
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Hair band | eye_distance (head top) |
|
||||
| Hair clip | ear/head |
|
||||
| Hair wrap | ear_distance |
|
||||
| Hair tie | neck (ponytail position) |
|
||||
| Hair pin | head |
|
||||
|
||||
#### Head Accessories
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Hat | head_top → eye |
|
||||
| Headscarf | ear_distance (wrapped) |
|
||||
| Hood | head_top → neck (full head) |
|
||||
|
||||
#### Hair Color
|
||||
| Feature | Detection |
|
||||
|---------|-----------|
|
||||
| Hair color HSV | hair region HSV histogram |
|
||||
|
||||
### Face Features
|
||||
|
||||
#### Eye Accessories
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Glasses | eye_distance |
|
||||
| Sunglasses | eye_distance (larger) |
|
||||
|
||||
#### Ear Accessories
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Earrings | ear_position |
|
||||
| Headphones (over-ear) | ear_distance (wrapped) |
|
||||
| Earphones (in-ear) | ear_position |
|
||||
| Earphones (ear-hook) | ear_position |
|
||||
|
||||
#### Face Accessories
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Blush | cheeks (below eye) |
|
||||
| Lipstick | lips (nose + eye_width * 0.5) |
|
||||
| Mask | ear_distance, eye → neck |
|
||||
|
||||
#### Skin Tone
|
||||
| Feature | Detection |
|
||||
|---------|-----------|
|
||||
| Skin color HSV | face region HSV histogram |
|
||||
|
||||
### Neck Features
|
||||
|
||||
#### Neck Accessories
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Collar | neck |
|
||||
| Bow tie | neck → chest |
|
||||
| Tie | neck → hip |
|
||||
| Scarf | neck → shoulder |
|
||||
| Necklace | neck |
|
||||
|
||||
#### Hanging Accessories
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Pendant (necklace) | neck → chest |
|
||||
| Charm (bag) | bag_position |
|
||||
| Charm (phone) | phone_position |
|
||||
|
||||
### Upper Body Features
|
||||
|
||||
#### Clothing
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Shirt color | neck → hip |
|
||||
| Shirt material | clothing texture (LBP) |
|
||||
| Clothing pattern | pattern detection |
|
||||
|
||||
#### Sleeves
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Long sleeve | shoulder → wrist |
|
||||
| Short sleeve | shoulder → elbow |
|
||||
| Arm sleeve | elbow → wrist |
|
||||
|
||||
#### Back Features
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Back exposed | shoulder → hip (view angle) |
|
||||
| Back tattoo | back exposed skin |
|
||||
|
||||
### Bags
|
||||
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Handbag | hand_position |
|
||||
| Shoulder bag | shoulder_position |
|
||||
| Backpack | shoulder → hip (back) |
|
||||
| Waist bag | hip_position |
|
||||
|
||||
### Hand Features
|
||||
|
||||
#### Hand Accessories
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Watch | wrist |
|
||||
| Bracelet | wrist → hand |
|
||||
| Ring | finger (MediaPipe hand landmarks 13-16) |
|
||||
| Gloves | wrist → hand |
|
||||
| Nail polish | finger tips |
|
||||
|
||||
#### Handheld Objects
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Phone | hand + object detection |
|
||||
| Handbag | hand + object detection |
|
||||
|
||||
### Lower Body Features
|
||||
|
||||
#### Pants
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Long pants | hip → ankle |
|
||||
| Shorts | hip → knee |
|
||||
|
||||
#### Waist Accessories
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Belt | hip |
|
||||
|
||||
### Foot Features
|
||||
|
||||
#### Foot Accessories
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Anklet | ankle |
|
||||
| Socks | ankle → foot |
|
||||
| Shoes | ankle |
|
||||
|
||||
### Skin Features
|
||||
|
||||
| Feature | Detection |
|
||||
|---------|-----------|
|
||||
| Tattoo | exposed skin anomaly color block |
|
||||
|
||||
### Exposed Skin Detection
|
||||
|
||||
| Location | Coverage Detection |
|
||||
|----------|-------------------|
|
||||
| Face | always exposed |
|
||||
| Arms | exposed if short sleeve |
|
||||
| Legs | exposed if shorts |
|
||||
| Hands | exposed if no gloves |
|
||||
| Feet | exposed if no socks |
|
||||
|
||||
---
|
||||
|
||||
## Mobility Aids / Vehicles
|
||||
|
||||
### Walking Aids (Object Detection)
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Cane | hand + object |
|
||||
| Wheelchair | hip + object |
|
||||
| Walker | both hands + object |
|
||||
|
||||
### Mobility Tools (Object Detection)
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Roller skates | ankle + object |
|
||||
| Skateboard | ankle + object |
|
||||
| Scooter | hand + ankle + object |
|
||||
|
||||
### Vehicles (Object Detection)
|
||||
| Feature | Keypoints |
|
||||
|---------|-----------|
|
||||
| Motorcycle | hip + ankle + object |
|
||||
| Bicycle | hip + ankle + object |
|
||||
| Tricycle | hip + ankle + object |
|
||||
| Car | hip + object |
|
||||
|
||||
---
|
||||
|
||||
## Feature Extraction Techniques
|
||||
|
||||
### Color Extraction (HSV Histogram)
|
||||
```python
|
||||
def extract_color(roi):
|
||||
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
|
||||
h_hist = cv2.calcHist([hsv], [0], None, [30], [0, 180])
|
||||
s_hist = cv2.calcHist([hsv], [1], None, [32], [0, 256])
|
||||
v_hist = cv2.calcHist([hsv], [2], None, [32], [0, 256])
|
||||
return {
|
||||
'h_histogram': normalize(h_hist),
|
||||
's_histogram': normalize(s_hist),
|
||||
'v_histogram': normalize(v_hist),
|
||||
}
|
||||
```
|
||||
|
||||
### Dominant Color (K-means)
|
||||
```python
|
||||
def extract_dominant_colors(roi, k=5):
|
||||
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
|
||||
pixels = hsv.reshape(-1, 3).astype(np.float32)
|
||||
_, labels, centers = cv2.kmeans(pixels, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
|
||||
counts = np.bincount(labels.flatten())
|
||||
return centers[np.argsort(-counts)[:k]]
|
||||
```
|
||||
|
||||
### Texture Extraction (LBP)
|
||||
```python
|
||||
def extract_texture(roi):
|
||||
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
|
||||
lbp = local_binary_pattern(gray, P=8, R=1)
|
||||
return {
|
||||
'lbp_variance': np.var(lbp),
|
||||
'lbp_histogram': np.histogram(lbp, bins=256)[0],
|
||||
}
|
||||
```
|
||||
|
||||
### Shininess Detection
|
||||
```python
|
||||
def detect_shininess(roi):
|
||||
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
|
||||
v_mean = np.mean(hsv[:,:,2])
|
||||
v_std = np.std(hsv[:,:,2])
|
||||
return {
|
||||
'brightness': v_mean,
|
||||
'brightness_variance': v_std,
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Tracking Flow
|
||||
|
||||
### Feature Storage Strategy
|
||||
| Level | Storage | Reason |
|
||||
|-------|---------|--------|
|
||||
| **Level 1** | TKG nodes | Stable features for tracking |
|
||||
| **Level 2** | Dynamic | On-demand calculation |
|
||||
| **Level 3** | Dynamic | On-demand calculation |
|
||||
|
||||
### Level 1 in TKG
|
||||
```sql
|
||||
-- New node_type: person_trace
|
||||
INSERT INTO tkg_nodes (
|
||||
node_type = 'person_trace',
|
||||
external_id = 'person_{frame}_{index}',
|
||||
file_uuid = 'xxx',
|
||||
properties = {
|
||||
'frame_count': 100,
|
||||
'frames': [1, 30, 60, ...],
|
||||
'avg_bbox': {...},
|
||||
'height_estimate': {
|
||||
'estimated_height_cm': 170.5,
|
||||
'height_ratio': 28.4,
|
||||
'height_category': 'tall'
|
||||
},
|
||||
'body_shape': {
|
||||
'chest_width': 150.2,
|
||||
'waist_width': 100.5,
|
||||
'hip_width': 120.3,
|
||||
'chest_waist_ratio': 1.49,
|
||||
'waist_hip_ratio': 0.84,
|
||||
'body_shape': 'hourglass'
|
||||
},
|
||||
'level1_features': {
|
||||
'body': {...},
|
||||
'head_top': {...},
|
||||
'upper_body': {...},
|
||||
'lower_body': {...}
|
||||
}
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
### Level 2/3 Dynamic Calculation
|
||||
```python
|
||||
# Level 2: computed on query
|
||||
face_features = extractor.extract_level2(frame, regions)
|
||||
|
||||
# Level 3: computed on query
|
||||
accessory_features = extractor.extract_level3(frame, keypoints, eye_width)
|
||||
```
|
||||
|
||||
### Matching Strategy
|
||||
```
|
||||
Frame N → Frame N+1:
|
||||
|
||||
1. Pose bbox IoU → same person position
|
||||
2. Level 1 similarity (TKG) → same feature combination
|
||||
3. Level 2/3 dynamic → detailed verification
|
||||
4. Face identity → final confirmation (if face detected)
|
||||
|
||||
Result: Continuous tracking of same identity
|
||||
```
|
||||
|
||||
### IoU Calculation
|
||||
```python
|
||||
def calculate_iou(bbox1, bbox2):
|
||||
x1, y1, w1, h1 = bbox1
|
||||
x2, y2, w2, h2 = bbox2
|
||||
|
||||
xi1 = max(x1, x2)
|
||||
yi1 = max(y1, y2)
|
||||
xi2 = min(x1 + w1, x2 + w2)
|
||||
yi2 = min(y1 + h1, y2 + h2)
|
||||
|
||||
inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
|
||||
union_area = w1 * h1 + w2 * h2 - inter_area
|
||||
|
||||
return inter_area / union_area if union_area > 0 else 0
|
||||
```
|
||||
|
||||
### Feature Similarity
|
||||
```python
|
||||
def calculate_similarity(features1, features2):
|
||||
# HSV histogram similarity
|
||||
h_sim = cv2.compareHist(features1['h_histogram'], features2['h_histogram'], cv2.HISTCMP_CORREL)
|
||||
|
||||
# Dominant color similarity
|
||||
color_dist = np.linalg.norm(features1['dominant_colors'] - features2['dominant_colors'])
|
||||
|
||||
# Combined score
|
||||
return {
|
||||
'color_similarity': h_sim,
|
||||
'color_distance': color_dist,
|
||||
'overall_score': h_sim * 0.7 + (1 - color_dist/255) * 0.3,
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
### appearance.json Structure
|
||||
```json
|
||||
{
|
||||
"frame_count": 100,
|
||||
"fps": 30.0,
|
||||
"frames": [
|
||||
{
|
||||
"frame": 1,
|
||||
"timestamp": 0.033,
|
||||
"persons": [
|
||||
{
|
||||
"person_index": 0,
|
||||
"bbox": {"x": 100, "y": 200, "width": 400, "height": 600},
|
||||
"identity_uuid": "xxx-xxx-xxx",
|
||||
"proportions": {
|
||||
"eye_width": 50.0,
|
||||
"body_height": 600.0,
|
||||
"torso_height": 200.0,
|
||||
"leg_height": 300.0,
|
||||
"shoulder_width": 150.0,
|
||||
"head_ratio": 0.08,
|
||||
"torso_ratio": 0.33,
|
||||
"leg_ratio": 0.50
|
||||
},
|
||||
"features": {
|
||||
"hair": {
|
||||
"color": {"h_histogram": [...], "dominant_colors": [...]},
|
||||
"length": "long",
|
||||
"style": "straight"
|
||||
},
|
||||
"skin": {
|
||||
"color": {"h_histogram": [...], "dominant_colors": [...]}
|
||||
},
|
||||
"clothing": {
|
||||
"upper": {
|
||||
"color": {...},
|
||||
"material": "cotton",
|
||||
"pattern": "solid",
|
||||
"sleeve": "short"
|
||||
},
|
||||
"lower": {
|
||||
"color": {...},
|
||||
"length": "long"
|
||||
}
|
||||
},
|
||||
"accessories": {
|
||||
"earring": true,
|
||||
"watch": true,
|
||||
"shoes_color": {...}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
### Processor Dependencies
|
||||
| Processor | Depends On | Reason |
|
||||
|-----------|------------|--------|
|
||||
| Appearance | Pose | bbox for region extraction |
|
||||
| Appearance | Face | identity matching + face landmarks |
|
||||
| Appearance | MediaPipe | hand landmarks + detailed pose |
|
||||
|
||||
### Data Flow
|
||||
```
|
||||
pose.json → bbox + keypoints
|
||||
face.json → identity + face landmarks
|
||||
mediapipe.json → hand landmarks + pose landmarks
|
||||
↓
|
||||
appearance.json → features + proportions + tracking
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
### Phase 1: Design Document
|
||||
- Create this design document
|
||||
- Define all feature mappings
|
||||
- Define output format
|
||||
|
||||
### Phase 2: Appearance Processor Refactor
|
||||
- Add proportion calculation module
|
||||
- Add feature extraction module
|
||||
- Integrate Pose + MediaPipe + Face data
|
||||
- Add IoU matching for pose-face
|
||||
|
||||
### Phase 3: Output Format Update
|
||||
- Update appearance.json structure
|
||||
- Update Rust structs
|
||||
- Update DB schema
|
||||
|
||||
### Phase 4: Testing
|
||||
- Unit tests for proportion calculation
|
||||
- Integration tests for full pipeline
|
||||
- Real video tracking validation
|
||||
|
||||
---
|
||||
|
||||
## Version History
|
||||
|
||||
| Version | Date | Author | Changes |
|
||||
|---------|------|--------|---------|
|
||||
| 1.0.0 | 2025-06-22 | OpenCode | Initial design document |
|
||||
@@ -167,55 +167,56 @@ class MediaPipeHolisticProcessor:
|
||||
"hands": {"left": None, "right": None},
|
||||
}
|
||||
|
||||
# Extract face mesh
|
||||
# Extract face mesh
|
||||
height, width = frame.shape[:2]
|
||||
if results.face_landmarks:
|
||||
person_data["face_mesh"] = self._extract_face_mesh(results.face_landmarks)
|
||||
|
||||
person_data["face_mesh"] = self._extract_face_mesh(results.face_landmarks, width, height)
|
||||
|
||||
# Extract pose
|
||||
if results.pose_landmarks:
|
||||
person_data["pose"] = self._extract_pose(results.pose_landmarks)
|
||||
|
||||
person_data["pose"] = self._extract_pose(results.pose_landmarks, width, height)
|
||||
|
||||
# Extract hands
|
||||
if results.left_hand_landmarks:
|
||||
person_data["hands"]["left"] = self._extract_hand(results.left_hand_landmarks, "left")
|
||||
|
||||
person_data["hands"]["left"] = self._extract_hand(results.left_hand_landmarks, "left", width, height)
|
||||
|
||||
if results.right_hand_landmarks:
|
||||
person_data["hands"]["right"] = self._extract_hand(results.right_hand_landmarks, "right")
|
||||
person_data["hands"]["right"] = self._extract_hand(results.right_hand_landmarks, "right", width, height)
|
||||
|
||||
# Calculate bbox from pose landmarks
|
||||
if results.pose_landmarks:
|
||||
landmarks = results.pose_landmarks.landmark
|
||||
x_coords = [lm.x for lm in landmarks if lm.visibility > 0.5]
|
||||
y_coords = [lm.y for lm in landmarks if lm.visibility > 0.5]
|
||||
|
||||
|
||||
if x_coords and y_coords:
|
||||
x_min, x_max = min(x_coords), max(x_coords)
|
||||
y_min, y_max = min(y_coords), max(y_coords)
|
||||
|
||||
height, width = frame.shape[:2]
|
||||
|
||||
|
||||
person_data["bbox"] = {
|
||||
"x": int(x_min * width),
|
||||
"y": int(y_min * height),
|
||||
"width": int((x_max - x_min) * width),
|
||||
"height": int((y_max - y_min) * height),
|
||||
}
|
||||
|
||||
|
||||
return person_data
|
||||
|
||||
def _extract_face_mesh(self, face_landmarks) -> Dict:
|
||||
|
||||
def _extract_face_mesh(self, face_landmarks, width: int, height: int) -> Dict:
|
||||
"""
|
||||
Extract face mesh landmarks and calculate features
|
||||
|
||||
|
||||
Args:
|
||||
face_landmarks: MediaPipe face landmarks
|
||||
|
||||
width: Frame width in pixels
|
||||
height: Frame height in pixels
|
||||
|
||||
Returns:
|
||||
Dict with landmarks, eye_features, mouth_features
|
||||
Dict with landmarks (in pixels), eye_features, mouth_features
|
||||
"""
|
||||
landmarks = []
|
||||
for lm in face_landmarks.landmark:
|
||||
landmarks.append([lm.x, lm.y, lm.z])
|
||||
landmarks.append([int(lm.x * width), int(lm.y * height), lm.z])
|
||||
|
||||
# Eye Aspect Ratio (EAR)
|
||||
def calculate_ear(eye_indices):
|
||||
@@ -329,19 +330,21 @@ class MediaPipeHolisticProcessor:
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_pose(self, pose_landmarks) -> Dict:
|
||||
def _extract_pose(self, pose_landmarks, width: int, height: int) -> Dict:
|
||||
"""
|
||||
Extract pose landmarks and calculate features
|
||||
|
||||
|
||||
Args:
|
||||
pose_landmarks: MediaPipe pose landmarks
|
||||
|
||||
width: Frame width in pixels
|
||||
height: Frame height in pixels
|
||||
|
||||
Returns:
|
||||
Dict with landmarks, arm_features, leg_features
|
||||
Dict with landmarks (in pixels), arm_features, leg_features
|
||||
"""
|
||||
landmarks = []
|
||||
for lm in pose_landmarks.landmark:
|
||||
landmarks.append([lm.x, lm.y, lm.z, lm.visibility])
|
||||
landmarks.append([int(lm.x * width), int(lm.y * height), lm.z, lm.visibility])
|
||||
|
||||
# Helper function to calculate angle
|
||||
def calculate_angle(p1_idx, p2_idx, p3_idx):
|
||||
@@ -450,20 +453,22 @@ class MediaPipeHolisticProcessor:
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_hand(self, hand_landmarks, hand_type: str) -> Dict:
|
||||
def _extract_hand(self, hand_landmarks, hand_type: str, width: int, height: int) -> Dict:
|
||||
"""
|
||||
Extract hand landmarks and detect gesture
|
||||
|
||||
|
||||
Args:
|
||||
hand_landmarks: MediaPipe hand landmarks
|
||||
hand_type: "left" or "right"
|
||||
|
||||
width: Frame width in pixels
|
||||
height: Frame height in pixels
|
||||
|
||||
Returns:
|
||||
Dict with landmarks, gesture
|
||||
Dict with landmarks (in pixels), gesture
|
||||
"""
|
||||
landmarks = []
|
||||
for lm in hand_landmarks.landmark:
|
||||
landmarks.append([lm.x, lm.y, lm.z])
|
||||
landmarks.append([int(lm.x * width), int(lm.y * height), lm.z])
|
||||
|
||||
# Check finger extensions
|
||||
def is_finger_extended(tip_idx, pip_idx):
|
||||
|
||||
341
scripts/tkg_level1_builder.py
Normal file
341
scripts/tkg_level1_builder.py
Normal file
@@ -0,0 +1,341 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
TKG Level 1 Builder - Store Level 1 appearance features in TKG
|
||||
|
||||
Purpose:
|
||||
1. Extract Level 1 features from pose.json + video frames
|
||||
2. Store as person_trace nodes in TKG
|
||||
3. Enable tracking via Level 1 feature similarity
|
||||
|
||||
Level 1 Features:
|
||||
- body: overall color distribution
|
||||
- head_top: hair color
|
||||
- upper_body: upper clothing color
|
||||
- lower_body: lower clothing color
|
||||
|
||||
Usage:
|
||||
python tkg_level1_builder.py --file-uuid <uuid> [--schema <schema>]
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import argparse
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import cv2
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "utils"))
|
||||
|
||||
from utils.feature_extractor import HierarchicalFeatureExtractor
|
||||
from utils.proportion_calculator import calculate_proportions, get_head_region
|
||||
|
||||
DB_URL = os.environ.get("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")
|
||||
SCHEMA = os.environ.get("DATABASE_SCHEMA", "dev")
|
||||
OUTPUT_DIR = os.environ.get("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev")
|
||||
|
||||
|
||||
def get_conn():
|
||||
return psycopg2.connect(DB_URL)
|
||||
|
||||
|
||||
def ensure_node(cur, schema, file_uuid, node_type, external_id, label="", properties=None):
|
||||
"""Insert or get graph node"""
|
||||
cur.execute(
|
||||
f"""
|
||||
INSERT INTO {schema}.tkg_nodes (node_type, external_id, file_uuid, label, properties)
|
||||
VALUES (%s, %s, %s, %s, %s::jsonb)
|
||||
ON CONFLICT (file_uuid, node_type, external_id)
|
||||
DO UPDATE SET properties = COALESCE(EXCLUDED.properties, {schema}.tkg_nodes.properties),
|
||||
label = COALESCE(NULLIF(EXCLUDED.label, ''), {schema}.tkg_nodes.label)
|
||||
RETURNING id
|
||||
""",
|
||||
(node_type, str(external_id), file_uuid, label, json.dumps(properties or {})),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
return row[0]
|
||||
|
||||
|
||||
def extract_level1_features(video_path, pose_json_path):
|
||||
"""
|
||||
Extract Level 1 features for each person in each frame
|
||||
|
||||
Args:
|
||||
video_path: Path to video file
|
||||
pose_json_path: Path to pose.json
|
||||
|
||||
Returns:
|
||||
List of (frame, person_index, bbox, level1_features)
|
||||
"""
|
||||
with open(pose_json_path) as f:
|
||||
pose_data = json.load(f)
|
||||
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"[TKG-L1] Cannot open video: {video_path}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
fps = pose_data.get("fps", 30.0)
|
||||
extractor = HierarchicalFeatureExtractor()
|
||||
|
||||
results = []
|
||||
|
||||
for pose_frame in pose_data.get("frames", []):
|
||||
frame_num = pose_frame["frame"]
|
||||
persons = pose_frame.get("persons", [])
|
||||
|
||||
if not persons:
|
||||
continue
|
||||
|
||||
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
|
||||
ret, frame = cap.read()
|
||||
|
||||
if not ret:
|
||||
continue
|
||||
|
||||
for person_idx, person in enumerate(persons):
|
||||
bbox = person.get("bbox", {})
|
||||
keypoints = person.get("keypoints", [])
|
||||
|
||||
if bbox.get("width", 0) <= 0 or bbox.get("height", 0) <= 0:
|
||||
continue
|
||||
|
||||
# Calculate proportions
|
||||
proportions = calculate_proportions(keypoints, bbox)
|
||||
|
||||
# Get head region
|
||||
head_region = get_head_region(keypoints)
|
||||
|
||||
# Extract Level 1 features
|
||||
level1 = extractor.extract_level1(frame, bbox, head_region)
|
||||
|
||||
results.append({
|
||||
"frame": frame_num,
|
||||
"timestamp": pose_frame.get("timestamp", frame_num / fps),
|
||||
"person_index": person_idx,
|
||||
"bbox": bbox,
|
||||
"proportions": proportions,
|
||||
"level1_features": level1,
|
||||
})
|
||||
|
||||
cap.release()
|
||||
return results
|
||||
|
||||
|
||||
def build_person_trace_nodes(cur, schema, file_uuid, level1_data):
|
||||
"""
|
||||
Build person_trace nodes with Level 1 features
|
||||
|
||||
Args:
|
||||
cur: Database cursor
|
||||
schema: Database schema
|
||||
file_uuid: File UUID
|
||||
level1_data: Level 1 extracted features
|
||||
"""
|
||||
print("[TKG-L1] Building person_trace nodes...")
|
||||
|
||||
# Group by person (assuming person_index consistency across frames)
|
||||
person_groups = {}
|
||||
for item in level1_data:
|
||||
person_idx = item["person_index"]
|
||||
if person_idx not in person_groups:
|
||||
person_groups[person_idx] = []
|
||||
person_groups[person_idx].append(item)
|
||||
|
||||
count = 0
|
||||
for person_idx, items in person_groups.items():
|
||||
if not items:
|
||||
continue
|
||||
|
||||
# Aggregate Level 1 features (average across frames)
|
||||
body_colors = []
|
||||
head_colors = []
|
||||
upper_colors = []
|
||||
lower_colors = []
|
||||
|
||||
frames = []
|
||||
bboxes = []
|
||||
|
||||
for item in items:
|
||||
l1 = item["level1_features"]
|
||||
frames.append(item["frame"])
|
||||
bboxes.append(item["bbox"])
|
||||
|
||||
if "body" in l1 and "color" in l1["body"]:
|
||||
body_colors.append(l1["body"]["color"].get("dominant_colors", []))
|
||||
|
||||
if "head_top" in l1 and "color" in l1["head_top"]:
|
||||
head_colors.append(l1["head_top"]["color"].get("dominant_colors", []))
|
||||
|
||||
if "upper_body" in l1 and "color" in l1["upper_body"]:
|
||||
upper_colors.append(l1["upper_body"]["color"].get("dominant_colors", []))
|
||||
|
||||
if "lower_body" in l1 and "color" in l1["lower_body"]:
|
||||
lower_colors.append(l1["lower_body"]["color"].get("dominant_colors", []))
|
||||
|
||||
# Average dominant colors
|
||||
avg_body_color = average_colors(body_colors) if body_colors else []
|
||||
avg_head_color = average_colors(head_colors) if head_colors else []
|
||||
avg_upper_color = average_colors(upper_colors) if upper_colors else []
|
||||
avg_lower_color = average_colors(lower_colors) if lower_colors else []
|
||||
|
||||
# Build node properties
|
||||
external_id = f"person_{person_idx}"
|
||||
label = f"Person {person_idx}"
|
||||
|
||||
# Get average height and body shape
|
||||
avg_height_estimate = {}
|
||||
avg_body_shape = {}
|
||||
|
||||
for item in items:
|
||||
props = item.get("proportions", {})
|
||||
if "height_estimate" in props:
|
||||
if not avg_height_estimate:
|
||||
avg_height_estimate = props["height_estimate"]
|
||||
if "body_shape" in props:
|
||||
if not avg_body_shape:
|
||||
avg_body_shape = props["body_shape"]
|
||||
|
||||
properties = {
|
||||
"frame_count": len(frames),
|
||||
"frames": frames,
|
||||
"avg_bbox": average_bbox(bboxes) if bboxes else {},
|
||||
"height_estimate": avg_height_estimate,
|
||||
"body_shape": avg_body_shape,
|
||||
"level1_features": {
|
||||
"body": {
|
||||
"dominant_colors": avg_body_color,
|
||||
"h_mean": average_h_mean(items, "body"),
|
||||
},
|
||||
"head_top": {
|
||||
"dominant_colors": avg_head_color,
|
||||
"h_mean": average_h_mean(items, "head_top"),
|
||||
},
|
||||
"upper_body": {
|
||||
"dominant_colors": avg_upper_color,
|
||||
"h_mean": average_h_mean(items, "upper_body"),
|
||||
},
|
||||
"lower_body": {
|
||||
"dominant_colors": avg_lower_color,
|
||||
"h_mean": average_h_mean(items, "lower_body"),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
# Store node
|
||||
ensure_node(cur, schema, file_uuid, "person_trace", external_id, label, properties)
|
||||
count += 1
|
||||
print(f"[TKG-L1] Created person_trace node: {external_id} ({len(frames)} frames)")
|
||||
|
||||
print(f"[TKG-L1] Total: {count} person_trace nodes")
|
||||
return count
|
||||
|
||||
|
||||
def average_colors(color_lists):
|
||||
"""Average multiple color lists"""
|
||||
if not color_lists:
|
||||
return []
|
||||
|
||||
valid_colors = [c for c in color_lists if c]
|
||||
if not valid_colors:
|
||||
return []
|
||||
|
||||
# Average first dominant color
|
||||
first_colors = [c[0] if c else [0, 0, 0] for c in valid_colors]
|
||||
avg = [sum(x) / len(x) for x in zip(*first_colors)]
|
||||
return [round(x, 2) for x in avg]
|
||||
|
||||
|
||||
def average_h_mean(items, region):
|
||||
"""Average H mean from Level 1 items"""
|
||||
h_means = []
|
||||
for item in items:
|
||||
l1 = item["level1_features"]
|
||||
if region in l1 and "color" in l1[region]:
|
||||
h_mean = l1[region]["color"].get("h_mean", 0)
|
||||
if h_mean:
|
||||
h_means.append(h_mean)
|
||||
|
||||
return round(sum(h_means) / len(h_means), 2) if h_means else 0
|
||||
|
||||
|
||||
def average_bbox(bboxes):
|
||||
"""Average bbox across frames"""
|
||||
if not bboxes:
|
||||
return {}
|
||||
|
||||
avg_x = sum(b.get("x", 0) for b in bboxes) / len(bboxes)
|
||||
avg_y = sum(b.get("y", 0) for b in bboxes) / len(bboxes)
|
||||
avg_w = sum(b.get("width", 0) for b in bboxes) / len(bboxes)
|
||||
avg_h = sum(b.get("height", 0) for b in bboxes) / len(bboxes)
|
||||
|
||||
return {
|
||||
"x": round(avg_x, 1),
|
||||
"y": round(avg_y, 1),
|
||||
"width": round(avg_w, 1),
|
||||
"height": round(avg_h, 1),
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="TKG Level 1 Builder")
|
||||
parser.add_argument("--file-uuid", "-u", required=True, help="File UUID")
|
||||
parser.add_argument("--schema", "-s", default=SCHEMA, help="Database schema")
|
||||
parser.add_argument("--video", "-v", help="Video path (optional, auto-detected)")
|
||||
parser.add_argument("--pose-json", "-p", help="Pose JSON path (optional, auto-detected)")
|
||||
args = parser.parse_args()
|
||||
|
||||
file_uuid = args.file_uuid
|
||||
schema = args.schema
|
||||
|
||||
# Auto-detect paths
|
||||
video_path = args.video or f"{OUTPUT_DIR}/{file_uuid}.mp4"
|
||||
pose_json_path = args.pose_json or f"{OUTPUT_DIR}/{file_uuid}.pose.json"
|
||||
|
||||
# Check files exist
|
||||
if not os.path.exists(video_path):
|
||||
print(f"[TKG-L1] Video not found: {video_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if not os.path.exists(pose_json_path):
|
||||
print(f"[TKG-L1] Pose JSON not found: {pose_json_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"[TKG-L1] Processing: {file_uuid}")
|
||||
print(f"[TKG-L1] Video: {video_path}")
|
||||
print(f"[TKG-L1] Pose: {pose_json_path}")
|
||||
|
||||
# Extract Level 1 features
|
||||
print("[TKG-L1] Extracting Level 1 features...")
|
||||
level1_data = extract_level1_features(video_path, pose_json_path)
|
||||
|
||||
if not level1_data:
|
||||
print("[TKG-L1] No Level 1 data extracted", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"[TKG-L1] Extracted: {len(level1_data)} frame-person pairs")
|
||||
|
||||
# Connect to DB
|
||||
conn = get_conn()
|
||||
cur = conn.cursor()
|
||||
|
||||
try:
|
||||
# Build person_trace nodes
|
||||
count = build_person_trace_nodes(cur, schema, file_uuid, level1_data)
|
||||
|
||||
conn.commit()
|
||||
print(f"[TKG-L1] Success: {count} person_trace nodes created")
|
||||
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
print(f"[TKG-L1] Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
finally:
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
684
scripts/utils/feature_extractor.py
Normal file
684
scripts/utils/feature_extractor.py
Normal file
@@ -0,0 +1,684 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Feature Extractor - Appearance feature extraction from video frames
|
||||
|
||||
Purpose:
|
||||
1. Extract color features (HSV histogram, dominant colors)
|
||||
2. Extract texture features (LBP, shininess)
|
||||
3. Extract pattern features
|
||||
4. Detect accessories and clothing attributes
|
||||
|
||||
Output:
|
||||
{
|
||||
'color': {...},
|
||||
'texture': {...},
|
||||
'pattern': {...},
|
||||
'accessories': {...},
|
||||
}
|
||||
|
||||
Usage:
|
||||
from feature_extractor import FeatureExtractor
|
||||
|
||||
extractor = FeatureExtractor()
|
||||
features = extractor.extract_all(frame, region)
|
||||
"""
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional
|
||||
from skimage.feature import local_binary_pattern
|
||||
|
||||
|
||||
class FeatureExtractor:
|
||||
"""
|
||||
Extract appearance features from image regions
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.lbp_radius = 1
|
||||
self.lbp_points = 8
|
||||
self.dominant_color_k = 5
|
||||
|
||||
def extract_color(self, roi: np.ndarray) -> Dict:
|
||||
"""
|
||||
Extract color features from ROI
|
||||
|
||||
Args:
|
||||
roi: Image region (BGR)
|
||||
|
||||
Returns:
|
||||
Dict with HSV histogram and dominant colors
|
||||
"""
|
||||
if roi is None or roi.size == 0:
|
||||
return {'error': 'empty_roi'}
|
||||
|
||||
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
|
||||
|
||||
# HSV histograms
|
||||
h_hist = cv2.calcHist([hsv], [0], None, [30], [0, 180]).flatten()
|
||||
s_hist = cv2.calcHist([hsv], [1], None, [32], [0, 256]).flatten()
|
||||
v_hist = cv2.calcHist([hsv], [2], None, [32], [0, 256]).flatten()
|
||||
|
||||
# Normalize
|
||||
h_sum = h_hist.sum() or 1
|
||||
s_sum = s_hist.sum() or 1
|
||||
v_sum = v_hist.sum() or 1
|
||||
|
||||
h_hist_norm = (h_hist / h_sum).tolist()
|
||||
s_hist_norm = (s_hist / s_sum).tolist()
|
||||
v_hist_norm = (v_hist / v_sum).tolist()
|
||||
|
||||
# Dominant colors via k-means
|
||||
pixels = hsv.reshape(-1, 3).astype(np.float32)
|
||||
dominant_colors = []
|
||||
|
||||
if len(pixels) >= self.dominant_color_k:
|
||||
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
|
||||
_, labels, centers = cv2.kmeans(
|
||||
pixels, self.dominant_color_k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS
|
||||
)
|
||||
counts = np.bincount(labels.flatten())
|
||||
dominant_colors = centers[np.argsort(-counts)[:self.dominant_color_k]].tolist()
|
||||
elif len(pixels) > 0:
|
||||
dominant_colors = [pixels.mean(axis=0).tolist()]
|
||||
|
||||
# Color statistics
|
||||
h_mean = np.mean(hsv[:,:,0])
|
||||
s_mean = np.mean(hsv[:,:,1])
|
||||
v_mean = np.mean(hsv[:,:,2])
|
||||
|
||||
return {
|
||||
'h_histogram': h_hist_norm,
|
||||
's_histogram': s_hist_norm,
|
||||
'v_histogram': v_hist_norm,
|
||||
'dominant_colors': dominant_colors,
|
||||
'h_mean': round(h_mean, 2),
|
||||
's_mean': round(s_mean, 2),
|
||||
'v_mean': round(v_mean, 2),
|
||||
}
|
||||
|
||||
def extract_texture(self, roi: np.ndarray) -> Dict:
|
||||
"""
|
||||
Extract texture features from ROI
|
||||
|
||||
Args:
|
||||
roi: Image region (BGR)
|
||||
|
||||
Returns:
|
||||
Dict with LBP and shininess features
|
||||
"""
|
||||
if roi is None or roi.size == 0:
|
||||
return {'error': 'empty_roi'}
|
||||
|
||||
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
|
||||
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
|
||||
|
||||
# LBP texture
|
||||
lbp = local_binary_pattern(gray, self.lbp_points, self.lbp_radius)
|
||||
lbp_hist = np.histogram(lbp, bins=256, range=(0, 256))[0]
|
||||
lbp_hist_norm = (lbp_hist / lbp_hist.sum()).tolist()
|
||||
|
||||
lbp_variance = np.var(lbp)
|
||||
lbp_mean = np.mean(lbp)
|
||||
|
||||
# Shininess (V channel statistics)
|
||||
v_values = hsv[:,:,2].flatten()
|
||||
v_mean = np.mean(v_values)
|
||||
v_std = np.std(v_values)
|
||||
v_max = np.max(v_values)
|
||||
|
||||
# High brightness ratio (shiny materials)
|
||||
high_brightness_ratio = np.sum(v_values > 200) / len(v_values)
|
||||
|
||||
return {
|
||||
'lbp_histogram': lbp_hist_norm,
|
||||
'lbp_variance': round(lbp_variance, 2),
|
||||
'lbp_mean': round(lbp_mean, 2),
|
||||
'brightness': round(v_mean, 2),
|
||||
'brightness_std': round(v_std, 2),
|
||||
'brightness_max': int(v_max),
|
||||
'shininess_ratio': round(high_brightness_ratio, 4),
|
||||
}
|
||||
|
||||
def detect_pattern(self, roi: np.ndarray) -> Dict:
|
||||
"""
|
||||
Detect clothing pattern
|
||||
|
||||
Args:
|
||||
roi: Image region (BGR)
|
||||
|
||||
Returns:
|
||||
Dict with pattern classification
|
||||
"""
|
||||
if roi is None or roi.size == 0:
|
||||
return {'pattern': 'unknown', 'confidence': 0.0}
|
||||
|
||||
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Edge detection
|
||||
edges = cv2.Canny(gray, 50, 150)
|
||||
edge_ratio = np.sum(edges > 0) / edges.size
|
||||
|
||||
# Gradient analysis
|
||||
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
|
||||
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
|
||||
gradient_magnitude = np.sqrt(sobelx**2 + sobely**2)
|
||||
gradient_mean = np.mean(gradient_magnitude)
|
||||
|
||||
# Color variance (for pattern detection)
|
||||
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
|
||||
h_std = np.std(hsv[:,:,0])
|
||||
s_std = np.std(hsv[:,:,1])
|
||||
|
||||
# Pattern classification
|
||||
pattern = 'solid'
|
||||
confidence = 0.7
|
||||
|
||||
if edge_ratio > 0.1 and gradient_mean > 20:
|
||||
if h_std > 30:
|
||||
pattern = 'patterned'
|
||||
confidence = 0.8
|
||||
elif edge_ratio > 0.2:
|
||||
pattern = 'striped'
|
||||
confidence = 0.6
|
||||
|
||||
if s_std > 50 and gradient_mean > 30:
|
||||
pattern = 'patterned'
|
||||
confidence = 0.85
|
||||
|
||||
return {
|
||||
'pattern': pattern,
|
||||
'confidence': confidence,
|
||||
'edge_ratio': round(edge_ratio, 4),
|
||||
'gradient_mean': round(gradient_mean, 2),
|
||||
'color_variance': round(h_std, 2),
|
||||
}
|
||||
|
||||
def classify_material(self, roi: np.ndarray) -> Dict:
|
||||
"""
|
||||
Classify clothing material
|
||||
|
||||
Args:
|
||||
roi: Image region (BGR)
|
||||
|
||||
Returns:
|
||||
Dict with material classification
|
||||
"""
|
||||
if roi is None or roi.size == 0:
|
||||
return {'material': 'unknown', 'confidence': 0.0}
|
||||
|
||||
texture = self.extract_texture(roi)
|
||||
|
||||
material = 'unknown'
|
||||
confidence = 0.0
|
||||
|
||||
lbp_var = texture.get('lbp_variance', 0)
|
||||
shininess = texture.get('shininess_ratio', 0)
|
||||
brightness = texture.get('brightness', 0)
|
||||
|
||||
# Material classification rules
|
||||
if shininess > 0.1 and brightness > 150:
|
||||
material = 'silk'
|
||||
confidence = 0.7
|
||||
elif shininess > 0.05 and lbp_var > 50:
|
||||
material = 'leather'
|
||||
confidence = 0.6
|
||||
elif lbp_var > 100:
|
||||
material = 'denim'
|
||||
confidence = 0.65
|
||||
elif lbp_var < 20 and shininess < 0.02:
|
||||
material = 'cotton'
|
||||
confidence = 0.6
|
||||
elif lbp_var < 50 and brightness < 100:
|
||||
material = 'polyester'
|
||||
confidence = 0.5
|
||||
|
||||
return {
|
||||
'material': material,
|
||||
'confidence': confidence,
|
||||
'texture_features': texture,
|
||||
}
|
||||
|
||||
def extract_all(self, roi: np.ndarray) -> Dict:
|
||||
"""
|
||||
Extract all features from ROI
|
||||
|
||||
Args:
|
||||
roi: Image region (BGR)
|
||||
|
||||
Returns:
|
||||
Dict with all features
|
||||
"""
|
||||
return {
|
||||
'color': self.extract_color(roi),
|
||||
'texture': self.extract_texture(roi),
|
||||
'pattern': self.detect_pattern(roi),
|
||||
'material': self.classify_material(roi),
|
||||
}
|
||||
|
||||
def extract_split_region(
|
||||
self,
|
||||
frame: np.ndarray,
|
||||
region: Dict,
|
||||
split_ratio: float = 0.5
|
||||
) -> Dict:
|
||||
"""
|
||||
Extract features from split region (upper/lower)
|
||||
|
||||
Args:
|
||||
frame: Full frame
|
||||
region: Region dict {'x', 'y', 'width', 'height'}
|
||||
split_ratio: Split ratio (0.5 = 50%)
|
||||
|
||||
Returns:
|
||||
Dict with upper and lower features
|
||||
"""
|
||||
x, y, w, h = region['x'], region['y'], region['width'], region['height']
|
||||
|
||||
if w <= 0 or h <= 0:
|
||||
return {'error': 'invalid_region'}
|
||||
|
||||
mid_y = y + int(h * split_ratio)
|
||||
|
||||
# Upper region
|
||||
upper_roi = frame[y:mid_y, x:x+w] if mid_y > y else None
|
||||
upper_features = self.extract_all(upper_roi) if upper_roi is not None else {'error': 'empty'}
|
||||
|
||||
# Lower region
|
||||
lower_roi = frame[mid_y:y+h, x:x+w] if y+h > mid_y else None
|
||||
lower_features = self.extract_all(lower_roi) if lower_roi is not None else {'error': 'empty'}
|
||||
|
||||
return {
|
||||
'upper': upper_features,
|
||||
'lower': lower_features,
|
||||
}
|
||||
|
||||
def detect_exposed_skin(self, roi: np.ndarray) -> Dict:
|
||||
"""
|
||||
Detect exposed skin in ROI
|
||||
|
||||
Args:
|
||||
roi: Image region (BGR)
|
||||
|
||||
Returns:
|
||||
Dict with skin detection results
|
||||
"""
|
||||
if roi is None or roi.size == 0:
|
||||
return {'skin_ratio': 0.0, 'skin_detected': False}
|
||||
|
||||
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
|
||||
|
||||
# Skin color range (HSV)
|
||||
# H: 0-50 (skin tones)
|
||||
# S: 10-150 (not too saturated)
|
||||
# V: 50-255 (visible)
|
||||
skin_mask = cv2.inRange(hsv, (0, 10, 50), (50, 150, 255))
|
||||
|
||||
skin_ratio = np.sum(skin_mask > 0) / skin_mask.size
|
||||
|
||||
return {
|
||||
'skin_ratio': round(skin_ratio, 4),
|
||||
'skin_detected': skin_ratio > 0.3,
|
||||
}
|
||||
|
||||
def calculate_similarity(self, features1: Dict, features2: Dict) -> Dict:
|
||||
"""
|
||||
Calculate similarity between two feature sets
|
||||
|
||||
Args:
|
||||
features1: First feature dict
|
||||
features2: Second feature dict
|
||||
|
||||
Returns:
|
||||
Dict with similarity scores
|
||||
"""
|
||||
scores = {}
|
||||
|
||||
# Color similarity (histogram correlation)
|
||||
if 'color' in features1 and 'color' in features2:
|
||||
h1 = np.array(features1['color'].get('h_histogram', []))
|
||||
h2 = np.array(features2['color'].get('h_histogram', []))
|
||||
|
||||
if len(h1) > 0 and len(h2) > 0:
|
||||
h_corr = cv2.compareHist(h1.astype(np.float32), h2.astype(np.float32), cv2.HISTCMP_CORREL)
|
||||
scores['color_similarity'] = round(h_corr, 4)
|
||||
else:
|
||||
scores['color_similarity'] = 0.0
|
||||
|
||||
# Dominant color distance
|
||||
dc1 = np.array(features1['color'].get('dominant_colors', [[0,0,0]]))
|
||||
dc2 = np.array(features2['color'].get('dominant_colors', [[0,0,0]]))
|
||||
|
||||
if len(dc1) > 0 and len(dc2) > 0:
|
||||
color_dist = np.linalg.norm(dc1[0] - dc2[0])
|
||||
scores['color_distance'] = round(color_dist, 2)
|
||||
else:
|
||||
scores['color_distance'] = 255.0
|
||||
|
||||
# Texture similarity
|
||||
if 'texture' in features1 and 'texture' in features2:
|
||||
lbp1 = np.array(features1['texture'].get('lbp_histogram', []))
|
||||
lbp2 = np.array(features2['texture'].get('lbp_histogram', []))
|
||||
|
||||
if len(lbp1) > 0 and len(lbp2) > 0:
|
||||
lbp_corr = cv2.compareHist(lbp1.astype(np.float32), lbp2.astype(np.float32), cv2.HISTCMP_CORREL)
|
||||
scores['texture_similarity'] = round(lbp_corr, 4)
|
||||
else:
|
||||
scores['texture_similarity'] = 0.0
|
||||
|
||||
# Overall score
|
||||
color_sim = scores.get('color_similarity', 0)
|
||||
texture_sim = scores.get('texture_similarity', 0)
|
||||
scores['overall_score'] = round(color_sim * 0.7 + texture_sim * 0.3, 4)
|
||||
|
||||
return scores
|
||||
|
||||
|
||||
# Helper functions for specific feature extraction
|
||||
|
||||
def extract_hair_color(frame: np.ndarray, head_region: Dict) -> Dict:
|
||||
"""Extract hair color from head region"""
|
||||
extractor = FeatureExtractor()
|
||||
x, y, w, h = head_region['x'], head_region['y'], head_region['width'], head_region['height']
|
||||
|
||||
# Focus on upper part of head (hair area)
|
||||
hair_roi = frame[y:y+int(h*0.5), x:x+w]
|
||||
|
||||
return extractor.extract_color(hair_roi)
|
||||
|
||||
|
||||
def extract_skin_color(frame: np.ndarray, face_region: Dict) -> Dict:
|
||||
"""Extract skin color from face region"""
|
||||
extractor = FeatureExtractor()
|
||||
x, y, w, h = face_region['x'], face_region['y'], face_region['width'], face_region['height']
|
||||
|
||||
face_roi = frame[y:y+h, x:x+w]
|
||||
|
||||
return extractor.extract_color(face_roi)
|
||||
|
||||
|
||||
def extract_clothing_color(frame: np.ndarray, torso_region: Dict) -> Dict:
|
||||
"""Extract clothing color from torso region"""
|
||||
extractor = FeatureExtractor()
|
||||
|
||||
return extractor.extract_split_region(frame, torso_region)
|
||||
|
||||
|
||||
def extract_accessory_color(frame: np.ndarray, accessory_region: Dict) -> Dict:
|
||||
"""Extract accessory color from region"""
|
||||
extractor = FeatureExtractor()
|
||||
x, y, w, h = accessory_region['x'], accessory_region['y'], accessory_region['width'], accessory_region['height']
|
||||
|
||||
roi = frame[y:y+h, x:x+w]
|
||||
|
||||
return extractor.extract_color(roi)
|
||||
|
||||
|
||||
class HierarchicalFeatureExtractor:
|
||||
"""
|
||||
Hierarchical feature extraction: coarse → fine
|
||||
|
||||
Level 1: Large regions (body bbox, upper/lower body)
|
||||
Level 2: Medium regions (head, face, arms, legs)
|
||||
Level 3: Fine features (accessories, details)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.extractor = FeatureExtractor()
|
||||
|
||||
def extract_level1(self, frame: np.ndarray, bbox: Dict, head_region: Optional[Dict] = None) -> Dict:
|
||||
"""
|
||||
Level 1: Extract large region features
|
||||
|
||||
Args:
|
||||
frame: Full frame
|
||||
bbox: Full body bbox {'x', 'y', 'width', 'height'}
|
||||
head_region: Optional head region for hair extraction
|
||||
|
||||
Returns:
|
||||
Dict with large region features
|
||||
"""
|
||||
x, y, w, h = bbox['x'], bbox['y'], bbox['width'], bbox['height']
|
||||
|
||||
if w <= 0 or h <= 0:
|
||||
return {'error': 'invalid_bbox'}
|
||||
|
||||
# Full body region
|
||||
body_roi = frame[y:y+h, x:x+w]
|
||||
body_features = self.extractor.extract_all(body_roi)
|
||||
|
||||
# Split upper/lower body (50%)
|
||||
mid_y = y + h // 2
|
||||
|
||||
upper_roi = frame[y:mid_y, x:x+w] if mid_y > y else None
|
||||
lower_roi = frame[mid_y:y+h, x:x+w] if y+h > mid_y else None
|
||||
|
||||
upper_features = self.extractor.extract_all(upper_roi) if upper_roi is not None else {}
|
||||
lower_features = self.extractor.extract_all(lower_roi) if lower_roi is not None else {}
|
||||
|
||||
# Head top (hair) - part of Level 1
|
||||
head_features = {}
|
||||
if head_region is not None:
|
||||
head_roi = self._get_roi(frame, head_region)
|
||||
if head_roi is not None:
|
||||
head_features = self.extractor.extract_all(head_roi)
|
||||
|
||||
return {
|
||||
'level': 1,
|
||||
'body': body_features,
|
||||
'head_top': head_features,
|
||||
'upper_body': upper_features,
|
||||
'lower_body': lower_features,
|
||||
'bbox': bbox,
|
||||
}
|
||||
|
||||
def extract_level2(
|
||||
self,
|
||||
frame: np.ndarray,
|
||||
regions: Dict
|
||||
) -> Dict:
|
||||
"""
|
||||
Level 2: Extract medium region features
|
||||
|
||||
Args:
|
||||
frame: Full frame
|
||||
regions: Dict with face, torso, leg, arm regions
|
||||
|
||||
Returns:
|
||||
Dict with medium region features
|
||||
"""
|
||||
features = {'level': 2}
|
||||
|
||||
# Face region (skin, lips, eyes)
|
||||
if 'face' in regions:
|
||||
face_roi = self._get_roi(frame, regions['face'])
|
||||
features['face'] = self.extractor.extract_all(face_roi) if face_roi is not None else {}
|
||||
features['face']['skin'] = self.extractor.detect_exposed_skin(face_roi) if face_roi is not None else {}
|
||||
|
||||
# Torso region (clothing details)
|
||||
if 'torso' in regions:
|
||||
torso_roi = self._get_roi(frame, regions['torso'])
|
||||
features['torso'] = self.extractor.extract_all(torso_roi) if torso_roi is not None else {}
|
||||
|
||||
# Leg region
|
||||
if 'leg' in regions:
|
||||
leg_roi = self._get_roi(frame, regions['leg'])
|
||||
features['leg'] = self.extractor.extract_all(leg_roi) if leg_roi is not None else {}
|
||||
|
||||
# Arms (left/right) - sleeve detection
|
||||
if 'left_arm' in regions:
|
||||
arm_roi = self._get_roi(frame, regions['left_arm'])
|
||||
features['left_arm'] = self.extractor.extract_all(arm_roi) if arm_roi is not None else {}
|
||||
|
||||
if 'right_arm' in regions:
|
||||
arm_roi = self._get_roi(frame, regions['right_arm'])
|
||||
features['right_arm'] = self.extractor.extract_all(arm_roi) if arm_roi is not None else {}
|
||||
|
||||
return features
|
||||
|
||||
def extract_level3(
|
||||
self,
|
||||
frame: np.ndarray,
|
||||
keypoints: List[Dict],
|
||||
eye_width: float
|
||||
) -> Dict:
|
||||
"""
|
||||
Level 3: Extract fine features (accessories, details)
|
||||
|
||||
Args:
|
||||
frame: Full frame
|
||||
keypoints: Pose keypoints
|
||||
eye_width: Eye distance (reference unit)
|
||||
|
||||
Returns:
|
||||
Dict with fine features
|
||||
"""
|
||||
features = {'level': 3}
|
||||
|
||||
# Estimate accessory regions from keypoints
|
||||
offset = int(eye_width * 0.5) if eye_width > 0 else 20
|
||||
|
||||
# Glasses (eye region)
|
||||
left_eye = self._get_kp(keypoints, 'left_eye')
|
||||
right_eye = self._get_kp(keypoints, 'right_eye')
|
||||
if left_eye and right_eye:
|
||||
glasses_roi = frame[
|
||||
int(min(left_eye['y'], right_eye['y']) - offset):int(max(left_eye['y'], right_eye['y']) + offset),
|
||||
int(left_eye['x'] - offset):int(right_eye['x'] + offset)
|
||||
]
|
||||
features['glasses'] = self.extractor.extract_all(glasses_roi) if glasses_roi is not None else {}
|
||||
|
||||
# Earrings (ear positions)
|
||||
left_ear = self._get_kp(keypoints, 'left_ear')
|
||||
right_ear = self._get_kp(keypoints, 'right_ear')
|
||||
if left_ear and left_ear.get('confidence', 0) > 0.1:
|
||||
ear_roi = frame[
|
||||
int(left_ear['y'] - offset):int(left_ear['y'] + offset),
|
||||
int(left_ear['x'] - offset):int(left_ear['x'] + offset)
|
||||
]
|
||||
features['left_earring'] = self.extractor.extract_all(ear_roi) if ear_roi is not None else {}
|
||||
|
||||
if right_ear and right_ear.get('confidence', 0) > 0.1:
|
||||
ear_roi = frame[
|
||||
int(right_ear['y'] - offset):int(right_ear['y'] + offset),
|
||||
int(right_ear['x'] - offset):int(right_ear['x'] + offset)
|
||||
]
|
||||
features['right_earring'] = self.extractor.extract_all(ear_roi) if ear_roi is not None else {}
|
||||
|
||||
# Watch (wrist position)
|
||||
left_wrist = self._get_kp(keypoints, 'left_wrist')
|
||||
right_wrist = self._get_kp(keypoints, 'right_wrist')
|
||||
if left_wrist and left_wrist.get('confidence', 0) > 0.1:
|
||||
wrist_roi = frame[
|
||||
int(left_wrist['y'] - offset):int(left_wrist['y'] + offset),
|
||||
int(left_wrist['x'] - offset):int(left_wrist['x'] + offset)
|
||||
]
|
||||
features['left_watch'] = self.extractor.extract_all(wrist_roi) if wrist_roi is not None else {}
|
||||
|
||||
if right_wrist and right_wrist.get('confidence', 0) > 0.1:
|
||||
wrist_roi = frame[
|
||||
int(right_wrist['y'] - offset):int(right_wrist['y'] + offset),
|
||||
int(right_wrist['x'] - offset):int(right_wrist['x'] + offset)
|
||||
]
|
||||
features['right_watch'] = self.extractor.extract_all(wrist_roi) if wrist_roi is not None else {}
|
||||
|
||||
# Shoes (ankle positions)
|
||||
left_ankle = self._get_kp(keypoints, 'left_ankle')
|
||||
right_ankle = self._get_kp(keypoints, 'right_ankle')
|
||||
if left_ankle and left_ankle.get('confidence', 0) > 0.1:
|
||||
shoe_roi = frame[
|
||||
int(left_ankle['y'] - offset):int(left_ankle['y'] + offset * 2),
|
||||
int(left_ankle['x'] - offset):int(left_ankle['x'] + offset)
|
||||
]
|
||||
features['left_shoe'] = self.extractor.extract_all(shoe_roi) if shoe_roi is not None else {}
|
||||
|
||||
if right_ankle and right_ankle.get('confidence', 0) > 0.1:
|
||||
shoe_roi = frame[
|
||||
int(right_ankle['y'] - offset):int(right_ankle['y'] + offset * 2),
|
||||
int(right_ankle['x'] - offset):int(right_ankle['x'] + offset)
|
||||
]
|
||||
features['right_shoe'] = self.extractor.extract_all(shoe_roi) if shoe_roi is not None else {}
|
||||
|
||||
return features
|
||||
|
||||
def extract_hierarchical(
|
||||
self,
|
||||
frame: np.ndarray,
|
||||
bbox: Dict,
|
||||
regions: Dict,
|
||||
keypoints: List[Dict],
|
||||
eye_width: float
|
||||
) -> Dict:
|
||||
"""
|
||||
Full hierarchical extraction: Level 1 → Level 2 → Level 3
|
||||
|
||||
Args:
|
||||
frame: Full frame
|
||||
bbox: Full body bbox
|
||||
regions: Medium regions dict (includes 'head' for Level 1)
|
||||
keypoints: Pose keypoints
|
||||
eye_width: Reference unit
|
||||
|
||||
Returns:
|
||||
Dict with all hierarchical features
|
||||
"""
|
||||
head_region = regions.get('head') if regions else None
|
||||
level1 = self.extract_level1(frame, bbox, head_region)
|
||||
level2 = self.extract_level2(frame, regions)
|
||||
level3 = self.extract_level3(frame, keypoints, eye_width)
|
||||
|
||||
return {
|
||||
'level1': level1,
|
||||
'level2': level2,
|
||||
'level3': level3,
|
||||
}
|
||||
|
||||
def _get_roi(self, frame: np.ndarray, region: Dict) -> Optional[np.ndarray]:
|
||||
"""Get ROI from frame using region dict"""
|
||||
if region is None:
|
||||
return None
|
||||
x, y, w, h = region.get('x', 0), region.get('y', 0), region.get('width', 0), region.get('height', 0)
|
||||
if w <= 0 or h <= 0:
|
||||
return None
|
||||
return frame[y:y+h, x:x+w]
|
||||
|
||||
def _get_kp(self, keypoints: List[Dict], name: str) -> Optional[Dict]:
|
||||
"""Get keypoint by name"""
|
||||
for kp in keypoints:
|
||||
if kp.get('name') == name:
|
||||
return kp
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Test with sample image
|
||||
import sys
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
img_path = sys.argv[1]
|
||||
img = cv2.imread(img_path)
|
||||
|
||||
if img is not None:
|
||||
extractor = FeatureExtractor()
|
||||
|
||||
# Extract from full image
|
||||
features = extractor.extract_all(img)
|
||||
|
||||
print("Color features:")
|
||||
print(f" H mean: {features['color']['h_mean']}")
|
||||
print(f" S mean: {features['color']['s_mean']}")
|
||||
print(f" V mean: {features['color']['v_mean']}")
|
||||
print(f" Dominant colors: {len(features['color']['dominant_colors'])}")
|
||||
|
||||
print("\nTexture features:")
|
||||
print(f" LBP variance: {features['texture']['lbp_variance']}")
|
||||
print(f" Brightness: {features['texture']['brightness']}")
|
||||
print(f" Shininess: {features['texture']['shininess_ratio']}")
|
||||
|
||||
print("\nPattern:")
|
||||
print(f" {features['pattern']['pattern']} (conf: {features['pattern']['confidence']})")
|
||||
|
||||
print("\nMaterial:")
|
||||
print(f" {features['material']['material']} (conf: {features['material']['confidence']})")
|
||||
else:
|
||||
print("Usage: python feature_extractor.py <image_path>")
|
||||
674
scripts/utils/proportion_calculator.py
Normal file
674
scripts/utils/proportion_calculator.py
Normal file
@@ -0,0 +1,674 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Proportion Calculator - Body proportion calculation from keypoints
|
||||
|
||||
Purpose:
|
||||
1. Calculate body proportions from Pose keypoints
|
||||
2. Use eye_width as reference unit
|
||||
3. Provide normalized ratios for feature extraction
|
||||
|
||||
Keypoints Used:
|
||||
- Swift Pose (19 keypoints): nose, eyes, ears, neck, shoulders, elbows, wrists, hips, knees, ankles
|
||||
- MediaPipe Pose (33 landmarks): additional details
|
||||
- YOLOv8 Pose (17 keypoints): fallback
|
||||
|
||||
Output:
|
||||
{
|
||||
'eye_width': float,
|
||||
'body_height': float,
|
||||
'torso_height': float,
|
||||
'leg_height': float,
|
||||
'shoulder_width': float,
|
||||
'head_ratio': float,
|
||||
'torso_ratio': float,
|
||||
'leg_ratio': float,
|
||||
}
|
||||
|
||||
Usage:
|
||||
from proportion_calculator import calculate_proportions
|
||||
|
||||
proportions = calculate_proportions(pose_keypoints)
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
|
||||
# MediaPipe pose landmark index to name mapping
|
||||
MEDIAPIPE_POSE_NAMES = {
|
||||
0: 'nose',
|
||||
1: 'left_eye_inner',
|
||||
2: 'left_eye',
|
||||
3: 'left_eye_outer',
|
||||
4: 'right_eye_inner',
|
||||
5: 'right_eye',
|
||||
6: 'right_eye_outer',
|
||||
7: 'left_ear',
|
||||
8: 'right_ear',
|
||||
9: 'mouth_left',
|
||||
10: 'mouth_right',
|
||||
11: 'left_shoulder',
|
||||
12: 'right_shoulder',
|
||||
13: 'left_elbow',
|
||||
14: 'right_elbow',
|
||||
15: 'left_wrist',
|
||||
16: 'right_wrist',
|
||||
17: 'left_pinky',
|
||||
18: 'right_pinky',
|
||||
19: 'left_index',
|
||||
20: 'right_index',
|
||||
21: 'left_thumb',
|
||||
22: 'right_thumb',
|
||||
23: 'left_hip',
|
||||
24: 'right_hip',
|
||||
25: 'left_knee',
|
||||
26: 'right_knee',
|
||||
27: 'left_ankle',
|
||||
28: 'right_ankle',
|
||||
29: 'left_heel',
|
||||
30: 'right_heel',
|
||||
31: 'left_foot_index',
|
||||
32: 'right_foot_index',
|
||||
}
|
||||
|
||||
|
||||
def convert_mediapipe_to_named(landmarks: List[List]) -> List[Dict]:
|
||||
"""
|
||||
Convert MediaPipe landmarks [x,y,z,vis] to named keypoints format
|
||||
|
||||
Args:
|
||||
landmarks: MediaPipe landmarks [[x, y, z, visibility], ...]
|
||||
|
||||
Returns:
|
||||
Named keypoints [{'name': 'nose', 'x': 100, 'y': 200, 'confidence': 0.9}, ...]
|
||||
"""
|
||||
named_keypoints = []
|
||||
for i, lm in enumerate(landmarks):
|
||||
if i in MEDIAPIPE_POSE_NAMES:
|
||||
named_keypoints.append({
|
||||
'name': MEDIAPIPE_POSE_NAMES[i],
|
||||
'x': lm[0],
|
||||
'y': lm[1],
|
||||
'confidence': lm[3] if len(lm) > 3 else 1.0,
|
||||
})
|
||||
return named_keypoints
|
||||
|
||||
|
||||
def get_keypoint_by_name(keypoints: List[Dict], name: str) -> Optional[Dict]:
|
||||
"""
|
||||
Get keypoint by name from keypoints list
|
||||
|
||||
Args:
|
||||
keypoints: List of keypoints [{'name': 'nose', 'x': 100, 'y': 200, 'confidence': 0.9}, ...]
|
||||
name: Keypoint name to find
|
||||
|
||||
Returns:
|
||||
Keypoint dict or None if not found
|
||||
"""
|
||||
for kp in keypoints:
|
||||
if kp.get('name') == name:
|
||||
return kp
|
||||
return None
|
||||
|
||||
|
||||
def calculate_distance(p1: Dict, p2: Dict) -> float:
|
||||
"""
|
||||
Calculate Euclidean distance between two keypoints
|
||||
|
||||
Args:
|
||||
p1: Keypoint {'x': float, 'y': float}
|
||||
p2: Keypoint {'x': float, 'y': float}
|
||||
|
||||
Returns:
|
||||
Distance in pixels
|
||||
"""
|
||||
if p1 is None or p2 is None:
|
||||
return 0.0
|
||||
return np.sqrt((p1['x'] - p2['x'])**2 + (p1['y'] - p2['y'])**2)
|
||||
|
||||
|
||||
def calculate_eye_width(keypoints: List[Dict]) -> float:
|
||||
"""
|
||||
Calculate eye distance (reference unit)
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
|
||||
Returns:
|
||||
Eye width in pixels
|
||||
"""
|
||||
left_eye = get_keypoint_by_name(keypoints, 'left_eye')
|
||||
right_eye = get_keypoint_by_name(keypoints, 'right_eye')
|
||||
|
||||
if left_eye is None or right_eye is None:
|
||||
return 0.0
|
||||
|
||||
# Filter by confidence
|
||||
if left_eye.get('confidence', 0) < 0.1 or right_eye.get('confidence', 0) < 0.1:
|
||||
return 0.0
|
||||
|
||||
return calculate_distance(left_eye, right_eye)
|
||||
|
||||
|
||||
def calculate_body_height(keypoints: List[Dict], bbox: Optional[Dict] = None) -> float:
|
||||
"""
|
||||
Calculate full body height
|
||||
|
||||
Assumes keypoints are already in Top-Left pixel coordinates
|
||||
(Y-flip and scale already handled by swift_pose.swift)
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list (Top-Left pixels)
|
||||
bbox: Optional bbox {'x', 'y', 'width', 'height'}
|
||||
|
||||
Returns:
|
||||
Body height in pixels
|
||||
"""
|
||||
nose = get_keypoint_by_name(keypoints, 'nose')
|
||||
left_ankle = get_keypoint_by_name(keypoints, 'left_ankle')
|
||||
right_ankle = get_keypoint_by_name(keypoints, 'right_ankle')
|
||||
|
||||
if nose is None:
|
||||
return 0.0
|
||||
|
||||
nose_y = nose['y']
|
||||
|
||||
# Get ankle position (max Y = bottom of body in Top-Left system)
|
||||
ankle_y = 0.0
|
||||
if left_ankle and left_ankle.get('confidence', 0) > 0.1:
|
||||
ankle_y = max(ankle_y, left_ankle['y'])
|
||||
if right_ankle and right_ankle.get('confidence', 0) > 0.1:
|
||||
ankle_y = max(ankle_y, right_ankle['y'])
|
||||
|
||||
if ankle_y > 0:
|
||||
return ankle_y - nose_y
|
||||
|
||||
# Fallback to bbox height
|
||||
if bbox and bbox.get('height', 0) > 0:
|
||||
return bbox['height']
|
||||
|
||||
return 0.0
|
||||
|
||||
|
||||
def calculate_torso_height(keypoints: List[Dict]) -> float:
|
||||
"""
|
||||
Calculate torso height (neck to hip)
|
||||
|
||||
Assumes keypoints are already in Top-Left pixel coordinates
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
|
||||
Returns:
|
||||
Torso height in pixels
|
||||
"""
|
||||
neck = get_keypoint_by_name(keypoints, 'neck')
|
||||
left_hip = get_keypoint_by_name(keypoints, 'left_hip')
|
||||
right_hip = get_keypoint_by_name(keypoints, 'right_hip')
|
||||
|
||||
# Get neck position
|
||||
neck_y = 0.0
|
||||
if neck and neck.get('confidence', 0) > 0.1:
|
||||
neck_y = neck['y']
|
||||
|
||||
# Fallback: estimate neck from nose + eye_width
|
||||
if neck_y == 0:
|
||||
nose = get_keypoint_by_name(keypoints, 'nose')
|
||||
eye_width = calculate_eye_width(keypoints)
|
||||
if nose and eye_width > 0:
|
||||
neck_y = nose['y'] + eye_width * 0.5
|
||||
|
||||
# Get hip position (average of both hips)
|
||||
hip_y = 0.0
|
||||
hip_count = 0
|
||||
if left_hip and left_hip.get('confidence', 0) > 0.1:
|
||||
hip_y += left_hip['y']
|
||||
hip_count += 1
|
||||
if right_hip and right_hip.get('confidence', 0) > 0.1:
|
||||
hip_y += right_hip['y']
|
||||
hip_count += 1
|
||||
|
||||
if hip_count > 0:
|
||||
hip_y = hip_y / hip_count
|
||||
|
||||
if neck_y > 0 and hip_y > 0:
|
||||
return hip_y - neck_y
|
||||
|
||||
return 0.0
|
||||
|
||||
|
||||
def calculate_leg_height(keypoints: List[Dict]) -> float:
|
||||
"""
|
||||
Calculate leg height (hip to ankle)
|
||||
|
||||
Assumes keypoints are already in Top-Left pixel coordinates
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
|
||||
Returns:
|
||||
Leg height in pixels
|
||||
"""
|
||||
left_hip = get_keypoint_by_name(keypoints, 'left_hip')
|
||||
right_hip = get_keypoint_by_name(keypoints, 'right_hip')
|
||||
left_ankle = get_keypoint_by_name(keypoints, 'left_ankle')
|
||||
right_ankle = get_keypoint_by_name(keypoints, 'right_ankle')
|
||||
|
||||
# Get hip position (average of both hips)
|
||||
hip_y = 0.0
|
||||
hip_count = 0
|
||||
if left_hip and left_hip.get('confidence', 0) > 0.1:
|
||||
hip_y += left_hip['y']
|
||||
hip_count += 1
|
||||
if right_hip and right_hip.get('confidence', 0) > 0.1:
|
||||
hip_y += right_hip['y']
|
||||
hip_count += 1
|
||||
|
||||
if hip_count > 0:
|
||||
hip_y = hip_y / hip_count
|
||||
|
||||
# Get ankle position (max Y = bottom of body)
|
||||
ankle_y = 0.0
|
||||
if left_ankle and left_ankle.get('confidence', 0) > 0.1:
|
||||
ankle_y = max(ankle_y, left_ankle['y'])
|
||||
if right_ankle and right_ankle.get('confidence', 0) > 0.1:
|
||||
ankle_y = max(ankle_y, right_ankle['y'])
|
||||
|
||||
if hip_y > 0 and ankle_y > 0:
|
||||
return ankle_y - hip_y
|
||||
|
||||
return 0.0
|
||||
|
||||
|
||||
def calculate_should_width(keypoints: List[Dict]) -> float:
|
||||
"""
|
||||
Calculate shoulder width
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
|
||||
Returns:
|
||||
Shoulder width in pixels
|
||||
"""
|
||||
left_shoulder = get_keypoint_by_name(keypoints, 'left_shoulder')
|
||||
right_shoulder = get_keypoint_by_name(keypoints, 'right_shoulder')
|
||||
|
||||
if left_shoulder is None or right_shoulder is None:
|
||||
return 0.0
|
||||
|
||||
if left_shoulder.get('confidence', 0) < 0.1 or right_shoulder.get('confidence', 0) < 0.1:
|
||||
return 0.0
|
||||
|
||||
return calculate_distance(left_shoulder, right_shoulder)
|
||||
|
||||
|
||||
def calculate_chest_width(keypoints: List[Dict]) -> float:
|
||||
"""
|
||||
Calculate chest/bust width (shoulder width as approximation)
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
|
||||
Returns:
|
||||
Chest width in pixels
|
||||
"""
|
||||
return calculate_should_width(keypoints)
|
||||
|
||||
|
||||
def calculate_waist_width(keypoints: List[Dict]) -> float:
|
||||
"""
|
||||
Calculate waist width (hip width as approximation)
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
|
||||
Returns:
|
||||
Waist width in pixels
|
||||
"""
|
||||
left_hip = get_keypoint_by_name(keypoints, 'left_hip')
|
||||
right_hip = get_keypoint_by_name(keypoints, 'right_hip')
|
||||
|
||||
if left_hip is None or right_hip is None:
|
||||
return 0.0
|
||||
|
||||
if left_hip.get('confidence', 0) < 0.1 or right_hip.get('confidence', 0) < 0.1:
|
||||
return 0.0
|
||||
|
||||
return calculate_distance(left_hip, right_hip)
|
||||
|
||||
|
||||
def calculate_hip_width(keypoints: List[Dict]) -> float:
|
||||
"""
|
||||
Calculate hip width
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
|
||||
Returns:
|
||||
Hip width in pixels
|
||||
"""
|
||||
return calculate_waist_width(keypoints)
|
||||
|
||||
|
||||
def calculate_body_shape(keypoints: List[Dict]) -> Dict:
|
||||
"""
|
||||
Calculate body shape (三圍): chest, waist, hip
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
|
||||
Returns:
|
||||
Dict with chest, waist, hip measurements and ratios
|
||||
"""
|
||||
chest_width = calculate_chest_width(keypoints)
|
||||
waist_width = calculate_waist_width(keypoints)
|
||||
hip_width = calculate_hip_width(keypoints)
|
||||
|
||||
# Calculate ratios (body shape classification)
|
||||
shape_type = "unknown"
|
||||
|
||||
if chest_width > 0 and waist_width > 0 and hip_width > 0:
|
||||
chest_waist_ratio = chest_width / waist_width
|
||||
waist_hip_ratio = waist_width / hip_width
|
||||
|
||||
# Body shape classification
|
||||
if chest_waist_ratio < 1.0 and waist_hip_ratio < 0.9:
|
||||
shape_type = "hourglass" # 葫芦形
|
||||
elif chest_waist_ratio > 1.2:
|
||||
shape_type = "triangle" # 倒三角(上身宽)
|
||||
elif waist_hip_ratio > 1.1:
|
||||
shape_type = "inverted_triangle" # 正三角(下身宽)
|
||||
elif abs(chest_width - hip_width) < 0.1 * max(chest_width, hip_width):
|
||||
shape_type = "rectangle" # 矩形
|
||||
else:
|
||||
shape_type = "oval" #椭圆形
|
||||
|
||||
return {
|
||||
'chest_width': round(chest_width, 2),
|
||||
'waist_width': round(waist_width, 2),
|
||||
'hip_width': round(hip_width, 2),
|
||||
'chest_waist_ratio': round(chest_width / waist_width, 4) if waist_width > 0 else 0,
|
||||
'waist_hip_ratio': round(waist_width / hip_width, 4) if hip_width > 0 else 0,
|
||||
'body_shape': shape_type,
|
||||
}
|
||||
|
||||
|
||||
def estimate_real_height(keypoints: List[Dict], eye_width: float) -> Dict:
|
||||
"""
|
||||
Estimate real height using eye_width as reference
|
||||
|
||||
Assumptions:
|
||||
- Average eye_width ≈ 6 cm
|
||||
- Average adult height ≈ 170 cm
|
||||
- ratio = body_height_pixels / eye_width_pixels
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
eye_width: Eye distance in pixels
|
||||
|
||||
Returns:
|
||||
Dict with estimated real height
|
||||
"""
|
||||
body_height = calculate_body_height(keypoints)
|
||||
|
||||
if eye_width <= 0 or body_height <= 0:
|
||||
return {
|
||||
'estimated_height_cm': 0,
|
||||
'height_ratio': 0,
|
||||
}
|
||||
|
||||
# Height ratio (body_height / eye_width)
|
||||
height_ratio = body_height / eye_width
|
||||
|
||||
# Estimate real height (assuming eye_width ≈ 6cm)
|
||||
# estimated_height = height_ratio * 6 cm
|
||||
estimated_height_cm = height_ratio * 6.0
|
||||
|
||||
# Height category
|
||||
height_category = "unknown"
|
||||
if estimated_height_cm < 150:
|
||||
height_category = "short"
|
||||
elif estimated_height_cm < 170:
|
||||
height_category = "medium"
|
||||
elif estimated_height_cm < 180:
|
||||
height_category = "tall"
|
||||
else:
|
||||
height_category = "very_tall"
|
||||
|
||||
return {
|
||||
'estimated_height_cm': round(estimated_height_cm, 1),
|
||||
'height_ratio': round(height_ratio, 2),
|
||||
'height_category': height_category,
|
||||
'body_height_px': round(body_height, 2),
|
||||
'eye_width_px': round(eye_width, 2),
|
||||
}
|
||||
|
||||
|
||||
def calculate_proportions(keypoints: List, bbox: Optional[Dict] = None) -> Dict:
|
||||
"""
|
||||
Calculate all body proportions including height and body shape
|
||||
|
||||
Accepts both formats:
|
||||
- Swift Pose: [{'name': 'nose', 'x': 100, 'y': 200, 'confidence': 0.9}, ...]
|
||||
- MediaPipe: [[x, y, z, visibility], ...] (auto-converts)
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list (named or indexed)
|
||||
bbox: Optional bbox for fallback
|
||||
|
||||
Returns:
|
||||
Dict with all proportions
|
||||
"""
|
||||
# Auto-detect and convert MediaPipe format
|
||||
if keypoints and isinstance(keypoints[0], list):
|
||||
keypoints = convert_mediapipe_to_named(keypoints)
|
||||
|
||||
eye_width = calculate_eye_width(keypoints)
|
||||
body_height = calculate_body_height(keypoints, bbox)
|
||||
torso_height = calculate_torso_height(keypoints)
|
||||
leg_height = calculate_leg_height(keypoints)
|
||||
shoulder_width = calculate_should_width(keypoints)
|
||||
|
||||
proportions = {
|
||||
'eye_width': round(eye_width, 2),
|
||||
'body_height': round(body_height, 2),
|
||||
'torso_height': round(torso_height, 2),
|
||||
'leg_height': round(leg_height, 2),
|
||||
'shoulder_width': round(shoulder_width, 2),
|
||||
}
|
||||
|
||||
# Calculate ratios
|
||||
if body_height > 0:
|
||||
proportions['head_ratio'] = round(eye_width / body_height, 4)
|
||||
proportions['torso_ratio'] = round(torso_height / body_height, 4)
|
||||
proportions['leg_ratio'] = round(leg_height / body_height, 4)
|
||||
else:
|
||||
proportions['head_ratio'] = 0.0
|
||||
proportions['torso_ratio'] = 0.0
|
||||
proportions['leg_ratio'] = 0.0
|
||||
|
||||
# Calculate body shape (三圍)
|
||||
body_shape = calculate_body_shape(keypoints)
|
||||
proportions['body_shape'] = body_shape
|
||||
|
||||
# Estimate real height
|
||||
height_estimate = estimate_real_height(keypoints, eye_width)
|
||||
proportions['height_estimate'] = height_estimate
|
||||
|
||||
return proportions
|
||||
|
||||
|
||||
def estimate_head_top(keypoints: List[Dict]) -> Tuple[float, float]:
|
||||
"""
|
||||
Estimate head top position (for hair/hat detection)
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
|
||||
Returns:
|
||||
(head_top_y, head_top_x) position
|
||||
"""
|
||||
nose = get_keypoint_by_name(keypoints, 'nose')
|
||||
eye_width = calculate_eye_width(keypoints)
|
||||
|
||||
if nose is None or eye_width == 0:
|
||||
return (0.0, 0.0)
|
||||
|
||||
# Head top is approximately above nose by eye_width
|
||||
head_top_y = nose['y'] - eye_width
|
||||
head_top_x = nose['x']
|
||||
|
||||
return (head_top_y, head_top_x)
|
||||
|
||||
|
||||
def estimate_region_from_keypoints(
|
||||
keypoints: List[Dict],
|
||||
top_keypoint: str,
|
||||
bottom_keypoint: str,
|
||||
left_keypoint: Optional[str] = None,
|
||||
right_keypoint: Optional[str] = None,
|
||||
eye_width_factor: float = 0.0
|
||||
) -> Dict:
|
||||
"""
|
||||
Estimate region from keypoints
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
top_keypoint: Name of top boundary keypoint
|
||||
bottom_keypoint: Name of bottom boundary keypoint
|
||||
left_keypoint: Name of left boundary keypoint (optional)
|
||||
right_keypoint: Name of right boundary keypoint (optional)
|
||||
eye_width_factor: Factor to expand region by eye_width
|
||||
|
||||
Returns:
|
||||
Region dict {'x', 'y', 'width', 'height'}
|
||||
"""
|
||||
top = get_keypoint_by_name(keypoints, top_keypoint)
|
||||
bottom = get_keypoint_by_name(keypoints, bottom_keypoint)
|
||||
left = get_keypoint_by_name(keypoints, left_keypoint) if left_keypoint else None
|
||||
right = get_keypoint_by_name(keypoints, right_keypoint) if right_keypoint else None
|
||||
|
||||
eye_width = calculate_eye_width(keypoints)
|
||||
|
||||
# Get Y boundaries
|
||||
top_y = top['y'] if top and top.get('confidence', 0) > 0.1 else 0
|
||||
bottom_y = bottom['y'] if bottom and bottom.get('confidence', 0) > 0.1 else 0
|
||||
|
||||
# Apply eye_width factor
|
||||
if eye_width_factor > 0 and eye_width > 0:
|
||||
top_y -= eye_width * eye_width_factor
|
||||
bottom_y += eye_width * eye_width_factor
|
||||
|
||||
# Get X boundaries
|
||||
if left and right and left.get('confidence', 0) > 0.1 and right.get('confidence', 0) > 0.1:
|
||||
left_x = min(left['x'], right['x'])
|
||||
right_x = max(left['x'], right['x'])
|
||||
else:
|
||||
# Fallback: use nose position
|
||||
nose = get_keypoint_by_name(keypoints, 'nose')
|
||||
if nose:
|
||||
left_x = nose['x'] - eye_width * 2 if eye_width > 0 else nose['x'] - 50
|
||||
right_x = nose['x'] + eye_width * 2 if eye_width > 0 else nose['x'] + 50
|
||||
else:
|
||||
left_x = 0
|
||||
right_x = 100
|
||||
|
||||
return {
|
||||
'x': int(left_x),
|
||||
'y': int(top_y),
|
||||
'width': int(right_x - left_x),
|
||||
'height': int(bottom_y - top_y)
|
||||
}
|
||||
|
||||
|
||||
# Region estimation helpers for specific body parts
|
||||
|
||||
def get_head_region(keypoints: List[Dict]) -> Dict:
|
||||
"""Get head region (for hair/hat detection)"""
|
||||
return estimate_region_from_keypoints(
|
||||
keypoints,
|
||||
top_keypoint='nose',
|
||||
bottom_keypoint='neck',
|
||||
left_keypoint='left_ear',
|
||||
right_keypoint='right_ear',
|
||||
eye_width_factor=1.0
|
||||
)
|
||||
|
||||
|
||||
def get_face_region(keypoints: List[Dict]) -> Dict:
|
||||
"""Get face region (for skin/face accessories detection)"""
|
||||
return estimate_region_from_keypoints(
|
||||
keypoints,
|
||||
top_keypoint='nose',
|
||||
bottom_keypoint='neck',
|
||||
left_keypoint='left_eye',
|
||||
right_keypoint='right_eye',
|
||||
eye_width_factor=0.5
|
||||
)
|
||||
|
||||
|
||||
def get_torso_region(keypoints: List[Dict]) -> Dict:
|
||||
"""Get torso region (for upper clothing detection)"""
|
||||
return estimate_region_from_keypoints(
|
||||
keypoints,
|
||||
top_keypoint='neck',
|
||||
bottom_keypoint='left_hip',
|
||||
left_keypoint='left_shoulder',
|
||||
right_keypoint='right_shoulder'
|
||||
)
|
||||
|
||||
|
||||
def get_leg_region(keypoints: List[Dict]) -> Dict:
|
||||
"""Get leg region (for lower clothing detection)"""
|
||||
return estimate_region_from_keypoints(
|
||||
keypoints,
|
||||
top_keypoint='left_hip',
|
||||
bottom_keypoint='left_ankle',
|
||||
left_keypoint='left_hip',
|
||||
right_keypoint='right_hip'
|
||||
)
|
||||
|
||||
|
||||
def get_arm_region(keypoints: List[Dict], side: str = 'left') -> Dict:
|
||||
"""Get arm region (for sleeve/arm detection)"""
|
||||
if side == 'left':
|
||||
return estimate_region_from_keypoints(
|
||||
keypoints,
|
||||
top_keypoint='left_shoulder',
|
||||
bottom_keypoint='left_wrist',
|
||||
left_keypoint='left_shoulder',
|
||||
right_keypoint='left_elbow'
|
||||
)
|
||||
else:
|
||||
return estimate_region_from_keypoints(
|
||||
keypoints,
|
||||
top_keypoint='right_shoulder',
|
||||
bottom_keypoint='right_wrist',
|
||||
left_keypoint='right_elbow',
|
||||
right_keypoint='right_shoulder'
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Test with sample keypoints
|
||||
sample_keypoints = [
|
||||
{'name': 'nose', 'x': 100, 'y': 50, 'confidence': 0.9},
|
||||
{'name': 'left_eye', 'x': 90, 'y': 40, 'confidence': 0.8},
|
||||
{'name': 'right_eye', 'x': 110, 'y': 40, 'confidence': 0.8},
|
||||
{'name': 'neck', 'x': 100, 'y': 80, 'confidence': 0.7},
|
||||
{'name': 'left_shoulder', 'x': 70, 'y': 100, 'confidence': 0.8},
|
||||
{'name': 'right_shoulder', 'x': 130, 'y': 100, 'confidence': 0.8},
|
||||
{'name': 'left_hip', 'x': 80, 'y': 200, 'confidence': 0.7},
|
||||
{'name': 'right_hip', 'x': 120, 'y': 200, 'confidence': 0.7},
|
||||
{'name': 'left_ankle', 'x': 80, 'y': 400, 'confidence': 0.6},
|
||||
{'name': 'right_ankle', 'x': 120, 'y': 400, 'confidence': 0.6},
|
||||
]
|
||||
|
||||
proportions = calculate_proportions(sample_keypoints)
|
||||
print("Proportions:", proportions)
|
||||
|
||||
head_region = get_head_region(sample_keypoints)
|
||||
print("Head region:", head_region)
|
||||
|
||||
torso_region = get_torso_region(sample_keypoints)
|
||||
print("Torso region:", torso_region)
|
||||
Reference in New Issue
Block a user