feat: add shot type detection and proportion-based height estimation
- detect_shot_type(): classify full_body/medium_shot/close_up - estimate height using shoulder_width × 3.8 (~171cm) for close-up - add BODY_PROPORTIONS constants for validation - head position ratio + bbox aspect ratio → shot type - enables filtering full-body shots in video search
This commit is contained in:
@@ -143,13 +143,62 @@ def calculate_eye_width(keypoints: List[Dict]) -> float:
|
||||
if left_eye is None or right_eye is None:
|
||||
return 0.0
|
||||
|
||||
# Filter by confidence
|
||||
if left_eye.get('confidence', 0) < 0.1 or right_eye.get('confidence', 0) < 0.1:
|
||||
return 0.0
|
||||
|
||||
return calculate_distance(left_eye, right_eye)
|
||||
|
||||
|
||||
def calculate_head_width(keypoints: List[Dict]) -> float:
|
||||
"""
|
||||
Calculate head width (ear to ear distance)
|
||||
Alternative reference unit, more stable for medium shots
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
|
||||
Returns:
|
||||
Head width in pixels
|
||||
"""
|
||||
left_ear = get_keypoint_by_name(keypoints, 'left_ear')
|
||||
right_ear = get_keypoint_by_name(keypoints, 'right_ear')
|
||||
|
||||
if left_ear is None or right_ear is None:
|
||||
return 0.0
|
||||
|
||||
if left_ear.get('confidence', 0) < 0.1 or right_ear.get('confidence', 0) < 0.1:
|
||||
return 0.0
|
||||
|
||||
return calculate_distance(left_ear, right_ear)
|
||||
|
||||
|
||||
def get_best_reference_unit(keypoints: List[Dict]) -> Tuple[str, float]:
|
||||
"""
|
||||
Get the best available reference unit for proportion calculations
|
||||
|
||||
Priority: shoulder_width > head_width > eye_width
|
||||
(larger units are more stable and available in wider shots)
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
|
||||
Returns:
|
||||
Tuple of (unit_name, unit_value_in_pixels)
|
||||
"""
|
||||
shoulder = calculate_should_width(keypoints)
|
||||
head = calculate_head_width(keypoints)
|
||||
eye = calculate_eye_width(keypoints)
|
||||
|
||||
if shoulder > 0:
|
||||
return ('shoulder_width', shoulder)
|
||||
elif head > 0:
|
||||
return ('head_width', head)
|
||||
elif eye > 0:
|
||||
return ('eye_width', eye)
|
||||
else:
|
||||
return ('none', 0.0)
|
||||
|
||||
|
||||
def calculate_body_height(keypoints: List[Dict], bbox: Optional[Dict] = None) -> float:
|
||||
"""
|
||||
Calculate full body height
|
||||
@@ -393,36 +442,274 @@ def calculate_body_shape(keypoints: List[Dict]) -> Dict:
|
||||
}
|
||||
|
||||
|
||||
def estimate_real_height(keypoints: List[Dict], eye_width: float) -> Dict:
|
||||
# Standard body proportions (adult average)
|
||||
# These ratios are used to cross-validate and estimate missing values
|
||||
BODY_PROPORTIONS = {
|
||||
'eye_to_head': 2.67, # head_width ≈ 2.67 × eye_width
|
||||
'eye_to_shoulder': 7.5, # shoulder_width ≈ 7.5 × eye_width
|
||||
'head_to_shoulder': 2.8, # shoulder_width ≈ 2.8 × head_width
|
||||
'head_to_height': 7.5, # body_height ≈ 7.5 × head_width
|
||||
'shoulder_to_height': 3.8, # body_height ≈ 3.8 × shoulder_width
|
||||
}
|
||||
|
||||
|
||||
def detect_shot_type(keypoints: List[Dict], bbox: Optional[Dict] = None) -> Dict:
|
||||
"""
|
||||
Estimate real height using eye_width as reference
|
||||
Detect shot type based on head position relative to bbox
|
||||
|
||||
Assumptions:
|
||||
- Average eye_width ≈ 6 cm
|
||||
- Average adult height ≈ 170 cm
|
||||
- ratio = body_height_pixels / eye_width_pixels
|
||||
Uses head/nose position to determine if the shot captures:
|
||||
- Full body: head near top of bbox, bbox extends far below
|
||||
- Medium shot: head in upper third of bbox
|
||||
- Close-up: head dominates bbox
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
eye_width: Eye distance in pixels
|
||||
bbox: Bbox {'x', 'y', 'width', 'height'}
|
||||
|
||||
Returns:
|
||||
Dict with estimated real height
|
||||
Dict with shot type and analysis
|
||||
"""
|
||||
body_height = calculate_body_height(keypoints)
|
||||
if bbox is None or bbox.get('height', 0) <= 0:
|
||||
return {'shot_type': 'unknown', 'reason': 'no bbox'}
|
||||
|
||||
if eye_width <= 0 or body_height <= 0:
|
||||
# Get head position (use nose or estimate from eyes)
|
||||
nose = get_keypoint_by_name(keypoints, 'nose')
|
||||
left_eye = get_keypoint_by_name(keypoints, 'left_eye')
|
||||
right_eye = get_keypoint_by_name(keypoints, 'right_eye')
|
||||
|
||||
head_y = 0
|
||||
if nose and nose.get('confidence', 0) > 0.1:
|
||||
head_y = nose['y']
|
||||
elif left_eye and left_eye.get('confidence', 0) > 0.1:
|
||||
head_y = left_eye['y']
|
||||
elif right_eye and right_eye.get('confidence', 0) > 0.1:
|
||||
head_y = right_eye['y']
|
||||
|
||||
if head_y <= 0:
|
||||
return {'shot_type': 'unknown', 'reason': 'no head keypoints'}
|
||||
|
||||
bbox_top = bbox['y']
|
||||
bbox_height = bbox['height']
|
||||
bbox_bottom = bbox_top + bbox_height
|
||||
|
||||
# Calculate head position ratio (0 = top of bbox, 1 = bottom of bbox)
|
||||
head_position_ratio = (head_y - bbox_top) / bbox_height
|
||||
|
||||
# Determine shot type
|
||||
# - Full body: head in top 15% of bbox, bbox is tall
|
||||
# - Medium: head in top 30% of bbox
|
||||
# - Close-up: head in middle/lower part of bbox
|
||||
|
||||
# Also check bbox aspect ratio (height/width)
|
||||
aspect_ratio = bbox_height / bbox.get('width', 1)
|
||||
|
||||
shot_type = 'unknown'
|
||||
reason = ''
|
||||
|
||||
if head_position_ratio < 0.15 and aspect_ratio > 2.0:
|
||||
shot_type = 'full_body'
|
||||
reason = f'head at {head_position_ratio:.1%} of bbox (top), aspect={aspect_ratio:.1f}'
|
||||
elif head_position_ratio < 0.30 and aspect_ratio > 1.5:
|
||||
shot_type = 'medium_shot'
|
||||
reason = f'head at {head_position_ratio:.1%} of bbox (upper), aspect={aspect_ratio:.1f}'
|
||||
elif head_position_ratio > 0.30 or aspect_ratio < 1.5:
|
||||
shot_type = 'close_up'
|
||||
reason = f'head at {head_position_ratio:.1%} of bbox (middle), aspect={aspect_ratio:.1f}'
|
||||
else:
|
||||
shot_type = 'medium_shot'
|
||||
reason = f'head at {head_position_ratio:.1%} of bbox'
|
||||
|
||||
return {
|
||||
'shot_type': shot_type,
|
||||
'head_position_ratio': round(head_position_ratio, 3),
|
||||
'aspect_ratio': round(aspect_ratio, 2),
|
||||
'reason': reason,
|
||||
'bbox_top': bbox_top,
|
||||
'bbox_bottom': bbox_bottom,
|
||||
'head_y': round(head_y, 2),
|
||||
}
|
||||
|
||||
|
||||
def estimate_from_proportions(keypoints: List[Dict], bbox: Optional[Dict] = None) -> Dict:
|
||||
"""
|
||||
Estimate measurements using body proportion ratios
|
||||
|
||||
Uses whichever reference unit is available and applies standard
|
||||
body proportions to estimate other measurements.
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
bbox: Optional bbox for fallback
|
||||
|
||||
Returns:
|
||||
Dict with estimated measurements based on available reference units
|
||||
"""
|
||||
eye_width = calculate_eye_width(keypoints)
|
||||
head_width = calculate_head_width(keypoints)
|
||||
shoulder_width = calculate_should_width(keypoints)
|
||||
body_height = calculate_body_height(keypoints, bbox)
|
||||
torso_height = calculate_torso_height(keypoints)
|
||||
|
||||
estimates = {
|
||||
'available_refs': {},
|
||||
'estimated_refs': {},
|
||||
'height_estimate': None,
|
||||
}
|
||||
|
||||
# Record available measurements
|
||||
if eye_width > 0:
|
||||
estimates['available_refs']['eye_width_px'] = round(eye_width, 2)
|
||||
if head_width > 0:
|
||||
estimates['available_refs']['head_width_px'] = round(head_width, 2)
|
||||
if shoulder_width > 0:
|
||||
estimates['available_refs']['shoulder_width_px'] = round(shoulder_width, 2)
|
||||
if body_height > 0:
|
||||
estimates['available_refs']['body_height_px'] = round(body_height, 2)
|
||||
if torso_height > 0:
|
||||
estimates['available_refs']['torso_height_px'] = round(torso_height, 2)
|
||||
|
||||
# Estimate missing reference units using available ones
|
||||
if shoulder_width > 0:
|
||||
# Shoulder is the most reliable reference for medium shots
|
||||
estimates['estimated_refs']['eye_width_px'] = round(shoulder_width / BODY_PROPORTIONS['eye_to_shoulder'], 2)
|
||||
estimates['estimated_refs']['head_width_px'] = round(shoulder_width / BODY_PROPORTIONS['head_to_shoulder'], 2)
|
||||
ref_unit = 'shoulder_width'
|
||||
ref_px = shoulder_width
|
||||
ref_cm = 45.0
|
||||
elif head_width > 0:
|
||||
estimates['estimated_refs']['eye_width_px'] = round(head_width / BODY_PROPORTIONS['eye_to_head'], 2)
|
||||
estimates['estimated_refs']['shoulder_width_px'] = round(head_width * BODY_PROPORTIONS['head_to_shoulder'], 2)
|
||||
ref_unit = 'head_width'
|
||||
ref_px = head_width
|
||||
ref_cm = 16.0
|
||||
elif eye_width > 0:
|
||||
estimates['estimated_refs']['head_width_px'] = round(eye_width * BODY_PROPORTIONS['eye_to_head'], 2)
|
||||
estimates['estimated_refs']['shoulder_width_px'] = round(eye_width * BODY_PROPORTIONS['eye_to_shoulder'], 2)
|
||||
ref_unit = 'eye_width'
|
||||
ref_px = eye_width
|
||||
ref_cm = 6.0
|
||||
else:
|
||||
return estimates
|
||||
|
||||
# Estimate height if we have body_height or torso_height
|
||||
if body_height > 0:
|
||||
height_ratio = body_height / ref_px
|
||||
estimated_cm = height_ratio * ref_cm
|
||||
estimates['height_estimate'] = {
|
||||
'estimated_height_cm': round(estimated_cm, 1),
|
||||
'height_ratio': round(height_ratio, 2),
|
||||
'reference_unit': ref_unit,
|
||||
'measurement_type': 'body_height',
|
||||
}
|
||||
elif torso_height > 0:
|
||||
# Torso ≈ 45% of full height
|
||||
torso_ratio = torso_height / ref_px
|
||||
torso_cm = torso_ratio * ref_cm
|
||||
full_height_cm = torso_cm / 0.45
|
||||
estimates['height_estimate'] = {
|
||||
'estimated_height_cm': round(full_height_cm, 1),
|
||||
'height_ratio': round(torso_ratio, 2),
|
||||
'reference_unit': ref_unit,
|
||||
'measurement_type': 'torso_extrapolated',
|
||||
}
|
||||
elif ref_unit == 'head_width' and head_width > 0:
|
||||
# Estimate height from head: body_height ≈ 7.5 × head_width
|
||||
estimated_height_px = head_width * BODY_PROPORTIONS['head_to_height']
|
||||
estimated_cm = 16.0 * BODY_PROPORTIONS['head_to_height'] # ~120cm baseline
|
||||
estimates['height_estimate'] = {
|
||||
'estimated_height_cm': round(estimated_cm, 1),
|
||||
'estimated_height_px': round(estimated_height_px, 2),
|
||||
'reference_unit': ref_unit,
|
||||
'measurement_type': 'head_proportion',
|
||||
'note': 'estimated from head_width without body keypoints',
|
||||
}
|
||||
elif ref_unit == 'shoulder_width' and shoulder_width > 0:
|
||||
# Estimate height from shoulder: body_height ≈ 3.8 × shoulder_width
|
||||
estimated_height_px = shoulder_width * BODY_PROPORTIONS['shoulder_to_height']
|
||||
estimated_cm = 45.0 * BODY_PROPORTIONS['shoulder_to_height'] # ~170cm baseline
|
||||
estimates['height_estimate'] = {
|
||||
'estimated_height_cm': round(estimated_cm, 1),
|
||||
'estimated_height_px': round(estimated_height_px, 2),
|
||||
'reference_unit': ref_unit,
|
||||
'measurement_type': 'shoulder_proportion',
|
||||
'note': 'estimated from shoulder_width without body keypoints',
|
||||
}
|
||||
|
||||
return estimates
|
||||
"""
|
||||
Estimate real height using best available reference unit
|
||||
|
||||
Reference units (approximate real sizes for adult):
|
||||
- shoulder_width ≈ 45 cm (most stable, available in medium shots)
|
||||
- head_width (ear to ear) ≈ 16 cm (available in close-up to medium)
|
||||
- eye_width ≈ 6 cm (only available in close-up)
|
||||
|
||||
Args:
|
||||
keypoints: Pose keypoints list
|
||||
bbox: Optional bbox for fallback
|
||||
|
||||
Returns:
|
||||
Dict with estimated real height and reference unit used
|
||||
"""
|
||||
body_height = calculate_body_height(keypoints, bbox)
|
||||
torso_height = calculate_torso_height(keypoints)
|
||||
|
||||
# Determine what we're measuring
|
||||
has_full_body = body_height > 0 and any([
|
||||
get_keypoint_by_name(keypoints, 'left_ankle'),
|
||||
get_keypoint_by_name(keypoints, 'right_ankle'),
|
||||
])
|
||||
|
||||
if body_height <= 0 and torso_height <= 0:
|
||||
return {
|
||||
'estimated_height_cm': 0,
|
||||
'height_ratio': 0,
|
||||
'reference_unit': 'none',
|
||||
'measurement_type': 'none',
|
||||
}
|
||||
|
||||
# Height ratio (body_height / eye_width)
|
||||
height_ratio = body_height / eye_width
|
||||
# Get best reference unit
|
||||
ref_name, ref_value = get_best_reference_unit(keypoints)
|
||||
|
||||
# Estimate real height (assuming eye_width ≈ 6cm)
|
||||
# estimated_height = height_ratio * 6 cm
|
||||
estimated_height_cm = height_ratio * 6.0
|
||||
if ref_value <= 0:
|
||||
return {
|
||||
'estimated_height_cm': 0,
|
||||
'height_ratio': 0,
|
||||
'reference_unit': 'none',
|
||||
'measurement_type': 'none',
|
||||
}
|
||||
|
||||
# Reference unit real size (cm)
|
||||
REF_SIZES = {
|
||||
'shoulder_width': 45.0,
|
||||
'head_width': 16.0,
|
||||
'eye_width': 6.0,
|
||||
}
|
||||
|
||||
ref_size_cm = REF_SIZES.get(ref_name, 6.0)
|
||||
|
||||
# Use appropriate height measurement
|
||||
if has_full_body and body_height > 0:
|
||||
measure_height = body_height
|
||||
measure_type = 'full_body'
|
||||
elif torso_height > 0:
|
||||
measure_height = torso_height
|
||||
measure_type = 'torso'
|
||||
else:
|
||||
measure_height = body_height
|
||||
measure_type = 'bbox_partial'
|
||||
|
||||
# Height ratio
|
||||
height_ratio = measure_height / ref_value
|
||||
|
||||
# Estimate real height
|
||||
estimated_height_cm = height_ratio * ref_size_cm
|
||||
|
||||
# For torso-only, estimate full body by typical proportion (torso ~45% of height)
|
||||
if measure_type == 'torso':
|
||||
full_body_estimate = estimated_height_cm / 0.45
|
||||
estimated_height_cm = full_body_estimate
|
||||
measure_type = 'torso_extrapolated'
|
||||
|
||||
# Height category
|
||||
height_category = "unknown"
|
||||
@@ -440,7 +727,10 @@ def estimate_real_height(keypoints: List[Dict], eye_width: float) -> Dict:
|
||||
'height_ratio': round(height_ratio, 2),
|
||||
'height_category': height_category,
|
||||
'body_height_px': round(body_height, 2),
|
||||
'eye_width_px': round(eye_width, 2),
|
||||
'torso_height_px': round(torso_height, 2),
|
||||
'reference_unit': ref_name,
|
||||
f'{ref_name}_px': round(ref_value, 2),
|
||||
'measurement_type': measure_type,
|
||||
}
|
||||
|
||||
|
||||
@@ -463,6 +753,9 @@ def calculate_proportions(keypoints: List, bbox: Optional[Dict] = None) -> Dict:
|
||||
if keypoints and isinstance(keypoints[0], list):
|
||||
keypoints = convert_mediapipe_to_named(keypoints)
|
||||
|
||||
# Detect shot type first
|
||||
shot_info = detect_shot_type(keypoints, bbox)
|
||||
|
||||
eye_width = calculate_eye_width(keypoints)
|
||||
body_height = calculate_body_height(keypoints, bbox)
|
||||
torso_height = calculate_torso_height(keypoints)
|
||||
@@ -470,6 +763,7 @@ def calculate_proportions(keypoints: List, bbox: Optional[Dict] = None) -> Dict:
|
||||
shoulder_width = calculate_should_width(keypoints)
|
||||
|
||||
proportions = {
|
||||
'shot_type': shot_info,
|
||||
'eye_width': round(eye_width, 2),
|
||||
'body_height': round(body_height, 2),
|
||||
'torso_height': round(torso_height, 2),
|
||||
@@ -491,10 +785,71 @@ def calculate_proportions(keypoints: List, bbox: Optional[Dict] = None) -> Dict:
|
||||
body_shape = calculate_body_shape(keypoints)
|
||||
proportions['body_shape'] = body_shape
|
||||
|
||||
# Estimate real height
|
||||
height_estimate = estimate_real_height(keypoints, eye_width)
|
||||
# Estimate height based on shot type
|
||||
shot_type = shot_info.get('shot_type', 'unknown')
|
||||
|
||||
height_estimate = None
|
||||
|
||||
if shot_type == 'full_body':
|
||||
# Full body visible - use body_height directly
|
||||
height_estimate = estimate_from_proportions(keypoints, bbox)
|
||||
elif shot_type == 'medium_shot':
|
||||
# Medium shot - use torso or extrapolate
|
||||
height_estimate = estimate_from_proportions(keypoints, bbox)
|
||||
if height_estimate.get('height_estimate'):
|
||||
height_estimate['height_estimate']['note'] = 'extrapolated from medium shot'
|
||||
elif shot_type == 'close_up':
|
||||
# Close-up - estimate from reference units using standard proportions
|
||||
shoulder_width = calculate_should_width(keypoints)
|
||||
head_width = calculate_head_width(keypoints)
|
||||
|
||||
if shoulder_width > 0:
|
||||
# shoulder_width × 3.8 ≈ body_height
|
||||
estimated_cm = 45.0 * BODY_PROPORTIONS['shoulder_to_height']
|
||||
height_estimate = {
|
||||
'estimated_height_cm': round(estimated_cm, 1),
|
||||
'reference_unit': 'shoulder_width',
|
||||
'shoulder_width_px': round(shoulder_width, 2),
|
||||
'measurement_type': 'proportion_estimate',
|
||||
'note': 'estimated from shoulder_width (close-up shot)',
|
||||
}
|
||||
elif head_width > 0:
|
||||
# head_width × 7.5 ≈ body_height
|
||||
estimated_cm = 16.0 * BODY_PROPORTIONS['head_to_height']
|
||||
height_estimate = {
|
||||
'estimated_height_cm': round(estimated_cm, 1),
|
||||
'reference_unit': 'head_width',
|
||||
'head_width_px': round(head_width, 2),
|
||||
'measurement_type': 'proportion_estimate',
|
||||
'note': 'estimated from head_width (close-up shot)',
|
||||
}
|
||||
else:
|
||||
height_estimate = estimate_from_proportions(keypoints, bbox)
|
||||
else:
|
||||
height_estimate = estimate_from_proportions(keypoints, bbox)
|
||||
|
||||
proportions['height_estimate'] = height_estimate
|
||||
|
||||
# Add reference units info
|
||||
head_width = calculate_head_width(keypoints)
|
||||
proportions['reference_units'] = {
|
||||
'eye_width': round(eye_width, 2),
|
||||
'head_width': round(head_width, 2),
|
||||
'shoulder_width': round(shoulder_width, 2),
|
||||
}
|
||||
|
||||
# Proportion ratios validation
|
||||
if eye_width > 0 and head_width > 0:
|
||||
proportions['proportion_ratios'] = {
|
||||
'head_to_eye': round(head_width / eye_width, 2), # Should be ~2.67
|
||||
}
|
||||
if head_width > 0 and shoulder_width > 0:
|
||||
proportions['proportion_ratios'] = proportions.get('proportion_ratios', {})
|
||||
proportions['proportion_ratios']['shoulder_to_head'] = round(shoulder_width / head_width, 2) # Should be ~2.8
|
||||
if eye_width > 0 and shoulder_width > 0:
|
||||
proportions['proportion_ratios'] = proportions.get('proportion_ratios', {})
|
||||
proportions['proportion_ratios']['shoulder_to_eye'] = round(shoulder_width / eye_width, 2) # Should be ~7.5
|
||||
|
||||
return proportions
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user