feat: add shot type detection and proportion-based height estimation

- detect_shot_type(): classify full_body/medium_shot/close_up
- estimate height using shoulder_width × 3.8 (~171cm) for close-up
- add BODY_PROPORTIONS constants for validation
- head position ratio + bbox aspect ratio → shot type
- enables filtering full-body shots in video search
This commit is contained in:
Accusys
2026-06-22 02:47:01 +08:00
parent 606f31f13c
commit d94b96d884

View File

@@ -143,13 +143,62 @@ def calculate_eye_width(keypoints: List[Dict]) -> float:
if left_eye is None or right_eye is None:
return 0.0
# Filter by confidence
if left_eye.get('confidence', 0) < 0.1 or right_eye.get('confidence', 0) < 0.1:
return 0.0
return calculate_distance(left_eye, right_eye)
def calculate_head_width(keypoints: List[Dict]) -> float:
"""
Calculate head width (ear to ear distance)
Alternative reference unit, more stable for medium shots
Args:
keypoints: Pose keypoints list
Returns:
Head width in pixels
"""
left_ear = get_keypoint_by_name(keypoints, 'left_ear')
right_ear = get_keypoint_by_name(keypoints, 'right_ear')
if left_ear is None or right_ear is None:
return 0.0
if left_ear.get('confidence', 0) < 0.1 or right_ear.get('confidence', 0) < 0.1:
return 0.0
return calculate_distance(left_ear, right_ear)
def get_best_reference_unit(keypoints: List[Dict]) -> Tuple[str, float]:
"""
Get the best available reference unit for proportion calculations
Priority: shoulder_width > head_width > eye_width
(larger units are more stable and available in wider shots)
Args:
keypoints: Pose keypoints list
Returns:
Tuple of (unit_name, unit_value_in_pixels)
"""
shoulder = calculate_should_width(keypoints)
head = calculate_head_width(keypoints)
eye = calculate_eye_width(keypoints)
if shoulder > 0:
return ('shoulder_width', shoulder)
elif head > 0:
return ('head_width', head)
elif eye > 0:
return ('eye_width', eye)
else:
return ('none', 0.0)
def calculate_body_height(keypoints: List[Dict], bbox: Optional[Dict] = None) -> float:
"""
Calculate full body height
@@ -393,36 +442,274 @@ def calculate_body_shape(keypoints: List[Dict]) -> Dict:
}
def estimate_real_height(keypoints: List[Dict], eye_width: float) -> Dict:
# Standard body proportions (adult average)
# These ratios are used to cross-validate and estimate missing values
BODY_PROPORTIONS = {
'eye_to_head': 2.67, # head_width ≈ 2.67 × eye_width
'eye_to_shoulder': 7.5, # shoulder_width ≈ 7.5 × eye_width
'head_to_shoulder': 2.8, # shoulder_width ≈ 2.8 × head_width
'head_to_height': 7.5, # body_height ≈ 7.5 × head_width
'shoulder_to_height': 3.8, # body_height ≈ 3.8 × shoulder_width
}
def detect_shot_type(keypoints: List[Dict], bbox: Optional[Dict] = None) -> Dict:
"""
Estimate real height using eye_width as reference
Detect shot type based on head position relative to bbox
Assumptions:
- Average eye_width ≈ 6 cm
- Average adult height ≈ 170 cm
- ratio = body_height_pixels / eye_width_pixels
Uses head/nose position to determine if the shot captures:
- Full body: head near top of bbox, bbox extends far below
- Medium shot: head in upper third of bbox
- Close-up: head dominates bbox
Args:
keypoints: Pose keypoints list
eye_width: Eye distance in pixels
bbox: Bbox {'x', 'y', 'width', 'height'}
Returns:
Dict with estimated real height
Dict with shot type and analysis
"""
body_height = calculate_body_height(keypoints)
if bbox is None or bbox.get('height', 0) <= 0:
return {'shot_type': 'unknown', 'reason': 'no bbox'}
if eye_width <= 0 or body_height <= 0:
# Get head position (use nose or estimate from eyes)
nose = get_keypoint_by_name(keypoints, 'nose')
left_eye = get_keypoint_by_name(keypoints, 'left_eye')
right_eye = get_keypoint_by_name(keypoints, 'right_eye')
head_y = 0
if nose and nose.get('confidence', 0) > 0.1:
head_y = nose['y']
elif left_eye and left_eye.get('confidence', 0) > 0.1:
head_y = left_eye['y']
elif right_eye and right_eye.get('confidence', 0) > 0.1:
head_y = right_eye['y']
if head_y <= 0:
return {'shot_type': 'unknown', 'reason': 'no head keypoints'}
bbox_top = bbox['y']
bbox_height = bbox['height']
bbox_bottom = bbox_top + bbox_height
# Calculate head position ratio (0 = top of bbox, 1 = bottom of bbox)
head_position_ratio = (head_y - bbox_top) / bbox_height
# Determine shot type
# - Full body: head in top 15% of bbox, bbox is tall
# - Medium: head in top 30% of bbox
# - Close-up: head in middle/lower part of bbox
# Also check bbox aspect ratio (height/width)
aspect_ratio = bbox_height / bbox.get('width', 1)
shot_type = 'unknown'
reason = ''
if head_position_ratio < 0.15 and aspect_ratio > 2.0:
shot_type = 'full_body'
reason = f'head at {head_position_ratio:.1%} of bbox (top), aspect={aspect_ratio:.1f}'
elif head_position_ratio < 0.30 and aspect_ratio > 1.5:
shot_type = 'medium_shot'
reason = f'head at {head_position_ratio:.1%} of bbox (upper), aspect={aspect_ratio:.1f}'
elif head_position_ratio > 0.30 or aspect_ratio < 1.5:
shot_type = 'close_up'
reason = f'head at {head_position_ratio:.1%} of bbox (middle), aspect={aspect_ratio:.1f}'
else:
shot_type = 'medium_shot'
reason = f'head at {head_position_ratio:.1%} of bbox'
return {
'shot_type': shot_type,
'head_position_ratio': round(head_position_ratio, 3),
'aspect_ratio': round(aspect_ratio, 2),
'reason': reason,
'bbox_top': bbox_top,
'bbox_bottom': bbox_bottom,
'head_y': round(head_y, 2),
}
def estimate_from_proportions(keypoints: List[Dict], bbox: Optional[Dict] = None) -> Dict:
"""
Estimate measurements using body proportion ratios
Uses whichever reference unit is available and applies standard
body proportions to estimate other measurements.
Args:
keypoints: Pose keypoints list
bbox: Optional bbox for fallback
Returns:
Dict with estimated measurements based on available reference units
"""
eye_width = calculate_eye_width(keypoints)
head_width = calculate_head_width(keypoints)
shoulder_width = calculate_should_width(keypoints)
body_height = calculate_body_height(keypoints, bbox)
torso_height = calculate_torso_height(keypoints)
estimates = {
'available_refs': {},
'estimated_refs': {},
'height_estimate': None,
}
# Record available measurements
if eye_width > 0:
estimates['available_refs']['eye_width_px'] = round(eye_width, 2)
if head_width > 0:
estimates['available_refs']['head_width_px'] = round(head_width, 2)
if shoulder_width > 0:
estimates['available_refs']['shoulder_width_px'] = round(shoulder_width, 2)
if body_height > 0:
estimates['available_refs']['body_height_px'] = round(body_height, 2)
if torso_height > 0:
estimates['available_refs']['torso_height_px'] = round(torso_height, 2)
# Estimate missing reference units using available ones
if shoulder_width > 0:
# Shoulder is the most reliable reference for medium shots
estimates['estimated_refs']['eye_width_px'] = round(shoulder_width / BODY_PROPORTIONS['eye_to_shoulder'], 2)
estimates['estimated_refs']['head_width_px'] = round(shoulder_width / BODY_PROPORTIONS['head_to_shoulder'], 2)
ref_unit = 'shoulder_width'
ref_px = shoulder_width
ref_cm = 45.0
elif head_width > 0:
estimates['estimated_refs']['eye_width_px'] = round(head_width / BODY_PROPORTIONS['eye_to_head'], 2)
estimates['estimated_refs']['shoulder_width_px'] = round(head_width * BODY_PROPORTIONS['head_to_shoulder'], 2)
ref_unit = 'head_width'
ref_px = head_width
ref_cm = 16.0
elif eye_width > 0:
estimates['estimated_refs']['head_width_px'] = round(eye_width * BODY_PROPORTIONS['eye_to_head'], 2)
estimates['estimated_refs']['shoulder_width_px'] = round(eye_width * BODY_PROPORTIONS['eye_to_shoulder'], 2)
ref_unit = 'eye_width'
ref_px = eye_width
ref_cm = 6.0
else:
return estimates
# Estimate height if we have body_height or torso_height
if body_height > 0:
height_ratio = body_height / ref_px
estimated_cm = height_ratio * ref_cm
estimates['height_estimate'] = {
'estimated_height_cm': round(estimated_cm, 1),
'height_ratio': round(height_ratio, 2),
'reference_unit': ref_unit,
'measurement_type': 'body_height',
}
elif torso_height > 0:
# Torso ≈ 45% of full height
torso_ratio = torso_height / ref_px
torso_cm = torso_ratio * ref_cm
full_height_cm = torso_cm / 0.45
estimates['height_estimate'] = {
'estimated_height_cm': round(full_height_cm, 1),
'height_ratio': round(torso_ratio, 2),
'reference_unit': ref_unit,
'measurement_type': 'torso_extrapolated',
}
elif ref_unit == 'head_width' and head_width > 0:
# Estimate height from head: body_height ≈ 7.5 × head_width
estimated_height_px = head_width * BODY_PROPORTIONS['head_to_height']
estimated_cm = 16.0 * BODY_PROPORTIONS['head_to_height'] # ~120cm baseline
estimates['height_estimate'] = {
'estimated_height_cm': round(estimated_cm, 1),
'estimated_height_px': round(estimated_height_px, 2),
'reference_unit': ref_unit,
'measurement_type': 'head_proportion',
'note': 'estimated from head_width without body keypoints',
}
elif ref_unit == 'shoulder_width' and shoulder_width > 0:
# Estimate height from shoulder: body_height ≈ 3.8 × shoulder_width
estimated_height_px = shoulder_width * BODY_PROPORTIONS['shoulder_to_height']
estimated_cm = 45.0 * BODY_PROPORTIONS['shoulder_to_height'] # ~170cm baseline
estimates['height_estimate'] = {
'estimated_height_cm': round(estimated_cm, 1),
'estimated_height_px': round(estimated_height_px, 2),
'reference_unit': ref_unit,
'measurement_type': 'shoulder_proportion',
'note': 'estimated from shoulder_width without body keypoints',
}
return estimates
"""
Estimate real height using best available reference unit
Reference units (approximate real sizes for adult):
- shoulder_width ≈ 45 cm (most stable, available in medium shots)
- head_width (ear to ear) ≈ 16 cm (available in close-up to medium)
- eye_width ≈ 6 cm (only available in close-up)
Args:
keypoints: Pose keypoints list
bbox: Optional bbox for fallback
Returns:
Dict with estimated real height and reference unit used
"""
body_height = calculate_body_height(keypoints, bbox)
torso_height = calculate_torso_height(keypoints)
# Determine what we're measuring
has_full_body = body_height > 0 and any([
get_keypoint_by_name(keypoints, 'left_ankle'),
get_keypoint_by_name(keypoints, 'right_ankle'),
])
if body_height <= 0 and torso_height <= 0:
return {
'estimated_height_cm': 0,
'height_ratio': 0,
'reference_unit': 'none',
'measurement_type': 'none',
}
# Height ratio (body_height / eye_width)
height_ratio = body_height / eye_width
# Get best reference unit
ref_name, ref_value = get_best_reference_unit(keypoints)
# Estimate real height (assuming eye_width ≈ 6cm)
# estimated_height = height_ratio * 6 cm
estimated_height_cm = height_ratio * 6.0
if ref_value <= 0:
return {
'estimated_height_cm': 0,
'height_ratio': 0,
'reference_unit': 'none',
'measurement_type': 'none',
}
# Reference unit real size (cm)
REF_SIZES = {
'shoulder_width': 45.0,
'head_width': 16.0,
'eye_width': 6.0,
}
ref_size_cm = REF_SIZES.get(ref_name, 6.0)
# Use appropriate height measurement
if has_full_body and body_height > 0:
measure_height = body_height
measure_type = 'full_body'
elif torso_height > 0:
measure_height = torso_height
measure_type = 'torso'
else:
measure_height = body_height
measure_type = 'bbox_partial'
# Height ratio
height_ratio = measure_height / ref_value
# Estimate real height
estimated_height_cm = height_ratio * ref_size_cm
# For torso-only, estimate full body by typical proportion (torso ~45% of height)
if measure_type == 'torso':
full_body_estimate = estimated_height_cm / 0.45
estimated_height_cm = full_body_estimate
measure_type = 'torso_extrapolated'
# Height category
height_category = "unknown"
@@ -440,7 +727,10 @@ def estimate_real_height(keypoints: List[Dict], eye_width: float) -> Dict:
'height_ratio': round(height_ratio, 2),
'height_category': height_category,
'body_height_px': round(body_height, 2),
'eye_width_px': round(eye_width, 2),
'torso_height_px': round(torso_height, 2),
'reference_unit': ref_name,
f'{ref_name}_px': round(ref_value, 2),
'measurement_type': measure_type,
}
@@ -463,6 +753,9 @@ def calculate_proportions(keypoints: List, bbox: Optional[Dict] = None) -> Dict:
if keypoints and isinstance(keypoints[0], list):
keypoints = convert_mediapipe_to_named(keypoints)
# Detect shot type first
shot_info = detect_shot_type(keypoints, bbox)
eye_width = calculate_eye_width(keypoints)
body_height = calculate_body_height(keypoints, bbox)
torso_height = calculate_torso_height(keypoints)
@@ -470,6 +763,7 @@ def calculate_proportions(keypoints: List, bbox: Optional[Dict] = None) -> Dict:
shoulder_width = calculate_should_width(keypoints)
proportions = {
'shot_type': shot_info,
'eye_width': round(eye_width, 2),
'body_height': round(body_height, 2),
'torso_height': round(torso_height, 2),
@@ -491,10 +785,71 @@ def calculate_proportions(keypoints: List, bbox: Optional[Dict] = None) -> Dict:
body_shape = calculate_body_shape(keypoints)
proportions['body_shape'] = body_shape
# Estimate real height
height_estimate = estimate_real_height(keypoints, eye_width)
# Estimate height based on shot type
shot_type = shot_info.get('shot_type', 'unknown')
height_estimate = None
if shot_type == 'full_body':
# Full body visible - use body_height directly
height_estimate = estimate_from_proportions(keypoints, bbox)
elif shot_type == 'medium_shot':
# Medium shot - use torso or extrapolate
height_estimate = estimate_from_proportions(keypoints, bbox)
if height_estimate.get('height_estimate'):
height_estimate['height_estimate']['note'] = 'extrapolated from medium shot'
elif shot_type == 'close_up':
# Close-up - estimate from reference units using standard proportions
shoulder_width = calculate_should_width(keypoints)
head_width = calculate_head_width(keypoints)
if shoulder_width > 0:
# shoulder_width × 3.8 ≈ body_height
estimated_cm = 45.0 * BODY_PROPORTIONS['shoulder_to_height']
height_estimate = {
'estimated_height_cm': round(estimated_cm, 1),
'reference_unit': 'shoulder_width',
'shoulder_width_px': round(shoulder_width, 2),
'measurement_type': 'proportion_estimate',
'note': 'estimated from shoulder_width (close-up shot)',
}
elif head_width > 0:
# head_width × 7.5 ≈ body_height
estimated_cm = 16.0 * BODY_PROPORTIONS['head_to_height']
height_estimate = {
'estimated_height_cm': round(estimated_cm, 1),
'reference_unit': 'head_width',
'head_width_px': round(head_width, 2),
'measurement_type': 'proportion_estimate',
'note': 'estimated from head_width (close-up shot)',
}
else:
height_estimate = estimate_from_proportions(keypoints, bbox)
else:
height_estimate = estimate_from_proportions(keypoints, bbox)
proportions['height_estimate'] = height_estimate
# Add reference units info
head_width = calculate_head_width(keypoints)
proportions['reference_units'] = {
'eye_width': round(eye_width, 2),
'head_width': round(head_width, 2),
'shoulder_width': round(shoulder_width, 2),
}
# Proportion ratios validation
if eye_width > 0 and head_width > 0:
proportions['proportion_ratios'] = {
'head_to_eye': round(head_width / eye_width, 2), # Should be ~2.67
}
if head_width > 0 and shoulder_width > 0:
proportions['proportion_ratios'] = proportions.get('proportion_ratios', {})
proportions['proportion_ratios']['shoulder_to_head'] = round(shoulder_width / head_width, 2) # Should be ~2.8
if eye_width > 0 and shoulder_width > 0:
proportions['proportion_ratios'] = proportions.get('proportion_ratios', {})
proportions['proportion_ratios']['shoulder_to_eye'] = round(shoulder_width / eye_width, 2) # Should be ~7.5
return proportions