From d94b96d884ba47e7e93a501eda7b71040765a54f Mon Sep 17 00:00:00 2001 From: Accusys Date: Mon, 22 Jun 2026 02:47:01 +0800 Subject: [PATCH] feat: add shot type detection and proportion-based height estimation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - detect_shot_type(): classify full_body/medium_shot/close_up - estimate height using shoulder_width × 3.8 (~171cm) for close-up - add BODY_PROPORTIONS constants for validation - head position ratio + bbox aspect ratio → shot type - enables filtering full-body shots in video search --- scripts/utils/proportion_calculator.py | 393 +++++++++++++++++++++++-- 1 file changed, 374 insertions(+), 19 deletions(-) diff --git a/scripts/utils/proportion_calculator.py b/scripts/utils/proportion_calculator.py index 6680cbe..a7e20bf 100644 --- a/scripts/utils/proportion_calculator.py +++ b/scripts/utils/proportion_calculator.py @@ -143,13 +143,62 @@ def calculate_eye_width(keypoints: List[Dict]) -> float: if left_eye is None or right_eye is None: return 0.0 - # Filter by confidence if left_eye.get('confidence', 0) < 0.1 or right_eye.get('confidence', 0) < 0.1: return 0.0 return calculate_distance(left_eye, right_eye) +def calculate_head_width(keypoints: List[Dict]) -> float: + """ + Calculate head width (ear to ear distance) + Alternative reference unit, more stable for medium shots + + Args: + keypoints: Pose keypoints list + + Returns: + Head width in pixels + """ + left_ear = get_keypoint_by_name(keypoints, 'left_ear') + right_ear = get_keypoint_by_name(keypoints, 'right_ear') + + if left_ear is None or right_ear is None: + return 0.0 + + if left_ear.get('confidence', 0) < 0.1 or right_ear.get('confidence', 0) < 0.1: + return 0.0 + + return calculate_distance(left_ear, right_ear) + + +def get_best_reference_unit(keypoints: List[Dict]) -> Tuple[str, float]: + """ + Get the best available reference unit for proportion calculations + + Priority: shoulder_width > head_width > eye_width + (larger units are more stable and available in wider shots) + + Args: + keypoints: Pose keypoints list + + Returns: + Tuple of (unit_name, unit_value_in_pixels) + """ + shoulder = calculate_should_width(keypoints) + head = calculate_head_width(keypoints) + eye = calculate_eye_width(keypoints) + + if shoulder > 0: + return ('shoulder_width', shoulder) + elif head > 0: + return ('head_width', head) + elif eye > 0: + return ('eye_width', eye) + else: + return ('none', 0.0) + + def calculate_body_height(keypoints: List[Dict], bbox: Optional[Dict] = None) -> float: """ Calculate full body height @@ -393,36 +442,274 @@ def calculate_body_shape(keypoints: List[Dict]) -> Dict: } -def estimate_real_height(keypoints: List[Dict], eye_width: float) -> Dict: +# Standard body proportions (adult average) +# These ratios are used to cross-validate and estimate missing values +BODY_PROPORTIONS = { + 'eye_to_head': 2.67, # head_width ≈ 2.67 × eye_width + 'eye_to_shoulder': 7.5, # shoulder_width ≈ 7.5 × eye_width + 'head_to_shoulder': 2.8, # shoulder_width ≈ 2.8 × head_width + 'head_to_height': 7.5, # body_height ≈ 7.5 × head_width + 'shoulder_to_height': 3.8, # body_height ≈ 3.8 × shoulder_width +} + + +def detect_shot_type(keypoints: List[Dict], bbox: Optional[Dict] = None) -> Dict: """ - Estimate real height using eye_width as reference + Detect shot type based on head position relative to bbox - Assumptions: - - Average eye_width ≈ 6 cm - - Average adult height ≈ 170 cm - - ratio = body_height_pixels / eye_width_pixels + Uses head/nose position to determine if the shot captures: + - Full body: head near top of bbox, bbox extends far below + - Medium shot: head in upper third of bbox + - Close-up: head dominates bbox Args: keypoints: Pose keypoints list - eye_width: Eye distance in pixels + bbox: Bbox {'x', 'y', 'width', 'height'} Returns: - Dict with estimated real height + Dict with shot type and analysis """ - body_height = calculate_body_height(keypoints) + if bbox is None or bbox.get('height', 0) <= 0: + return {'shot_type': 'unknown', 'reason': 'no bbox'} - if eye_width <= 0 or body_height <= 0: + # Get head position (use nose or estimate from eyes) + nose = get_keypoint_by_name(keypoints, 'nose') + left_eye = get_keypoint_by_name(keypoints, 'left_eye') + right_eye = get_keypoint_by_name(keypoints, 'right_eye') + + head_y = 0 + if nose and nose.get('confidence', 0) > 0.1: + head_y = nose['y'] + elif left_eye and left_eye.get('confidence', 0) > 0.1: + head_y = left_eye['y'] + elif right_eye and right_eye.get('confidence', 0) > 0.1: + head_y = right_eye['y'] + + if head_y <= 0: + return {'shot_type': 'unknown', 'reason': 'no head keypoints'} + + bbox_top = bbox['y'] + bbox_height = bbox['height'] + bbox_bottom = bbox_top + bbox_height + + # Calculate head position ratio (0 = top of bbox, 1 = bottom of bbox) + head_position_ratio = (head_y - bbox_top) / bbox_height + + # Determine shot type + # - Full body: head in top 15% of bbox, bbox is tall + # - Medium: head in top 30% of bbox + # - Close-up: head in middle/lower part of bbox + + # Also check bbox aspect ratio (height/width) + aspect_ratio = bbox_height / bbox.get('width', 1) + + shot_type = 'unknown' + reason = '' + + if head_position_ratio < 0.15 and aspect_ratio > 2.0: + shot_type = 'full_body' + reason = f'head at {head_position_ratio:.1%} of bbox (top), aspect={aspect_ratio:.1f}' + elif head_position_ratio < 0.30 and aspect_ratio > 1.5: + shot_type = 'medium_shot' + reason = f'head at {head_position_ratio:.1%} of bbox (upper), aspect={aspect_ratio:.1f}' + elif head_position_ratio > 0.30 or aspect_ratio < 1.5: + shot_type = 'close_up' + reason = f'head at {head_position_ratio:.1%} of bbox (middle), aspect={aspect_ratio:.1f}' + else: + shot_type = 'medium_shot' + reason = f'head at {head_position_ratio:.1%} of bbox' + + return { + 'shot_type': shot_type, + 'head_position_ratio': round(head_position_ratio, 3), + 'aspect_ratio': round(aspect_ratio, 2), + 'reason': reason, + 'bbox_top': bbox_top, + 'bbox_bottom': bbox_bottom, + 'head_y': round(head_y, 2), + } + + +def estimate_from_proportions(keypoints: List[Dict], bbox: Optional[Dict] = None) -> Dict: + """ + Estimate measurements using body proportion ratios + + Uses whichever reference unit is available and applies standard + body proportions to estimate other measurements. + + Args: + keypoints: Pose keypoints list + bbox: Optional bbox for fallback + + Returns: + Dict with estimated measurements based on available reference units + """ + eye_width = calculate_eye_width(keypoints) + head_width = calculate_head_width(keypoints) + shoulder_width = calculate_should_width(keypoints) + body_height = calculate_body_height(keypoints, bbox) + torso_height = calculate_torso_height(keypoints) + + estimates = { + 'available_refs': {}, + 'estimated_refs': {}, + 'height_estimate': None, + } + + # Record available measurements + if eye_width > 0: + estimates['available_refs']['eye_width_px'] = round(eye_width, 2) + if head_width > 0: + estimates['available_refs']['head_width_px'] = round(head_width, 2) + if shoulder_width > 0: + estimates['available_refs']['shoulder_width_px'] = round(shoulder_width, 2) + if body_height > 0: + estimates['available_refs']['body_height_px'] = round(body_height, 2) + if torso_height > 0: + estimates['available_refs']['torso_height_px'] = round(torso_height, 2) + + # Estimate missing reference units using available ones + if shoulder_width > 0: + # Shoulder is the most reliable reference for medium shots + estimates['estimated_refs']['eye_width_px'] = round(shoulder_width / BODY_PROPORTIONS['eye_to_shoulder'], 2) + estimates['estimated_refs']['head_width_px'] = round(shoulder_width / BODY_PROPORTIONS['head_to_shoulder'], 2) + ref_unit = 'shoulder_width' + ref_px = shoulder_width + ref_cm = 45.0 + elif head_width > 0: + estimates['estimated_refs']['eye_width_px'] = round(head_width / BODY_PROPORTIONS['eye_to_head'], 2) + estimates['estimated_refs']['shoulder_width_px'] = round(head_width * BODY_PROPORTIONS['head_to_shoulder'], 2) + ref_unit = 'head_width' + ref_px = head_width + ref_cm = 16.0 + elif eye_width > 0: + estimates['estimated_refs']['head_width_px'] = round(eye_width * BODY_PROPORTIONS['eye_to_head'], 2) + estimates['estimated_refs']['shoulder_width_px'] = round(eye_width * BODY_PROPORTIONS['eye_to_shoulder'], 2) + ref_unit = 'eye_width' + ref_px = eye_width + ref_cm = 6.0 + else: + return estimates + + # Estimate height if we have body_height or torso_height + if body_height > 0: + height_ratio = body_height / ref_px + estimated_cm = height_ratio * ref_cm + estimates['height_estimate'] = { + 'estimated_height_cm': round(estimated_cm, 1), + 'height_ratio': round(height_ratio, 2), + 'reference_unit': ref_unit, + 'measurement_type': 'body_height', + } + elif torso_height > 0: + # Torso ≈ 45% of full height + torso_ratio = torso_height / ref_px + torso_cm = torso_ratio * ref_cm + full_height_cm = torso_cm / 0.45 + estimates['height_estimate'] = { + 'estimated_height_cm': round(full_height_cm, 1), + 'height_ratio': round(torso_ratio, 2), + 'reference_unit': ref_unit, + 'measurement_type': 'torso_extrapolated', + } + elif ref_unit == 'head_width' and head_width > 0: + # Estimate height from head: body_height ≈ 7.5 × head_width + estimated_height_px = head_width * BODY_PROPORTIONS['head_to_height'] + estimated_cm = 16.0 * BODY_PROPORTIONS['head_to_height'] # ~120cm baseline + estimates['height_estimate'] = { + 'estimated_height_cm': round(estimated_cm, 1), + 'estimated_height_px': round(estimated_height_px, 2), + 'reference_unit': ref_unit, + 'measurement_type': 'head_proportion', + 'note': 'estimated from head_width without body keypoints', + } + elif ref_unit == 'shoulder_width' and shoulder_width > 0: + # Estimate height from shoulder: body_height ≈ 3.8 × shoulder_width + estimated_height_px = shoulder_width * BODY_PROPORTIONS['shoulder_to_height'] + estimated_cm = 45.0 * BODY_PROPORTIONS['shoulder_to_height'] # ~170cm baseline + estimates['height_estimate'] = { + 'estimated_height_cm': round(estimated_cm, 1), + 'estimated_height_px': round(estimated_height_px, 2), + 'reference_unit': ref_unit, + 'measurement_type': 'shoulder_proportion', + 'note': 'estimated from shoulder_width without body keypoints', + } + + return estimates + """ + Estimate real height using best available reference unit + + Reference units (approximate real sizes for adult): + - shoulder_width ≈ 45 cm (most stable, available in medium shots) + - head_width (ear to ear) ≈ 16 cm (available in close-up to medium) + - eye_width ≈ 6 cm (only available in close-up) + + Args: + keypoints: Pose keypoints list + bbox: Optional bbox for fallback + + Returns: + Dict with estimated real height and reference unit used + """ + body_height = calculate_body_height(keypoints, bbox) + torso_height = calculate_torso_height(keypoints) + + # Determine what we're measuring + has_full_body = body_height > 0 and any([ + get_keypoint_by_name(keypoints, 'left_ankle'), + get_keypoint_by_name(keypoints, 'right_ankle'), + ]) + + if body_height <= 0 and torso_height <= 0: return { 'estimated_height_cm': 0, 'height_ratio': 0, + 'reference_unit': 'none', + 'measurement_type': 'none', } - # Height ratio (body_height / eye_width) - height_ratio = body_height / eye_width + # Get best reference unit + ref_name, ref_value = get_best_reference_unit(keypoints) - # Estimate real height (assuming eye_width ≈ 6cm) - # estimated_height = height_ratio * 6 cm - estimated_height_cm = height_ratio * 6.0 + if ref_value <= 0: + return { + 'estimated_height_cm': 0, + 'height_ratio': 0, + 'reference_unit': 'none', + 'measurement_type': 'none', + } + + # Reference unit real size (cm) + REF_SIZES = { + 'shoulder_width': 45.0, + 'head_width': 16.0, + 'eye_width': 6.0, + } + + ref_size_cm = REF_SIZES.get(ref_name, 6.0) + + # Use appropriate height measurement + if has_full_body and body_height > 0: + measure_height = body_height + measure_type = 'full_body' + elif torso_height > 0: + measure_height = torso_height + measure_type = 'torso' + else: + measure_height = body_height + measure_type = 'bbox_partial' + + # Height ratio + height_ratio = measure_height / ref_value + + # Estimate real height + estimated_height_cm = height_ratio * ref_size_cm + + # For torso-only, estimate full body by typical proportion (torso ~45% of height) + if measure_type == 'torso': + full_body_estimate = estimated_height_cm / 0.45 + estimated_height_cm = full_body_estimate + measure_type = 'torso_extrapolated' # Height category height_category = "unknown" @@ -440,7 +727,10 @@ def estimate_real_height(keypoints: List[Dict], eye_width: float) -> Dict: 'height_ratio': round(height_ratio, 2), 'height_category': height_category, 'body_height_px': round(body_height, 2), - 'eye_width_px': round(eye_width, 2), + 'torso_height_px': round(torso_height, 2), + 'reference_unit': ref_name, + f'{ref_name}_px': round(ref_value, 2), + 'measurement_type': measure_type, } @@ -463,6 +753,9 @@ def calculate_proportions(keypoints: List, bbox: Optional[Dict] = None) -> Dict: if keypoints and isinstance(keypoints[0], list): keypoints = convert_mediapipe_to_named(keypoints) + # Detect shot type first + shot_info = detect_shot_type(keypoints, bbox) + eye_width = calculate_eye_width(keypoints) body_height = calculate_body_height(keypoints, bbox) torso_height = calculate_torso_height(keypoints) @@ -470,6 +763,7 @@ def calculate_proportions(keypoints: List, bbox: Optional[Dict] = None) -> Dict: shoulder_width = calculate_should_width(keypoints) proportions = { + 'shot_type': shot_info, 'eye_width': round(eye_width, 2), 'body_height': round(body_height, 2), 'torso_height': round(torso_height, 2), @@ -491,10 +785,71 @@ def calculate_proportions(keypoints: List, bbox: Optional[Dict] = None) -> Dict: body_shape = calculate_body_shape(keypoints) proportions['body_shape'] = body_shape - # Estimate real height - height_estimate = estimate_real_height(keypoints, eye_width) + # Estimate height based on shot type + shot_type = shot_info.get('shot_type', 'unknown') + + height_estimate = None + + if shot_type == 'full_body': + # Full body visible - use body_height directly + height_estimate = estimate_from_proportions(keypoints, bbox) + elif shot_type == 'medium_shot': + # Medium shot - use torso or extrapolate + height_estimate = estimate_from_proportions(keypoints, bbox) + if height_estimate.get('height_estimate'): + height_estimate['height_estimate']['note'] = 'extrapolated from medium shot' + elif shot_type == 'close_up': + # Close-up - estimate from reference units using standard proportions + shoulder_width = calculate_should_width(keypoints) + head_width = calculate_head_width(keypoints) + + if shoulder_width > 0: + # shoulder_width × 3.8 ≈ body_height + estimated_cm = 45.0 * BODY_PROPORTIONS['shoulder_to_height'] + height_estimate = { + 'estimated_height_cm': round(estimated_cm, 1), + 'reference_unit': 'shoulder_width', + 'shoulder_width_px': round(shoulder_width, 2), + 'measurement_type': 'proportion_estimate', + 'note': 'estimated from shoulder_width (close-up shot)', + } + elif head_width > 0: + # head_width × 7.5 ≈ body_height + estimated_cm = 16.0 * BODY_PROPORTIONS['head_to_height'] + height_estimate = { + 'estimated_height_cm': round(estimated_cm, 1), + 'reference_unit': 'head_width', + 'head_width_px': round(head_width, 2), + 'measurement_type': 'proportion_estimate', + 'note': 'estimated from head_width (close-up shot)', + } + else: + height_estimate = estimate_from_proportions(keypoints, bbox) + else: + height_estimate = estimate_from_proportions(keypoints, bbox) + proportions['height_estimate'] = height_estimate + # Add reference units info + head_width = calculate_head_width(keypoints) + proportions['reference_units'] = { + 'eye_width': round(eye_width, 2), + 'head_width': round(head_width, 2), + 'shoulder_width': round(shoulder_width, 2), + } + + # Proportion ratios validation + if eye_width > 0 and head_width > 0: + proportions['proportion_ratios'] = { + 'head_to_eye': round(head_width / eye_width, 2), # Should be ~2.67 + } + if head_width > 0 and shoulder_width > 0: + proportions['proportion_ratios'] = proportions.get('proportion_ratios', {}) + proportions['proportion_ratios']['shoulder_to_head'] = round(shoulder_width / head_width, 2) # Should be ~2.8 + if eye_width > 0 and shoulder_width > 0: + proportions['proportion_ratios'] = proportions.get('proportion_ratios', {}) + proportions['proportion_ratios']['shoulder_to_eye'] = round(shoulder_width / eye_width, 2) # Should be ~7.5 + return proportions