feat: update Python processors and add utility scripts
- Update ASR, face, OCR, pose processors - Add release pre-flight check script - Add synonym generation, chunk processing scripts - Add face recognition, stamp search utilities
This commit is contained in:
169
scripts/find_kids_pose.py
Normal file
169
scripts/find_kids_pose.py
Normal file
@@ -0,0 +1,169 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Find "Kids" in pose data based on Head-to-Body Ratio.
|
||||
Heuristic: Kids have a larger head relative to their body height (approx 1:5 or 1:6) compared to adults (approx 1:7.5).
|
||||
"""
|
||||
|
||||
import json
|
||||
import math
|
||||
import sys
|
||||
|
||||
# Configuration
|
||||
POSE_JSON_PATH = "output/384b0ff44aaaa1f1/384b0ff44aaaa1f1.pose.json"
|
||||
# Heuristic Threshold: Kids typically have a body length < 6.0 * head_width
|
||||
# Adults are usually > 6.5.
|
||||
# We look for Ratio < 5.5 to be safe (smaller is "more kid-like" relative to head size)
|
||||
BODY_TO_HEAD_RATIO_THRESHOLD = 5.8
|
||||
|
||||
def distance(p1, p2):
|
||||
return math.sqrt((p1['x'] - p2['x'])**2 + (p1['y'] - p2['y'])**2)
|
||||
|
||||
def get_midpoint(p1, p2):
|
||||
return {'x': (p1['x'] + p2['x'])/2, 'y': (p1['y'] + p2['y'])/2}
|
||||
|
||||
def find_kids():
|
||||
try:
|
||||
with open(POSE_JSON_PATH, 'r') as f:
|
||||
data = json.load(f)
|
||||
except Exception as e:
|
||||
print(f"Error loading JSON: {e}")
|
||||
return
|
||||
|
||||
frames = data.get("frames", {})
|
||||
potential_kids = []
|
||||
|
||||
# Counters for debugging
|
||||
total_poses = 0
|
||||
analyzed_poses = 0
|
||||
|
||||
for frame_idx_str, frame_data in frames.items():
|
||||
# Structure: frames -> { "frame_index": { "timestamp": ..., "poses": [...] } }
|
||||
# Or maybe just "poses" list directly?
|
||||
# Checking structure: result["frames"][str(idx)] = { "timestamp": ..., "poses": frame_poses }
|
||||
# Wait, in the processor code:
|
||||
# result["frames"][str(idx)] = { "timestamp": idx / fps ..., "poses": frame_poses }
|
||||
# But the loop iterates over `frames.items()`.
|
||||
|
||||
# Actually, looking at the JSON structure saved:
|
||||
# It saves the whole result dict.
|
||||
# result = { ... "frames": { "0": { ... }, "10": { ... } } }
|
||||
# So `frame_data` is { "timestamp": ..., "poses": [...] }
|
||||
|
||||
timestamp = frame_data.get("timestamp", 0)
|
||||
|
||||
# "poses" in this JSON is the list of person detections
|
||||
# Each detection has "keypoints" list
|
||||
# But wait, looking at the processor code:
|
||||
# frame_poses.append({"keypoints": person_keypoints, "person_id": person_idx})
|
||||
# The saved JSON structure in process_video_pose is:
|
||||
# result["frames"][str(idx)] = { "timestamp": ..., "poses": frame_poses }
|
||||
|
||||
# Let's check the actual JSON structure of the file generated.
|
||||
# It is likely: frames -> { "frame_index": { "timestamp": ..., "poses": [...] } }
|
||||
|
||||
people_in_frame = frame_data.get("poses", [])
|
||||
|
||||
for person in people_in_frame:
|
||||
total_poses += 1
|
||||
kps_list = person.get("keypoints", [])
|
||||
|
||||
# Map keypoints by name for easier access
|
||||
kp_dict = {kp['name']: kp for kp in kps_list}
|
||||
|
||||
# We need visible keypoints
|
||||
nose = kp_dict.get('nose')
|
||||
l_shoulder = kp_dict.get('left_shoulder')
|
||||
r_shoulder = kp_dict.get('right_shoulder')
|
||||
l_hip = kp_dict.get('left_hip')
|
||||
r_hip = kp_dict.get('right_hip')
|
||||
l_ankle = kp_dict.get('left_ankle')
|
||||
r_ankle = kp_dict.get('right_ankle')
|
||||
|
||||
# Check visibility
|
||||
if not nose or not (l_shoulder or r_shoulder):
|
||||
continue
|
||||
|
||||
analyzed_poses += 1
|
||||
|
||||
# Estimate Head Size
|
||||
# Distance Nose -> Mid-Shoulders is approx half head height.
|
||||
if l_shoulder and r_shoulder:
|
||||
mid_shoulder = get_midpoint(l_shoulder, r_shoulder)
|
||||
elif l_shoulder:
|
||||
mid_shoulder = l_shoulder
|
||||
else:
|
||||
mid_shoulder = r_shoulder
|
||||
|
||||
if not mid_shoulder:
|
||||
continue
|
||||
|
||||
# Head Height approx = 2 * distance(Nose, Mid_Shoulder)
|
||||
# Why 2? Nose is roughly in the middle of the face vertically (eyes/nose/mouth).
|
||||
# Distance from nose to shoulder top is roughly "Neck + Half Head".
|
||||
# A rough proxy for Head Height is 1/2 shoulder width? No.
|
||||
# Let's use: Head_Height ~ 1.0 * distance(Nose, Shoulder) is risky.
|
||||
# Let's assume Head_Height is roughly constant relative to shoulder width.
|
||||
|
||||
# Better metric: Body Length / Shoulder Width?
|
||||
# No, shoulder width varies with build.
|
||||
|
||||
# Let's go back to: Total Visible Height / Estimated Head Height.
|
||||
# Head Height Estimate = Distance(Nose, Mid_Shoulder) * 2.5 (Rough guess for full head).
|
||||
# Actually, let's use: Head_Height = Distance(Left Ear, Right Ear) if visible? No, usually not reliable.
|
||||
# Let's use: Head_Height = Distance(Nose, Mid_Shoulder) * 1.8 (Empirical factor).
|
||||
head_height_est = distance(nose, mid_shoulder) * 1.8
|
||||
|
||||
if head_height_est < 10: # Too small/noisy
|
||||
continue
|
||||
|
||||
# Body Height: Distance from Nose to lowest visible point (Hip or Ankle)
|
||||
# We want to estimate Total Height.
|
||||
# If Ankles visible:
|
||||
if l_ankle and r_ankle:
|
||||
mid_ankle = get_midpoint(l_ankle, r_ankle)
|
||||
# Height from Top of Head to Ankle
|
||||
# Nose is inside head. Distance(Nose, Ankle) + Top_of_Head_offset.
|
||||
# Let's just use Distance(Nose, Ankle) as the "Body Length below nose".
|
||||
# Total Height ≈ Dist(Nose, Ankle) + Head_Height/2.
|
||||
dist_nose_ankle = distance(nose, mid_ankle)
|
||||
total_height = dist_nose_ankle + (head_height_est / 2)
|
||||
|
||||
# Check for valid height (avoid division by zero or weird angles)
|
||||
if total_height > head_height_est:
|
||||
ratio = total_height / head_height_est
|
||||
|
||||
# Heuristic:
|
||||
# Adults: ~7.0 - 8.0
|
||||
# Kids: ~4.5 - 6.0
|
||||
# We look for < 6.5
|
||||
if ratio < BODY_TO_HEAD_RATIO_THRESHOLD:
|
||||
potential_kids.append({
|
||||
"frame": frame_idx_str,
|
||||
"timestamp": timestamp,
|
||||
"ratio": round(ratio, 2),
|
||||
"person_id": person.get("person_id", "?")
|
||||
})
|
||||
else:
|
||||
# If legs not visible (sitting/crouching), harder to judge ratio.
|
||||
# We could use Shoulder-to-Hip vs Head, but let's stick to full body for safety.
|
||||
pass
|
||||
|
||||
print(f"Analyzed {analyzed_poses} poses out of {total_poses} total detections.")
|
||||
print(f"Found {len(potential_kids)} potential 'kids' (Ratio < {BODY_TO_HEAD_RATIO_THRESHOLD}).")
|
||||
|
||||
# Group by timestamp to avoid duplicates (same person in consecutive frames)
|
||||
unique_kids = {}
|
||||
for k in potential_kids:
|
||||
ts = round(k['timestamp'], 1) # Round to 0.1s
|
||||
if ts not in unique_kids:
|
||||
unique_kids[ts] = k
|
||||
|
||||
# Sort by timestamp
|
||||
sorted_kids = sorted(unique_kids.values(), key=lambda x: x['timestamp'])
|
||||
|
||||
print(f"\nUnique potential kid detections (timestamps):")
|
||||
for k in sorted_kids:
|
||||
print(f" -> Timestamp: {k['timestamp']:.2f}s | Ratio: {k['ratio']}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
find_kids()
|
||||
Reference in New Issue
Block a user