#!/opt/homebrew/bin/python3.11 """ Hand Processor Wrapper Calls Swift Vision Framework hand pose (swift_hand) for gesture detection. Uses VNDetectHumanHandPoseRequest with ANE acceleration. """ import re import sys import json import os import subprocess import argparse sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from redis_publisher import RedisPublisher SWIFT_HAND_PATH = os.path.join( os.path.dirname(os.path.abspath(__file__)), "swift_processors/.build/debug/swift_hand" ) SWIFT_HAND_ALT = os.path.join( os.path.dirname(os.path.abspath(__file__)), "swift_processors/.build/arm64-apple-macosx/debug/swift_hand" ) SWIFT_HAND_PROGRESS_RE = re.compile(r"\[SwiftHand\] Progress:\s*(\d+)%") def process_hand( video_path: str, output_path: str, uuid: str = "", sample_interval: int = 3, publisher: RedisPublisher = None, ) -> dict: swift_bin = SWIFT_HAND_PATH if not os.path.exists(swift_bin): swift_bin = SWIFT_HAND_ALT if not os.path.exists(swift_bin): print("[Hand] Swift binary not found", file=sys.stderr) if publisher: publisher.error("hand", "Swift binary not found") return {"frame_count": 0, "fps": 0.0, "frames": []} cmd = [swift_bin, video_path, output_path, "--sample-interval", str(sample_interval), "--uuid", uuid] print(f"[Hand] Running Swift Hand (Vision Framework)", file=sys.stderr) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) last_pct = -1 for line in proc.stdout: line = line.strip() m = SWIFT_HAND_PROGRESS_RE.search(line) if m: pct = int(m.group(1)) if pct > last_pct: last_pct = pct print(f"[Hand] Progress: {pct}%", file=sys.stderr) if publisher: publisher.progress("hand", pct, 100, f"{pct}%") elif line: print(f" {line}", file=sys.stderr) stderr_output = proc.stderr.read() if stderr_output: print(stderr_output.strip(), file=sys.stderr) proc.wait() if proc.returncode != 0 or not os.path.exists(output_path): print(f"[Hand] Swift Hand failed (exit={proc.returncode})", file=sys.stderr) if publisher: publisher.error("hand", f"Swift Hand failed") return {"frame_count": 0, "fps": 0.0, "frames": []} with open(output_path) as f: return json.load(f) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Hand Processor (Swift Vision)") parser.add_argument("video_path") parser.add_argument("output_path") parser.add_argument("--uuid", "-u", default="") parser.add_argument("--sample-interval", type=int, default=3) args = parser.parse_args() publisher = RedisPublisher(args.uuid) if args.uuid else None if publisher: publisher.info("hand", "HAND_START") result = process_hand(args.video_path, args.output_path, args.uuid, args.sample_interval, publisher) with open(args.output_path, "w") as f: json.dump(result, f, indent=2) print(f"Hand: {len(result.get('frames', []))} frames with hands") if publisher: publisher.complete("hand", f"{len(result.get('frames',[]))} frames")