"""YOLO judge: object detection matching against expected objects""" import cv2, numpy as np, re from ultralytics import YOLO MODEL_PATH = "/Users/accusys/momentry_core_0.1/yolov8s.mlpackage" COCO = [ "person","bicycle","car","motorbike","aeroplane","bus","train","truck","boat", "traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog", "horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella", "handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite", "baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle", "wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange", "broccoli","carrot","hot dog","pizza","donut","cake","chair","sofa","pottedplant", "bed","diningtable","toilet","tvmonitor","laptop","mouse","remote","keyboard", "cell phone","microwave","oven","toaster","sink","refrigerator","book","clock", "vase","scissors","teddy bear","hair drier","toothbrush", ] _model = None def load(): global _model if _model is None: try: _model = YOLO(MODEL_PATH, task="detect", verbose=False) except: _model = YOLO("yolov8s.pt") def score(frames, prompt): load() prompt_lower = prompt.lower() # Extract expected objects from prompt: check each COCO class (word boundary) expected = [c for c in COCO if re.search(r'\b' + re.escape(c) + r'\b', prompt_lower)] if not expected: expected = ["person"] # default fallback results = [] for img in frames: # Convert PIL to numpy arr = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) dets = _model(arr, verbose=False, imgsz=640) found = [] if dets and len(dets) > 0 and dets[0].boxes is not None: for cls_id in dets[0].boxes.cls.int().tolist(): cls_name = COCO[cls_id] if cls_id < len(COCO) else f"cls_{cls_id}" found.append(cls_name) match = sum(1 for e in expected if e in found) results.append({"expected": expected, "found": found, "match_count": match, "total": len(expected)}) total_match = sum(r["match_count"] for r in results) total_expected = sum(r["total"] for r in results) or 1 score_val = int(100 * total_match / total_expected) return { "agent": "YOLO", "score": score_val, "reasoning": f"Found {total_match}/{total_expected} expected objects: {expected}", "details": {"frames": results} }