feat: eye filter flag + QA fixes (Gemma4 prompt, YOLO boundary, PaliGemma score, GDINO skip)
This commit is contained in:
@@ -37,7 +37,7 @@ Video analysis:
|
||||
|
||||
Rate how well this video matches the expected query on a scale of 0-100.
|
||||
0 = completely unrelated, 100 = perfect match.
|
||||
Reply ONLY with JSON: {{"score": N, "reasoning": "brief one-line reason"}}"""
|
||||
Reply with ONLY this JSON, no markdown, no explanation: {{"score": N, "reasoning": "brief one-line reason"}}"""
|
||||
|
||||
response = call_llm(llm_prompt)
|
||||
try:
|
||||
|
||||
@@ -32,9 +32,20 @@ def score(frames, prompt):
|
||||
descriptions.append(desc)
|
||||
|
||||
combined = " | ".join(descriptions)
|
||||
|
||||
# Simple text-to-score: check if description mentions key terms from prompt
|
||||
prompt_lower = prompt.lower()
|
||||
desc_lower = combined.lower()
|
||||
score = 50 # default
|
||||
# Boost if prompt elements found in description
|
||||
for word in prompt_lower.split():
|
||||
if len(word) > 3 and word in desc_lower:
|
||||
score += 10
|
||||
score = min(100, score)
|
||||
|
||||
return {
|
||||
"agent": "PaliGemma",
|
||||
"score": None, # raw text, scored later by Gemma4
|
||||
"score": score,
|
||||
"reasoning": combined,
|
||||
"details": {"descriptions": descriptions}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
"""YOLO judge: object detection matching against expected objects"""
|
||||
import cv2, numpy as np
|
||||
import cv2, numpy as np, re
|
||||
from ultralytics import YOLO
|
||||
|
||||
MODEL_PATH = "/Users/accusys/momentry_core_0.1/yolov8s.mlpackage"
|
||||
@@ -30,8 +30,8 @@ def score(frames, prompt):
|
||||
load()
|
||||
prompt_lower = prompt.lower()
|
||||
|
||||
# Extract expected objects from prompt: check each COCO class
|
||||
expected = [c for c in COCO if c in prompt_lower]
|
||||
# Extract expected objects from prompt: check each COCO class (word boundary)
|
||||
expected = [c for c in COCO if re.search(r'\b' + re.escape(c) + r'\b', prompt_lower)]
|
||||
if not expected:
|
||||
expected = ["person"] # default fallback
|
||||
|
||||
|
||||
@@ -65,14 +65,7 @@ def run_judges(query, result, file_uuid):
|
||||
results.append({"agent": "MaskFormer", "score": 50, "reasoning": f"Judge error: {str(e)[:60]}", "details": {}})
|
||||
|
||||
# Grounding DINO — SKIP (too slow per-video search)
|
||||
# print(f" [{qid}] GDINO...", end="", flush=True)
|
||||
# try:
|
||||
# gd_result = gdino.score(frames, prompt)
|
||||
# print(" done")
|
||||
# results.append(gd_result)
|
||||
# except Exception as e:
|
||||
# print(f" ERROR: {str(e)[:60]}")
|
||||
results.append({"agent": "GroundingDINO", "score": 50, "reasoning": "Skipped for performance", "details": {}})
|
||||
results.append({"agent": "GroundingDINO", "score": 50, "reasoning": "Skipped (too slow for full-video search)", "details": {}})
|
||||
|
||||
print(f" [{qid}] FaceNet...", end="", flush=True)
|
||||
try:
|
||||
|
||||
@@ -19,6 +19,7 @@ import sys
|
||||
import os
|
||||
import json
|
||||
import argparse
|
||||
from collections import defaultdict
|
||||
import numpy as np
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
@@ -160,7 +161,7 @@ def merge_traces_within_cuts(face_data: dict, cut_scenes: list) -> dict:
|
||||
return face_data
|
||||
|
||||
|
||||
def run_face_tracker(face_json_path: str, traced_json_path: str) -> str:
|
||||
def run_face_tracker(face_json_path: str, traced_json_path: str, filter_eyes: bool = False) -> str:
|
||||
"""Run face_tracker.py on face.json, returns path to face_traced.json"""
|
||||
from face_tracker import track_faces
|
||||
|
||||
@@ -199,7 +200,22 @@ def run_face_tracker(face_json_path: str, traced_json_path: str) -> str:
|
||||
"fps": face_data.get("fps", 30.0),
|
||||
"total_frames": face_data.get("frame_count", 0),
|
||||
}
|
||||
|
||||
|
||||
# Eye filter: remove faces without at least one eye landmark
|
||||
if filter_eyes:
|
||||
removed = 0
|
||||
for fnum_str, frm_data in face_data.get("frames", {}).items():
|
||||
faces = frm_data.get("faces", [])
|
||||
kept = []
|
||||
for face in faces:
|
||||
lm = face.get("landmarks", {})
|
||||
if len(lm.get("left_eye", [])) > 0 or len(lm.get("right_eye", [])) > 0:
|
||||
kept.append(face)
|
||||
else:
|
||||
removed += 1
|
||||
frm_data["faces"] = kept
|
||||
print(f"[TRACE] Eye filter: {removed} faces without eyes removed")
|
||||
|
||||
print(f"[TRACE] Processing {len(face_data.get('frames', {}))} frames")
|
||||
|
||||
# Load embeddings from DB for the face tracker
|
||||
@@ -344,6 +360,7 @@ def main():
|
||||
parser.add_argument("--schema", default=SCHEMA, help="DB schema name")
|
||||
|
||||
parser.add_argument("--uuid", help="UUID for Redis tracking (accepted by executor)")
|
||||
parser.add_argument("--filter-eyes", action="store_true", help="Remove faces without eye landmarks before tracking")
|
||||
args = parser.parse_args()
|
||||
|
||||
face_json = args.face_json or os.path.join(
|
||||
@@ -356,7 +373,7 @@ def main():
|
||||
sys.exit(1)
|
||||
|
||||
# Step 1: Run face tracker
|
||||
run_face_tracker(face_json, traced_json)
|
||||
run_face_tracker(face_json, traced_json, filter_eyes=args.filter_eyes)
|
||||
|
||||
# Step 2: Store in DB with trace_id
|
||||
total, traces = store_traced_faces(args.file_uuid, traced_json, args.schema)
|
||||
|
||||
Reference in New Issue
Block a user