feat: eye filter flag + QA fixes (Gemma4 prompt, YOLO boundary, PaliGemma score, GDINO skip)

2026-05-14 12:24:25 +08:00
parent f60a59b280
commit 39888ce3cc
5 changed files with 37 additions and 16 deletions
--- a/scripts/qa/judges/gemma4.py
+++ b/scripts/qa/judges/gemma4.py
@@ -37,7 +37,7 @@ Video analysis:

 Rate how well this video matches the expected query on a scale of 0-100.
 0 = completely unrelated, 100 = perfect match.
-Reply ONLY with JSON: {{"score": N, "reasoning": "brief one-line reason"}}"""
+Reply with ONLY this JSON, no markdown, no explanation: {{"score": N, "reasoning": "brief one-line reason"}}"""
    
    response = call_llm(llm_prompt)
    try:
--- a/scripts/qa/judges/paligemma.py
+++ b/scripts/qa/judges/paligemma.py
@@ -32,9 +32,20 @@ def score(frames, prompt):
        descriptions.append(desc)
    
    combined = " | ".join(descriptions)
+    
+    # Simple text-to-score: check if description mentions key terms from prompt
+    prompt_lower = prompt.lower()
+    desc_lower = combined.lower()
+    score = 50  # default
+    # Boost if prompt elements found in description
+    for word in prompt_lower.split():
+        if len(word) > 3 and word in desc_lower:
+            score += 10
+    score = min(100, score)
+    
    return {
        "agent": "PaliGemma",
-        "score": None,  # raw text, scored later by Gemma4
+        "score": score,
        "reasoning": combined,
        "details": {"descriptions": descriptions}
    }
--- a/scripts/qa/judges/yolo.py
+++ b/scripts/qa/judges/yolo.py
@@ -1,5 +1,5 @@
 """YOLO judge: object detection matching against expected objects"""
-import cv2, numpy as np
+import cv2, numpy as np, re
 from ultralytics import YOLO

 MODEL_PATH = "/Users/accusys/momentry_core_0.1/yolov8s.mlpackage"
@@ -30,8 +30,8 @@ def score(frames, prompt):
    load()
    prompt_lower = prompt.lower()
    
-    # Extract expected objects from prompt: check each COCO class
-    expected = [c for c in COCO if c in prompt_lower]
+    # Extract expected objects from prompt: check each COCO class (word boundary)
+    expected = [c for c in COCO if re.search(r'\b' + re.escape(c) + r'\b', prompt_lower)]
    if not expected:
        expected = ["person"]  # default fallback
    
--- a/scripts/qa/pipeline.py
+++ b/scripts/qa/pipeline.py
@@ -65,14 +65,7 @@ def run_judges(query, result, file_uuid):
        results.append({"agent": "MaskFormer", "score": 50, "reasoning": f"Judge error: {str(e)[:60]}", "details": {}})
    
    # Grounding DINO — SKIP (too slow per-video search)
-    # print(f"  [{qid}] GDINO...", end="", flush=True)
-    # try:
-    #     gd_result = gdino.score(frames, prompt)
-    #     print(" done")
-    #     results.append(gd_result)
-    # except Exception as e:
-    #     print(f" ERROR: {str(e)[:60]}")
-    results.append({"agent": "GroundingDINO", "score": 50, "reasoning": "Skipped for performance", "details": {}})
+    results.append({"agent": "GroundingDINO", "score": 50, "reasoning": "Skipped (too slow for full-video search)", "details": {}})
    
    print(f"  [{qid}] FaceNet...", end="", flush=True)
    try: