diff --git a/scripts/qa/judges/gemma4.py b/scripts/qa/judges/gemma4.py
index 61f2cb6..606b126 100644
--- a/scripts/qa/judges/gemma4.py
+++ b/scripts/qa/judges/gemma4.py
@@ -37,7 +37,7 @@ Video analysis:
 
 Rate how well this video matches the expected query on a scale of 0-100.
 0 = completely unrelated, 100 = perfect match.
-Reply ONLY with JSON: {{"score": N, "reasoning": "brief one-line reason"}}"""
+Reply with ONLY this JSON, no markdown, no explanation: {{"score": N, "reasoning": "brief one-line reason"}}"""
     
     response = call_llm(llm_prompt)
     try:
diff --git a/scripts/qa/judges/paligemma.py b/scripts/qa/judges/paligemma.py
index 9835823..11b1ad3 100644
--- a/scripts/qa/judges/paligemma.py
+++ b/scripts/qa/judges/paligemma.py
@@ -32,9 +32,20 @@ def score(frames, prompt):
         descriptions.append(desc)
     
     combined = " | ".join(descriptions)
+    
+    # Simple text-to-score: check if description mentions key terms from prompt
+    prompt_lower = prompt.lower()
+    desc_lower = combined.lower()
+    score = 50  # default
+    # Boost if prompt elements found in description
+    for word in prompt_lower.split():
+        if len(word) > 3 and word in desc_lower:
+            score += 10
+    score = min(100, score)
+    
     return {
         "agent": "PaliGemma",
-        "score": None,  # raw text, scored later by Gemma4
+        "score": score,
         "reasoning": combined,
         "details": {"descriptions": descriptions}
     }
diff --git a/scripts/qa/judges/yolo.py b/scripts/qa/judges/yolo.py
index 94d34cc..161edfd 100644
--- a/scripts/qa/judges/yolo.py
+++ b/scripts/qa/judges/yolo.py
@@ -1,5 +1,5 @@
 """YOLO judge: object detection matching against expected objects"""
-import cv2, numpy as np
+import cv2, numpy as np, re
 from ultralytics import YOLO
 
 MODEL_PATH = "/Users/accusys/momentry_core_0.1/yolov8s.mlpackage"
@@ -30,8 +30,8 @@ def score(frames, prompt):
     load()
     prompt_lower = prompt.lower()
     
-    # Extract expected objects from prompt: check each COCO class
-    expected = [c for c in COCO if c in prompt_lower]
+    # Extract expected objects from prompt: check each COCO class (word boundary)
+    expected = [c for c in COCO if re.search(r'\b' + re.escape(c) + r'\b', prompt_lower)]
     if not expected:
         expected = ["person"]  # default fallback
     
diff --git a/scripts/qa/pipeline.py b/scripts/qa/pipeline.py
index 16d745f..3caeca8 100644
--- a/scripts/qa/pipeline.py
+++ b/scripts/qa/pipeline.py
@@ -65,14 +65,7 @@ def run_judges(query, result, file_uuid):
         results.append({"agent": "MaskFormer", "score": 50, "reasoning": f"Judge error: {str(e)[:60]}", "details": {}})
     
     # Grounding DINO — SKIP (too slow per-video search)
-    # print(f"  [{qid}] GDINO...", end="", flush=True)
-    # try:
-    #     gd_result = gdino.score(frames, prompt)
-    #     print(" done")
-    #     results.append(gd_result)
-    # except Exception as e:
-    #     print(f" ERROR: {str(e)[:60]}")
-    results.append({"agent": "GroundingDINO", "score": 50, "reasoning": "Skipped for performance", "details": {}})
+    results.append({"agent": "GroundingDINO", "score": 50, "reasoning": "Skipped (too slow for full-video search)", "details": {}})
     
     print(f"  [{qid}] FaceNet...", end="", flush=True)
     try:
diff --git a/scripts/store_traced_faces.py b/scripts/store_traced_faces.py
index c3a86f6..26127ed 100644
--- a/scripts/store_traced_faces.py
+++ b/scripts/store_traced_faces.py
@@ -19,6 +19,7 @@ import sys
 import os
 import json
 import argparse
+from collections import defaultdict
 import numpy as np
 import psycopg2
 import psycopg2.extras
@@ -160,7 +161,7 @@ def merge_traces_within_cuts(face_data: dict, cut_scenes: list) -> dict:
     return face_data
 
 
-def run_face_tracker(face_json_path: str, traced_json_path: str) -> str:
+def run_face_tracker(face_json_path: str, traced_json_path: str, filter_eyes: bool = False) -> str:
     """Run face_tracker.py on face.json, returns path to face_traced.json"""
     from face_tracker import track_faces
 
@@ -199,7 +200,22 @@ def run_face_tracker(face_json_path: str, traced_json_path: str) -> str:
                 "fps": face_data.get("fps", 30.0),
                 "total_frames": face_data.get("frame_count", 0),
             }
-
+    
+    # Eye filter: remove faces without at least one eye landmark
+    if filter_eyes:
+        removed = 0
+        for fnum_str, frm_data in face_data.get("frames", {}).items():
+            faces = frm_data.get("faces", [])
+            kept = []
+            for face in faces:
+                lm = face.get("landmarks", {})
+                if len(lm.get("left_eye", [])) > 0 or len(lm.get("right_eye", [])) > 0:
+                    kept.append(face)
+                else:
+                    removed += 1
+            frm_data["faces"] = kept
+        print(f"[TRACE] Eye filter: {removed} faces without eyes removed")
+    
     print(f"[TRACE] Processing {len(face_data.get('frames', {}))} frames")
     
     # Load embeddings from DB for the face tracker
@@ -344,6 +360,7 @@ def main():
     parser.add_argument("--schema", default=SCHEMA, help="DB schema name")
 
     parser.add_argument("--uuid", help="UUID for Redis tracking (accepted by executor)")
+    parser.add_argument("--filter-eyes", action="store_true", help="Remove faces without eye landmarks before tracking")
     args = parser.parse_args()
 
     face_json = args.face_json or os.path.join(
@@ -356,7 +373,7 @@ def main():
         sys.exit(1)
 
     # Step 1: Run face tracker
-    run_face_tracker(face_json, traced_json)
+    run_face_tracker(face_json, traced_json, filter_eyes=args.filter_eyes)
 
     # Step 2: Store in DB with trace_id
     total, traces = store_traced_faces(args.file_uuid, traced_json, args.schema)