feat: QA self-check agent — 15 prompts, 5 judges, weighted scoring
This commit is contained in:
38
scripts/qa/judges/gdino.py
Normal file
38
scripts/qa/judges/gdino.py
Normal file
@@ -0,0 +1,38 @@
|
||||
"""Grounding DINO judge: zero-shot object detection from prompt keywords"""
|
||||
import requests, json, io
|
||||
from PIL import Image
|
||||
|
||||
GDINO_URL = "http://localhost:5051/search"
|
||||
DEFAULT_UUID = "aeed71342a899fe4b4c57b7d41bcb692"
|
||||
|
||||
def score(frames, prompt):
|
||||
prompt_lower = prompt.lower()
|
||||
|
||||
# Just do a single time-bounded search (not per frame)
|
||||
try:
|
||||
resp = requests.post(GDINO_URL, json={
|
||||
"file_uuid": DEFAULT_UUID,
|
||||
"text": prompt_lower,
|
||||
"limit": 3,
|
||||
"start_time": 0,
|
||||
"end_time": 0
|
||||
}, timeout=30)
|
||||
data = resp.json()
|
||||
hits = data.get("hits", [])
|
||||
n_hits = len(hits)
|
||||
best_score = max((h.get("best_score", 0) for h in hits), default=0)
|
||||
dets_found = []
|
||||
for h in hits:
|
||||
for d in h.get("detections", []):
|
||||
dets_found.append(d.get("label", ""))
|
||||
|
||||
score_val = int(100 * min(1.0, best_score * 2))
|
||||
|
||||
return {
|
||||
"agent": "GroundingDINO",
|
||||
"score": score_val,
|
||||
"reasoning": f"{n_hits} hits, best_score={best_score:.2f}, labels={dets_found[:3]}",
|
||||
"details": {"n_hits": n_hits, "best_score": best_score}
|
||||
}
|
||||
except Exception as e:
|
||||
return {"agent": "GroundingDINO", "score": 50, "reasoning": f"GDINO error: {str(e)[:80]}", "details": {}}
|
||||
Reference in New Issue
Block a user