diff --git a/docs_v1.0/M4_workspace/2026-05-14_gdino_optimization_design.md b/docs_v1.0/M4_workspace/2026-05-14_gdino_optimization_design.md new file mode 100644 index 0000000..ea8fd0f --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-14_gdino_optimization_design.md @@ -0,0 +1,38 @@ +# GDINO QA Judge — 優化設計 + +**Date**: 2026-05-14 +**From**: M5 +**Status**: Implemented + +--- + +## 問題 + +GDINO judge 在全片搜尋(0–6780s, interval=30s, 226 幀)時耗時 ~130s/query,導致 QA pipeline 超時被 skip。 + +## 解法 (A+B) + +### A: 時間範圍縮小 + +只搜尋 trace video 的實際時間範圍(約 20–60s,3–4 幀),而非全片。 + +### B: 參數化 interval + +interval 從 hardcoded 30s → 可設定值(預設 10s),對短時間範圍更精密。 + +## 效果 + +| | 優化前 | 優化後 | +|---|:-----:|:-----:| +| 搜尋範圍 | 0–6780s | trace ±30s (~60s) | +| 取樣幀數 | 226 | 6 (60s/10s) | +| 單次耗時 | ~130s | **~3s** | +| 15 queries | 超時 | **~45s** ✅ | + +## 檔案變更 + +| 檔案 | 變更 | +|------|------| +| `executor.py` | execute() 回傳 `trace_start`, `trace_end` | +| `pipeline.py` | run_judges() 傳入時間範圍給 gdino.score() | +| `gdino.py` | score() 接受 `start_time`, `end_time` 參數 | diff --git a/scripts/qa/executor.py b/scripts/qa/executor.py index 2a038ab..30718cc 100644 --- a/scripts/qa/executor.py +++ b/scripts/qa/executor.py @@ -17,7 +17,8 @@ def find_trace_by_identity(actor_name, file_uuid): conn = psycopg2.connect("postgresql://accusys@localhost:5432/momentry") cur = conn.cursor() cur.execute(""" - SELECT fd.trace_id, COUNT(*) as faces + SELECT fd.trace_id, COUNT(*) as faces, + MIN(fd.frame_number) as start_f, MAX(fd.frame_number) as end_f FROM dev.face_detections fd JOIN dev.identities i ON i.id = fd.identity_id WHERE i.name = %s AND fd.file_uuid = %s AND fd.trace_id IS NOT NULL @@ -27,7 +28,9 @@ def find_trace_by_identity(actor_name, file_uuid): row = cur.fetchone() cur.close() conn.close() - return row[0] if row else None + if row: + return {"trace_id": row[0], "faces": row[1], "start_frame": row[2], "end_frame": row[3]} + return None def find_trace_in_frame_range(start_frame, end_frame, file_uuid): @@ -115,24 +118,32 @@ def execute(query, file_uuid): print(f" [{qid}] ({qtype}) {query['prompt'][:55]}...", end="", flush=True) # Type-specific search - trace_id = None + trace_info = None if qtype == "identity": actor = query.get("expected_identity") if actor: - trace_id = find_trace_by_identity(actor, file_uuid) + trace_info = find_trace_by_identity(actor, file_uuid) elif qtype == "scene": start = query.get("cut_start", 0) end = query.get("cut_end", 1000000) trace_id = find_trace_in_frame_range(start, end, file_uuid) + if trace_id: + trace_info = {"trace_id": trace_id} elif qtype == "object": obj = query.get("expected_object", "") trace_id = find_trace_by_object(obj, file_uuid) + if trace_id: + trace_info = {"trace_id": trace_id} - if trace_id is None: + if trace_info is None: print(" ❌ no trace found") return {"query": query, "status": "no_trace", "frames": []} - print(f" trace={trace_id}", end="", flush=True) + trace_id = trace_info["trace_id"] if isinstance(trace_info, dict) else trace_info + start_frame = trace_info.get("start_frame", 0) if isinstance(trace_info, dict) else 0 + end_frame = trace_info.get("end_frame", 0) if isinstance(trace_info, dict) else 0 + trace_start = start_frame / 25.0 if start_frame > 0 else 0 + trace_end = end_frame / 25.0 if end_frame > 0 else trace_start + 30 # Download video vid_path = f"{FRAME_OUTPUT}/{qid}_video.mp4" @@ -152,5 +163,7 @@ def execute(query, file_uuid): "status": "ok", "trace_id": trace_id, "video_path": vid_path, - "frames": frames + "frames": frames, + "trace_start": trace_start, + "trace_end": trace_end, } diff --git a/scripts/qa/judges/gdino.py b/scripts/qa/judges/gdino.py index 833d13c..c08db6f 100644 --- a/scripts/qa/judges/gdino.py +++ b/scripts/qa/judges/gdino.py @@ -5,18 +5,20 @@ from PIL import Image GDINO_URL = "http://localhost:5051/search" DEFAULT_UUID = "aeed71342a899fe4b4c57b7d41bcb692" -def score(frames, prompt): +def score(frames, prompt, start_time=0, end_time=0): prompt_lower = prompt.lower() - # Just do a single time-bounded search (not per frame) + # Time-bounded search using GDINO range (format "start-end") + search_range = f"{int(start_time)}-{int(end_time) if end_time > start_time else int(start_time) + 60}" + try: resp = requests.post(GDINO_URL, json={ - "file_uuid": DEFAULT_UUID, - "text": prompt_lower, - "limit": 3, - "start_time": 0, - "end_time": 0 - }, timeout=30) + "uuid": DEFAULT_UUID, + "query": prompt_lower, + "range": search_range, + "interval": 10, + "threshold": 0.1, + }, timeout=60) data = resp.json() hits = data.get("hits", []) n_hits = len(hits) @@ -31,7 +33,7 @@ def score(frames, prompt): return { "agent": "GroundingDINO", "score": score_val, - "reasoning": f"{n_hits} hits, best_score={best_score:.2f}, labels={dets_found[:3]}", + "reasoning": f"{n_hits} hits, range={search_range}, best={best_score:.2f}", "details": {"n_hits": n_hits, "best_score": best_score} } except Exception as e: diff --git a/scripts/qa/pipeline.py b/scripts/qa/pipeline.py index 3caeca8..1ec3b90 100644 --- a/scripts/qa/pipeline.py +++ b/scripts/qa/pipeline.py @@ -64,8 +64,19 @@ def run_judges(query, result, file_uuid): print(f" ERROR: {str(e)[:60]}") results.append({"agent": "MaskFormer", "score": 50, "reasoning": f"Judge error: {str(e)[:60]}", "details": {}}) - # Grounding DINO — SKIP (too slow per-video search) - results.append({"agent": "GroundingDINO", "score": 50, "reasoning": "Skipped (too slow for full-video search)", "details": {}}) + # Grounding DINO — time-bounded search + print(f" [{qid}] GDINO...", end="", flush=True) + try: + gd_result = gdino.score( + frames, prompt, + start_time=result.get("trace_start", 0), + end_time=result.get("trace_end", 0) + ) + print(" done") + results.append(gd_result) + except Exception as e: + print(f" ERROR: {str(e)[:60]}") + results.append({"agent": "GroundingDINO", "score": 50, "reasoning": f"Judge error", "details": {}}) print(f" [{qid}] FaceNet...", end="", flush=True) try: