#!/opt/homebrew/bin/python3.11 """ M5 QA Self-Check Agent Usage: python3 pipeline.py --uuid aeed71342a899fe4b4c57b7d41bcb692 """ import sys, os, argparse sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "judges")) from query_generator import generate from executor import execute from scorer import aggregate, generate_report # Import judges from judges import paligemma, gdino, maskformer, yolo, facenet, gemma4 JUDGE_WEIGHTS = { "PaliGemma": 0.25, "Gemma4": 0.35, "MaskFormer": 0.15, "YOLO": 0.15, "GroundingDINO": 0.05, "FaceNet": 0.05, } def run_judges(query, result, file_uuid): """Run all judges on the extracted frames and prompt.""" frames = result.get("frames", []) prompt = query["prompt"] qid = query["id"] if not frames: print(f" [{qid}] No frames to judge") return [] results = [] # Run PaliGemma first (produces text needed by Gemma4) print(f" [{qid}] PaliGemma...", end="", flush=True) try: pg_result = paligemma.score(frames, prompt) print(" done") results.append(pg_result) except Exception as e: print(f" ERROR: {str(e)[:60]}") results.append({"agent": "PaliGemma", "score": 50, "reasoning": f"Judge error: {str(e)[:60]}", "details": {}}) # Run other judges print(f" [{qid}] YOLO...", end="", flush=True) try: yo_result = yolo.score(frames, prompt) print(" done") results.append(yo_result) except Exception as e: print(f" ERROR: {str(e)[:60]}") results.append({"agent": "YOLO", "score": 50, "reasoning": f"Judge error: {str(e)[:60]}", "details": {}}) print(f" [{qid}] MaskFormer...", end="", flush=True) try: mf_result = maskformer.score(frames, prompt) print(" done") results.append(mf_result) except Exception as e: print(f" ERROR: {str(e)[:60]}") results.append({"agent": "MaskFormer", "score": 50, "reasoning": f"Judge error: {str(e)[:60]}", "details": {}}) # Grounding DINO — time-bounded search print(f" [{qid}] GDINO...", end="", flush=True) try: gd_result = gdino.score( frames, prompt, start_time=result.get("trace_start", 0), end_time=result.get("trace_end", 0) ) print(" done") results.append(gd_result) except Exception as e: print(f" ERROR: {str(e)[:60]}") results.append({"agent": "GroundingDINO", "score": 50, "reasoning": f"Judge error", "details": {}}) print(f" [{qid}] FaceNet...", end="", flush=True) try: fn_result = facenet.score(frames, prompt) print(" done") results.append(fn_result) except Exception as e: print(f" ERROR: {str(e)[:60]}") results.append({"agent": "FaceNet", "score": 50, "reasoning": f"Judge error", "details": {}}) # Gemma4 — uses context from other judges print(f" [{qid}] Gemma4...", end="", flush=True) try: pali_text = "" for r in results: if r["agent"] == "PaliGemma": pali_text = r.get("reasoning", "") break ctx = { "paligemma": pali_text, "maskformer": mf_result.get("reasoning", "") if 'mf_result' in dir() else "", "yolo": yo_result.get("details", {}).get("frames", [{}])[0].get("found", []) if 'yo_result' in dir() else [] } gm_result = gemma4.score(frames, prompt, context=ctx) print(" done") results.append(gm_result) except Exception as e: print(f" ERROR: {str(e)[:60]}") results.append({"agent": "Gemma4", "score": 50, "reasoning": f"LLM error: {str(e)[:60]}", "details": {}}) return results def main(): parser = argparse.ArgumentParser(description="QA Self-Check Agent") parser.add_argument("--uuid", required=True, help="File UUID") args = parser.parse_args() file_uuid = args.uuid print(f"=== QA Self-Check Agent ===") print(f"UUID: {file_uuid}") print() # Phase 1: Generate 15 test queries print("=== Phase 1: Generating queries ===") queries = generate(file_uuid) print(f" Generated {len(queries)} queries:") for q in queries: print(f" {q['id']} [{q['type']:>7}] {q['prompt'][:60]}") print() # Phase 2: Execute (API search + video download + frame extraction) print("=== Phase 2: Executing queries ===") results = [] for q in queries: result = execute(q, file_uuid) results.append(result) print() # Phase 3: Run judges print("=== Phase 3: Running judges ===") for i, r in enumerate(results): if r.get("status") != "ok" or not r.get("frames"): print(f" [{r['query']['id']}] Skipped (no video/frames)") r["judge_results"] = [] continue r["judge_results"] = run_judges(r["query"], r, file_uuid) # Phase 4: Generate report print() print("=== Phase 4: Generating report ===") # Strip non-serializable data for r in results: r.pop("frames", None) # Strip PIL Image from judge details if any for jr in r.get("judge_results", []): if "frames" in jr.get("details", {}): jr["details"].pop("frames") generate_report(results, file_uuid) print() print("=== Done ===") if __name__ == "__main__": main()