"""Scorer: Weighted aggregate all judge scores → report""" import json, os from datetime import datetime import subprocess import numpy as np class NumpyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, (np.integer,)): return int(obj) if isinstance(obj, (np.floating,)): return float(obj) if isinstance(obj, (np.bool_,)): return bool(obj) if isinstance(obj, np.ndarray): return obj.tolist() return super().default(obj) OUTPUT_DIR = "/Users/accusys/momentry/output_dev" WEIGHTS = { "Gemma4": 0.35, "PaliGemma": 0.25, "YOLO": 0.15, "MaskFormer": 0.15, "GroundingDINO": 0.05, "FaceNet": 0.05, } def get_build_info(): try: git_hash = subprocess.run( ["git", "rev-parse", "--short", "HEAD"], capture_output=True, text=True, timeout=5, cwd=os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ).stdout.strip() except: git_hash = "unknown" return { "build_git_hash": git_hash, "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "version": "1.0.0" } def compute_scores(judge_results): """Convert judge outputs to numeric scores.""" scores = {} for jr in judge_results: agent = jr["agent"] s = jr.get("score") if s is None: s = 50 # default for non-numeric judges scores[agent] = s return scores def aggregate(scores): """Weighted aggregate across all judges.""" total_weight = 0 weighted_sum = 0 for agent, score in scores.items(): w = WEIGHTS.get(agent, 0.1) if score is not None: weighted_sum += w * score total_weight += w return round(weighted_sum / total_weight) if total_weight > 0 else 0 def generate_report(all_results, file_uuid): """Generate qa_report.md + qa_report.json.""" build = get_build_info() report_path = os.path.join(OUTPUT_DIR, "qa_report.md") json_path = os.path.join(OUTPUT_DIR, "qa_report.json") lines = [] lines.append("# QA Self-Check Report") lines.append(f"") lines.append(f"**UUID**: `{file_uuid}`") lines.append(f"**Build**: {build['build_git_hash']}") lines.append(f"**Timestamp**: {build['timestamp']}") lines.append(f"**Version**: {build['version']}") lines.append("") lines.append("---") lines.append("") # Summary table total_queries = len(all_results) avg_scores = [] by_type = {} for r in all_results: qtype = r["query"]["type"] qid = r["query"]["id"] # Collect all judge scores for this result scores = {} for jr in r.get("judge_results", []): s = jr.get("score") if s is not None: scores[jr["agent"]] = s final_score = aggregate(scores) avg_scores.append(final_score) by_type.setdefault(qtype, []).append(final_score) overall = round(sum(avg_scores) / len(avg_scores)) if avg_scores else 0 lines.append("## Summary") lines.append("") lines.append(f"| Metric | Score |") lines.append(f"|--------|:----:|") lines.append(f"| **Overall** | **{overall}/100** |") for qtype in ["identity", "scene", "object"]: scores = by_type.get(qtype, []) if scores: avg = round(sum(scores) / len(scores)) lines.append(f"| {qtype.capitalize()} queries | {avg}/100 |") lines.append("") # Per-query details lines.append("## Per-Query Details") lines.append("") for r in all_results: q = r["query"] lines.append(f"### {q['id']}: {q['prompt']}") lines.append(f"") lines.append(f"| Type: {q['type']} | Status: {r.get('status', 'ok')} |") lines.append(f"|-----------------|-------------------|") lines.append(f"") # Judges lines.append(f"| Judge | Score | Reasoning |") lines.append(f"|-------|:-----:|-----------|") for jr in r.get("judge_results", []): s = jr.get("score", "-") if s is None: s = "-" reasoning = jr.get("reasoning", "")[:80] lines.append(f"| {jr['agent']} | {s} | {reasoning} |") scores = {} for jr in r.get("judge_results", []): if jr.get("score") is not None: scores[jr["agent"]] = jr["score"] final = aggregate(scores) lines.append(f"| **Weighted** | **{final}** | |") lines.append(f"") lines.append("---") lines.append(f"*Report generated by M5 QA Agent — {build['timestamp']}*") report_text = "\n".join(lines) with open(report_path, "w") as f: f.write(report_text) # JSON output json_output = { "build": build, "file_uuid": file_uuid, "overall_score": overall, "by_type": {t: round(sum(s)/len(s)) for t, s in by_type.items() if s}, "queries": all_results } with open(json_path, "w") as f: json.dump(json_output, f, indent=2, ensure_ascii=False, cls=NumpyEncoder) print(f"\n Report: {report_path}") print(f" JSON: {json_path}") print(f" Overall score: {overall}/100")