Files
momentry_core/scripts/render_offline_report.py
Accusys fff2af8ad1 fix: identity names now show in all trace tooltips (online + offline)
- Online: remove IDENTITY filter gating on identity_note — always show
- Offline: fix id_names scope bug — was overwritten by top10-only dict
- Both reports now show 'identity: PERSON_xxx' for all 2000 timeline traces
- All 5483 traces have identity mapping (verified in SQLite)
2026-05-13 03:19:26 +08:00

251 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/opt/homebrew/bin/python3.11
"""
Offline Report Generator — Uses SQLite file (no PostgreSQL needed).
Generates comprehensive HTML report with charts, heatmaps, and vector stats.
Usage:
python3 render_offline_report.py <uuid>.sqlite [output.html]
python3 render_offline_report.py <uuid>.sqlite --identity <id>
"""
import sys, json, sqlite3, os, argparse
from collections import defaultdict
parser = argparse.ArgumentParser()
parser.add_argument("sqlite_path", help="Path to the .sqlite file")
parser.add_argument("output", nargs="?", default=None, help="Output HTML path")
parser.add_argument("--identity", "-i", type=int, default=None, help="Filter by identity_id")
args = parser.parse_args()
SQLITE_PATH = args.sqlite_path
OUT = args.output or SQLITE_PATH.replace(".sqlite", "_report.html")
IDENTITY = args.identity
if not os.path.exists(SQLITE_PATH):
print(f"ERROR: {SQLITE_PATH} not found")
sys.exit(1)
# Load sqlite-vec extension if available
VEC_DYLIB = None
for path in [
os.path.join(os.path.dirname(os.path.abspath(__file__)), "vec0.dylib"),
"/tmp/vec0.dylib",
]:
if os.path.exists(path):
VEC_DYLIB = path
break
conn = sqlite3.connect(SQLITE_PATH)
if VEC_DYLIB:
conn.enable_load_extension(True)
try:
conn.load_extension(VEC_DYLIB)
except:
pass
conn.enable_load_extension(False)
c = conn.cursor()
# Read video metadata
c.execute("SELECT file_uuid, file_name, duration, fps FROM videos LIMIT 1")
row = c.fetchone()
if not row:
print("No video data found")
sys.exit(1)
file_uuid, video_name, duration, fps = row[0], row[1], float(row[2] or 6785), float(row[3] or 25.0)
sample_interval = 3 # 8Hz face detection
hz = fps / sample_interval
# Build identity filter
identity_filter = ""
identity_params = []
if IDENTITY is not None:
identity_filter = " AND identity_id = ?"
identity_params = [IDENTITY]
# Query trace spans
trace_query = f"SELECT trace_id, MIN(frame_number), MAX(frame_number), MIN(timestamp_secs), MAX(timestamp_secs), COUNT(*) FROM face_detections WHERE trace_id IS NOT NULL{identity_filter} GROUP BY trace_id ORDER BY MIN(timestamp_secs)"
c.execute(trace_query, identity_params)
trace_spans = c.fetchall()
# Query density
density_query = f"SELECT CAST(FLOOR(timestamp_secs/5) AS INTEGER) as bkt, COUNT(*) as cnt FROM face_detections WHERE trace_id IS NOT NULL{identity_filter} GROUP BY bkt ORDER BY bkt"
c.execute(density_query, identity_params)
density = {r[0]: r[1] for r in c.fetchall()}
# Total detections
c.execute(f"SELECT COUNT(*) FROM face_detections WHERE 1=1{identity_filter}", identity_params)
total_detections = c.fetchone()[0]
# Trace-to-identity mapping (for tooltips)
trace_to_identity = {}
c.execute("SELECT DISTINCT trace_id, identity_id FROM face_detections WHERE trace_id IS NOT NULL AND identity_id IS NOT NULL")
for tid, iid in c.fetchall():
trace_to_identity[tid] = iid
# Get identity names
id_names = {}
if trace_to_identity:
unique_ids = set(trace_to_identity.values())
placeholders = ",".join(["?" for _ in unique_ids])
c.execute(f"SELECT id, name FROM identities WHERE id IN ({placeholders})", list(unique_ids))
id_names = {r[0]: r[1] for r in c.fetchall()}
# Identity info
identity_info = None
if IDENTITY is not None:
c.execute("SELECT id, name, identity_type, source, status FROM identities WHERE id=?", [IDENTITY])
r = c.fetchone()
if r:
identity_info = {"id": r[0], "name": r[1], "type": r[2], "source": r[3], "status": r[4]}
else:
c.execute("SELECT identity_id, COUNT(*) as fc, COUNT(DISTINCT trace_id) as tc FROM face_detections WHERE identity_id IS NOT NULL GROUP BY identity_id ORDER BY fc DESC LIMIT 10")
top_identities = c.fetchall()
# TKG stats
c.execute("SELECT COUNT(*) FROM tkg_nodes")
tkg_nodes = c.fetchone()[0]
c.execute("SELECT node_type, COUNT(*) FROM tkg_nodes GROUP BY node_type")
tkg_types = dict(c.fetchall())
c.execute("SELECT COUNT(*) FROM tkg_edges")
tkg_edges = c.fetchone()[0]
# Vector counts
vec_counts = {}
for tbl in ["chunk_embeddings", "face_embeddings", "voice_embeddings"]:
try:
c.execute(f"SELECT COUNT(*) FROM {tbl}")
vec_counts[tbl] = c.fetchone()[0]
except:
vec_counts[tbl] = 0
c.close()
conn.close()
BUCKET = 5
num_buckets = int(duration / BUCKET) + 1
max_density = max(density.values()) if density else 1
def build_html():
h = []
h.append('<!DOCTYPE html><html><head><meta charset="utf-8"><title>Offline Report — {}</title>'.format(video_name[:50]))
h.append('<style>')
h.append('body{font-family:-apple-system,BlinkMacSystemFont,sans-serif;margin:20px;background:#0d1117;color:#c9d1d9}')
h.append('h1,h2{color:#e94560}')
h.append('.stats{display:flex;gap:12px;margin:8px 0;flex-wrap:wrap}')
h.append('.stat{background:#161b22;padding:6px 14px;border-radius:6px}')
h.append('.stat .num{font-size:20px;font-weight:bold;color:#e94560}')
h.append('.stat .label{font-size:10px;color:#8b949e}')
h.append('.viz{position:relative;background:#0d1117;border:1px solid #30363d;margin:8px 0;overflow:hidden}')
h.append('.bar{display:block;position:absolute;height:3px;background:#e94560;opacity:0.7;border-radius:1px}')
h.append('.bar:hover{height:8px;opacity:1}')
h.append('table{border-collapse:collapse;width:100%;color:#c9d1d9}')
h.append('th{background:#161b22;text-align:left;padding:6px 10px}')
h.append('td{padding:4px 10px;border-bottom:1px solid #21262d}')
h.append('</style></head><body>')
sub = " (identity: {})".format(identity_info["name"]) if identity_info else ""
h.append('<h1>📊 Offline Report — {}{}</h1>'.format(video_name[:60], sub))
h.append('<div style="color:#666;font-size:11px;margin-bottom:10px">Source: {} | Generated: offline (SQLite)</div>'.format(os.path.basename(SQLITE_PATH)))
# Identity card
if identity_info:
h.append('<div style="background:#161b22;border:1px solid #30363d;border-radius:8px;padding:16px;margin:12px 0">')
h.append('<h3 style="margin:0;color:#e94560">Identity Details</h3>')
h.append('<table><tr><td style="color:#8b949e;width:80px">ID</td><td>{}</td></tr>'.format(identity_info["id"]))
h.append('<tr><td style="color:#8b949e">Name</td><td style="font-weight:bold">{}</td></tr>'.format(identity_info["name"]))
h.append('<tr><td style="color:#8b949e">Type</td><td>{}</td></tr>'.format(identity_info["type"]))
h.append('<tr><td style="color:#8b949e">Source</td><td>{}</td></tr>'.format(identity_info["source"]))
h.append('<tr><td style="color:#8b949e">Status</td><td>{}</td></tr>'.format(identity_info["status"]))
h.append('</table></div>')
# Stats row
h.append('<div class="stats">')
h.append('<div class="stat"><div class="num">{:,}</div><div class="label">traces</div></div>'.format(len(trace_spans)))
h.append('<div class="stat"><div class="num">{:,}</div><div class="label">detections</div></div>'.format(total_detections))
h.append('<div class="stat"><div class="num">{:.0f}s</div><div class="label">duration</div></div>'.format(duration))
h.append('<div class="stat"><div class="num">{}</div><div class="label">max/{}s</div></div>'.format(max_density, BUCKET))
h.append('<div class="stat"><div class="num">{:.0f}fps</div><div class="label">video fps</div></div>'.format(fps))
h.append('<div class="stat"><div class="num">{:.0f}Hz</div><div class="label">sample rate</div></div>'.format(hz))
h.append('<div class="stat"><div class="num">{:,}</div><div class="label">{}s buckets</div></div>'.format(num_buckets, BUCKET))
h.append('</div>')
# Database summary
h.append('<h2>Database Contents</h2>')
h.append('<table>')
h.append('<tr><th>Table</th><th style="text-align:right">Rows</th><th>Type</th></tr>')
for name, count in [
("videos", 1), ("chunk", len(trace_spans)),
("face_detections", total_detections), ("identities", len(id_names) if not IDENTITY else 1),
("tkg_nodes", tkg_nodes), ("tkg_edges", tkg_edges),
]:
h.append('<tr><td>{}</td><td style="text-align:right">{:,}</td><td>flat</td></tr>'.format(name, count))
for name, dim in [("chunk_embeddings", 768), ("face_embeddings", 512), ("voice_embeddings", 192)]:
count = vec_counts.get(name, 0)
h.append('<tr><td>{}</td><td style="text-align:right">{:,}</td><td>vec0 ({}D)</td></tr>'.format(name, count, dim))
h.append('</table>')
# TKG breakdown
if tkg_types:
h.append('<h2>TKG Nodes</h2>')
h.append('<div class="stats">')
for ntype, cnt in sorted(tkg_types.items()):
h.append('<div class="stat"><div class="num">{:,}</div><div class="label">{}</div></div>'.format(cnt, ntype))
h.append('</div>')
# 1. Density histogram
h.append('<h2>Face Density Over Time</h2>')
w_px = num_buckets * 2 + 20
h.append('<div class="viz" style="width:{}px;height:80px">'.format(w_px))
for b in range(num_buckets):
v = density.get(b, 0)
h_px = max(2, int(60 * v / max(1, max_density * 0.6))) if v > 0 else 0
if v == 0:
color = "#0d1117"
else:
i = min(v / (max(1, max_density * 0.5)), 1.0)
r = int(233 * i + 13 * (1 - i))
g = int(69 * i + 13 * (1 - i))
bv = int(96 * i + 23 * (1 - i))
color = "rgb({},{},{})".format(r, g, bv)
h.append('<span style="position:absolute;left:{}px;bottom:0;width:2px;height:{}px;background:{}" title="{}s: {} faces"></span>'.format(b*2+10, h_px, color, b*BUCKET, v))
h.append('</div>')
# 2. Trace timeline
h.append('<h2>Trace Timeline</h2>')
show_traces = min(len(trace_spans), 2000)
bar_h = 2
chart_height = show_traces * (bar_h + 1) + 10
h.append('<div class="viz" style="width:{}px;height:{}px">'.format(w_px, chart_height))
for i, (tid, fn0, fn1, t0, t1, cnt) in enumerate(trace_spans[:show_traces]):
left = int(t0 / duration * (w_px - 20)) + 10
width = max(3, int((t1 - t0) / duration * (w_px - 20)))
top = i * (bar_h + 1) + 5
opacity = 1.0 if cnt > 5 else 0.3
identity_note = ""
iid = trace_to_identity.get(tid)
if iid and iid in id_names:
identity_note = ", identity: {}".format(id_names[iid])
h.append('<span class="bar" style="left:{}px;top:{}px;width:{}px;height:{}px;opacity:{}" title="T{}: {:.0f}s{:.0f}s, {} faces{}"></span>'.format(
left, top, width, bar_h, opacity, tid, t0, t1, cnt, identity_note))
h.append('</div>')
# 3. Top identities
if not IDENTITY and top_identities:
h.append('<h2>Top Identities</h2>')
h.append('<table>')
h.append('<tr><th>ID</th><th>Name</th><th style="text-align:right">Faces</th><th style="text-align:right">Traces</th></tr>')
for iid, fc, tc in top_identities:
name = id_names.get(iid, "#{}".format(iid))[:50]
h.append('<tr><td style="color:#8b949e">{}</td><td>{}</td><td style="text-align:right">{:,}</td><td style="text-align:right">{}</td></tr>'.format(iid, name, fc, tc))
h.append('</table>')
h.append('</body></html>')
return '\n'.join(h)
html = build_html()
with open(OUT, 'w') as f:
f.write(html)
print("Saved: {}".format(OUT))
print("Traces: {}, Detections: {}, Duration: {:.0f}s, Sample: {:.0f}Hz".format(len(trace_spans), total_detections, duration, hz))
print("Size: {:.0f}KB".format(len(html) / 1024))