- Online: remove IDENTITY filter gating on identity_note — always show - Offline: fix id_names scope bug — was overwritten by top10-only dict - Both reports now show 'identity: PERSON_xxx' for all 2000 timeline traces - All 5483 traces have identity mapping (verified in SQLite)
251 lines
11 KiB
Python
251 lines
11 KiB
Python
#!/opt/homebrew/bin/python3.11
|
||
"""
|
||
Offline Report Generator — Uses SQLite file (no PostgreSQL needed).
|
||
Generates comprehensive HTML report with charts, heatmaps, and vector stats.
|
||
|
||
Usage:
|
||
python3 render_offline_report.py <uuid>.sqlite [output.html]
|
||
python3 render_offline_report.py <uuid>.sqlite --identity <id>
|
||
"""
|
||
import sys, json, sqlite3, os, argparse
|
||
from collections import defaultdict
|
||
|
||
parser = argparse.ArgumentParser()
|
||
parser.add_argument("sqlite_path", help="Path to the .sqlite file")
|
||
parser.add_argument("output", nargs="?", default=None, help="Output HTML path")
|
||
parser.add_argument("--identity", "-i", type=int, default=None, help="Filter by identity_id")
|
||
args = parser.parse_args()
|
||
|
||
SQLITE_PATH = args.sqlite_path
|
||
OUT = args.output or SQLITE_PATH.replace(".sqlite", "_report.html")
|
||
IDENTITY = args.identity
|
||
|
||
if not os.path.exists(SQLITE_PATH):
|
||
print(f"ERROR: {SQLITE_PATH} not found")
|
||
sys.exit(1)
|
||
|
||
# Load sqlite-vec extension if available
|
||
VEC_DYLIB = None
|
||
for path in [
|
||
os.path.join(os.path.dirname(os.path.abspath(__file__)), "vec0.dylib"),
|
||
"/tmp/vec0.dylib",
|
||
]:
|
||
if os.path.exists(path):
|
||
VEC_DYLIB = path
|
||
break
|
||
|
||
conn = sqlite3.connect(SQLITE_PATH)
|
||
if VEC_DYLIB:
|
||
conn.enable_load_extension(True)
|
||
try:
|
||
conn.load_extension(VEC_DYLIB)
|
||
except:
|
||
pass
|
||
conn.enable_load_extension(False)
|
||
c = conn.cursor()
|
||
|
||
# Read video metadata
|
||
c.execute("SELECT file_uuid, file_name, duration, fps FROM videos LIMIT 1")
|
||
row = c.fetchone()
|
||
if not row:
|
||
print("No video data found")
|
||
sys.exit(1)
|
||
file_uuid, video_name, duration, fps = row[0], row[1], float(row[2] or 6785), float(row[3] or 25.0)
|
||
sample_interval = 3 # 8Hz face detection
|
||
hz = fps / sample_interval
|
||
|
||
# Build identity filter
|
||
identity_filter = ""
|
||
identity_params = []
|
||
if IDENTITY is not None:
|
||
identity_filter = " AND identity_id = ?"
|
||
identity_params = [IDENTITY]
|
||
|
||
# Query trace spans
|
||
trace_query = f"SELECT trace_id, MIN(frame_number), MAX(frame_number), MIN(timestamp_secs), MAX(timestamp_secs), COUNT(*) FROM face_detections WHERE trace_id IS NOT NULL{identity_filter} GROUP BY trace_id ORDER BY MIN(timestamp_secs)"
|
||
c.execute(trace_query, identity_params)
|
||
trace_spans = c.fetchall()
|
||
|
||
# Query density
|
||
density_query = f"SELECT CAST(FLOOR(timestamp_secs/5) AS INTEGER) as bkt, COUNT(*) as cnt FROM face_detections WHERE trace_id IS NOT NULL{identity_filter} GROUP BY bkt ORDER BY bkt"
|
||
c.execute(density_query, identity_params)
|
||
density = {r[0]: r[1] for r in c.fetchall()}
|
||
|
||
# Total detections
|
||
c.execute(f"SELECT COUNT(*) FROM face_detections WHERE 1=1{identity_filter}", identity_params)
|
||
total_detections = c.fetchone()[0]
|
||
|
||
# Trace-to-identity mapping (for tooltips)
|
||
trace_to_identity = {}
|
||
c.execute("SELECT DISTINCT trace_id, identity_id FROM face_detections WHERE trace_id IS NOT NULL AND identity_id IS NOT NULL")
|
||
for tid, iid in c.fetchall():
|
||
trace_to_identity[tid] = iid
|
||
# Get identity names
|
||
id_names = {}
|
||
if trace_to_identity:
|
||
unique_ids = set(trace_to_identity.values())
|
||
placeholders = ",".join(["?" for _ in unique_ids])
|
||
c.execute(f"SELECT id, name FROM identities WHERE id IN ({placeholders})", list(unique_ids))
|
||
id_names = {r[0]: r[1] for r in c.fetchall()}
|
||
|
||
# Identity info
|
||
identity_info = None
|
||
if IDENTITY is not None:
|
||
c.execute("SELECT id, name, identity_type, source, status FROM identities WHERE id=?", [IDENTITY])
|
||
r = c.fetchone()
|
||
if r:
|
||
identity_info = {"id": r[0], "name": r[1], "type": r[2], "source": r[3], "status": r[4]}
|
||
else:
|
||
c.execute("SELECT identity_id, COUNT(*) as fc, COUNT(DISTINCT trace_id) as tc FROM face_detections WHERE identity_id IS NOT NULL GROUP BY identity_id ORDER BY fc DESC LIMIT 10")
|
||
top_identities = c.fetchall()
|
||
|
||
# TKG stats
|
||
c.execute("SELECT COUNT(*) FROM tkg_nodes")
|
||
tkg_nodes = c.fetchone()[0]
|
||
c.execute("SELECT node_type, COUNT(*) FROM tkg_nodes GROUP BY node_type")
|
||
tkg_types = dict(c.fetchall())
|
||
c.execute("SELECT COUNT(*) FROM tkg_edges")
|
||
tkg_edges = c.fetchone()[0]
|
||
|
||
# Vector counts
|
||
vec_counts = {}
|
||
for tbl in ["chunk_embeddings", "face_embeddings", "voice_embeddings"]:
|
||
try:
|
||
c.execute(f"SELECT COUNT(*) FROM {tbl}")
|
||
vec_counts[tbl] = c.fetchone()[0]
|
||
except:
|
||
vec_counts[tbl] = 0
|
||
|
||
c.close()
|
||
conn.close()
|
||
|
||
BUCKET = 5
|
||
num_buckets = int(duration / BUCKET) + 1
|
||
max_density = max(density.values()) if density else 1
|
||
|
||
def build_html():
|
||
h = []
|
||
h.append('<!DOCTYPE html><html><head><meta charset="utf-8"><title>Offline Report — {}</title>'.format(video_name[:50]))
|
||
h.append('<style>')
|
||
h.append('body{font-family:-apple-system,BlinkMacSystemFont,sans-serif;margin:20px;background:#0d1117;color:#c9d1d9}')
|
||
h.append('h1,h2{color:#e94560}')
|
||
h.append('.stats{display:flex;gap:12px;margin:8px 0;flex-wrap:wrap}')
|
||
h.append('.stat{background:#161b22;padding:6px 14px;border-radius:6px}')
|
||
h.append('.stat .num{font-size:20px;font-weight:bold;color:#e94560}')
|
||
h.append('.stat .label{font-size:10px;color:#8b949e}')
|
||
h.append('.viz{position:relative;background:#0d1117;border:1px solid #30363d;margin:8px 0;overflow:hidden}')
|
||
h.append('.bar{display:block;position:absolute;height:3px;background:#e94560;opacity:0.7;border-radius:1px}')
|
||
h.append('.bar:hover{height:8px;opacity:1}')
|
||
h.append('table{border-collapse:collapse;width:100%;color:#c9d1d9}')
|
||
h.append('th{background:#161b22;text-align:left;padding:6px 10px}')
|
||
h.append('td{padding:4px 10px;border-bottom:1px solid #21262d}')
|
||
h.append('</style></head><body>')
|
||
|
||
sub = " (identity: {})".format(identity_info["name"]) if identity_info else ""
|
||
h.append('<h1>📊 Offline Report — {}{}</h1>'.format(video_name[:60], sub))
|
||
h.append('<div style="color:#666;font-size:11px;margin-bottom:10px">Source: {} | Generated: offline (SQLite)</div>'.format(os.path.basename(SQLITE_PATH)))
|
||
|
||
# Identity card
|
||
if identity_info:
|
||
h.append('<div style="background:#161b22;border:1px solid #30363d;border-radius:8px;padding:16px;margin:12px 0">')
|
||
h.append('<h3 style="margin:0;color:#e94560">Identity Details</h3>')
|
||
h.append('<table><tr><td style="color:#8b949e;width:80px">ID</td><td>{}</td></tr>'.format(identity_info["id"]))
|
||
h.append('<tr><td style="color:#8b949e">Name</td><td style="font-weight:bold">{}</td></tr>'.format(identity_info["name"]))
|
||
h.append('<tr><td style="color:#8b949e">Type</td><td>{}</td></tr>'.format(identity_info["type"]))
|
||
h.append('<tr><td style="color:#8b949e">Source</td><td>{}</td></tr>'.format(identity_info["source"]))
|
||
h.append('<tr><td style="color:#8b949e">Status</td><td>{}</td></tr>'.format(identity_info["status"]))
|
||
h.append('</table></div>')
|
||
|
||
# Stats row
|
||
h.append('<div class="stats">')
|
||
h.append('<div class="stat"><div class="num">{:,}</div><div class="label">traces</div></div>'.format(len(trace_spans)))
|
||
h.append('<div class="stat"><div class="num">{:,}</div><div class="label">detections</div></div>'.format(total_detections))
|
||
h.append('<div class="stat"><div class="num">{:.0f}s</div><div class="label">duration</div></div>'.format(duration))
|
||
h.append('<div class="stat"><div class="num">{}</div><div class="label">max/{}s</div></div>'.format(max_density, BUCKET))
|
||
h.append('<div class="stat"><div class="num">{:.0f}fps</div><div class="label">video fps</div></div>'.format(fps))
|
||
h.append('<div class="stat"><div class="num">{:.0f}Hz</div><div class="label">sample rate</div></div>'.format(hz))
|
||
h.append('<div class="stat"><div class="num">{:,}</div><div class="label">{}s buckets</div></div>'.format(num_buckets, BUCKET))
|
||
h.append('</div>')
|
||
|
||
# Database summary
|
||
h.append('<h2>Database Contents</h2>')
|
||
h.append('<table>')
|
||
h.append('<tr><th>Table</th><th style="text-align:right">Rows</th><th>Type</th></tr>')
|
||
for name, count in [
|
||
("videos", 1), ("chunk", len(trace_spans)),
|
||
("face_detections", total_detections), ("identities", len(id_names) if not IDENTITY else 1),
|
||
("tkg_nodes", tkg_nodes), ("tkg_edges", tkg_edges),
|
||
]:
|
||
h.append('<tr><td>{}</td><td style="text-align:right">{:,}</td><td>flat</td></tr>'.format(name, count))
|
||
|
||
for name, dim in [("chunk_embeddings", 768), ("face_embeddings", 512), ("voice_embeddings", 192)]:
|
||
count = vec_counts.get(name, 0)
|
||
h.append('<tr><td>{}</td><td style="text-align:right">{:,}</td><td>vec0 ({}D)</td></tr>'.format(name, count, dim))
|
||
h.append('</table>')
|
||
|
||
# TKG breakdown
|
||
if tkg_types:
|
||
h.append('<h2>TKG Nodes</h2>')
|
||
h.append('<div class="stats">')
|
||
for ntype, cnt in sorted(tkg_types.items()):
|
||
h.append('<div class="stat"><div class="num">{:,}</div><div class="label">{}</div></div>'.format(cnt, ntype))
|
||
h.append('</div>')
|
||
|
||
# 1. Density histogram
|
||
h.append('<h2>Face Density Over Time</h2>')
|
||
w_px = num_buckets * 2 + 20
|
||
h.append('<div class="viz" style="width:{}px;height:80px">'.format(w_px))
|
||
for b in range(num_buckets):
|
||
v = density.get(b, 0)
|
||
h_px = max(2, int(60 * v / max(1, max_density * 0.6))) if v > 0 else 0
|
||
if v == 0:
|
||
color = "#0d1117"
|
||
else:
|
||
i = min(v / (max(1, max_density * 0.5)), 1.0)
|
||
r = int(233 * i + 13 * (1 - i))
|
||
g = int(69 * i + 13 * (1 - i))
|
||
bv = int(96 * i + 23 * (1 - i))
|
||
color = "rgb({},{},{})".format(r, g, bv)
|
||
h.append('<span style="position:absolute;left:{}px;bottom:0;width:2px;height:{}px;background:{}" title="{}s: {} faces"></span>'.format(b*2+10, h_px, color, b*BUCKET, v))
|
||
h.append('</div>')
|
||
|
||
# 2. Trace timeline
|
||
h.append('<h2>Trace Timeline</h2>')
|
||
show_traces = min(len(trace_spans), 2000)
|
||
bar_h = 2
|
||
chart_height = show_traces * (bar_h + 1) + 10
|
||
h.append('<div class="viz" style="width:{}px;height:{}px">'.format(w_px, chart_height))
|
||
for i, (tid, fn0, fn1, t0, t1, cnt) in enumerate(trace_spans[:show_traces]):
|
||
left = int(t0 / duration * (w_px - 20)) + 10
|
||
width = max(3, int((t1 - t0) / duration * (w_px - 20)))
|
||
top = i * (bar_h + 1) + 5
|
||
opacity = 1.0 if cnt > 5 else 0.3
|
||
identity_note = ""
|
||
iid = trace_to_identity.get(tid)
|
||
if iid and iid in id_names:
|
||
identity_note = ", identity: {}".format(id_names[iid])
|
||
h.append('<span class="bar" style="left:{}px;top:{}px;width:{}px;height:{}px;opacity:{}" title="T{}: {:.0f}s–{:.0f}s, {} faces{}"></span>'.format(
|
||
left, top, width, bar_h, opacity, tid, t0, t1, cnt, identity_note))
|
||
h.append('</div>')
|
||
|
||
# 3. Top identities
|
||
if not IDENTITY and top_identities:
|
||
h.append('<h2>Top Identities</h2>')
|
||
h.append('<table>')
|
||
h.append('<tr><th>ID</th><th>Name</th><th style="text-align:right">Faces</th><th style="text-align:right">Traces</th></tr>')
|
||
for iid, fc, tc in top_identities:
|
||
name = id_names.get(iid, "#{}".format(iid))[:50]
|
||
h.append('<tr><td style="color:#8b949e">{}</td><td>{}</td><td style="text-align:right">{:,}</td><td style="text-align:right">{}</td></tr>'.format(iid, name, fc, tc))
|
||
h.append('</table>')
|
||
|
||
h.append('</body></html>')
|
||
return '\n'.join(h)
|
||
|
||
html = build_html()
|
||
with open(OUT, 'w') as f:
|
||
f.write(html)
|
||
|
||
print("Saved: {}".format(OUT))
|
||
print("Traces: {}, Detections: {}, Duration: {:.0f}s, Sample: {:.0f}Hz".format(len(trace_spans), total_detections, duration, hz))
|
||
print("Size: {:.0f}KB".format(len(html) / 1024))
|