feat: offline report from SQLite, no PostgreSQL needed

- Add render_offline_report.py — reads .sqlite directly
- Reports include: DB contents, TKG breakdown, density histogram,
  trace timeline, top identities, identity details card
- Supports --identity filter (like online mode)
- Add release visualize-offline <sqlite> [-i identity] [-o output]
- Works with exported .sqlite from export_sqlite.py
- Uses sqlite-vec vec0 tables for vector metadata
This commit is contained in:
Accusys
2026-05-13 03:10:59 +08:00
parent 007fe10c2e
commit bbf8e64752
2 changed files with 274 additions and 0 deletions

View File

@@ -0,0 +1,239 @@
#!/opt/homebrew/bin/python3.11
"""
Offline Report Generator — Uses SQLite file (no PostgreSQL needed).
Generates comprehensive HTML report with charts, heatmaps, and vector stats.
Usage:
python3 render_offline_report.py <uuid>.sqlite [output.html]
python3 render_offline_report.py <uuid>.sqlite --identity <id>
"""
import sys, json, sqlite3, os, argparse
from collections import defaultdict
parser = argparse.ArgumentParser()
parser.add_argument("sqlite_path", help="Path to the .sqlite file")
parser.add_argument("output", nargs="?", default=None, help="Output HTML path")
parser.add_argument("--identity", "-i", type=int, default=None, help="Filter by identity_id")
args = parser.parse_args()
SQLITE_PATH = args.sqlite_path
OUT = args.output or SQLITE_PATH.replace(".sqlite", "_report.html")
IDENTITY = args.identity
if not os.path.exists(SQLITE_PATH):
print(f"ERROR: {SQLITE_PATH} not found")
sys.exit(1)
# Load sqlite-vec extension if available
VEC_DYLIB = None
for path in [
os.path.join(os.path.dirname(os.path.abspath(__file__)), "vec0.dylib"),
"/tmp/vec0.dylib",
]:
if os.path.exists(path):
VEC_DYLIB = path
break
conn = sqlite3.connect(SQLITE_PATH)
if VEC_DYLIB:
conn.enable_load_extension(True)
try:
conn.load_extension(VEC_DYLIB)
except:
pass
conn.enable_load_extension(False)
c = conn.cursor()
# Read video metadata
c.execute("SELECT file_uuid, file_name, duration, fps FROM videos LIMIT 1")
row = c.fetchone()
if not row:
print("No video data found")
sys.exit(1)
file_uuid, video_name, duration, fps = row[0], row[1], float(row[2] or 6785), float(row[3] or 25.0)
sample_interval = 3 # 8Hz face detection
hz = fps / sample_interval
# Build identity filter
identity_filter = ""
identity_params = []
if IDENTITY is not None:
identity_filter = " AND identity_id = ?"
identity_params = [IDENTITY]
# Query trace spans
trace_query = f"SELECT trace_id, MIN(frame_number), MAX(frame_number), MIN(timestamp_secs), MAX(timestamp_secs), COUNT(*) FROM face_detections WHERE trace_id IS NOT NULL{identity_filter} GROUP BY trace_id ORDER BY MIN(timestamp_secs)"
c.execute(trace_query, identity_params)
trace_spans = c.fetchall()
# Query density
density_query = f"SELECT CAST(FLOOR(timestamp_secs/5) AS INTEGER) as bkt, COUNT(*) as cnt FROM face_detections WHERE trace_id IS NOT NULL{identity_filter} GROUP BY bkt ORDER BY bkt"
c.execute(density_query, identity_params)
density = {r[0]: r[1] for r in c.fetchall()}
# Total detections
c.execute(f"SELECT COUNT(*) FROM face_detections WHERE 1=1{identity_filter}", identity_params)
total_detections = c.fetchone()[0]
# Identity info
identity_info = None
if IDENTITY is not None:
c.execute("SELECT id, name, identity_type, source, status FROM identities WHERE id=?", [IDENTITY])
r = c.fetchone()
if r:
identity_info = {"id": r[0], "name": r[1], "type": r[2], "source": r[3], "status": r[4]}
else:
c.execute("SELECT identity_id, COUNT(*) as fc, COUNT(DISTINCT trace_id) as tc FROM face_detections WHERE identity_id IS NOT NULL GROUP BY identity_id ORDER BY fc DESC LIMIT 10")
top_identities = c.fetchall()
# Get identity names
id_names = {}
if top_identities:
ids = [r[0] for r in top_identities]
placeholders = ",".join(["?" for _ in ids])
c.execute(f"SELECT id, name FROM identities WHERE id IN ({placeholders})", ids)
id_names = {r[0]: r[1] for r in c.fetchall()}
# TKG stats
c.execute("SELECT COUNT(*) FROM tkg_nodes")
tkg_nodes = c.fetchone()[0]
c.execute("SELECT node_type, COUNT(*) FROM tkg_nodes GROUP BY node_type")
tkg_types = dict(c.fetchall())
c.execute("SELECT COUNT(*) FROM tkg_edges")
tkg_edges = c.fetchone()[0]
# Vector counts
vec_counts = {}
for tbl in ["chunk_embeddings", "face_embeddings", "voice_embeddings"]:
try:
c.execute(f"SELECT COUNT(*) FROM {tbl}")
vec_counts[tbl] = c.fetchone()[0]
except:
vec_counts[tbl] = 0
c.close()
conn.close()
BUCKET = 5
num_buckets = int(duration / BUCKET) + 1
max_density = max(density.values()) if density else 1
def build_html():
h = []
h.append('<!DOCTYPE html><html><head><meta charset="utf-8"><title>Offline Report — {}</title>'.format(video_name[:50]))
h.append('<style>')
h.append('body{font-family:-apple-system,BlinkMacSystemFont,sans-serif;margin:20px;background:#0d1117;color:#c9d1d9}')
h.append('h1,h2{color:#e94560}')
h.append('.stats{display:flex;gap:12px;margin:8px 0;flex-wrap:wrap}')
h.append('.stat{background:#161b22;padding:6px 14px;border-radius:6px}')
h.append('.stat .num{font-size:20px;font-weight:bold;color:#e94560}')
h.append('.stat .label{font-size:10px;color:#8b949e}')
h.append('.viz{position:relative;background:#0d1117;border:1px solid #30363d;margin:8px 0;overflow:hidden}')
h.append('.bar{display:block;position:absolute;height:3px;background:#e94560;opacity:0.7;border-radius:1px}')
h.append('.bar:hover{height:8px;opacity:1}')
h.append('table{border-collapse:collapse;width:100%;color:#c9d1d9}')
h.append('th{background:#161b22;text-align:left;padding:6px 10px}')
h.append('td{padding:4px 10px;border-bottom:1px solid #21262d}')
h.append('</style></head><body>')
sub = " (identity: {})".format(identity_info["name"]) if identity_info else ""
h.append('<h1>📊 Offline Report — {}{}</h1>'.format(video_name[:60], sub))
h.append('<div style="color:#666;font-size:11px;margin-bottom:10px">Source: {} | Generated: offline (SQLite)</div>'.format(os.path.basename(SQLITE_PATH)))
# Identity card
if identity_info:
h.append('<div style="background:#161b22;border:1px solid #30363d;border-radius:8px;padding:16px;margin:12px 0">')
h.append('<h3 style="margin:0;color:#e94560">Identity Details</h3>')
h.append('<table><tr><td style="color:#8b949e;width:80px">ID</td><td>{}</td></tr>'.format(identity_info["id"]))
h.append('<tr><td style="color:#8b949e">Name</td><td style="font-weight:bold">{}</td></tr>'.format(identity_info["name"]))
h.append('<tr><td style="color:#8b949e">Type</td><td>{}</td></tr>'.format(identity_info["type"]))
h.append('<tr><td style="color:#8b949e">Source</td><td>{}</td></tr>'.format(identity_info["source"]))
h.append('<tr><td style="color:#8b949e">Status</td><td>{}</td></tr>'.format(identity_info["status"]))
h.append('</table></div>')
# Stats row
h.append('<div class="stats">')
h.append('<div class="stat"><div class="num">{:,}</div><div class="label">traces</div></div>'.format(len(trace_spans)))
h.append('<div class="stat"><div class="num">{:,}</div><div class="label">detections</div></div>'.format(total_detections))
h.append('<div class="stat"><div class="num">{:.0f}s</div><div class="label">duration</div></div>'.format(duration))
h.append('<div class="stat"><div class="num">{}</div><div class="label">max/{}s</div></div>'.format(max_density, BUCKET))
h.append('<div class="stat"><div class="num">{:.0f}fps</div><div class="label">video fps</div></div>'.format(fps))
h.append('<div class="stat"><div class="num">{:.0f}Hz</div><div class="label">sample rate</div></div>'.format(hz))
h.append('<div class="stat"><div class="num">{:,}</div><div class="label">{}s buckets</div></div>'.format(num_buckets, BUCKET))
h.append('</div>')
# Database summary
h.append('<h2>Database Contents</h2>')
h.append('<table>')
h.append('<tr><th>Table</th><th style="text-align:right">Rows</th><th>Type</th></tr>')
for name, count in [
("videos", 1), ("chunk", len(trace_spans)),
("face_detections", total_detections), ("identities", len(id_names) if not IDENTITY else 1),
("tkg_nodes", tkg_nodes), ("tkg_edges", tkg_edges),
]:
h.append('<tr><td>{}</td><td style="text-align:right">{:,}</td><td>flat</td></tr>'.format(name, count))
for name, dim in [("chunk_embeddings", 768), ("face_embeddings", 512), ("voice_embeddings", 192)]:
count = vec_counts.get(name, 0)
h.append('<tr><td>{}</td><td style="text-align:right">{:,}</td><td>vec0 ({}D)</td></tr>'.format(name, count, dim))
h.append('</table>')
# TKG breakdown
if tkg_types:
h.append('<h2>TKG Nodes</h2>')
h.append('<div class="stats">')
for ntype, cnt in sorted(tkg_types.items()):
h.append('<div class="stat"><div class="num">{:,}</div><div class="label">{}</div></div>'.format(cnt, ntype))
h.append('</div>')
# 1. Density histogram
h.append('<h2>Face Density Over Time</h2>')
w_px = num_buckets * 2 + 20
h.append('<div class="viz" style="width:{}px;height:80px">'.format(w_px))
for b in range(num_buckets):
v = density.get(b, 0)
h_px = max(2, int(60 * v / max(1, max_density * 0.6))) if v > 0 else 0
if v == 0:
color = "#0d1117"
else:
i = min(v / (max(1, max_density * 0.5)), 1.0)
r = int(233 * i + 13 * (1 - i))
g = int(69 * i + 13 * (1 - i))
bv = int(96 * i + 23 * (1 - i))
color = "rgb({},{},{})".format(r, g, bv)
h.append('<span style="position:absolute;left:{}px;bottom:0;width:2px;height:{}px;background:{}" title="{}s: {} faces"></span>'.format(b*2+10, h_px, color, b*BUCKET, v))
h.append('</div>')
# 2. Trace timeline
h.append('<h2>Trace Timeline</h2>')
show_traces = min(len(trace_spans), 2000)
bar_h = 2
chart_height = show_traces * (bar_h + 1) + 10
h.append('<div class="viz" style="width:{}px;height:{}px">'.format(w_px, chart_height))
for i, (tid, fn0, fn1, t0, t1, cnt) in enumerate(trace_spans[:show_traces]):
left = int(t0 / duration * (w_px - 20)) + 10
width = max(3, int((t1 - t0) / duration * (w_px - 20)))
top = i * (bar_h + 1) + 5
opacity = 1.0 if cnt > 5 else 0.3
h.append('<span class="bar" style="left:{}px;top:{}px;width:{}px;height:{}px;opacity:{}" title="T{}: {:.0f}s{:.0f}s, {} faces"></span>'.format(left, top, width, bar_h, opacity, tid, t0, t1, cnt))
h.append('</div>')
# 3. Top identities
if not IDENTITY and top_identities:
h.append('<h2>Top Identities</h2>')
h.append('<table>')
h.append('<tr><th>ID</th><th>Name</th><th style="text-align:right">Faces</th><th style="text-align:right">Traces</th></tr>')
for iid, fc, tc in top_identities:
name = id_names.get(iid, "#{}".format(iid))[:50]
h.append('<tr><td style="color:#8b949e">{}</td><td>{}</td><td style="text-align:right">{:,}</td><td style="text-align:right">{}</td></tr>'.format(iid, name, fc, tc))
h.append('</table>')
h.append('</body></html>')
return '\n'.join(h)
html = build_html()
with open(OUT, 'w') as f:
f.write(html)
print("Saved: {}".format(OUT))
print("Traces: {}, Detections: {}, Duration: {:.0f}s, Sample: {:.0f}Hz".format(len(trace_spans), total_detections, duration, hz))
print("Size: {:.0f}KB".format(len(html) / 1024))