diff --git a/scripts/render_offline_report.py b/scripts/render_offline_report.py new file mode 100644 index 0000000..07095cf --- /dev/null +++ b/scripts/render_offline_report.py @@ -0,0 +1,239 @@ +#!/opt/homebrew/bin/python3.11 +""" +Offline Report Generator — Uses SQLite file (no PostgreSQL needed). +Generates comprehensive HTML report with charts, heatmaps, and vector stats. + +Usage: + python3 render_offline_report.py .sqlite [output.html] + python3 render_offline_report.py .sqlite --identity +""" +import sys, json, sqlite3, os, argparse +from collections import defaultdict + +parser = argparse.ArgumentParser() +parser.add_argument("sqlite_path", help="Path to the .sqlite file") +parser.add_argument("output", nargs="?", default=None, help="Output HTML path") +parser.add_argument("--identity", "-i", type=int, default=None, help="Filter by identity_id") +args = parser.parse_args() + +SQLITE_PATH = args.sqlite_path +OUT = args.output or SQLITE_PATH.replace(".sqlite", "_report.html") +IDENTITY = args.identity + +if not os.path.exists(SQLITE_PATH): + print(f"ERROR: {SQLITE_PATH} not found") + sys.exit(1) + +# Load sqlite-vec extension if available +VEC_DYLIB = None +for path in [ + os.path.join(os.path.dirname(os.path.abspath(__file__)), "vec0.dylib"), + "/tmp/vec0.dylib", +]: + if os.path.exists(path): + VEC_DYLIB = path + break + +conn = sqlite3.connect(SQLITE_PATH) +if VEC_DYLIB: + conn.enable_load_extension(True) + try: + conn.load_extension(VEC_DYLIB) + except: + pass + conn.enable_load_extension(False) +c = conn.cursor() + +# Read video metadata +c.execute("SELECT file_uuid, file_name, duration, fps FROM videos LIMIT 1") +row = c.fetchone() +if not row: + print("No video data found") + sys.exit(1) +file_uuid, video_name, duration, fps = row[0], row[1], float(row[2] or 6785), float(row[3] or 25.0) +sample_interval = 3 # 8Hz face detection +hz = fps / sample_interval + +# Build identity filter +identity_filter = "" +identity_params = [] +if IDENTITY is not None: + identity_filter = " AND identity_id = ?" + identity_params = [IDENTITY] + +# Query trace spans +trace_query = f"SELECT trace_id, MIN(frame_number), MAX(frame_number), MIN(timestamp_secs), MAX(timestamp_secs), COUNT(*) FROM face_detections WHERE trace_id IS NOT NULL{identity_filter} GROUP BY trace_id ORDER BY MIN(timestamp_secs)" +c.execute(trace_query, identity_params) +trace_spans = c.fetchall() + +# Query density +density_query = f"SELECT CAST(FLOOR(timestamp_secs/5) AS INTEGER) as bkt, COUNT(*) as cnt FROM face_detections WHERE trace_id IS NOT NULL{identity_filter} GROUP BY bkt ORDER BY bkt" +c.execute(density_query, identity_params) +density = {r[0]: r[1] for r in c.fetchall()} + +# Total detections +c.execute(f"SELECT COUNT(*) FROM face_detections WHERE 1=1{identity_filter}", identity_params) +total_detections = c.fetchone()[0] + +# Identity info +identity_info = None +if IDENTITY is not None: + c.execute("SELECT id, name, identity_type, source, status FROM identities WHERE id=?", [IDENTITY]) + r = c.fetchone() + if r: + identity_info = {"id": r[0], "name": r[1], "type": r[2], "source": r[3], "status": r[4]} +else: + c.execute("SELECT identity_id, COUNT(*) as fc, COUNT(DISTINCT trace_id) as tc FROM face_detections WHERE identity_id IS NOT NULL GROUP BY identity_id ORDER BY fc DESC LIMIT 10") + top_identities = c.fetchall() + # Get identity names + id_names = {} + if top_identities: + ids = [r[0] for r in top_identities] + placeholders = ",".join(["?" for _ in ids]) + c.execute(f"SELECT id, name FROM identities WHERE id IN ({placeholders})", ids) + id_names = {r[0]: r[1] for r in c.fetchall()} + +# TKG stats +c.execute("SELECT COUNT(*) FROM tkg_nodes") +tkg_nodes = c.fetchone()[0] +c.execute("SELECT node_type, COUNT(*) FROM tkg_nodes GROUP BY node_type") +tkg_types = dict(c.fetchall()) +c.execute("SELECT COUNT(*) FROM tkg_edges") +tkg_edges = c.fetchone()[0] + +# Vector counts +vec_counts = {} +for tbl in ["chunk_embeddings", "face_embeddings", "voice_embeddings"]: + try: + c.execute(f"SELECT COUNT(*) FROM {tbl}") + vec_counts[tbl] = c.fetchone()[0] + except: + vec_counts[tbl] = 0 + +c.close() +conn.close() + +BUCKET = 5 +num_buckets = int(duration / BUCKET) + 1 +max_density = max(density.values()) if density else 1 + +def build_html(): + h = [] + h.append('Offline Report — {}'.format(video_name[:50])) + h.append('') + + sub = " (identity: {})".format(identity_info["name"]) if identity_info else "" + h.append('

šŸ“Š Offline Report — {}{}

'.format(video_name[:60], sub)) + h.append('
Source: {} | Generated: offline (SQLite)
'.format(os.path.basename(SQLITE_PATH))) + + # Identity card + if identity_info: + h.append('
') + h.append('

Identity Details

') + h.append(''.format(identity_info["id"])) + h.append(''.format(identity_info["name"])) + h.append(''.format(identity_info["type"])) + h.append(''.format(identity_info["source"])) + h.append(''.format(identity_info["status"])) + h.append('
ID{}
Name{}
Type{}
Source{}
Status{}
') + + # Stats row + h.append('
') + h.append('
{:,}
traces
'.format(len(trace_spans))) + h.append('
{:,}
detections
'.format(total_detections)) + h.append('
{:.0f}s
duration
'.format(duration)) + h.append('
{}
max/{}s
'.format(max_density, BUCKET)) + h.append('
{:.0f}fps
video fps
'.format(fps)) + h.append('
{:.0f}Hz
sample rate
'.format(hz)) + h.append('
{:,}
{}s buckets
'.format(num_buckets, BUCKET)) + h.append('
') + + # Database summary + h.append('

Database Contents

') + h.append('') + h.append('') + for name, count in [ + ("videos", 1), ("chunk", len(trace_spans)), + ("face_detections", total_detections), ("identities", len(id_names) if not IDENTITY else 1), + ("tkg_nodes", tkg_nodes), ("tkg_edges", tkg_edges), + ]: + h.append(''.format(name, count)) + + for name, dim in [("chunk_embeddings", 768), ("face_embeddings", 512), ("voice_embeddings", 192)]: + count = vec_counts.get(name, 0) + h.append(''.format(name, count, dim)) + h.append('
TableRowsType
{}{:,}flat
{}{:,}vec0 ({}D)
') + + # TKG breakdown + if tkg_types: + h.append('

TKG Nodes

') + h.append('
') + for ntype, cnt in sorted(tkg_types.items()): + h.append('
{:,}
{}
'.format(cnt, ntype)) + h.append('
') + + # 1. Density histogram + h.append('

Face Density Over Time

') + w_px = num_buckets * 2 + 20 + h.append('
'.format(w_px)) + for b in range(num_buckets): + v = density.get(b, 0) + h_px = max(2, int(60 * v / max(1, max_density * 0.6))) if v > 0 else 0 + if v == 0: + color = "#0d1117" + else: + i = min(v / (max(1, max_density * 0.5)), 1.0) + r = int(233 * i + 13 * (1 - i)) + g = int(69 * i + 13 * (1 - i)) + bv = int(96 * i + 23 * (1 - i)) + color = "rgb({},{},{})".format(r, g, bv) + h.append(''.format(b*2+10, h_px, color, b*BUCKET, v)) + h.append('
') + + # 2. Trace timeline + h.append('

Trace Timeline

') + show_traces = min(len(trace_spans), 2000) + bar_h = 2 + chart_height = show_traces * (bar_h + 1) + 10 + h.append('
'.format(w_px, chart_height)) + for i, (tid, fn0, fn1, t0, t1, cnt) in enumerate(trace_spans[:show_traces]): + left = int(t0 / duration * (w_px - 20)) + 10 + width = max(3, int((t1 - t0) / duration * (w_px - 20))) + top = i * (bar_h + 1) + 5 + opacity = 1.0 if cnt > 5 else 0.3 + h.append(''.format(left, top, width, bar_h, opacity, tid, t0, t1, cnt)) + h.append('
') + + # 3. Top identities + if not IDENTITY and top_identities: + h.append('

Top Identities

') + h.append('') + h.append('') + for iid, fc, tc in top_identities: + name = id_names.get(iid, "#{}".format(iid))[:50] + h.append(''.format(iid, name, fc, tc)) + h.append('
IDNameFacesTraces
{}{}{:,}{}
') + + h.append('') + return '\n'.join(h) + +html = build_html() +with open(OUT, 'w') as f: + f.write(html) + +print("Saved: {}".format(OUT)) +print("Traces: {}, Detections: {}, Duration: {:.0f}s, Sample: {:.0f}Hz".format(len(trace_spans), total_detections, duration, hz)) +print("Size: {:.0f}KB".format(len(html) / 1024)) diff --git a/src/bin/release.rs b/src/bin/release.rs index e140a02..97a9e08 100644 --- a/src/bin/release.rs +++ b/src/bin/release.rs @@ -62,6 +62,17 @@ enum Commands { #[arg(short = 'i', long)] identity: Option, }, + /// Generate offline report from .sqlite file (no PostgreSQL needed) + VisualizeOffline { + /// Path to .sqlite file + sqlite_path: String, + /// Output path + #[arg(short, long)] + output: Option, + /// Filter by identity_id + #[arg(short = 'i', long)] + identity: Option, + }, } /// Run psql command and return stdout @@ -469,6 +480,29 @@ async fn cmd_package(db: &PostgresDb, uuid: &str) -> Result<()> { Ok(()) } +fn cmd_visualize_offline(sqlite_path: &str, output: Option<&str>, identity: Option) -> Result<()> { + let outpath = match output { + Some(p) => p.to_string(), + None => sqlite_path.replace(".sqlite", "_report.html"), + }; + let script = "/Users/accusys/momentry_core_0.1/scripts/render_offline_report.py"; + let mut args: Vec = vec![script.to_string(), sqlite_path.to_string(), outpath.clone()]; + if let Some(id) = identity { + args.push("--identity".to_string()); + args.push(id.to_string()); + } + let output = Command::new("/opt/homebrew/bin/python3.11") + .args(&args) + .output() + .context("Offline report script failed")?; + if !output.status.success() { + anyhow::bail!("Offline report: {}", String::from_utf8_lossy(&output.stderr)); + } + println!("{}", String::from_utf8_lossy(&output.stdout)); + println!("\n Open: {}", outpath); + Ok(()) +} + // ---- Visualize ---- fn cmd_visualize(uuid: &str, typ: &str, output: Option<&str>, identity: Option) -> Result<()> { @@ -613,6 +647,7 @@ async fn main() -> Result<()> { Commands::Package { uuid } => cmd_package(&db, &uuid).await?, Commands::Stats => cmd_stats()?, Commands::Visualize { uuid, typ, output, identity } => cmd_visualize(&uuid, &typ, output.as_deref(), identity)?, + Commands::VisualizeOffline { sqlite_path, output, identity } => cmd_visualize_offline(&sqlite_path, output.as_deref(), identity)?, } Ok(()) }