#!/opt/homebrew/bin/python3.11 """ Export a single file's data to SQL file (COPY format). Usage: python3 export_file_package.py """ import json, os, sys, subprocess PG_BIN = "/Users/accusys/pgsql/18.3/bin" DB_URL = "postgresql://accusys@localhost:5432/momentry" TABLES = [ ("dev.videos", "file_uuid"), ("dev.chunk", "file_uuid"), ("dev.chunk_vectors", "uuid"), ("dev.face_detections", "file_uuid"), ("dev.tkg_nodes", "file_uuid"), ("dev.tkg_edges", "file_uuid"), ] def main(): uuid = sys.argv[1] if len(sys.argv) > 1 else "aeed71342a899fe4b4c57b7d41bcb692" outdir = sys.argv[2] if len(sys.argv) > 2 else "/tmp/file_pkg" os.makedirs(outdir, exist_ok=True) sql_path = os.path.join(outdir, "data.sql") print(f"Exporting {uuid} → {sql_path}") with open(sql_path, "w") as f: f.write(f"-- File package: {uuid}\nBEGIN;\n\n") for tbl, col in TABLES: f.write(f"-- {tbl} WHERE {col} = '{uuid}'\n") # Get column list schema, table = tbl.split(".") r = subprocess.run( [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A", "-c", f"SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='{schema}' AND table_name='{table}' AND is_updatable='YES'"], capture_output=True, text=True, timeout=15) cols = r.stdout.strip() r = subprocess.run( [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c", f"COPY (SELECT * FROM {tbl} WHERE {col} = '{uuid}') TO STDOUT WITH CSV HEADER"], capture_output=True, text=True, timeout=60) if r.stdout.strip(): f.write(f"COPY {tbl} ({cols}) FROM STDIN WITH CSV HEADER;\n") f.write(r.stdout) if not r.stdout.endswith("\n"): f.write("\n") f.write("\\.\n\n") # Export identities for this file (by file_uuid column) plus global identities # Global: tmdb + merged + user_defined (exclude inactive auto) f.write(f"-- dev.identities (WHERE file_uuid='{uuid}' OR global tmdb/merged/user_defined)\n") r = subprocess.run( [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A", "-c", "SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='dev' AND table_name='identities' AND is_updatable='YES'"], capture_output=True, text=True, timeout=15) cols = r.stdout.strip() r = subprocess.run( [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c", f"COPY (SELECT * FROM dev.identities WHERE file_uuid = '{uuid}' OR (file_uuid IS NULL AND source IN ('tmdb', 'merged', 'user_defined'))) TO STDOUT WITH CSV HEADER"], capture_output=True, text=True, timeout=60) if r.stdout.strip(): f.write(f"COPY dev.identities ({cols}) FROM STDIN WITH CSV HEADER;\n") f.write(r.stdout) if not r.stdout.endswith("\n"): f.write("\n") f.write("\\.\n\n") # Export identity_bindings for identities referenced by this file f.write(f"-- dev.identity_bindings (for identities in face_detections WHERE file_uuid='{uuid}')\n") r = subprocess.run( [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A", "-c", "SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='dev' AND table_name='identity_bindings' AND is_updatable='YES'"], capture_output=True, text=True, timeout=15) cols = r.stdout.strip() r = subprocess.run( [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c", f"COPY (SELECT ib.* FROM dev.identity_bindings ib INNER JOIN dev.face_detections fd ON fd.identity_id = ib.identity_id AND fd.trace_id IS NOT NULL WHERE fd.file_uuid = '{uuid}' AND ib.identity_value IN (SELECT DISTINCT trace_id::text FROM dev.face_detections WHERE file_uuid = '{uuid}' AND trace_id IS NOT NULL)) TO STDOUT WITH CSV HEADER"], capture_output=True, text=True, timeout=60) if r.stdout.strip(): f.write(f"COPY dev.identity_bindings ({cols}) FROM STDIN WITH CSV HEADER;\n") f.write(r.stdout) if not r.stdout.endswith("\n"): f.write("\n") f.write("\\.\n\n") f.write("COMMIT;\n") size = os.path.getsize(sql_path) print(f" {sql_path} ({size/1024/1024:.1f} MB)") # Copy video file to package r = subprocess.run( [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A", "-c", f"SELECT file_path FROM dev.videos WHERE file_uuid='{uuid}'"], capture_output=True, text=True, timeout=15) video_path = r.stdout.strip() if video_path and os.path.exists(video_path): video_name = os.path.basename(video_path) dest = os.path.join(outdir, video_name) import shutil shutil.copy2(video_path, dest) vsize = os.path.getsize(dest) print(f" {video_name} ({vsize/1024/1024:.0f} MB)") else: print(f" WARNING: video file not found at {video_path}") # file_info.json r = subprocess.run( [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A", "-c", f"SELECT json_build_object('file_uuid', file_uuid, 'file_name', file_name, 'duration', duration, 'fps', fps, 'width', width, 'height', height, 'total_frames', total_frames, 'status', status) FROM dev.videos WHERE file_uuid='{uuid}'"], capture_output=True, text=True, timeout=15) if r.stdout.strip(): info = json.loads(r.stdout.strip()) info["momentry_version"] = "1.0.0" # keep in sync with Cargo.toml version info["momentry_build"] = subprocess.run(["git", "rev-parse", "--short", "HEAD"], capture_output=True, text=True, timeout=5).stdout.strip() with open(os.path.join(outdir, "file_info.json"), "w") as f: json.dump(info, f, indent=2) print(f" file_info.json") # Export identities.json (for offline analysis) id_path = os.path.join(outdir, f"{uuid}.identities.json") r = subprocess.run( [f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A", "-c", f"SELECT json_build_object('file_uuid', file_uuid) FROM dev.videos WHERE file_uuid='{uuid}'"], capture_output=True, text=True, timeout=15) subprocess.run( ["/opt/homebrew/bin/python3.11", os.path.join(os.path.dirname(os.path.abspath(__file__)), "export_identities.py"), uuid, id_path], check=False, timeout=60) if os.path.exists(id_path): print(f" {uuid}.identities.json ({os.path.getsize(id_path)/1024:.0f}KB)") if __name__ == "__main__": main()