- Fix swift_pose/swift_ocr Y-flip bugs (BUG-003~006) - Add heuristic_scene module + post-processing trigger (replaces Places365) - YOLOv5nu → YOLOv8s CoreML (+33% detections, +390% scene indicators) - Per-table SQL export (split 4.7GB single file → 478MB max per table) - Version/build check in deploy.sh (compare /health vs file_info.json) - Add file_uuid column to identities table + backfill - Identity pre-clean step in deploy (avoids UNIQUE conflicts on re-deploy) - Stranger_xxx naming fix with UUID context - Add DETECTOR_REGISTRY.md (25 detectors), DETECTOR_SELECTION_SOP.md - Update SPATIAL_COORDINATE_REGISTRY.md (P layer, 6-layer architecture) - New IDENTITY_LIFECYCLE.md - M4 response docs for deploy_script_fix and 111614 test report
138 lines
6.8 KiB
Python
138 lines
6.8 KiB
Python
#!/opt/homebrew/bin/python3.11
|
|
"""
|
|
Export a single file's data to SQL file (COPY format).
|
|
Usage: python3 export_file_package.py <file_uuid> <output_dir>
|
|
"""
|
|
import json, os, sys, subprocess
|
|
|
|
PG_BIN = "/Users/accusys/pgsql/18.3/bin"
|
|
DB_URL = "postgresql://accusys@localhost:5432/momentry"
|
|
|
|
TABLES = [
|
|
("dev.videos", "file_uuid"),
|
|
("dev.chunk", "file_uuid"),
|
|
("dev.chunk_vectors", "uuid"),
|
|
("dev.face_detections", "file_uuid"),
|
|
("dev.tkg_nodes", "file_uuid"),
|
|
("dev.tkg_edges", "file_uuid"),
|
|
]
|
|
|
|
def main():
|
|
uuid = sys.argv[1] if len(sys.argv) > 1 else "aeed71342a899fe4b4c57b7d41bcb692"
|
|
outdir = sys.argv[2] if len(sys.argv) > 2 else "/tmp/file_pkg"
|
|
os.makedirs(outdir, exist_ok=True)
|
|
sql_path = os.path.join(outdir, "data.sql")
|
|
|
|
print(f"Exporting {uuid} → {sql_path}")
|
|
with open(sql_path, "w") as f:
|
|
f.write(f"-- File package: {uuid}\nBEGIN;\n\n")
|
|
|
|
for tbl, col in TABLES:
|
|
f.write(f"-- {tbl} WHERE {col} = '{uuid}'\n")
|
|
|
|
# Get column list
|
|
schema, table = tbl.split(".")
|
|
r = subprocess.run(
|
|
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
|
|
"-c", f"SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='{schema}' AND table_name='{table}' AND is_updatable='YES'"],
|
|
capture_output=True, text=True, timeout=15)
|
|
cols = r.stdout.strip()
|
|
|
|
r = subprocess.run(
|
|
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
|
|
f"COPY (SELECT * FROM {tbl} WHERE {col} = '{uuid}') TO STDOUT WITH CSV HEADER"],
|
|
capture_output=True, text=True, timeout=60)
|
|
if r.stdout.strip():
|
|
f.write(f"COPY {tbl} ({cols}) FROM STDIN WITH CSV HEADER;\n")
|
|
f.write(r.stdout)
|
|
if not r.stdout.endswith("\n"):
|
|
f.write("\n")
|
|
f.write("\\.\n\n")
|
|
|
|
# Export identities for this file (by file_uuid column) plus global identities
|
|
# Global: tmdb + merged + user_defined (exclude inactive auto)
|
|
f.write(f"-- dev.identities (WHERE file_uuid='{uuid}' OR global tmdb/merged/user_defined)\n")
|
|
r = subprocess.run(
|
|
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
|
|
"-c", "SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='dev' AND table_name='identities' AND is_updatable='YES'"],
|
|
capture_output=True, text=True, timeout=15)
|
|
cols = r.stdout.strip()
|
|
r = subprocess.run(
|
|
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
|
|
f"COPY (SELECT * FROM dev.identities WHERE file_uuid = '{uuid}' OR (file_uuid IS NULL AND source IN ('tmdb', 'merged', 'user_defined'))) TO STDOUT WITH CSV HEADER"],
|
|
capture_output=True, text=True, timeout=60)
|
|
if r.stdout.strip():
|
|
f.write(f"COPY dev.identities ({cols}) FROM STDIN WITH CSV HEADER;\n")
|
|
f.write(r.stdout)
|
|
if not r.stdout.endswith("\n"):
|
|
f.write("\n")
|
|
f.write("\\.\n\n")
|
|
|
|
# Export identity_bindings for identities referenced by this file
|
|
f.write(f"-- dev.identity_bindings (for identities in face_detections WHERE file_uuid='{uuid}')\n")
|
|
r = subprocess.run(
|
|
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
|
|
"-c", "SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='dev' AND table_name='identity_bindings' AND is_updatable='YES'"],
|
|
capture_output=True, text=True, timeout=15)
|
|
cols = r.stdout.strip()
|
|
r = subprocess.run(
|
|
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
|
|
f"COPY (SELECT ib.* FROM dev.identity_bindings ib INNER JOIN dev.face_detections fd ON fd.identity_id = ib.identity_id AND fd.trace_id IS NOT NULL WHERE fd.file_uuid = '{uuid}' AND ib.identity_value IN (SELECT DISTINCT trace_id::text FROM dev.face_detections WHERE file_uuid = '{uuid}' AND trace_id IS NOT NULL)) TO STDOUT WITH CSV HEADER"],
|
|
capture_output=True, text=True, timeout=60)
|
|
if r.stdout.strip():
|
|
f.write(f"COPY dev.identity_bindings ({cols}) FROM STDIN WITH CSV HEADER;\n")
|
|
f.write(r.stdout)
|
|
if not r.stdout.endswith("\n"):
|
|
f.write("\n")
|
|
f.write("\\.\n\n")
|
|
|
|
f.write("COMMIT;\n")
|
|
|
|
size = os.path.getsize(sql_path)
|
|
print(f" {sql_path} ({size/1024/1024:.1f} MB)")
|
|
|
|
# Copy video file to package
|
|
r = subprocess.run(
|
|
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
|
|
"-c", f"SELECT file_path FROM dev.videos WHERE file_uuid='{uuid}'"],
|
|
capture_output=True, text=True, timeout=15)
|
|
video_path = r.stdout.strip()
|
|
if video_path and os.path.exists(video_path):
|
|
video_name = os.path.basename(video_path)
|
|
dest = os.path.join(outdir, video_name)
|
|
import shutil
|
|
shutil.copy2(video_path, dest)
|
|
vsize = os.path.getsize(dest)
|
|
print(f" {video_name} ({vsize/1024/1024:.0f} MB)")
|
|
else:
|
|
print(f" WARNING: video file not found at {video_path}")
|
|
|
|
# file_info.json
|
|
r = subprocess.run(
|
|
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
|
|
"-c", f"SELECT json_build_object('file_uuid', file_uuid, 'file_name', file_name, 'duration', duration, 'fps', fps, 'width', width, 'height', height, 'total_frames', total_frames, 'status', status) FROM dev.videos WHERE file_uuid='{uuid}'"],
|
|
capture_output=True, text=True, timeout=15)
|
|
if r.stdout.strip():
|
|
info = json.loads(r.stdout.strip())
|
|
info["momentry_version"] = "1.0.0" # keep in sync with Cargo.toml version
|
|
info["momentry_build"] = subprocess.run(["git", "rev-parse", "--short", "HEAD"],
|
|
capture_output=True, text=True, timeout=5).stdout.strip()
|
|
with open(os.path.join(outdir, "file_info.json"), "w") as f:
|
|
json.dump(info, f, indent=2)
|
|
print(f" file_info.json")
|
|
|
|
# Export identities.json (for offline analysis)
|
|
id_path = os.path.join(outdir, f"{uuid}.identities.json")
|
|
r = subprocess.run(
|
|
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
|
|
"-c", f"SELECT json_build_object('file_uuid', file_uuid) FROM dev.videos WHERE file_uuid='{uuid}'"],
|
|
capture_output=True, text=True, timeout=15)
|
|
subprocess.run(
|
|
["/opt/homebrew/bin/python3.11", os.path.join(os.path.dirname(os.path.abspath(__file__)), "export_identities.py"), uuid, id_path],
|
|
check=False, timeout=60)
|
|
if os.path.exists(id_path):
|
|
print(f" {uuid}.identities.json ({os.path.getsize(id_path)/1024:.0f}KB)")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|