Files
momentry_core/scripts/export_file_package.py
Accusys ffc30d7377 M4 handover: coordinate fixes, detector registry, deploy v2, YOLOv8s, identity lifecycle
- Fix swift_pose/swift_ocr Y-flip bugs (BUG-003~006)
- Add heuristic_scene module + post-processing trigger (replaces Places365)
- YOLOv5nu → YOLOv8s CoreML (+33% detections, +390% scene indicators)
- Per-table SQL export (split 4.7GB single file → 478MB max per table)
- Version/build check in deploy.sh (compare /health vs file_info.json)
- Add file_uuid column to identities table + backfill
- Identity pre-clean step in deploy (avoids UNIQUE conflicts on re-deploy)
- Stranger_xxx naming fix with UUID context
- Add DETECTOR_REGISTRY.md (25 detectors), DETECTOR_SELECTION_SOP.md
- Update SPATIAL_COORDINATE_REGISTRY.md (P layer, 6-layer architecture)
- New IDENTITY_LIFECYCLE.md
- M4 response docs for deploy_script_fix and 111614 test report
2026-05-13 20:00:47 +08:00

138 lines
6.8 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
Export a single file's data to SQL file (COPY format).
Usage: python3 export_file_package.py <file_uuid> <output_dir>
"""
import json, os, sys, subprocess
PG_BIN = "/Users/accusys/pgsql/18.3/bin"
DB_URL = "postgresql://accusys@localhost:5432/momentry"
TABLES = [
("dev.videos", "file_uuid"),
("dev.chunk", "file_uuid"),
("dev.chunk_vectors", "uuid"),
("dev.face_detections", "file_uuid"),
("dev.tkg_nodes", "file_uuid"),
("dev.tkg_edges", "file_uuid"),
]
def main():
uuid = sys.argv[1] if len(sys.argv) > 1 else "aeed71342a899fe4b4c57b7d41bcb692"
outdir = sys.argv[2] if len(sys.argv) > 2 else "/tmp/file_pkg"
os.makedirs(outdir, exist_ok=True)
sql_path = os.path.join(outdir, "data.sql")
print(f"Exporting {uuid}{sql_path}")
with open(sql_path, "w") as f:
f.write(f"-- File package: {uuid}\nBEGIN;\n\n")
for tbl, col in TABLES:
f.write(f"-- {tbl} WHERE {col} = '{uuid}'\n")
# Get column list
schema, table = tbl.split(".")
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", f"SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='{schema}' AND table_name='{table}' AND is_updatable='YES'"],
capture_output=True, text=True, timeout=15)
cols = r.stdout.strip()
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
f"COPY (SELECT * FROM {tbl} WHERE {col} = '{uuid}') TO STDOUT WITH CSV HEADER"],
capture_output=True, text=True, timeout=60)
if r.stdout.strip():
f.write(f"COPY {tbl} ({cols}) FROM STDIN WITH CSV HEADER;\n")
f.write(r.stdout)
if not r.stdout.endswith("\n"):
f.write("\n")
f.write("\\.\n\n")
# Export identities for this file (by file_uuid column) plus global identities
# Global: tmdb + merged + user_defined (exclude inactive auto)
f.write(f"-- dev.identities (WHERE file_uuid='{uuid}' OR global tmdb/merged/user_defined)\n")
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", "SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='dev' AND table_name='identities' AND is_updatable='YES'"],
capture_output=True, text=True, timeout=15)
cols = r.stdout.strip()
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
f"COPY (SELECT * FROM dev.identities WHERE file_uuid = '{uuid}' OR (file_uuid IS NULL AND source IN ('tmdb', 'merged', 'user_defined'))) TO STDOUT WITH CSV HEADER"],
capture_output=True, text=True, timeout=60)
if r.stdout.strip():
f.write(f"COPY dev.identities ({cols}) FROM STDIN WITH CSV HEADER;\n")
f.write(r.stdout)
if not r.stdout.endswith("\n"):
f.write("\n")
f.write("\\.\n\n")
# Export identity_bindings for identities referenced by this file
f.write(f"-- dev.identity_bindings (for identities in face_detections WHERE file_uuid='{uuid}')\n")
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", "SELECT string_agg(column_name, ', ' ORDER BY ordinal_position) FROM information_schema.columns WHERE table_schema='dev' AND table_name='identity_bindings' AND is_updatable='YES'"],
capture_output=True, text=True, timeout=15)
cols = r.stdout.strip()
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-c",
f"COPY (SELECT ib.* FROM dev.identity_bindings ib INNER JOIN dev.face_detections fd ON fd.identity_id = ib.identity_id AND fd.trace_id IS NOT NULL WHERE fd.file_uuid = '{uuid}' AND ib.identity_value IN (SELECT DISTINCT trace_id::text FROM dev.face_detections WHERE file_uuid = '{uuid}' AND trace_id IS NOT NULL)) TO STDOUT WITH CSV HEADER"],
capture_output=True, text=True, timeout=60)
if r.stdout.strip():
f.write(f"COPY dev.identity_bindings ({cols}) FROM STDIN WITH CSV HEADER;\n")
f.write(r.stdout)
if not r.stdout.endswith("\n"):
f.write("\n")
f.write("\\.\n\n")
f.write("COMMIT;\n")
size = os.path.getsize(sql_path)
print(f" {sql_path} ({size/1024/1024:.1f} MB)")
# Copy video file to package
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", f"SELECT file_path FROM dev.videos WHERE file_uuid='{uuid}'"],
capture_output=True, text=True, timeout=15)
video_path = r.stdout.strip()
if video_path and os.path.exists(video_path):
video_name = os.path.basename(video_path)
dest = os.path.join(outdir, video_name)
import shutil
shutil.copy2(video_path, dest)
vsize = os.path.getsize(dest)
print(f" {video_name} ({vsize/1024/1024:.0f} MB)")
else:
print(f" WARNING: video file not found at {video_path}")
# file_info.json
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", f"SELECT json_build_object('file_uuid', file_uuid, 'file_name', file_name, 'duration', duration, 'fps', fps, 'width', width, 'height', height, 'total_frames', total_frames, 'status', status) FROM dev.videos WHERE file_uuid='{uuid}'"],
capture_output=True, text=True, timeout=15)
if r.stdout.strip():
info = json.loads(r.stdout.strip())
info["momentry_version"] = "1.0.0" # keep in sync with Cargo.toml version
info["momentry_build"] = subprocess.run(["git", "rev-parse", "--short", "HEAD"],
capture_output=True, text=True, timeout=5).stdout.strip()
with open(os.path.join(outdir, "file_info.json"), "w") as f:
json.dump(info, f, indent=2)
print(f" file_info.json")
# Export identities.json (for offline analysis)
id_path = os.path.join(outdir, f"{uuid}.identities.json")
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", "accusys", "-d", "momentry", "-t", "-A",
"-c", f"SELECT json_build_object('file_uuid', file_uuid) FROM dev.videos WHERE file_uuid='{uuid}'"],
capture_output=True, text=True, timeout=15)
subprocess.run(
["/opt/homebrew/bin/python3.11", os.path.join(os.path.dirname(os.path.abspath(__file__)), "export_identities.py"), uuid, id_path],
check=False, timeout=60)
if os.path.exists(id_path):
print(f" {uuid}.identities.json ({os.path.getsize(id_path)/1024:.0f}KB)")
if __name__ == "__main__":
main()