Files
momentry_core/scripts/import_file_package.py
Accusys 48c3b13c37 fix: restore identity_id after face_dedup, rebuild package v20260512
- Re-ran identity_bind.py to restore identity_id on face_detections
- Dedup cleanup had removed rows with identity_id, kept NULL rows
- 70691 face_detections now have identity_id, 428 identities
- Full package rebuild: 169MB sqlite, 1358MB tar.gz
- identities.json: 428 identities + 5483 bindings + 5483 trace maps
- TMDB matching complete: Audrey Hepburn 843 traces, Cary Grant 482
2026-05-13 04:30:18 +08:00

114 lines
3.7 KiB
Python

#!/opt/homebrew/bin/python3.11
"""
Import a File Content Package into the DB.
Usage: python3 import_file_package.py --uuid <file_uuid> --package <path>
"""
import json, os, sys, subprocess, argparse, csv
PG_BIN = "/Users/accusys/pgsql/18.3/bin"
DB_USER = "accusys"
DB_NAME = "momentry"
def psql(sql):
r = subprocess.run([f"{PG_BIN}/psql", "-U", DB_USER, "-d", DB_NAME, "-c", sql],
capture_output=True, text=True, timeout=30)
if r.returncode != 0:
print(f" ERROR: {r.stderr[:200]}")
return False
print(f" OK: {r.stdout.strip()[:100]}")
return True
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--uuid", required=True)
parser.add_argument("--package", required=True)
args = parser.parse_args()
uuid = args.uuid
pkg = args.package.rstrip("/")
if not os.path.exists(pkg):
print(f"Package not found: {pkg}")
sys.exit(1)
print(f"Importing package {uuid} from {pkg}")
print()
# 1. Update video registration (mark as processed)
print("[1/5] Update video registration...")
meta_path = os.path.join(pkg, "metadata.json")
if os.path.exists(meta_path):
with open(meta_path) as f:
meta = json.load(f)
fps = meta.get("fps", 24.0)
dur = meta.get("duration", 0)
psql(
f"UPDATE dev.videos SET status='ready', duration={dur}, fps={fps} "
f"WHERE file_uuid='{uuid}'"
)
# 2. Import identities
print("[2/5] Import identities...")
id_path = os.path.join(pkg, "identities.csv")
if os.path.exists(id_path):
with open(id_path) as f:
count = sum(1 for _ in csv.DictReader(f))
if count > 0:
psql(
f"COPY dev.identities (uuid, name, identity_type, source, status, metadata) "
f"FROM '{id_path}' WITH CSV HEADER "
f"ON CONFLICT (name) DO NOTHING"
)
# 3. Import face detections
print("[3/5] Import face detections...")
fd_path = os.path.join(pkg, "face_detections.csv")
if os.path.exists(fd_path):
psql(
f"COPY dev.face_detections (id, file_uuid, frame_number, timestamp_secs, "
f"face_id, x, y, width, height, confidence, trace_id, identity_id) "
f"FROM '{fd_path}' WITH CSV HEADER "
f"ON CONFLICT (id) DO NOTHING"
)
# 4. Import chunks
print("[4/5] Import chunks...")
ch_path = os.path.join(pkg, "chunks.csv")
if os.path.exists(ch_path):
psql(
f"COPY dev.chunk (chunk_id, chunk_type, start_frame, end_frame, "
f"start_time, end_time, fps, text_content) "
f"FROM '{ch_path}' WITH CSV HEADER "
f"ON CONFLICT (file_uuid, chunk_id) DO NOTHING"
)
# 5. Import vectors
print("[5/5] Import chunk_vectors...")
vec_path = os.path.join(pkg, "chunk_vectors.csv")
if os.path.exists(vec_path):
psql(
f"COPY dev.chunk_vectors (chunk_id, embedding) "
f"FROM '{vec_path}' WITH CSV HEADER"
)
# Verify
print()
print("=== Verification ===")
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", DB_USER, "-d", DB_NAME, "-t", "-A",
"-c", f"SELECT count(*) FROM dev.chunk WHERE file_uuid='{uuid}'"],
capture_output=True, text=True, timeout=10)
print(f" Chunks: {r.stdout.strip()}")
r = subprocess.run(
[f"{PG_BIN}/psql", "-U", DB_USER, "-d", DB_NAME, "-t", "-A",
"-c", f"SELECT count(*) FROM dev.chunk_vectors WHERE uuid='{uuid}'"],
capture_output=True, text=True, timeout=10)
print(f" Vectors: {r.stdout.strip()}")
print()
print("=== Done ===")
if __name__ == "__main__":
main()