fix: restore identity_id after face_dedup, rebuild package v20260512
- Re-ran identity_bind.py to restore identity_id on face_detections - Dedup cleanup had removed rows with identity_id, kept NULL rows - 70691 face_detections now have identity_id, 428 identities - Full package rebuild: 169MB sqlite, 1358MB tar.gz - identities.json: 428 identities + 5483 bindings + 5483 trace maps - TMDB matching complete: Audrey Hepburn 843 traces, Cary Grant 482
This commit is contained in:
113
scripts/import_file_package.py
Normal file
113
scripts/import_file_package.py
Normal file
@@ -0,0 +1,113 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Import a File Content Package into the DB.
|
||||
Usage: python3 import_file_package.py --uuid <file_uuid> --package <path>
|
||||
"""
|
||||
import json, os, sys, subprocess, argparse, csv
|
||||
|
||||
PG_BIN = "/Users/accusys/pgsql/18.3/bin"
|
||||
DB_USER = "accusys"
|
||||
DB_NAME = "momentry"
|
||||
|
||||
def psql(sql):
|
||||
r = subprocess.run([f"{PG_BIN}/psql", "-U", DB_USER, "-d", DB_NAME, "-c", sql],
|
||||
capture_output=True, text=True, timeout=30)
|
||||
if r.returncode != 0:
|
||||
print(f" ERROR: {r.stderr[:200]}")
|
||||
return False
|
||||
print(f" OK: {r.stdout.strip()[:100]}")
|
||||
return True
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--uuid", required=True)
|
||||
parser.add_argument("--package", required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
uuid = args.uuid
|
||||
pkg = args.package.rstrip("/")
|
||||
|
||||
if not os.path.exists(pkg):
|
||||
print(f"Package not found: {pkg}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Importing package {uuid} from {pkg}")
|
||||
print()
|
||||
|
||||
# 1. Update video registration (mark as processed)
|
||||
print("[1/5] Update video registration...")
|
||||
meta_path = os.path.join(pkg, "metadata.json")
|
||||
if os.path.exists(meta_path):
|
||||
with open(meta_path) as f:
|
||||
meta = json.load(f)
|
||||
fps = meta.get("fps", 24.0)
|
||||
dur = meta.get("duration", 0)
|
||||
psql(
|
||||
f"UPDATE dev.videos SET status='ready', duration={dur}, fps={fps} "
|
||||
f"WHERE file_uuid='{uuid}'"
|
||||
)
|
||||
|
||||
# 2. Import identities
|
||||
print("[2/5] Import identities...")
|
||||
id_path = os.path.join(pkg, "identities.csv")
|
||||
if os.path.exists(id_path):
|
||||
with open(id_path) as f:
|
||||
count = sum(1 for _ in csv.DictReader(f))
|
||||
if count > 0:
|
||||
psql(
|
||||
f"COPY dev.identities (uuid, name, identity_type, source, status, metadata) "
|
||||
f"FROM '{id_path}' WITH CSV HEADER "
|
||||
f"ON CONFLICT (name) DO NOTHING"
|
||||
)
|
||||
|
||||
# 3. Import face detections
|
||||
print("[3/5] Import face detections...")
|
||||
fd_path = os.path.join(pkg, "face_detections.csv")
|
||||
if os.path.exists(fd_path):
|
||||
psql(
|
||||
f"COPY dev.face_detections (id, file_uuid, frame_number, timestamp_secs, "
|
||||
f"face_id, x, y, width, height, confidence, trace_id, identity_id) "
|
||||
f"FROM '{fd_path}' WITH CSV HEADER "
|
||||
f"ON CONFLICT (id) DO NOTHING"
|
||||
)
|
||||
|
||||
# 4. Import chunks
|
||||
print("[4/5] Import chunks...")
|
||||
ch_path = os.path.join(pkg, "chunks.csv")
|
||||
if os.path.exists(ch_path):
|
||||
psql(
|
||||
f"COPY dev.chunk (chunk_id, chunk_type, start_frame, end_frame, "
|
||||
f"start_time, end_time, fps, text_content) "
|
||||
f"FROM '{ch_path}' WITH CSV HEADER "
|
||||
f"ON CONFLICT (file_uuid, chunk_id) DO NOTHING"
|
||||
)
|
||||
|
||||
# 5. Import vectors
|
||||
print("[5/5] Import chunk_vectors...")
|
||||
vec_path = os.path.join(pkg, "chunk_vectors.csv")
|
||||
if os.path.exists(vec_path):
|
||||
psql(
|
||||
f"COPY dev.chunk_vectors (chunk_id, embedding) "
|
||||
f"FROM '{vec_path}' WITH CSV HEADER"
|
||||
)
|
||||
|
||||
# Verify
|
||||
print()
|
||||
print("=== Verification ===")
|
||||
r = subprocess.run(
|
||||
[f"{PG_BIN}/psql", "-U", DB_USER, "-d", DB_NAME, "-t", "-A",
|
||||
"-c", f"SELECT count(*) FROM dev.chunk WHERE file_uuid='{uuid}'"],
|
||||
capture_output=True, text=True, timeout=10)
|
||||
print(f" Chunks: {r.stdout.strip()}")
|
||||
|
||||
r = subprocess.run(
|
||||
[f"{PG_BIN}/psql", "-U", DB_USER, "-d", DB_NAME, "-t", "-A",
|
||||
"-c", f"SELECT count(*) FROM dev.chunk_vectors WHERE uuid='{uuid}'"],
|
||||
capture_output=True, text=True, timeout=10)
|
||||
print(f" Vectors: {r.stdout.strip()}")
|
||||
|
||||
print()
|
||||
print("=== Done ===")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user