From bd7d8c77bfbd7ad4e58ad044b54747009c20dba3 Mon Sep 17 00:00:00 2001 From: Accusys Date: Fri, 26 Jun 2026 13:55:10 +0800 Subject: [PATCH] feat: add migrate_manual_file_identities.py Migrate identities.file_uuid to file_identities table for consistent structure --- scripts/migrate_manual_file_identities.py | 106 ++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 scripts/migrate_manual_file_identities.py diff --git a/scripts/migrate_manual_file_identities.py b/scripts/migrate_manual_file_identities.py new file mode 100644 index 0000000..8183509 --- /dev/null +++ b/scripts/migrate_manual_file_identities.py @@ -0,0 +1,106 @@ +#!/opt/homebrew/bin/python3.11 +""" +Migrate manual identities' file_uuid to file_identities table. + +After migration: +- All identities use file_identities table for file linkage +- identities.file_uuid column becomes deprecated + +Usage: + python3 scripts/migrate_manual_file_identities.py --schema public + python3 scripts/migrate_manual_file_identities.py --schema dev +""" + +import argparse +import os +import psycopg2 +import psycopg2.extras +import json +from datetime import datetime, timezone + + +def main(): + parser = argparse.ArgumentParser(description="Migrate manual identities to file_identities") + parser.add_argument("--schema", default="public", help="Database schema") + parser.add_argument("--db", default=os.getenv("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry")) + args = parser.parse_args() + + schema = args.schema + identities_table = f"{schema}.identities" if schema != "public" else "identities" + file_identities_table = f"{schema}.file_identities" if schema != "public" else "file_identities" + + conn = psycopg2.connect(args.db) + cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + + # Get manual identities with file_uuid + cur.execute(f""" + SELECT id, uuid, name, status, file_uuid, metadata, created_at + FROM {identities_table} + WHERE file_uuid IS NOT NULL + """) + identities = cur.fetchall() + + print(f"[Migration] Found {len(identities)} identities with file_uuid") + + migrated = 0 + now = datetime.now(timezone.utc).isoformat() + + for identity in identities: + file_uuid = identity["file_uuid"] + identity_id = identity["id"] + + try: + # Check if already exists in file_identities + cur.execute(f""" + SELECT 1 FROM {file_identities_table} + WHERE file_uuid = %s AND identity_id = %s + """, (file_uuid, identity_id)) + if cur.fetchone(): + continue + + # Insert into file_identities + cur.execute(f""" + INSERT INTO {file_identities_table} ( + file_uuid, identity_id, confidence, metadata, created_at + ) VALUES (%s, %s, %s, %s, %s) + """, ( + file_uuid, + identity_id, + 1.0, + psycopg2.extras.Json({ + "source": identity.get("source") or "manual", + "migrated_from": "identities.file_uuid", + "migrated_at": now, + }), + now, + )) + migrated += 1 + + except Exception as e: + print(f" [WARN] Failed for {identity['name']}: {e}") + + conn.commit() + print(f"[Migration] Created {migrated} new file_identities entries") + + # Verify + cur.execute(f""" + SELECT source, COUNT(*) as total, + COUNT(file_uuid) as has_file_uuid + FROM {identities_table} + GROUP BY source + """) + print() + print("[Migration] Verification:") + for r in cur.fetchall(): + print(f" {r['source'] or 'NULL':15} total={r['total']}, file_uuid={r['has_file_uuid']}") + + cur.execute(f"SELECT COUNT(*) FROM {file_identities_table}") + count = cur.fetchone()["count"] + print(f" file_identities total: {count}") + + cur.close() + conn.close() + + +if __name__ == "__main__": + main() \ No newline at end of file