feat: add migrate_manual_file_identities.py

Migrate identities.file_uuid to file_identities table for consistent structure
This commit is contained in:
Accusys
2026-06-26 13:55:10 +08:00
parent 6f1a560d06
commit bd7d8c77bf

View File

@@ -0,0 +1,106 @@
#!/opt/homebrew/bin/python3.11
"""
Migrate manual identities' file_uuid to file_identities table.
After migration:
- All identities use file_identities table for file linkage
- identities.file_uuid column becomes deprecated
Usage:
python3 scripts/migrate_manual_file_identities.py --schema public
python3 scripts/migrate_manual_file_identities.py --schema dev
"""
import argparse
import os
import psycopg2
import psycopg2.extras
import json
from datetime import datetime, timezone
def main():
parser = argparse.ArgumentParser(description="Migrate manual identities to file_identities")
parser.add_argument("--schema", default="public", help="Database schema")
parser.add_argument("--db", default=os.getenv("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry"))
args = parser.parse_args()
schema = args.schema
identities_table = f"{schema}.identities" if schema != "public" else "identities"
file_identities_table = f"{schema}.file_identities" if schema != "public" else "file_identities"
conn = psycopg2.connect(args.db)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
# Get manual identities with file_uuid
cur.execute(f"""
SELECT id, uuid, name, status, file_uuid, metadata, created_at
FROM {identities_table}
WHERE file_uuid IS NOT NULL
""")
identities = cur.fetchall()
print(f"[Migration] Found {len(identities)} identities with file_uuid")
migrated = 0
now = datetime.now(timezone.utc).isoformat()
for identity in identities:
file_uuid = identity["file_uuid"]
identity_id = identity["id"]
try:
# Check if already exists in file_identities
cur.execute(f"""
SELECT 1 FROM {file_identities_table}
WHERE file_uuid = %s AND identity_id = %s
""", (file_uuid, identity_id))
if cur.fetchone():
continue
# Insert into file_identities
cur.execute(f"""
INSERT INTO {file_identities_table} (
file_uuid, identity_id, confidence, metadata, created_at
) VALUES (%s, %s, %s, %s, %s)
""", (
file_uuid,
identity_id,
1.0,
psycopg2.extras.Json({
"source": identity.get("source") or "manual",
"migrated_from": "identities.file_uuid",
"migrated_at": now,
}),
now,
))
migrated += 1
except Exception as e:
print(f" [WARN] Failed for {identity['name']}: {e}")
conn.commit()
print(f"[Migration] Created {migrated} new file_identities entries")
# Verify
cur.execute(f"""
SELECT source, COUNT(*) as total,
COUNT(file_uuid) as has_file_uuid
FROM {identities_table}
GROUP BY source
""")
print()
print("[Migration] Verification:")
for r in cur.fetchall():
print(f" {r['source'] or 'NULL':15} total={r['total']}, file_uuid={r['has_file_uuid']}")
cur.execute(f"SELECT COUNT(*) FROM {file_identities_table}")
count = cur.fetchone()["count"]
print(f" file_identities total: {count}")
cur.close()
conn.close()
if __name__ == "__main__":
main()