feat: add migrate_manual_file_identities.py
Migrate identities.file_uuid to file_identities table for consistent structure
This commit is contained in:
106
scripts/migrate_manual_file_identities.py
Normal file
106
scripts/migrate_manual_file_identities.py
Normal file
@@ -0,0 +1,106 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Migrate manual identities' file_uuid to file_identities table.
|
||||
|
||||
After migration:
|
||||
- All identities use file_identities table for file linkage
|
||||
- identities.file_uuid column becomes deprecated
|
||||
|
||||
Usage:
|
||||
python3 scripts/migrate_manual_file_identities.py --schema public
|
||||
python3 scripts/migrate_manual_file_identities.py --schema dev
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Migrate manual identities to file_identities")
|
||||
parser.add_argument("--schema", default="public", help="Database schema")
|
||||
parser.add_argument("--db", default=os.getenv("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry"))
|
||||
args = parser.parse_args()
|
||||
|
||||
schema = args.schema
|
||||
identities_table = f"{schema}.identities" if schema != "public" else "identities"
|
||||
file_identities_table = f"{schema}.file_identities" if schema != "public" else "file_identities"
|
||||
|
||||
conn = psycopg2.connect(args.db)
|
||||
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
||||
|
||||
# Get manual identities with file_uuid
|
||||
cur.execute(f"""
|
||||
SELECT id, uuid, name, status, file_uuid, metadata, created_at
|
||||
FROM {identities_table}
|
||||
WHERE file_uuid IS NOT NULL
|
||||
""")
|
||||
identities = cur.fetchall()
|
||||
|
||||
print(f"[Migration] Found {len(identities)} identities with file_uuid")
|
||||
|
||||
migrated = 0
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
for identity in identities:
|
||||
file_uuid = identity["file_uuid"]
|
||||
identity_id = identity["id"]
|
||||
|
||||
try:
|
||||
# Check if already exists in file_identities
|
||||
cur.execute(f"""
|
||||
SELECT 1 FROM {file_identities_table}
|
||||
WHERE file_uuid = %s AND identity_id = %s
|
||||
""", (file_uuid, identity_id))
|
||||
if cur.fetchone():
|
||||
continue
|
||||
|
||||
# Insert into file_identities
|
||||
cur.execute(f"""
|
||||
INSERT INTO {file_identities_table} (
|
||||
file_uuid, identity_id, confidence, metadata, created_at
|
||||
) VALUES (%s, %s, %s, %s, %s)
|
||||
""", (
|
||||
file_uuid,
|
||||
identity_id,
|
||||
1.0,
|
||||
psycopg2.extras.Json({
|
||||
"source": identity.get("source") or "manual",
|
||||
"migrated_from": "identities.file_uuid",
|
||||
"migrated_at": now,
|
||||
}),
|
||||
now,
|
||||
))
|
||||
migrated += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f" [WARN] Failed for {identity['name']}: {e}")
|
||||
|
||||
conn.commit()
|
||||
print(f"[Migration] Created {migrated} new file_identities entries")
|
||||
|
||||
# Verify
|
||||
cur.execute(f"""
|
||||
SELECT source, COUNT(*) as total,
|
||||
COUNT(file_uuid) as has_file_uuid
|
||||
FROM {identities_table}
|
||||
GROUP BY source
|
||||
""")
|
||||
print()
|
||||
print("[Migration] Verification:")
|
||||
for r in cur.fetchall():
|
||||
print(f" {r['source'] or 'NULL':15} total={r['total']}, file_uuid={r['has_file_uuid']}")
|
||||
|
||||
cur.execute(f"SELECT COUNT(*) FROM {file_identities_table}")
|
||||
count = cur.fetchone()["count"]
|
||||
print(f" file_identities total: {count}")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user