#!/opt/homebrew/bin/python3.11
"""
Identity Binding: cluster face traces → identity bindings.
Uses face embeddings from face_detections, clusters per trace, creates identities.
"""
import json, sys, time
import psycopg2
import numpy as np
from sklearn.cluster import AgglomerativeClustering

UUID = sys.argv[1] if len(sys.argv) > 1 else "23b1c872379d4ec06479e5ed39eef4c5"
DB = "dbname=momentry user=accusys"
DISTANCE_THRESHOLD = 0.55  # Cosine distance threshold for clustering

print(f"=== Identity Binding for {UUID} ===")

conn = psycopg2.connect(DB)
cur = conn.cursor()

# Step 1: Get trace embeddings from face_detections
print("Loading face trace data...")
cur.execute("""
    SELECT trace_id, embedding
    FROM dev.face_detections
    WHERE file_uuid = %s AND trace_id IS NOT NULL AND embedding IS NOT NULL
    ORDER BY trace_id, id
""", (UUID,))
rows = cur.fetchall()
print(f"Face detections with embeddings: {len(rows)}")

# Group by trace_id and compute average embedding
trace_embs = {}
for trace_id, emb in rows:
    if trace_id not in trace_embs:
        trace_embs[trace_id] = []
    trace_embs[trace_id].append(emb)

print(f"Unique traces: {len(trace_embs)}")

# Compute mean embeddings per trace
trace_ids = []
trace_vectors = []
for tid, embs in sorted(trace_embs.items()):
    mean_emb = np.mean(embs, axis=0)
    mean_emb = mean_emb / (np.linalg.norm(mean_emb) + 1e-10)
    trace_ids.append(tid)
    trace_vectors.append(mean_emb)

X = np.array(trace_vectors)
print(f"Trace vectors shape: {X.shape}")

# Step 2: Cluster traces
print("Clustering traces...")
if len(X) > 1:
    clustering = AgglomerativeClustering(
        n_clusters=None,
        distance_threshold=DISTANCE_THRESHOLD,
        metric='cosine',
        linkage='average'
    )
    labels = clustering.fit_predict(X)
else:
    labels = [0]

n_clusters = len(set(labels))
print(f"Clusters/identities: {n_clusters}")

# Step 3: Get or create identity records
print("Creating identity records...")
# Get existing identities
cur.execute("SELECT id, uuid FROM dev.identities")
existing = {row[0]: row[1] for row in cur.fetchall()}

# Map cluster -> identity_id
cluster_to_identity = {}
for cluster_id in sorted(set(labels)):
    # Create new identity
    identity_uuid = None
    cur.execute("""
        INSERT INTO dev.identities (name, identity_type, source, status, created_at, file_uuid)
        VALUES (%s, 'face', 'auto', 'active', NOW(), %s)
        ON CONFLICT (name) DO UPDATE SET status = 'active', file_uuid = COALESCE(dev.identities.file_uuid, %s)
        RETURNING id
    """, (f"stranger_{UUID}_{cluster_id}", UUID, UUID))
    identity_id = cur.fetchone()[0]
    cluster_to_identity[cluster_id] = identity_id
    print(f"  Cluster {cluster_id}: new identity {identity_id} (stranger_{UUID}_{cluster_id})")

# Step 4: Create identity bindings
print("Creating identity bindings...")
bindings = 0
for tid, label in zip(trace_ids, labels):
    identity_id = cluster_to_identity[label]
    # Get a representative face_id for this trace
    cur.execute("""
        SELECT face_id FROM dev.face_detections
        WHERE file_uuid = %s AND trace_id = %s
        LIMIT 1
    """, (UUID, tid))
    row = cur.fetchone()
    if row:
        face_id = row[0]
        # Create binding
        cur.execute("""
            INSERT INTO dev.identity_bindings (identity_id, identity_type, identity_value, confidence, created_at)
            VALUES (%s, 'trace', %s, 0.8, NOW())
            ON CONFLICT DO NOTHING
        """, (identity_id, str(tid)))
        bindings += 1

        # Also update face_detection with identity_id
        cur.execute("""
            UPDATE dev.face_detections SET identity_id = %s
            WHERE file_uuid = %s AND trace_id = %s
        """, (identity_id, UUID, tid))

conn.commit()
print(f"Created {bindings} identity bindings for {n_clusters} identities")

# Summary
print(f"\n=== Summary ===")
cur.execute("SELECT COUNT(*) FROM dev.identities WHERE source = 'auto'")
print(f"Total auto-generated identities: {cur.fetchone()[0]}")
cur.execute("SELECT COUNT(*) FROM dev.identity_bindings")
print(f"Total identity bindings: {cur.fetchone()[0]}")

cur.close()
conn.close()
print("=== Done ===")