feat: service inventory, ERP reports, sqlite-vec integration, visualize tool
- Add SERVICE_INVENTORY_V1.0.0.md (25 source-verified tools, 3.7GB) - Add ERP_SELECTION_REPORT.md (Odoo CE vs ERPNext comparison) - Add SFTPGO_ODOO_REPLACEMENT.md (SFTPGo migration plan) - Add SERVICE_GO_GITEA_BUILD.md (Go compiler + Gitea build report) - Add release visualize command (face trace heatmap + identity filter) - Add sqlite-vec integration (160MB SQLite with vec0 vector tables) - Add export_identities.py, export_sqlite.py, render_face_heatmap.py - Add Go, Gitea, Rust/Cargo, Swift, yt-dlp, SQLite, sqlite-vec to service CLI - Fix package to include identities and identity_bindings in data.sql - Update release list to show all deployed video stats - Add V1.0.0 YAML frontmatter to all docs (DOCS_STANDARD compliant)
This commit is contained in:
129
scripts/identity_bind.py
Normal file
129
scripts/identity_bind.py
Normal file
@@ -0,0 +1,129 @@
|
||||
#!/opt/homebrew/bin/python3.11
|
||||
"""
|
||||
Identity Binding: cluster face traces → identity bindings.
|
||||
Uses face embeddings from face_detections, clusters per trace, creates identities.
|
||||
"""
|
||||
import json, sys, time
|
||||
import psycopg2
|
||||
import numpy as np
|
||||
from sklearn.cluster import AgglomerativeClustering
|
||||
|
||||
UUID = sys.argv[1] if len(sys.argv) > 1 else "23b1c872379d4ec06479e5ed39eef4c5"
|
||||
DB = "dbname=momentry user=accusys"
|
||||
DISTANCE_THRESHOLD = 0.55 # Cosine distance threshold for clustering
|
||||
|
||||
print(f"=== Identity Binding for {UUID} ===")
|
||||
|
||||
conn = psycopg2.connect(DB)
|
||||
cur = conn.cursor()
|
||||
|
||||
# Step 1: Get trace embeddings from face_detections
|
||||
print("Loading face trace data...")
|
||||
cur.execute("""
|
||||
SELECT trace_id, embedding
|
||||
FROM dev.face_detections
|
||||
WHERE file_uuid = %s AND trace_id IS NOT NULL AND embedding IS NOT NULL
|
||||
ORDER BY trace_id, id
|
||||
""", (UUID,))
|
||||
rows = cur.fetchall()
|
||||
print(f"Face detections with embeddings: {len(rows)}")
|
||||
|
||||
# Group by trace_id and compute average embedding
|
||||
trace_embs = {}
|
||||
for trace_id, emb in rows:
|
||||
if trace_id not in trace_embs:
|
||||
trace_embs[trace_id] = []
|
||||
trace_embs[trace_id].append(emb)
|
||||
|
||||
print(f"Unique traces: {len(trace_embs)}")
|
||||
|
||||
# Compute mean embeddings per trace
|
||||
trace_ids = []
|
||||
trace_vectors = []
|
||||
for tid, embs in sorted(trace_embs.items()):
|
||||
mean_emb = np.mean(embs, axis=0)
|
||||
mean_emb = mean_emb / (np.linalg.norm(mean_emb) + 1e-10)
|
||||
trace_ids.append(tid)
|
||||
trace_vectors.append(mean_emb)
|
||||
|
||||
X = np.array(trace_vectors)
|
||||
print(f"Trace vectors shape: {X.shape}")
|
||||
|
||||
# Step 2: Cluster traces
|
||||
print("Clustering traces...")
|
||||
if len(X) > 1:
|
||||
clustering = AgglomerativeClustering(
|
||||
n_clusters=None,
|
||||
distance_threshold=DISTANCE_THRESHOLD,
|
||||
metric='cosine',
|
||||
linkage='average'
|
||||
)
|
||||
labels = clustering.fit_predict(X)
|
||||
else:
|
||||
labels = [0]
|
||||
|
||||
n_clusters = len(set(labels))
|
||||
print(f"Clusters/identities: {n_clusters}")
|
||||
|
||||
# Step 3: Get or create identity records
|
||||
print("Creating identity records...")
|
||||
# Get existing identities
|
||||
cur.execute("SELECT id, uuid FROM dev.identities")
|
||||
existing = {row[0]: row[1] for row in cur.fetchall()}
|
||||
|
||||
# Map cluster -> identity_id
|
||||
cluster_to_identity = {}
|
||||
for cluster_id in sorted(set(labels)):
|
||||
# Create new identity
|
||||
identity_uuid = None
|
||||
cur.execute("""
|
||||
INSERT INTO dev.identities (name, identity_type, source, status, created_at)
|
||||
VALUES (%s, 'face', 'auto', 'active', NOW())
|
||||
ON CONFLICT (name) DO UPDATE SET status = 'active'
|
||||
RETURNING id
|
||||
""", (f"PERSON_{UUID[:8]}_{cluster_id}",))
|
||||
identity_id = cur.fetchone()[0]
|
||||
cluster_to_identity[cluster_id] = identity_id
|
||||
print(f" Cluster {cluster_id}: new identity {identity_id} (PERSON_{cluster_id})")
|
||||
|
||||
# Step 4: Create identity bindings
|
||||
print("Creating identity bindings...")
|
||||
bindings = 0
|
||||
for tid, label in zip(trace_ids, labels):
|
||||
identity_id = cluster_to_identity[label]
|
||||
# Get a representative face_id for this trace
|
||||
cur.execute("""
|
||||
SELECT face_id FROM dev.face_detections
|
||||
WHERE file_uuid = %s AND trace_id = %s
|
||||
LIMIT 1
|
||||
""", (UUID, tid))
|
||||
row = cur.fetchone()
|
||||
if row:
|
||||
face_id = row[0]
|
||||
# Create binding
|
||||
cur.execute("""
|
||||
INSERT INTO dev.identity_bindings (identity_id, identity_type, identity_value, confidence, created_at)
|
||||
VALUES (%s, 'trace', %s, 0.8, NOW())
|
||||
ON CONFLICT DO NOTHING
|
||||
""", (identity_id, str(tid)))
|
||||
bindings += 1
|
||||
|
||||
# Also update face_detection with identity_id
|
||||
cur.execute("""
|
||||
UPDATE dev.face_detections SET identity_id = %s
|
||||
WHERE file_uuid = %s AND trace_id = %s
|
||||
""", (identity_id, UUID, tid))
|
||||
|
||||
conn.commit()
|
||||
print(f"Created {bindings} identity bindings for {n_clusters} identities")
|
||||
|
||||
# Summary
|
||||
print(f"\n=== Summary ===")
|
||||
cur.execute("SELECT COUNT(*) FROM dev.identities WHERE source = 'auto'")
|
||||
print(f"Total auto-generated identities: {cur.fetchone()[0]}")
|
||||
cur.execute("SELECT COUNT(*) FROM dev.identity_bindings")
|
||||
print(f"Total identity bindings: {cur.fetchone()[0]}")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
print("=== Done ===")
|
||||
Reference in New Issue
Block a user