- Add database migrations (006-028) for face recognition, identity, file_uuid - Add test scripts for ASR, face, search, processing - Add portal frontend (Tauri) - Add config, benchmark, and monitoring utilities - Add model checkpoints and pretrained model references
331 lines
11 KiB
SQL
331 lines
11 KiB
SQL
-- Migration 023: Extend identities table for multi-dimensional embeddings
|
|
-- Date: 2026-04-28
|
|
-- Purpose: Add identity_type, source, status, face_embedding, voice_embedding, identity_embedding, reference_data
|
|
-- Reference: docs_v1.0/ARCHITECTURE/MOMENTRY_CORE_ARCHITECTURE_V2.md
|
|
-- Strategy: Add columns to existing table (preserve existing data)
|
|
|
|
-- ============================================
|
|
-- Part 0: Ensure uuid column exists (primary key alternative)
|
|
-- ============================================
|
|
|
|
-- public schema: add uuid if not exists
|
|
ALTER TABLE public.identities
|
|
ADD COLUMN IF NOT EXISTS uuid UUID DEFAULT gen_random_uuid();
|
|
|
|
-- dev schema: uuid already exists
|
|
|
|
-- ============================================
|
|
-- Part 1: Rename embedding → face_embedding (if exists)
|
|
-- ============================================
|
|
|
|
-- public schema
|
|
DO $$
|
|
BEGIN
|
|
IF EXISTS (
|
|
SELECT 1 FROM information_schema.columns
|
|
WHERE table_schema = 'public'
|
|
AND table_name = 'identities'
|
|
AND column_name = 'embedding'
|
|
) THEN
|
|
-- Rename column
|
|
ALTER TABLE public.identities RENAME COLUMN embedding TO face_embedding;
|
|
|
|
-- Change dimension to 512 (if currently 768)
|
|
-- Note: We cannot easily change vector dimension, so we keep as is and will fix later
|
|
-- For now, just add comment
|
|
EXECUTE 'COMMENT ON COLUMN public.identities.face_embedding IS ''InsightFace 512-dim ArcFace embedding (or 768 legacy)''';
|
|
END IF;
|
|
END $$;
|
|
|
|
-- dev schema
|
|
DO $$
|
|
BEGIN
|
|
IF EXISTS (
|
|
SELECT 1 FROM information_schema.columns
|
|
WHERE table_schema = 'dev'
|
|
AND table_name = 'identities'
|
|
AND column_name = 'embedding'
|
|
) THEN
|
|
-- Rename column
|
|
ALTER TABLE dev.identities RENAME COLUMN embedding TO face_embedding;
|
|
|
|
-- Comment
|
|
EXECUTE 'COMMENT ON COLUMN dev.identities.face_embedding IS ''InsightFace 512-dim ArcFace embedding''';
|
|
END IF;
|
|
END $$;
|
|
|
|
-- ============================================
|
|
-- Part 2: Add identity_type VARCHAR(30)
|
|
-- ============================================
|
|
|
|
-- public schema
|
|
ALTER TABLE public.identities
|
|
ADD COLUMN IF NOT EXISTS identity_type VARCHAR(30) DEFAULT 'people';
|
|
|
|
-- dev schema
|
|
ALTER TABLE dev.identities
|
|
ADD COLUMN IF NOT EXISTS identity_type VARCHAR(30) DEFAULT 'people';
|
|
|
|
COMMENT ON COLUMN public.identities.identity_type IS
|
|
'Identity type: people, brand, object, concept, logo, symbol, scene, sound, animal, environmental';
|
|
|
|
COMMENT ON COLUMN dev.identities.identity_type IS
|
|
'Identity type: people, brand, object, concept, logo, symbol, scene, sound, animal, environmental';
|
|
|
|
-- ============================================
|
|
-- Part 3: Add source VARCHAR(20)
|
|
-- ============================================
|
|
|
|
-- public schema
|
|
ALTER TABLE public.identities
|
|
ADD COLUMN IF NOT EXISTS source VARCHAR(20) DEFAULT 'manual';
|
|
|
|
-- dev schema
|
|
ALTER TABLE dev.identities
|
|
ADD COLUMN IF NOT EXISTS source VARCHAR(20) DEFAULT 'manual';
|
|
|
|
COMMENT ON COLUMN public.identities.source IS
|
|
'Identity source: manual, tmdb, agent_suggested, ai_detection';
|
|
|
|
COMMENT ON COLUMN dev.identities.source IS
|
|
'Identity source: manual, tmdb, agent_suggested, ai_detection';
|
|
|
|
-- ============================================
|
|
-- Part 4: Add status VARCHAR(20)
|
|
-- ============================================
|
|
|
|
-- public schema
|
|
ALTER TABLE public.identities
|
|
ADD COLUMN IF NOT EXISTS status VARCHAR(20) DEFAULT 'pending';
|
|
|
|
-- dev schema
|
|
ALTER TABLE dev.identities
|
|
ADD COLUMN IF NOT EXISTS status VARCHAR(20) DEFAULT 'pending';
|
|
|
|
COMMENT ON COLUMN public.identities.status IS
|
|
'Identity status: pending, confirmed, skipped';
|
|
|
|
COMMENT ON COLUMN dev.identities.status IS
|
|
'Identity status: pending, confirmed, skipped';
|
|
|
|
-- ============================================
|
|
-- Part 5: Add voice_embedding VECTOR(192)
|
|
-- ============================================
|
|
|
|
-- public schema
|
|
ALTER TABLE public.identities
|
|
ADD COLUMN IF NOT EXISTS voice_embedding VECTOR(192);
|
|
|
|
-- dev schema
|
|
ALTER TABLE dev.identities
|
|
ADD COLUMN IF NOT EXISTS voice_embedding VECTOR(192);
|
|
|
|
COMMENT ON COLUMN public.identities.voice_embedding IS
|
|
'ECAPA-TDNN 192-dim voice embedding';
|
|
|
|
COMMENT ON COLUMN dev.identities.voice_embedding IS
|
|
'ECAPA-TDNN 192-dim voice embedding';
|
|
|
|
-- ============================================
|
|
-- Part 6: Add identity_embedding VECTOR(768)
|
|
-- ============================================
|
|
|
|
-- public schema
|
|
ALTER TABLE public.identities
|
|
ADD COLUMN IF NOT EXISTS identity_embedding VECTOR(768);
|
|
|
|
-- dev schema
|
|
ALTER TABLE dev.identities
|
|
ADD COLUMN IF NOT EXISTS identity_embedding VECTOR(768);
|
|
|
|
COMMENT ON COLUMN public.identities.identity_embedding IS
|
|
'CLIP ViT-L/14 768-dim embedding for logo/symbol/object identity';
|
|
|
|
COMMENT ON COLUMN dev.identities.identity_embedding IS
|
|
'CLIP ViT-L/14 768-dim embedding for logo/symbol/object identity';
|
|
|
|
-- ============================================
|
|
-- Part 7: Add reference_data JSONB (1-to-many embeddings)
|
|
-- ============================================
|
|
|
|
-- public schema
|
|
ALTER TABLE public.identities
|
|
ADD COLUMN IF NOT EXISTS reference_data JSONB DEFAULT '{}';
|
|
|
|
-- dev schema
|
|
ALTER TABLE dev.identities
|
|
ADD COLUMN IF NOT EXISTS reference_data JSONB DEFAULT '{}';
|
|
|
|
COMMENT ON COLUMN public.identities.reference_data IS
|
|
'JSONB: {face_embeddings[], voice_embeddings[], identity_embeddings[], sound_embeddings[], image_urls[]}';
|
|
|
|
COMMENT ON COLUMN dev.identities.reference_data IS
|
|
'JSONB: {face_embeddings[], voice_embeddings[], identity_embeddings[], sound_embeddings[], image_urls[]}';
|
|
|
|
-- ============================================
|
|
-- Part 8: Add created_at and updated_at (if not exists)
|
|
-- ============================================
|
|
|
|
-- public schema: add created_at
|
|
ALTER TABLE public.identities
|
|
ADD COLUMN IF NOT EXISTS created_at TIMESTAMPTZ DEFAULT NOW();
|
|
|
|
-- public schema: add updated_at
|
|
ALTER TABLE public.identities
|
|
ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ DEFAULT NOW();
|
|
|
|
-- dev schema: add updated_at (created_at already exists)
|
|
ALTER TABLE dev.identities
|
|
ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ DEFAULT NOW();
|
|
|
|
-- ============================================
|
|
-- Part 9: Add TMDB integration fields
|
|
-- ============================================
|
|
|
|
-- TMDB specific fields
|
|
ALTER TABLE public.identities
|
|
ADD COLUMN IF NOT EXISTS tmdb_id INTEGER;
|
|
|
|
ALTER TABLE public.identities
|
|
ADD COLUMN IF NOT EXISTS tmdb_profile TEXT;
|
|
|
|
ALTER TABLE dev.identities
|
|
ADD COLUMN IF NOT EXISTS tmdb_id INTEGER;
|
|
|
|
ALTER TABLE dev.identities
|
|
ADD COLUMN IF NOT EXISTS tmdb_profile TEXT;
|
|
|
|
COMMENT ON COLUMN public.identities.tmdb_id IS
|
|
'TMDB person ID';
|
|
|
|
COMMENT ON COLUMN dev.identities.tmdb_id IS
|
|
'TMDB person ID';
|
|
|
|
COMMENT ON COLUMN public.identities.tmdb_profile IS
|
|
'TMDB profile image URL';
|
|
|
|
COMMENT ON COLUMN dev.identities.tmdb_profile IS
|
|
'TMDB profile image URL';
|
|
|
|
-- ============================================
|
|
-- Part 10: Create vector indexes
|
|
-- ============================================
|
|
|
|
-- face_embedding index
|
|
CREATE INDEX IF NOT EXISTS idx_identities_face_embedding
|
|
ON public.identities USING ivfflat (face_embedding vector_cosine_ops)
|
|
WITH (lists = 100);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_dev_identities_face_embedding
|
|
ON dev.identities USING ivfflat (face_embedding vector_cosine_ops)
|
|
WITH (lists = 100);
|
|
|
|
-- voice_embedding index
|
|
CREATE INDEX IF NOT EXISTS idx_identities_voice_embedding
|
|
ON public.identities USING ivfflat (voice_embedding vector_cosine_ops)
|
|
WITH (lists = 50);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_dev_identities_voice_embedding
|
|
ON dev.identities USING ivfflat (voice_embedding vector_cosine_ops)
|
|
WITH (lists = 50);
|
|
|
|
-- identity_embedding index
|
|
CREATE INDEX IF NOT EXISTS idx_identities_identity_embedding
|
|
ON public.identities USING ivfflat (identity_embedding vector_cosine_ops)
|
|
WITH (lists = 100);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_dev_identities_identity_embedding
|
|
ON dev.identities USING ivfflat (identity_embedding vector_cosine_ops)
|
|
WITH (lists = 100);
|
|
|
|
-- reference_data JSONB index (GIN)
|
|
CREATE INDEX IF NOT EXISTS idx_identities_reference_data
|
|
ON public.identities USING GIN (reference_data);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_dev_identities_reference_data
|
|
ON dev.identities USING GIN (reference_data);
|
|
|
|
-- uuid index
|
|
CREATE INDEX IF NOT EXISTS idx_identities_uuid
|
|
ON public.identities (uuid);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_dev_identities_uuid
|
|
ON dev.identities (uuid);
|
|
|
|
-- ============================================
|
|
-- Part 11: Add identity_type check constraint
|
|
-- ============================================
|
|
|
|
-- Update identity_type constraint to include new types
|
|
ALTER TABLE public.identities
|
|
DROP CONSTRAINT IF EXISTS identities_identity_type_check;
|
|
|
|
ALTER TABLE public.identities
|
|
ADD CONSTRAINT identities_identity_type_check
|
|
CHECK (
|
|
identity_type IN (
|
|
'people', 'brand', 'object', 'concept', 'logo', 'symbol',
|
|
'scene', 'sound', 'animal', 'environmental'
|
|
)
|
|
);
|
|
|
|
ALTER TABLE dev.identities
|
|
DROP CONSTRAINT IF EXISTS identities_identity_type_check;
|
|
|
|
ALTER TABLE dev.identities
|
|
ADD CONSTRAINT identities_identity_type_check
|
|
CHECK (
|
|
identity_type IN (
|
|
'people', 'brand', 'object', 'concept', 'logo', 'symbol',
|
|
'scene', 'sound', 'animal', 'environmental'
|
|
)
|
|
);
|
|
|
|
-- ============================================
|
|
-- Part 12: Drop old embedding index (if exists)
|
|
-- ============================================
|
|
|
|
DROP INDEX IF EXISTS public.idx_identities_embedding;
|
|
DROP INDEX IF EXISTS dev.idx_identities_embedding;
|
|
|
|
-- ============================================
|
|
-- Verification
|
|
-- ============================================
|
|
|
|
-- Verify table structure
|
|
DO $$
|
|
DECLARE
|
|
col_count INTEGER;
|
|
BEGIN
|
|
SELECT COUNT(*) INTO col_count
|
|
FROM information_schema.columns
|
|
WHERE table_schema = 'public' AND table_name = 'identities'
|
|
AND column_name IN (
|
|
'uuid', 'identity_type', 'source', 'status',
|
|
'face_embedding', 'voice_embedding', 'identity_embedding',
|
|
'reference_data', 'tmdb_id', 'tmdb_profile',
|
|
'created_at', 'updated_at'
|
|
);
|
|
|
|
IF col_count < 12 THEN
|
|
RAISE NOTICE 'Migration 023: Some columns missing in public.identities (count=%, expected=12)', col_count;
|
|
ELSE
|
|
RAISE NOTICE 'Migration 023: All columns added successfully to public.identities';
|
|
END IF;
|
|
|
|
SELECT COUNT(*) INTO col_count
|
|
FROM information_schema.columns
|
|
WHERE table_schema = 'dev' AND table_name = 'identities'
|
|
AND column_name IN (
|
|
'uuid', 'identity_type', 'source', 'status',
|
|
'face_embedding', 'voice_embedding', 'identity_embedding',
|
|
'reference_data', 'tmdb_id', 'tmdb_profile',
|
|
'created_at', 'updated_at'
|
|
);
|
|
|
|
IF col_count < 12 THEN
|
|
RAISE NOTICE 'Migration 023: Some columns missing in dev.identities (count=%, expected=12)', col_count;
|
|
ELSE
|
|
RAISE NOTICE 'Migration 023: All columns added successfully to dev.identities';
|
|
END IF;
|
|
END $$; |