feat: add migrations, test scripts, and utility tools
- Add database migrations (006-028) for face recognition, identity, file_uuid - Add test scripts for ASR, face, search, processing - Add portal frontend (Tauri) - Add config, benchmark, and monitoring utilities - Add model checkpoints and pretrained model references
This commit is contained in:
77
migrations/008_person_identity_binding.sql
Normal file
77
migrations/008_person_identity_binding.sql
Normal file
@@ -0,0 +1,77 @@
|
||||
-- Migration: 008_person_identity_binding.sql
|
||||
-- Purpose: 建立聲紋 (Speaker ID)、人臉 (Face ID) 與真實身份 (Identity) 的綁定系統
|
||||
-- Date: 2026-04-10
|
||||
|
||||
-- 1. 擴展 chunks 表,增加聲音與面孔的觀測值陣列
|
||||
ALTER TABLE chunks
|
||||
ADD COLUMN IF NOT EXISTS speaker_ids TEXT[] DEFAULT '{}', -- e.g. ['speaker_3', 'speaker_5']
|
||||
ADD COLUMN IF NOT EXISTS face_ids TEXT[] DEFAULT '{}'; -- e.g. ['face_1']
|
||||
|
||||
-- 2. 建立真實身份表 (Talents / Persons)
|
||||
-- 存儲現實世界中的人員資訊 (演員、配音員、真實人物)
|
||||
CREATE TABLE IF NOT EXISTS talents (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
real_name TEXT NOT NULL, -- 真實姓名 (e.g. "Tom Cruise")
|
||||
actor_name TEXT, -- 藝名/別名
|
||||
voice_embedding VECTOR(192), -- 聲紋參考向量 (ECAPA-TDNN)
|
||||
face_embedding VECTOR(512), -- 人臉參考向量 (ArcFace)
|
||||
metadata JSONB DEFAULT '{}', -- 其他屬性 (性別、年齡等)
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
UNIQUE(real_name)
|
||||
);
|
||||
|
||||
-- 建立向量索引
|
||||
CREATE INDEX IF NOT EXISTS idx_talent_voice ON talents USING hnsw (voice_embedding vector_cosine_ops);
|
||||
CREATE INDEX IF NOT EXISTS idx_talent_face ON talents USING hnsw (face_embedding vector_cosine_ops);
|
||||
|
||||
-- 3. 建立身份綁定映射表 (Identity Bindings)
|
||||
-- 負責將機器生成的 ID (face_x, speaker_y) 映射到 talent_id
|
||||
CREATE TABLE IF NOT EXISTS identity_bindings (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
talent_id BIGINT REFERENCES talents(id) ON DELETE CASCADE,
|
||||
|
||||
-- 綁定類型與機器 ID
|
||||
binding_type VARCHAR(32) NOT NULL, -- 'face' 或 'speaker'
|
||||
binding_value VARCHAR(64) NOT NULL, -- e.g. "face_1", "speaker_3"
|
||||
|
||||
-- 綁定來源與狀態
|
||||
source TEXT DEFAULT 'auto', -- 'auto' (自動聚類) 或 'manual' (人工綁定)
|
||||
confidence FLOAT DEFAULT 0.0,
|
||||
is_active BOOLEAN DEFAULT TRUE,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
|
||||
-- 每個機器 ID 只能綁定一個 Talent
|
||||
UNIQUE(binding_type, binding_value)
|
||||
);
|
||||
|
||||
-- 索引優化:加速由機器 ID 查找 Talent
|
||||
CREATE INDEX IF NOT EXISTS idx_bindings_lookup ON identity_bindings(binding_type, binding_value);
|
||||
CREATE INDEX IF NOT EXISTS idx_bindings_talent ON identity_bindings(talent_id);
|
||||
|
||||
-- 4. (選填) 建立角色表 (Characters) - 用於動畫/多語系場景
|
||||
CREATE TABLE IF NOT EXISTS characters (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
video_uuid TEXT NOT NULL,
|
||||
name TEXT NOT NULL, -- 角色名 (e.g. "Batman")
|
||||
language_track TEXT DEFAULT 'original', -- 語言軌道 (original, dub_zh_tw)
|
||||
is_voice_only BOOLEAN DEFAULT FALSE, -- 是否為無臉角色 (旁白/AI)
|
||||
metadata JSONB DEFAULT '{}',
|
||||
UNIQUE(video_uuid, name, language_track)
|
||||
);
|
||||
|
||||
-- 5. (選填) 建立飾演關係表 (Castings)
|
||||
-- 定義 Talent 在特定視頻中飾演哪個 Character
|
||||
CREATE TABLE IF NOT EXISTS castings (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
character_id BIGINT REFERENCES characters(id) ON DELETE CASCADE,
|
||||
talent_id BIGINT REFERENCES talents(id) ON DELETE CASCADE,
|
||||
track_type VARCHAR(32) DEFAULT 'original', -- 對應音軌版本
|
||||
role_type VARCHAR(32) DEFAULT 'both', -- 'voice', 'face', 'both'
|
||||
UNIQUE(character_id, talent_id, track_type)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE talents IS '真實人物/演員/配音員資訊庫';
|
||||
COMMENT ON TABLE identity_bindings IS '機器 ID (Face/Speaker) 與真實 Talent 的映射關係';
|
||||
COMMENT ON TABLE characters IS '視頻中的劇中角色';
|
||||
COMMENT ON TABLE castings is 'Talent 與 Character 的飾演關係';
|
||||
Reference in New Issue
Block a user