feat: update Python processors and add utility scripts

- Update ASR, face, OCR, pose processors
- Add release pre-flight check script
- Add synonym generation, chunk processing scripts
- Add face recognition, stamp search utilities
This commit is contained in:
Warren
2026-04-30 15:07:49 +08:00
parent f4697396e4
commit 8f05a7c188
256 changed files with 60505 additions and 299 deletions

View File

@@ -0,0 +1,36 @@
-- P0: Core API Infrastructure
-- 1. Update assets table for frame tracking
ALTER TABLE videos ADD COLUMN IF NOT EXISTS total_frames BIGINT DEFAULT 0;
ALTER TABLE videos ADD COLUMN IF NOT EXISTS processing_status VARCHAR(20) DEFAULT 'REGISTERED';
-- 2. Create Jobs table for scheduling and tracking
CREATE TABLE IF NOT EXISTS jobs (
id UUID PRIMARY KEY,
asset_uuid VARCHAR(32) NOT NULL REFERENCES dev.videos(uuid) ON DELETE CASCADE,
processor_list TEXT[],
assigned_processor_id UUID,
rule VARCHAR(20),
status VARCHAR(20) DEFAULT 'QUEUED',
total_frames BIGINT DEFAULT 0,
processed_frames BIGINT DEFAULT 0,
error_message TEXT,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- 3. Create chunks_rule1 table for sentence-level chunking
CREATE TABLE IF NOT EXISTS chunks_rule1 (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
asset_uuid VARCHAR(32) NOT NULL REFERENCES dev.videos(uuid) ON DELETE CASCADE,
start_frame BIGINT NOT NULL,
end_frame BIGINT NOT NULL,
content TEXT NOT NULL,
speaker_id VARCHAR(50),
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_chunks_rule1_asset ON chunks_rule1(asset_uuid);
CREATE INDEX IF NOT EXISTS idx_chunks_rule1_frames ON chunks_rule1(start_frame, end_frame);
-- 4. Indexes
CREATE INDEX IF NOT EXISTS idx_jobs_asset ON jobs(asset_uuid);
CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status);

View File

@@ -0,0 +1,20 @@
-- P1: Align processor_results with Worker expectations
-- This table tracks processor execution per job.
ALTER TABLE dev.processor_results ADD COLUMN IF NOT EXISTS job_id INTEGER REFERENCES dev.monitor_jobs(id);
ALTER TABLE dev.processor_results ADD COLUMN IF NOT EXISTS processor VARCHAR(64);
ALTER TABLE dev.processor_results ADD COLUMN IF NOT EXISTS output_path TEXT;
ALTER TABLE dev.processor_results ADD COLUMN IF NOT EXISTS progress_total INTEGER DEFAULT 0;
ALTER TABLE dev.processor_results ADD COLUMN IF NOT EXISTS progress_current INTEGER DEFAULT 0;
ALTER TABLE dev.processor_results ADD COLUMN IF NOT EXISTS last_checkpoint TIMESTAMP WITH TIME ZONE;
ALTER TABLE dev.processor_results ADD COLUMN IF NOT EXISTS updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP;
ALTER TABLE dev.processor_results ADD COLUMN IF NOT EXISTS duration_secs DOUBLE PRECISION;
-- Map old processor_type to processor if empty
UPDATE dev.processor_results SET processor = processor_type WHERE processor IS NULL AND processor_type IS NOT NULL;
-- Add unique constraint for upsert logic
ALTER TABLE dev.processor_results ADD CONSTRAINT uq_processor_results_job_processor UNIQUE (job_id, processor);
CREATE INDEX IF NOT EXISTS idx_processor_results_job ON dev.processor_results(job_id);
CREATE INDEX IF NOT EXISTS idx_processor_results_status ON dev.processor_results(status);

View File

@@ -0,0 +1,33 @@
-- P2: Person Identity & Talent Management
-- 1. Create Talents table (Global Identities / TMDB Actors)
CREATE TABLE IF NOT EXISTS talents (
id BIGSERIAL PRIMARY KEY,
real_name VARCHAR(255) NOT NULL UNIQUE,
actor_name VARCHAR(255),
voice_embedding TEXT,
face_embedding TEXT,
metadata JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- 2. Create Identity Bindings (Maps machine IDs to Talents)
CREATE TABLE IF NOT EXISTS identity_bindings (
id BIGSERIAL PRIMARY KEY,
talent_id BIGINT REFERENCES talents(id) ON DELETE CASCADE,
binding_type VARCHAR(20) NOT NULL, -- 'face', 'speaker'
binding_value VARCHAR(100) NOT NULL,
source VARCHAR(50) DEFAULT 'manual',
confidence DOUBLE PRECISION DEFAULT 1.0,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
UNIQUE(talent_id, binding_type, binding_value)
);
CREATE INDEX IF NOT EXISTS idx_identity_bindings_talent ON identity_bindings(talent_id);
CREATE INDEX IF NOT EXISTS idx_identity_bindings_value ON identity_bindings(binding_type, binding_value);
-- 3. Extend person_identities with temporal overlap and confidence fields
ALTER TABLE person_identities ADD COLUMN IF NOT EXISTS character_name VARCHAR(255);
ALTER TABLE person_identities ADD COLUMN IF NOT EXISTS global_person_id BIGINT REFERENCES talents(id);
ALTER TABLE person_identities ADD COLUMN IF NOT EXISTS temporal_overlap_score DOUBLE PRECISION;
ALTER TABLE person_identities ADD COLUMN IF NOT EXISTS audio_visual_confidence DOUBLE PRECISION;
ALTER TABLE person_identities ADD COLUMN IF NOT EXISTS match_strategy VARCHAR(30);