feat: trace quality agent selection report, identity clustering runner_v2 DB write, age/gender CoreML selection, updated experiment config UUID

This commit is contained in:
Warren
2026-05-06 14:41:48 +08:00
parent 74b6182eba
commit 65a1f77e65
1048 changed files with 103499 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
-- Migration: 029_add_trace_id_to_face_detections.sql
-- Date: 2026-05-04
-- Purpose: Add trace_id for cross-frame face tracking (TKG temporal graph)
-- trace_id links same person across multiple frames
BEGIN;
-- 1. Add trace_id column
ALTER TABLE face_detections ADD COLUMN IF NOT EXISTS trace_id INTEGER;
-- 2. Index for trace queries
CREATE INDEX IF NOT EXISTS idx_face_detections_trace_id ON face_detections(trace_id)
WHERE trace_id IS NOT NULL;
-- 3. Composite index for frame-range queries (TKG spatial-temporal export)
CREATE INDEX IF NOT EXISTS idx_face_detections_trace_time ON face_detections(trace_id, frame_number)
WHERE trace_id IS NOT NULL;
COMMIT;

View File

@@ -0,0 +1,62 @@
-- Migration: 030_create_tkg_graph_tables.sql
-- Date: 2026-05-04
-- Purpose: Temporal Knowledge Graph using PostgreSQL native graph pattern
-- Nodes = entities (face traces, objects, speakers)
-- Edges = temporal-spatial relationships
--
-- Graph Model:
-- (FaceTrace) -[:APPEARS_IN]-> (Frame)
-- (YoloObject) -[:APPEARS_IN]-> (Frame)
-- (FaceTrace) -[:CO_OCCURS_WITH]-> (YoloObject) -- same frame
-- (FaceTrace) -[:SPEAKS_AS]-> (Speaker) -- temporal overlap
BEGIN;
-- 1. Graph Nodes: typed entities with properties
CREATE TABLE IF NOT EXISTS tkg_nodes (
id BIGSERIAL PRIMARY KEY,
node_type VARCHAR(64) NOT NULL, -- 'face_trace', 'yolo_object', 'speaker', 'frame'
external_id VARCHAR(256) NOT NULL, -- trace_id, object_class, speaker_id
file_uuid VARCHAR(64) NOT NULL,
label VARCHAR(512), -- display name
properties JSONB NOT NULL DEFAULT '{}', -- position, confidence, etc.
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
UNIQUE (file_uuid, node_type, external_id)
);
CREATE INDEX idx_tkg_nodes_type ON tkg_nodes(node_type);
CREATE INDEX idx_tkg_nodes_file ON tkg_nodes(file_uuid);
-- 2. Graph Edges: typed relationships with temporal data
CREATE TABLE IF NOT EXISTS tkg_edges (
id BIGSERIAL PRIMARY KEY,
edge_type VARCHAR(64) NOT NULL, -- 'APPEARS_IN', 'CO_OCCURS_WITH', 'NEAR', 'SPEAKS_AS'
source_node_id BIGINT NOT NULL REFERENCES tkg_nodes(id) ON DELETE CASCADE,
target_node_id BIGINT NOT NULL REFERENCES tkg_nodes(id) ON DELETE CASCADE,
file_uuid VARCHAR(64) NOT NULL,
properties JSONB NOT NULL DEFAULT '{}', -- temporal data: {start_frame, end_frame, overlap_ratio, distance}
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
UNIQUE (file_uuid, edge_type, source_node_id, target_node_id)
);
CREATE INDEX idx_tkg_edges_type ON tkg_edges(edge_type);
CREATE INDEX idx_tkg_edges_source ON tkg_edges(source_node_id);
CREATE INDEX idx_tkg_edges_target ON tkg_edges(target_node_id);
CREATE INDEX idx_tkg_edges_file ON tkg_edges(file_uuid);
-- 3. Materialized Co-occurrence: face_trace ↔ yolo_object in same frame
-- This is the core TKG query: "Who was near what, when?"
CREATE MATERIALIZED VIEW IF NOT EXISTS tkg_co_occurrence AS
SELECT
fd.file_uuid,
fd.trace_id,
fd.frame_number,
fd.bbox AS face_bbox,
NULL::jsonb AS yolo_bbox, -- placeholder: will be populated from yolo data
NULL::text AS object_class, -- placeholder
NULL::float8 AS confidence -- placeholder
FROM face_detections fd
WHERE fd.trace_id IS NOT NULL
WITH NO DATA;
COMMIT;

View File

@@ -0,0 +1,25 @@
-- Migration: 031_add_chunk_search_trigger.sql
-- Date: 2026-05-05
-- Purpose: Add search_vector tsvector column + auto-update trigger for BM25 search
BEGIN;
-- Drop old trigger if exists
DROP TRIGGER IF EXISTS trg_chunk_search_vector ON dev.chunks;
DROP TRIGGER IF EXISTS trg_chunk_search_vector ON chunks;
-- Create trigger function (must be created before trigger)
CREATE OR REPLACE FUNCTION update_chunk_search_vector()
RETURNS trigger AS $$
BEGIN
NEW.search_vector := to_tsvector('english', COALESCE(NEW.text_content, ''));
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Create trigger on dev.chunks
CREATE TRIGGER trg_chunk_search_vector
BEFORE INSERT OR UPDATE ON dev.chunks
FOR EACH ROW EXECUTE FUNCTION update_chunk_search_vector();
COMMIT;

View File

@@ -0,0 +1,59 @@
-- Migration: 032_processor_version_tracking.sql
-- Date: 2026-05-05
-- Purpose: Processor/Agent version tracking for lifecycle management
-- Enables stale detection and targeted re-processing
BEGIN;
-- 1. Processor version registry
CREATE TABLE IF NOT EXISTS dev.processor_versions (
processor VARCHAR(64) PRIMARY KEY,
model_version VARCHAR(128) NOT NULL,
processor_type VARCHAR(32) NOT NULL DEFAULT 'processor', -- 'processor' or 'agent'
dependencies TEXT[] DEFAULT '{}',
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
file_uuid VARCHAR(64) -- NULL = global version, set = per-file override
);
-- 2. Initial version seeding (current Charade pipeline)
INSERT INTO dev.processor_versions (processor, model_version, processor_type, dependencies) VALUES
('cut', 'pyscenedetect/default', 'processor', '{}'),
('asr', 'faster-whisper/small/v1', 'processor', '{}'),
('asrx', 'speechbrain/ecapa-tdnn/v1', 'processor', '{asr}'),
('ocr', 'apple-vision/v1', 'processor', '{}'),
('yolo', 'yolov5-coreml/v2', 'processor', '{}'),
('face_detection', 'apple-vision/v2', 'processor', '{}'),
('face_embedding', 'coreml-facenet/v2', 'processor', '{}'),
('pose', 'apple-vision/v1', 'processor', '{}'),
('face_trace', 'iou+embedding/v1', 'processor', '{face_detection,face_embedding}'),
('speaker_binding', 'mar-lip/v1', 'agent', '{asrx,face_detection}'),
('identity_clustering', 'cosine-threshold/v1', 'agent', '{face_trace,speaker_binding}'),
('tmdb_agent', 'tmdb-api/v1', 'agent', '{}'),
('story_agent', 'template/v2.0', 'agent', '{asr,asrx,cut,face_trace,identity_clustering,yolo}'),
('embedding_agent', 'nomic-embed-768d/v1', 'agent', '{story_agent}')
ON CONFLICT (processor) DO UPDATE SET model_version = EXCLUDED.model_version;
-- 3. Stale detection function
CREATE OR REPLACE FUNCTION dev.check_stale_agents(
p_file_uuid VARCHAR(64),
p_current_versions JSONB
) RETURNS TABLE(agent_name VARCHAR(64), reason TEXT) AS $$
DECLARE
v_rec RECORD;
BEGIN
FOR v_rec IN
SELECT processor, model_version, dependencies
FROM dev.processor_versions
WHERE file_uuid IS NULL OR file_uuid = p_file_uuid
LOOP
IF p_current_versions->>v_rec.processor IS DISTINCT FROM v_rec.model_version THEN
agent_name := v_rec.processor;
reason := format('Version mismatch: current=%s, stored=%s',
p_current_versions->>v_rec.processor, v_rec.model_version);
RETURN NEXT;
END IF;
END LOOP;
END;
$$ LANGUAGE plpgsql;
COMMIT;