feat: trace quality agent selection report, identity clustering runner_v2 DB write, age/gender CoreML selection, updated experiment config UUID
This commit is contained in:
19
migrations/029_add_trace_id_to_face_detections.sql
Normal file
19
migrations/029_add_trace_id_to_face_detections.sql
Normal file
@@ -0,0 +1,19 @@
|
||||
-- Migration: 029_add_trace_id_to_face_detections.sql
|
||||
-- Date: 2026-05-04
|
||||
-- Purpose: Add trace_id for cross-frame face tracking (TKG temporal graph)
|
||||
-- trace_id links same person across multiple frames
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- 1. Add trace_id column
|
||||
ALTER TABLE face_detections ADD COLUMN IF NOT EXISTS trace_id INTEGER;
|
||||
|
||||
-- 2. Index for trace queries
|
||||
CREATE INDEX IF NOT EXISTS idx_face_detections_trace_id ON face_detections(trace_id)
|
||||
WHERE trace_id IS NOT NULL;
|
||||
|
||||
-- 3. Composite index for frame-range queries (TKG spatial-temporal export)
|
||||
CREATE INDEX IF NOT EXISTS idx_face_detections_trace_time ON face_detections(trace_id, frame_number)
|
||||
WHERE trace_id IS NOT NULL;
|
||||
|
||||
COMMIT;
|
||||
62
migrations/030_create_tkg_graph_tables.sql
Normal file
62
migrations/030_create_tkg_graph_tables.sql
Normal file
@@ -0,0 +1,62 @@
|
||||
-- Migration: 030_create_tkg_graph_tables.sql
|
||||
-- Date: 2026-05-04
|
||||
-- Purpose: Temporal Knowledge Graph using PostgreSQL native graph pattern
|
||||
-- Nodes = entities (face traces, objects, speakers)
|
||||
-- Edges = temporal-spatial relationships
|
||||
--
|
||||
-- Graph Model:
|
||||
-- (FaceTrace) -[:APPEARS_IN]-> (Frame)
|
||||
-- (YoloObject) -[:APPEARS_IN]-> (Frame)
|
||||
-- (FaceTrace) -[:CO_OCCURS_WITH]-> (YoloObject) -- same frame
|
||||
-- (FaceTrace) -[:SPEAKS_AS]-> (Speaker) -- temporal overlap
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- 1. Graph Nodes: typed entities with properties
|
||||
CREATE TABLE IF NOT EXISTS tkg_nodes (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
node_type VARCHAR(64) NOT NULL, -- 'face_trace', 'yolo_object', 'speaker', 'frame'
|
||||
external_id VARCHAR(256) NOT NULL, -- trace_id, object_class, speaker_id
|
||||
file_uuid VARCHAR(64) NOT NULL,
|
||||
label VARCHAR(512), -- display name
|
||||
properties JSONB NOT NULL DEFAULT '{}', -- position, confidence, etc.
|
||||
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE (file_uuid, node_type, external_id)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_tkg_nodes_type ON tkg_nodes(node_type);
|
||||
CREATE INDEX idx_tkg_nodes_file ON tkg_nodes(file_uuid);
|
||||
|
||||
-- 2. Graph Edges: typed relationships with temporal data
|
||||
CREATE TABLE IF NOT EXISTS tkg_edges (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
edge_type VARCHAR(64) NOT NULL, -- 'APPEARS_IN', 'CO_OCCURS_WITH', 'NEAR', 'SPEAKS_AS'
|
||||
source_node_id BIGINT NOT NULL REFERENCES tkg_nodes(id) ON DELETE CASCADE,
|
||||
target_node_id BIGINT NOT NULL REFERENCES tkg_nodes(id) ON DELETE CASCADE,
|
||||
file_uuid VARCHAR(64) NOT NULL,
|
||||
properties JSONB NOT NULL DEFAULT '{}', -- temporal data: {start_frame, end_frame, overlap_ratio, distance}
|
||||
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE (file_uuid, edge_type, source_node_id, target_node_id)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_tkg_edges_type ON tkg_edges(edge_type);
|
||||
CREATE INDEX idx_tkg_edges_source ON tkg_edges(source_node_id);
|
||||
CREATE INDEX idx_tkg_edges_target ON tkg_edges(target_node_id);
|
||||
CREATE INDEX idx_tkg_edges_file ON tkg_edges(file_uuid);
|
||||
|
||||
-- 3. Materialized Co-occurrence: face_trace ↔ yolo_object in same frame
|
||||
-- This is the core TKG query: "Who was near what, when?"
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS tkg_co_occurrence AS
|
||||
SELECT
|
||||
fd.file_uuid,
|
||||
fd.trace_id,
|
||||
fd.frame_number,
|
||||
fd.bbox AS face_bbox,
|
||||
NULL::jsonb AS yolo_bbox, -- placeholder: will be populated from yolo data
|
||||
NULL::text AS object_class, -- placeholder
|
||||
NULL::float8 AS confidence -- placeholder
|
||||
FROM face_detections fd
|
||||
WHERE fd.trace_id IS NOT NULL
|
||||
WITH NO DATA;
|
||||
|
||||
COMMIT;
|
||||
25
migrations/031_add_chunk_search_trigger.sql
Normal file
25
migrations/031_add_chunk_search_trigger.sql
Normal file
@@ -0,0 +1,25 @@
|
||||
-- Migration: 031_add_chunk_search_trigger.sql
|
||||
-- Date: 2026-05-05
|
||||
-- Purpose: Add search_vector tsvector column + auto-update trigger for BM25 search
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- Drop old trigger if exists
|
||||
DROP TRIGGER IF EXISTS trg_chunk_search_vector ON dev.chunks;
|
||||
DROP TRIGGER IF EXISTS trg_chunk_search_vector ON chunks;
|
||||
|
||||
-- Create trigger function (must be created before trigger)
|
||||
CREATE OR REPLACE FUNCTION update_chunk_search_vector()
|
||||
RETURNS trigger AS $$
|
||||
BEGIN
|
||||
NEW.search_vector := to_tsvector('english', COALESCE(NEW.text_content, ''));
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create trigger on dev.chunks
|
||||
CREATE TRIGGER trg_chunk_search_vector
|
||||
BEFORE INSERT OR UPDATE ON dev.chunks
|
||||
FOR EACH ROW EXECUTE FUNCTION update_chunk_search_vector();
|
||||
|
||||
COMMIT;
|
||||
59
migrations/032_processor_version_tracking.sql
Normal file
59
migrations/032_processor_version_tracking.sql
Normal file
@@ -0,0 +1,59 @@
|
||||
-- Migration: 032_processor_version_tracking.sql
|
||||
-- Date: 2026-05-05
|
||||
-- Purpose: Processor/Agent version tracking for lifecycle management
|
||||
-- Enables stale detection and targeted re-processing
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- 1. Processor version registry
|
||||
CREATE TABLE IF NOT EXISTS dev.processor_versions (
|
||||
processor VARCHAR(64) PRIMARY KEY,
|
||||
model_version VARCHAR(128) NOT NULL,
|
||||
processor_type VARCHAR(32) NOT NULL DEFAULT 'processor', -- 'processor' or 'agent'
|
||||
dependencies TEXT[] DEFAULT '{}',
|
||||
updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP,
|
||||
file_uuid VARCHAR(64) -- NULL = global version, set = per-file override
|
||||
);
|
||||
|
||||
-- 2. Initial version seeding (current Charade pipeline)
|
||||
INSERT INTO dev.processor_versions (processor, model_version, processor_type, dependencies) VALUES
|
||||
('cut', 'pyscenedetect/default', 'processor', '{}'),
|
||||
('asr', 'faster-whisper/small/v1', 'processor', '{}'),
|
||||
('asrx', 'speechbrain/ecapa-tdnn/v1', 'processor', '{asr}'),
|
||||
('ocr', 'apple-vision/v1', 'processor', '{}'),
|
||||
('yolo', 'yolov5-coreml/v2', 'processor', '{}'),
|
||||
('face_detection', 'apple-vision/v2', 'processor', '{}'),
|
||||
('face_embedding', 'coreml-facenet/v2', 'processor', '{}'),
|
||||
('pose', 'apple-vision/v1', 'processor', '{}'),
|
||||
('face_trace', 'iou+embedding/v1', 'processor', '{face_detection,face_embedding}'),
|
||||
('speaker_binding', 'mar-lip/v1', 'agent', '{asrx,face_detection}'),
|
||||
('identity_clustering', 'cosine-threshold/v1', 'agent', '{face_trace,speaker_binding}'),
|
||||
('tmdb_agent', 'tmdb-api/v1', 'agent', '{}'),
|
||||
('story_agent', 'template/v2.0', 'agent', '{asr,asrx,cut,face_trace,identity_clustering,yolo}'),
|
||||
('embedding_agent', 'nomic-embed-768d/v1', 'agent', '{story_agent}')
|
||||
ON CONFLICT (processor) DO UPDATE SET model_version = EXCLUDED.model_version;
|
||||
|
||||
-- 3. Stale detection function
|
||||
CREATE OR REPLACE FUNCTION dev.check_stale_agents(
|
||||
p_file_uuid VARCHAR(64),
|
||||
p_current_versions JSONB
|
||||
) RETURNS TABLE(agent_name VARCHAR(64), reason TEXT) AS $$
|
||||
DECLARE
|
||||
v_rec RECORD;
|
||||
BEGIN
|
||||
FOR v_rec IN
|
||||
SELECT processor, model_version, dependencies
|
||||
FROM dev.processor_versions
|
||||
WHERE file_uuid IS NULL OR file_uuid = p_file_uuid
|
||||
LOOP
|
||||
IF p_current_versions->>v_rec.processor IS DISTINCT FROM v_rec.model_version THEN
|
||||
agent_name := v_rec.processor;
|
||||
reason := format('Version mismatch: current=%s, stored=%s',
|
||||
p_current_versions->>v_rec.processor, v_rec.model_version);
|
||||
RETURN NEXT;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
COMMIT;
|
||||
Reference in New Issue
Block a user