-- ============================================================================ -- Migration 017: Create pre_chunks table (Processor Output) -- ============================================================================ -- Purpose: -- 1. Move raw processor outputs (YOLO frames, Face detections, etc.) -- from 'chunks' table to a dedicated 'pre_chunks' table. -- 2. Support coordinate_type (frame for video, text for audio, etc.) -- to allow future expansion for PDF/Audio files. -- 3. Support Identity linking directly on pre_chunks (Face -> Identity). -- ============================================================================ -- 0. Clean up existing conflicting table (if any) DROP TABLE IF EXISTS pre_chunks CASCADE; -- 1. Create pre_chunks table CREATE TABLE pre_chunks ( id BIGSERIAL PRIMARY KEY, file_uuid UUID NOT NULL, processor_type VARCHAR(32) NOT NULL, -- 'yolo', 'face', 'asr', 'ocr', 'pose'... -- Coordinate system (supports Video, Audio, Text...) coordinate_type VARCHAR(20) DEFAULT 'frame', -- 'frame', 'time', 'page' coordinate_index BIGINT NOT NULL, -- Frame number, or paragraph index timestamp FLOAT, -- Time in seconds data JSONB NOT NULL, -- Raw processor output (objects, bboxes, etc.) -- Identity linkage (Face -> Identity, or Speaker -> Identity) -- If NULL, this Face/Speaker is a "Candidate" -- Note: FK removed temporarily due to schema migration in progress identity_id UUID, confidence FLOAT, -- Match confidence created_at TIMESTAMPTZ DEFAULT NOW() ); -- 2. Indexes CREATE INDEX idx_pre_chunks_file ON pre_chunks(file_uuid); CREATE INDEX idx_pre_chunks_processor ON pre_chunks(processor_type); CREATE INDEX idx_pre_chunks_identity ON pre_chunks(identity_id); CREATE INDEX idx_pre_chunks_coord ON pre_chunks(file_uuid, processor_type, coordinate_index); -- 3. Comment COMMENT ON TABLE pre_chunks IS 'Raw output from Processors (Frames, Segments). Candidates are rows where identity_id IS NULL.'; COMMENT ON COLUMN pre_chunks.coordinate_type IS 'Coordinate unit: frame (Video), time (Audio), page (PDF)...';