feat: ASRX hybrid pipeline, identity history, worker fixes, checkpoint system

This commit is contained in:
Accusys
2026-06-02 07:13:23 +08:00
parent e3066c3f49
commit e1572907ae
198 changed files with 43705 additions and 8910 deletions

View File

@@ -0,0 +1,28 @@
-- Migration: Create identity_history table for undo/redo support
-- Description: Stores PATCH operation history for identity undo/redo functionality
-- Date: 2026-05-28
-- Create identity_history table
CREATE TABLE IF NOT EXISTS identity_history (
id BIGSERIAL PRIMARY KEY,
identity_id INTEGER NOT NULL REFERENCES identities(id) ON DELETE CASCADE,
operation VARCHAR(20) NOT NULL, -- 'update', 'create', 'delete'
before_snapshot JSONB, -- 操作前完整狀態
after_snapshot JSONB, -- 操作後完整狀態
is_undone BOOLEAN DEFAULT FALSE, -- 是否已被 undo
undone_at TIMESTAMPTZ, -- undo 時間
user_id VARCHAR(100), -- 操作者
user_source VARCHAR(50), -- 'wordpress', 'api', 'cli'
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Create indexes for efficient undo/redo operations
CREATE INDEX IF NOT EXISTS idx_identity_history_identity_time
ON identity_history(identity_id, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_identity_history_not_undone
ON identity_history(identity_id, created_at DESC)
WHERE NOT is_undone;
-- Add comment
COMMENT ON TABLE identity_history IS 'Stores identity PATCH operation history for undo/redo support. Max 256 records per identity.';

View File

@@ -0,0 +1,314 @@
-- ============================================================
-- 3002/3003 Schema Separation: Create pipeline tables in public
-- Generated: 2026-05-17
-- ============================================================
-- Run: /Users/accusys/pgsql/18.3/bin/psql "postgres://accusys@localhost:5432/momentry" -f migrations/3002_public_schema_pipeline_tables.sql
-- ============================================================
BEGIN;
-- ============================================================
-- 1. videos
-- ============================================================
CREATE SEQUENCE IF NOT EXISTS public.videos_id_seq AS integer START WITH 1;
CREATE TABLE IF NOT EXISTS public.videos (
id integer DEFAULT nextval('public.videos_id_seq') NOT NULL,
file_uuid character varying(32) NOT NULL,
file_path text NOT NULL,
file_name text NOT NULL,
duration double precision,
width integer,
height integer,
fps double precision,
probe_json jsonb,
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
fs_video boolean DEFAULT false,
fs_json boolean DEFAULT false,
psql_chunk boolean DEFAULT false,
pobject_chunk boolean DEFAULT false,
mobject_chunk boolean DEFAULT false,
pvector_chunk boolean DEFAULT false,
qvector_chunk boolean DEFAULT false,
status character varying(20) DEFAULT 'pending'::character varying,
user_id bigint,
job_id integer,
registration_time timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
file_type character varying(20),
processing_status jsonb DEFAULT '{}'::jsonb,
birth_registration jsonb,
total_frames bigint DEFAULT 0,
parent_uuid character varying(32),
cut_done boolean DEFAULT false,
scene_done boolean DEFAULT false,
audio_tracks jsonb DEFAULT '[]'::jsonb,
cut_count integer DEFAULT 0,
cut_max_duration double precision DEFAULT 0,
content_hash text
);
ALTER SEQUENCE public.videos_id_seq OWNED BY public.videos.id;
-- ============================================================
-- 2. chunk (with pgvector support)
-- ============================================================
CREATE SEQUENCE IF NOT EXISTS public.chunks_id_seq AS integer START WITH 1;
CREATE TABLE IF NOT EXISTS public.chunk (
id integer DEFAULT nextval('public.chunks_id_seq') NOT NULL,
file_uuid character varying(32) NOT NULL,
chunk_type character varying(32) NOT NULL,
start_time double precision NOT NULL,
end_time double precision NOT NULL,
content jsonb NOT NULL,
vector_id character varying(64),
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
file_id integer,
text_content text,
frame_count integer DEFAULT 0,
pre_chunk_ids integer[],
parent_chunk_id character varying(64),
child_chunk_ids text[],
search_vector tsvector,
fps double precision DEFAULT 24.0,
start_frame bigint DEFAULT 0,
end_frame bigint DEFAULT 0,
metadata jsonb,
updated_at timestamp with time zone DEFAULT now(),
visual_stats jsonb,
summary_text text,
chunk_id character varying(128) NOT NULL,
embedding public.vector,
old_chunk_id character varying(128),
chunk_index integer DEFAULT 0,
unique_key character varying
);
ALTER SEQUENCE public.chunks_id_seq OWNED BY public.chunk.id;
-- ============================================================
-- 3. chunk_vectors
-- ============================================================
CREATE SEQUENCE IF NOT EXISTS public.chunk_vectors_id_seq AS integer START WITH 1;
CREATE TABLE IF NOT EXISTS public.chunk_vectors (
id integer DEFAULT nextval('public.chunk_vectors_id_seq') NOT NULL,
chunk_id character varying(64) NOT NULL,
uuid character varying(64) NOT NULL,
chunk_type character varying(32) DEFAULT 'sentence'::character varying NOT NULL,
embedding jsonb,
created_at timestamp with time zone DEFAULT now()
);
ALTER SEQUENCE public.chunk_vectors_id_seq OWNED BY public.chunk_vectors.id;
-- ============================================================
-- 4. cuts
-- ============================================================
CREATE SEQUENCE IF NOT EXISTS public.cuts_id_seq AS integer START WITH 1;
CREATE TABLE IF NOT EXISTS public.cuts (
id integer DEFAULT nextval('public.cuts_id_seq') NOT NULL,
file_uuid character varying(32) NOT NULL,
cut_number integer NOT NULL,
start_frame bigint NOT NULL,
end_frame bigint NOT NULL,
start_time double precision,
end_time double precision,
fps double precision,
metadata jsonb DEFAULT '{}'::jsonb,
created_at timestamp with time zone DEFAULT now()
);
ALTER SEQUENCE public.cuts_id_seq OWNED BY public.cuts.id;
-- ============================================================
-- 5. frames
-- ============================================================
CREATE SEQUENCE IF NOT EXISTS public.frames_id_seq AS integer START WITH 1;
CREATE TABLE IF NOT EXISTS public.frames (
id integer DEFAULT nextval('public.frames_id_seq') NOT NULL,
file_id integer NOT NULL,
frame_number bigint NOT NULL,
timestamp double precision NOT NULL,
fps double precision DEFAULT 24.0,
yolo_objects jsonb,
ocr_results jsonb,
face_results jsonb,
frame_path text,
created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP
);
ALTER SEQUENCE public.frames_id_seq OWNED BY public.frames.id;
-- ============================================================
-- 6. monitor_jobs
-- ============================================================
CREATE SEQUENCE IF NOT EXISTS public.monitor_jobs_id_seq AS integer START WITH 1;
CREATE TABLE IF NOT EXISTS public.monitor_jobs (
id integer DEFAULT nextval('public.monitor_jobs_id_seq') NOT NULL,
uuid character varying(32) NOT NULL,
video_path character varying(512),
status character varying(20) DEFAULT 'pending'::character varying NOT NULL,
current_processor character varying(20),
progress_total integer DEFAULT 0,
progress_current integer DEFAULT 0,
error_count integer DEFAULT 0,
last_error text,
started_at timestamp without time zone,
updated_at timestamp with time zone,
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
video_id bigint,
user_id bigint,
processors text[],
completed_processors text[],
failed_processors text[]
);
ALTER SEQUENCE public.monitor_jobs_id_seq OWNED BY public.monitor_jobs.id;
-- ============================================================
-- 7. processor_results
-- ============================================================
CREATE SEQUENCE IF NOT EXISTS public.processor_results_id_seq AS integer START WITH 1;
CREATE TABLE IF NOT EXISTS public.processor_results (
id integer DEFAULT nextval('public.processor_results_id_seq') NOT NULL,
job_id integer,
video_id bigint,
processor character varying(20),
status character varying(20) DEFAULT 'pending'::character varying NOT NULL,
output_path text,
started_at timestamp with time zone,
completed_at timestamp with time zone,
error_message text,
progress_total integer DEFAULT 0,
progress_current integer DEFAULT 0,
last_checkpoint jsonb,
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
duration_secs double precision,
chunks_produced integer DEFAULT 0,
frames_processed integer DEFAULT 0,
output_size_bytes bigint DEFAULT 0,
file_uuid character varying(32),
result jsonb,
output_data jsonb,
retry_count integer DEFAULT 0,
processor_type character varying(64),
uuid character varying(255)
);
ALTER SEQUENCE public.processor_results_id_seq OWNED BY public.processor_results.id;
-- ============================================================
-- 8. processor_versions
-- ============================================================
CREATE TABLE IF NOT EXISTS public.processor_versions (
processor character varying(64) NOT NULL,
model_version character varying(128) NOT NULL,
processor_type character varying(32) DEFAULT 'processor'::character varying NOT NULL,
dependencies text[] DEFAULT '{}'::text[],
updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
file_uuid character varying(64)
);
-- ============================================================
-- 9. parent_chunks
-- ============================================================
CREATE SEQUENCE IF NOT EXISTS public.parent_chunks_id_seq AS integer START WITH 1;
CREATE TABLE IF NOT EXISTS public.parent_chunks (
id integer DEFAULT nextval('public.parent_chunks_id_seq') NOT NULL,
uuid character varying(32) NOT NULL,
chunk_id character varying(64),
summary_text text,
summary_tsvector tsvector,
metadata jsonb DEFAULT '{}'::jsonb
);
ALTER SEQUENCE public.parent_chunks_id_seq OWNED BY public.parent_chunks.id;
-- ============================================================
-- 10. tkg_edges
-- ============================================================
CREATE SEQUENCE IF NOT EXISTS public.tkg_edges_id_seq AS bigint START WITH 1;
CREATE TABLE IF NOT EXISTS public.tkg_edges (
id bigint DEFAULT nextval('public.tkg_edges_id_seq') NOT NULL,
edge_type character varying(64) NOT NULL,
source_node_id bigint NOT NULL,
target_node_id bigint NOT NULL,
file_uuid character varying(64) NOT NULL,
properties jsonb NOT NULL DEFAULT '{}'::jsonb,
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP
);
ALTER SEQUENCE public.tkg_edges_id_seq OWNED BY public.tkg_edges.id;
-- ============================================================
-- 11. tkg_nodes
-- ============================================================
CREATE SEQUENCE IF NOT EXISTS public.tkg_nodes_id_seq AS bigint START WITH 1;
CREATE TABLE IF NOT EXISTS public.tkg_nodes (
id bigint DEFAULT nextval('public.tkg_nodes_id_seq') NOT NULL,
node_type character varying(64) NOT NULL,
external_id character varying(256) NOT NULL,
file_uuid character varying(64) NOT NULL,
label character varying(512),
properties jsonb NOT NULL DEFAULT '{}'::jsonb,
created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP
);
ALTER SEQUENCE public.tkg_nodes_id_seq OWNED BY public.tkg_nodes.id;
-- ============================================================
-- Indexes & Constraints
-- ============================================================
-- videos
CREATE UNIQUE INDEX IF NOT EXISTS idx_videos_file_uuid ON public.videos (file_uuid);
-- chunk
CREATE INDEX IF NOT EXISTS idx_chunk_file_uuid ON public.chunk (file_uuid);
CREATE INDEX IF NOT EXISTS idx_chunk_type ON public.chunk (chunk_type);
CREATE INDEX IF NOT EXISTS idx_chunk_parent ON public.chunk (parent_chunk_id);
CREATE INDEX IF NOT EXISTS idx_chunk_file_type ON public.chunk (file_uuid, chunk_type);
CREATE UNIQUE INDEX IF NOT EXISTS idx_chunk_file_old_id ON public.chunk (file_uuid, old_chunk_id) WHERE old_chunk_id IS NOT NULL;
-- chunk_vectors
CREATE INDEX IF NOT EXISTS idx_chunk_vec_uuid ON public.chunk_vectors (uuid);
CREATE INDEX IF NOT EXISTS idx_chunk_vec_chunk ON public.chunk_vectors (chunk_id);
-- cuts
CREATE INDEX IF NOT EXISTS idx_cuts_file_uuid ON public.cuts (file_uuid);
-- frames
CREATE INDEX IF NOT EXISTS idx_frames_file_id ON public.frames (file_id);
-- monitor_jobs
CREATE UNIQUE INDEX IF NOT EXISTS idx_monitor_jobs_uuid ON public.monitor_jobs (uuid);
CREATE INDEX IF NOT EXISTS idx_monitor_jobs_status ON public.monitor_jobs (status);
-- processor_results
CREATE INDEX IF NOT EXISTS idx_pr_job_id ON public.processor_results (job_id);
CREATE INDEX IF NOT EXISTS idx_pr_uuid ON public.processor_results (uuid);
CREATE UNIQUE INDEX IF NOT EXISTS idx_pr_job_processor_type ON public.processor_results (job_id, processor_type);
-- parent_chunks
CREATE INDEX IF NOT EXISTS idx_parent_chunks_uuid ON public.parent_chunks (uuid);
-- tkg_edges
CREATE INDEX IF NOT EXISTS idx_tkg_edges_file_uuid ON public.tkg_edges (file_uuid);
CREATE INDEX IF NOT EXISTS idx_tkg_edges_type ON public.tkg_edges (edge_type);
-- tkg_nodes
CREATE INDEX IF NOT EXISTS idx_tkg_nodes_file_uuid ON public.tkg_nodes (file_uuid);
CREATE INDEX IF NOT EXISTS idx_tkg_nodes_type ON public.tkg_nodes (node_type);
CREATE INDEX IF NOT EXISTS idx_tkg_nodes_external ON public.tkg_nodes (external_id);
COMMIT;