diff --git a/docs_v1.0/API_WORKSPACE/modules/05_process.md b/docs_v1.0/API_WORKSPACE/modules/05_process.md index f08b1a9..a3dd3e6 100644 --- a/docs_v1.0/API_WORKSPACE/modules/05_process.md +++ b/docs_v1.0/API_WORKSPACE/modules/05_process.md @@ -51,8 +51,8 @@ curl -s -X POST "$API/api/v1/file/$FILE_UUID/process" \ | `success` | boolean | Always true on 200 | | `job_id` | integer | Monitor job ID (for job tracking) | | `file_uuid` | string | 32-char hex UUID of the file | -| `status` | string | `"processing"` | -| `pids` | integer[] | Process IDs of started processors | +| `status` | string | `"queued"` — file enters the FIFO queue | +| `pids` | integer[] | Process IDs of started processors (empty for queued) | | `message` | string | Human-readable status | #### Error Responses @@ -237,6 +237,105 @@ curl -s "$API/api/v1/jobs" -H "X-API-Key: $KEY" | jq '{count, jobs: [.jobs[] | { | `page` | integer | Current page number | | `page_size` | integer | Jobs per page | +### `GET /api/v1/job/:uuid` + +**Auth**: Required +**Scope**: file-level + +Get detailed information about a specific processing job, including its queue position. + +#### Response (200) + +```json +{ + "id": 51, + "uuid": "c36f35685177c981aa139b66bbbccc5b", + "status": "queued", + "current_processor": null, + "progress_current": 0, + "progress_total": 0, + "processors": [], + "created_at": "2026-06-22 23:08:48.497018", + "started_at": null, + "updated_at": null, + "queue_position": 3 +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `id` | integer | Monitor job ID | +| `uuid` | string | File UUID | +| `status` | string | `"pending"`, `"queued"`, `"running"`, `"completed"`, `"failed"` | +| `current_processor` | string | Currently active processor, or null | +| `progress_current` | integer | Current progress count | +| `progress_total` | integer | Total progress count | +| `processors` | array | Processor list | +| `created_at` | string | Job creation timestamp | +| `started_at` | string | Processing start timestamp, or null | +| `updated_at` | string | Last update timestamp, or null | +| `queue_position` | integer | Position in FIFO queue (null if not pending/queued) | + +--- + +### Status Lifecycle + +``` +register ──→ pending + │ + trigger (POST /process) + │ + queued ←── queue_position counts jobs ahead + │ + worker picks up + │ + processing + │ + ┌────────┴────────┐ + ▼ ▼ + completed failed + │ + checkin ──→ indexed + checkout ──→ checked_out +``` + +| Status | Meaning | +|--------|---------| +| `pending` | File registered, not yet triggered | +| `queued` | Triggered, waiting for worker in FIFO queue | +| `processing` | Worker actively processing | +| `completed` | All processors finished successfully | +| `failed` | One or more essential processors failed | +| `indexed` | Post-processing checkin complete | +| `checked_out` | User checked out the file | + +Queue order is FIFO (`created_at ASC`). The `GET /api/v1/job/:uuid` endpoint returns `queue_position` showing how many jobs are ahead. + +### Frontend Status Mapping + +When displaying file status in the frontend list (e.g. after `GET /api/v1/files/scan`), map the `status` field as follows: + +| DB Status | Status Label | Filter: 待處理 | Filter: 處理中 | Count: pendingCount | Count: processingCount | +|-----------|-------------|----------------|----------------|---------------------|-----------------------| +| `unregistered` | 未註冊 | No | No | No | No | +| `registered` | 待處理 | **Yes** | No | **Yes** | No | +| `pending` | 待處理 | **Yes** | No | **Yes** | No | +| `queued` | 排隊中 | **Yes** | **Yes** | **Yes** | **Yes** | +| `processing` | 處理中 | No | **Yes** | No | **Yes** | +| `completed` | 已完成 | No | No | No | No | +| `failed` | 處理失敗 | No | No | No | No | +| `indexed` | 已入庫 | No | No | No | No | + +**`queued` 的特殊處理**: +- `statusLabel` → 顯示「排隊中」,加 `ms-badge-warn` 樣式(黃色) +- `filterPending` → 應包含 `queued`,讓它在「待處理」filter 可見 +- `pendingCount` + `processingCount` → 兩者都應包含 `queued`,因它既是「待處理」也是「正在排隊」 +- 在 `refreshAllStatus` / `loadFiles` 中,如果檔案狀態是 `queued`,應顯示簡單的排隊訊息(無需 polling progress) +- 當 worker pickup 後,狀態會變為 `processing`,此時 `refreshAllStatus` 會自動偵測到並開始 polling progress +- 也可以提供一個「queue_position」顯示:呼叫 `GET /api/v1/job/:uuid` 取得排在第幾位 + +--- + ### `GET /api/v1/file/:file_uuid/processor-counts` **Auth**: Required @@ -407,4 +506,4 @@ curl -s -X POST "$API/api/v1/file/$FILE_UUID/complete" \ Phase 1 (`/phase1`) combines store-asrx + rule1 + vectorize into one call. --- -*Updated: 2026-06-20 12:00:00* +*Updated: 2026-06-23 — Added queued status, FIFO queue order, queue_position in job detail, frontend status mapping table* diff --git a/docs_v1.0/doc/05_process.html b/docs_v1.0/doc/05_process.html index c250249..17120e8 100644 --- a/docs_v1.0/doc/05_process.html +++ b/docs_v1.0/doc/05_process.html @@ -119,12 +119,12 @@ curl -s -X status string -"processing" +"queued" — file enters the FIFO queue pids integer[] -Process IDs of started processors +Process IDs of started processors (empty for queued) message @@ -507,6 +507,239 @@ curl -s -X +

GET /api/v1/job/:uuid

+

Auth: Required +Scope: file-level

+

Get detailed information about a specific processing job, including its queue position.

+

Response (200)

+
{
+  "id": 51,
+  "uuid": "c36f35685177c981aa139b66bbbccc5b",
+  "status": "queued",
+  "current_processor": null,
+  "progress_current": 0,
+  "progress_total": 0,
+  "processors": [],
+  "created_at": "2026-06-22 23:08:48.497018",
+  "started_at": null,
+  "updated_at": null,
+  "queue_position": 3
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
idintegerMonitor job ID
uuidstringFile UUID
statusstring"pending", "queued", "running", "completed", "failed"
current_processorstringCurrently active processor, or null
progress_currentintegerCurrent progress count
progress_totalintegerTotal progress count
processorsarrayProcessor list
created_atstringJob creation timestamp
started_atstringProcessing start timestamp, or null
updated_atstringLast update timestamp, or null
queue_positionintegerPosition in FIFO queue (null if not pending/queued)
+
+

Status Lifecycle

+
register ──→ pending
+                  
+            trigger (POST /process)
+                  
+              queued ←── queue_position counts jobs ahead
+                  
+            worker picks up
+                  
+            processing
+                  
+         ┌────────┴────────┐
+                           
+    completed             failed
+         
+    checkin ──→ indexed
+    checkout ──→ checked_out
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
StatusMeaning
pendingFile registered, not yet triggered
queuedTriggered, waiting for worker in FIFO queue
processingWorker actively processing
completedAll processors finished successfully
failedOne or more essential processors failed
indexedPost-processing checkin complete
checked_outUser checked out the file
+

Queue order is FIFO (created_at ASC). The GET /api/v1/job/:uuid endpoint returns queue_position showing how many jobs are ahead.

+

Frontend Status Mapping

+

When displaying file status in the frontend list (e.g. after GET /api/v1/files/scan), map the status field as follows:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DB StatusStatus LabelFilter: 待處理Filter: 處理中Count: pendingCountCount: processingCount
unregistered未註冊NoNoNoNo
registered待處理YesNoYesNo
pending待處理YesNoYesNo
queued排隊中YesYesYesYes
processing處理中NoYesNoYes
completed已完成NoNoNoNo
failed處理失敗NoNoNoNo
indexed已入庫NoNoNoNo
+

queued 的特殊處理: +- statusLabel → 顯示「排隊中」,加 ms-badge-warn 樣式(黃色) +- filterPending → 應包含 queued,讓它在「待處理」filter 可見 +- pendingCount + processingCount → 兩者都應包含 queued,因它既是「待處理」也是「正在排隊」 +- 在 refreshAllStatus / loadFiles 中,如果檔案狀態是 queued,應顯示簡單的排隊訊息(無需 polling progress) +- 當 worker pickup 後,狀態會變為 processing,此時 refreshAllStatus 會自動偵測到並開始 polling progress +- 也可以提供一個「queue_position」顯示:呼叫 GET /api/v1/job/:uuid 取得排在第幾位

+

GET /api/v1/file/:file_uuid/processor-counts

Auth: Required Scope: file-level

@@ -652,7 +885,7 @@ curl -s -X

Phase 1 (/phase1) combines store-asrx + rule1 + vectorize into one call.


-

Updated: 2026-06-20 12:00:00

+

Updated: 2026-06-23 — Added queued status, FIFO queue order, queue_position in job detail, frontend status mapping table

\ No newline at end of file diff --git a/docs_v1.0/doc_wasm/modules/05_process.md b/docs_v1.0/doc_wasm/modules/05_process.md index f08b1a9..a3dd3e6 100644 --- a/docs_v1.0/doc_wasm/modules/05_process.md +++ b/docs_v1.0/doc_wasm/modules/05_process.md @@ -51,8 +51,8 @@ curl -s -X POST "$API/api/v1/file/$FILE_UUID/process" \ | `success` | boolean | Always true on 200 | | `job_id` | integer | Monitor job ID (for job tracking) | | `file_uuid` | string | 32-char hex UUID of the file | -| `status` | string | `"processing"` | -| `pids` | integer[] | Process IDs of started processors | +| `status` | string | `"queued"` — file enters the FIFO queue | +| `pids` | integer[] | Process IDs of started processors (empty for queued) | | `message` | string | Human-readable status | #### Error Responses @@ -237,6 +237,105 @@ curl -s "$API/api/v1/jobs" -H "X-API-Key: $KEY" | jq '{count, jobs: [.jobs[] | { | `page` | integer | Current page number | | `page_size` | integer | Jobs per page | +### `GET /api/v1/job/:uuid` + +**Auth**: Required +**Scope**: file-level + +Get detailed information about a specific processing job, including its queue position. + +#### Response (200) + +```json +{ + "id": 51, + "uuid": "c36f35685177c981aa139b66bbbccc5b", + "status": "queued", + "current_processor": null, + "progress_current": 0, + "progress_total": 0, + "processors": [], + "created_at": "2026-06-22 23:08:48.497018", + "started_at": null, + "updated_at": null, + "queue_position": 3 +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `id` | integer | Monitor job ID | +| `uuid` | string | File UUID | +| `status` | string | `"pending"`, `"queued"`, `"running"`, `"completed"`, `"failed"` | +| `current_processor` | string | Currently active processor, or null | +| `progress_current` | integer | Current progress count | +| `progress_total` | integer | Total progress count | +| `processors` | array | Processor list | +| `created_at` | string | Job creation timestamp | +| `started_at` | string | Processing start timestamp, or null | +| `updated_at` | string | Last update timestamp, or null | +| `queue_position` | integer | Position in FIFO queue (null if not pending/queued) | + +--- + +### Status Lifecycle + +``` +register ──→ pending + │ + trigger (POST /process) + │ + queued ←── queue_position counts jobs ahead + │ + worker picks up + │ + processing + │ + ┌────────┴────────┐ + ▼ ▼ + completed failed + │ + checkin ──→ indexed + checkout ──→ checked_out +``` + +| Status | Meaning | +|--------|---------| +| `pending` | File registered, not yet triggered | +| `queued` | Triggered, waiting for worker in FIFO queue | +| `processing` | Worker actively processing | +| `completed` | All processors finished successfully | +| `failed` | One or more essential processors failed | +| `indexed` | Post-processing checkin complete | +| `checked_out` | User checked out the file | + +Queue order is FIFO (`created_at ASC`). The `GET /api/v1/job/:uuid` endpoint returns `queue_position` showing how many jobs are ahead. + +### Frontend Status Mapping + +When displaying file status in the frontend list (e.g. after `GET /api/v1/files/scan`), map the `status` field as follows: + +| DB Status | Status Label | Filter: 待處理 | Filter: 處理中 | Count: pendingCount | Count: processingCount | +|-----------|-------------|----------------|----------------|---------------------|-----------------------| +| `unregistered` | 未註冊 | No | No | No | No | +| `registered` | 待處理 | **Yes** | No | **Yes** | No | +| `pending` | 待處理 | **Yes** | No | **Yes** | No | +| `queued` | 排隊中 | **Yes** | **Yes** | **Yes** | **Yes** | +| `processing` | 處理中 | No | **Yes** | No | **Yes** | +| `completed` | 已完成 | No | No | No | No | +| `failed` | 處理失敗 | No | No | No | No | +| `indexed` | 已入庫 | No | No | No | No | + +**`queued` 的特殊處理**: +- `statusLabel` → 顯示「排隊中」,加 `ms-badge-warn` 樣式(黃色) +- `filterPending` → 應包含 `queued`,讓它在「待處理」filter 可見 +- `pendingCount` + `processingCount` → 兩者都應包含 `queued`,因它既是「待處理」也是「正在排隊」 +- 在 `refreshAllStatus` / `loadFiles` 中,如果檔案狀態是 `queued`,應顯示簡單的排隊訊息(無需 polling progress) +- 當 worker pickup 後,狀態會變為 `processing`,此時 `refreshAllStatus` 會自動偵測到並開始 polling progress +- 也可以提供一個「queue_position」顯示:呼叫 `GET /api/v1/job/:uuid` 取得排在第幾位 + +--- + ### `GET /api/v1/file/:file_uuid/processor-counts` **Auth**: Required @@ -407,4 +506,4 @@ curl -s -X POST "$API/api/v1/file/$FILE_UUID/complete" \ Phase 1 (`/phase1`) combines store-asrx + rule1 + vectorize into one call. --- -*Updated: 2026-06-20 12:00:00* +*Updated: 2026-06-23 — Added queued status, FIFO queue order, queue_position in job detail, frontend status mapping table* diff --git a/scripts/face_mediapipe_test.py b/scripts/face_mediapipe_test.py deleted file mode 100644 index ea0307b..0000000 --- a/scripts/face_mediapipe_test.py +++ /dev/null @@ -1,200 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -POC: MediaPipe Face Detection vs Apple Vision Framework vs InsightFace - -Tests face detection on video frames and reports: -- Detection count -- Bounding box quality -- Landmarks (468 face mesh) -- Processing speed -""" -import sys -import json -import os -import time -import subprocess -import argparse - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - - -def extract_frames(video_path, sample_interval=30, max_frames=50): - """Extract frames using ffmpeg""" - import tempfile - tmpdir = tempfile.mkdtemp(prefix="face_test_") - pattern = os.path.join(tmpdir, "frame_%05d.jpg") - cmd = ["ffmpeg", "-y", "-v", "quiet", "-i", video_path, - "-vf", f"select=not(mod(n\\,{sample_interval}))", - "-vsync", "vfr", "-q:v", "5", pattern] - subprocess.run(cmd, check=True) - files = sorted([f for f in os.listdir(tmpdir) if f.endswith(".jpg")])[:max_frames] - return tmpdir, [os.path.join(tmpdir, f) for f in files] - - -def test_mediapipe(frame_paths, fps): - """MediaPipe Face Detection + Face Mesh""" - try: - from mediapipe.tasks import vision - from mediapipe.tasks.python.core.base_options import BaseOptions - from mediapipe.tasks.python.vision.face_detector import FaceDetector, FaceDetectorOptions - from mediapipe.tasks.python.vision.face_landmarker import FaceLandmarker, FaceLandmarkerOptions - except ImportError: - print("[MediaPipe] Not available, skipping") - return None - - model_dir = os.path.join(os.path.dirname(__file__), "models") - os.makedirs(model_dir, exist_ok=True) - - # Check model files - MediaPipe downloads automatically via the API - base_opts_detect = BaseOptions(model_asset_path="") - detect_opts = FaceDetectorOptions(base_options=BaseOptions()) - - t0 = time.time() - total_faces = 0 - frames_with_faces = 0 - landmarks_total = 0 - - # MediaPipe Face Detector - try: - detector = vision.FaceDetector.create_from_options( - FaceDetectorOptions( - base_options=BaseOptions(model_asset_buffer=None), - running_mode=vision.RunningMode.IMAGE - ) - ) - except: - # Download model first - import urllib.request - model_url = "https://storage.googleapis.com/mediapipe-models/face_detector/blaze_face_short_range/float16/latest/face_detector.task" - model_path = os.path.join(model_dir, "face_detector.task") - if not os.path.exists(model_path): - print(f"[MediaPipe] Downloading model: {model_url}") - urllib.request.urlretrieve(model_url, model_path) - - detector = vision.FaceDetector.create_from_options( - FaceDetectorOptions( - base_options=BaseOptions(model_asset_path=model_path), - running_mode=vision.RunningMode.IMAGE - ) - ) - - import cv2 - for path in frame_paths: - img = cv2.imread(path) - if img is None: - continue - h, w = img.shape[:2] - - mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=img) - result = detector.detect(mp_img) - - if result.detections: - frames_with_faces += 1 - for det in result.detections: - total_faces += 1 - bbox = det.bounding_box - # bbox is [x, y, width, height] in pixels - - elapsed = time.time() - t0 - print(f"[MediaPipe] Detection: {len(frame_paths)} frames, {frames_with_faces} with faces, {total_faces} faces, {elapsed:.2f}s") - - # Face Landmarker (468 points) - landmark_path = os.path.join(model_dir, "face_landmarker.task") - if not os.path.exists(landmark_path): - model_url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task" - print(f"[MediaPipe] Downloading landmark model...") - import urllib.request - urllib.request.urlretrieve(model_url, landmark_path) - - landmarker = vision.FaceLandmarker.create_from_options( - FaceLandmarkerOptions( - base_options=BaseOptions(model_asset_path=landmark_path), - running_mode=vision.RunningMode.IMAGE, - output_face_blendshapes=False, - output_facial_transformation_matrixes=False, - ) - ) - - t1 = time.time() - for path in frame_paths[:10]: # Only test 10 frames for landmarks - img = cv2.imread(path) - if img is None: - continue - mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=img) - result = landmarker.detect(mp_img) - if result.face_landmarks: - for face in result.face_landmarks: - landmarks_total += len(face) - - elapsed2 = time.time() - t1 - print(f"[MediaPipe] Face Mesh (10 frames): {landmarks_total} total landmarks (~{landmarks_total//max(len(result.face_landmarks),1)} per face)") - - return { - "frames_processed": len(frame_paths), - "frames_with_faces": frames_with_faces, - "total_faces": total_faces, - "time_sec": elapsed, - "landmarks_per_face": 468, - } - - -def test_vision_framework(frame_paths, fps): - """Apple Vision Framework face detection via swift binary""" - # Use the existing swift binary - swift_bin = os.path.join(os.path.dirname(__file__), - "swift_processors/.build/debug/swift_ocr") - # swift_ocr doesn't do face detection, use the face_compare_test - swift_face = os.path.join(os.path.dirname(__file__), - "swift_processors/.build/debug/face_compare_test") - - if not os.path.exists(swift_face): - print("[Vision] Binary not found, skipping") - return None - - print(f"[Vision] Running face compare test...") - t0 = time.time() - result = subprocess.run( - [swift_face, frame_paths[0].rsplit("/", 2)[0].replace("/frames", ""), # This won't work for single files - "--sample-interval", "1", "--max-frames", str(len(frame_paths))], - capture_output=True, text=True, timeout=120 - ) - elapsed = time.time() - t0 - print(result.stdout[-500:]) - return {"time_sec": elapsed} - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("video_path") - parser.add_argument("--sample-interval", type=int, default=30) - parser.add_argument("--max-frames", type=int, default=50) - args = parser.parse_args() - - print(f"Testing: {args.video_path}") - - # Extract frames - tmpdir, frames = extract_frames(args.video_path, args.sample_interval, args.max_frames) - print(f"Extracted {len(frames)} frames") - - # MediaPipe - print("\n=== MediaPipe ===") - mp_result = test_mediapipe(frames, 24) - - # Vision Framework - print("\n=== Apple Vision Framework ===") - vf_result = test_vision_framework(frames, 24) - - # Summary - print("\n=== Comparison ===") - if mp_result: - print(f"MediaPipe: {mp_result['total_faces']} faces in {mp_result['frames_with_faces']} frames, {mp_result['time_sec']:.2f}s") - print(f" Landmarks: {mp_result['landmarks_per_face']} per face") - print(f"Vision Framework: (see above)") - - # Cleanup - import shutil - shutil.rmtree(tmpdir, ignore_errors=True) - - -if __name__ == "__main__": - main() diff --git a/scripts/face_mediapipe_test_v1.11.py b/scripts/face_mediapipe_test_v1.11.py deleted file mode 120000 index d45d8b1..0000000 --- a/scripts/face_mediapipe_test_v1.11.py +++ /dev/null @@ -1 +0,0 @@ -../v1.1/scripts/face_mediapipe_test_v1.11.py \ No newline at end of file diff --git a/scripts/face_processor.py b/scripts/face_processor.py index e0fd164..ee47d1f 100644 --- a/scripts/face_processor.py +++ b/scripts/face_processor.py @@ -225,8 +225,9 @@ class FaceProcessorVision: if face_img.size == 0: continue - # CoreML embedding - emb = self.extract_face_embedding(face_img) + # CoreML embedding - TODO: push to Qdrant _faces collection instead + # emb = self.extract_face_embedding(face_img) + emb = None if emb is not None: embed_count += 1 @@ -240,7 +241,6 @@ class FaceProcessorVision: faces.append({ "x": x, "y": y, "width": w, "height": h, "confidence": face.get("confidence", 0.5), - "embedding": emb, "pose_angle": { "angle": pose_angle, "roll": pose_info.get("roll", 0), @@ -262,20 +262,17 @@ class FaceProcessorVision: if len(face_data["frames"]) % 100 == 0: elapsed = time.time() - t0 - print(f"[FACE_V2] {len(face_data['frames'])} frames, {embed_count} embeddings, {elapsed:.0f}s") + print(f"[FACE_V2] {len(face_data['frames'])} frames, {elapsed:.0f}s") if self.publisher: pct = int(len(face_data["frames"]) * 100 / max(len(frames), 1)) if pct > last_pct: last_pct = pct self.publisher.progress("face", len(face_data["frames"]), len(frames), - f"{embed_count} faces", embed_count, "faces") + "", 0, "faces") self.video.release() - # Finalize face_data["metadata"]["status"] = "completed" - face_data["metadata"]["total_embeddings"] = embed_count - face_data["metadata"]["embedder"] = "coreml_facenet" # Convert dict frames to list for Rust FaceResult format frames_list = [] diff --git a/scripts/generate_parent_chunks_gemma4.py b/scripts/generate_parent_chunks_gemma4.py deleted file mode 100644 index d0aa584..0000000 --- a/scripts/generate_parent_chunks_gemma4.py +++ /dev/null @@ -1,228 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Regenerate ALL parent chunks for 384b0ff44aaaa1f1 using gemma4 -Groups ASR chunks into ~17 logical scenes and generates summaries. -""" - -import json -import subprocess -import psycopg2 -import psycopg2.extras - -DB_CONFIG = {"host": "localhost", "user": "accusys", "dbname": "momentry"} -UUID = "384b0ff44aaaa1f1" -OLLAMA_URL = "http://localhost:11434/api/generate" -MODEL = "gemma4:latest" - -# Target ~17 scenes across 6865s = ~400s per scene -# But use natural breaks (gaps in dialogue) to split -SCENE_TARGET_COUNT = 17 - - -def get_chunks(): - conn = psycopg2.connect(**DB_CONFIG) - cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) - cur.execute( - """ - SELECT id, chunk_id, start_time, end_time, start_frame, end_frame, - text_content, fps - FROM chunks - WHERE uuid = %s AND chunk_type = 'sentence' - ORDER BY start_time - """, - (UUID,), - ) - chunks = cur.fetchall() - cur.close() - conn.close() - return chunks - - -def call_gemma4(prompt, max_tokens=300): - payload = { - "model": MODEL, - "prompt": prompt, - "stream": False, - "options": {"temperature": 0.3, "num_predict": max_tokens}, - } - try: - resp = subprocess.run( - ["curl", "-s", OLLAMA_URL, "-d", json.dumps(payload)], - capture_output=True, - text=True, - timeout=180, - ) - if resp.returncode == 0: - result = json.loads(resp.stdout) - return result.get("response", "").strip() - except Exception as e: - print(f" ⚠️ Ollama error: {e}") - return "" - - -def find_scene_boundaries(chunks, target_count=SCENE_TARGET_COUNT): - """Find optimal scene boundaries based on dialogue gaps""" - if not chunks: - return [] - - # Calculate gaps between consecutive chunks - gaps = [] - for i in range(1, len(chunks)): - gap = chunks[i]["start_time"] - chunks[i - 1]["end_time"] - gaps.append((i, gap)) - - # Sort by gap size, take top (target_count - 1) gaps - gaps.sort(key=lambda x: x[1], reverse=True) - split_indices = sorted([g[0] for g in gaps[: target_count - 1]]) - - # Create scenes - scenes = [] - start = 0 - for split in split_indices: - scenes.append(chunks[start:split]) - start = split - scenes.append(chunks[start:]) - - return scenes - - -def generate_summary(scene_chunks, scene_num): - """Generate summary for a scene using gemma4""" - texts = [c["text_content"] for c in scene_chunks if c["text_content"]] - if not texts: - return f"Scene {scene_num}: No dialogue" - - combined = " ".join(texts)[:3000] - duration = scene_chunks[-1]["end_time"] - scene_chunks[0]["start_time"] - - prompt = f"""You are a professional film scene analyst. Given the following dialogue transcript from a movie scene, write a concise one-sentence English summary. - -Duration: {duration:.0f} seconds -Dialogue: -{combined} - -Provide ONLY the summary sentence, nothing else. Focus on plot events and character actions.""" - - summary = call_gemma4(prompt, max_tokens=250) - if not summary: - # Fallback: use first few words of dialogue - summary = f"Scene {scene_num}: {' '.join(texts[:3])[:80]}..." - return summary - - -def insert_parent_chunks(scenes): - """Insert parent chunks and update child relationships""" - conn = psycopg2.connect(**DB_CONFIG) - cur = conn.cursor() - - inserted = 0 - for i, scene_chunks in enumerate(scenes): - start_time = scene_chunks[0]["start_time"] - end_time = scene_chunks[-1]["end_time"] - start_frame = int(scene_chunks[0]["start_frame"]) - end_frame = int(scene_chunks[-1]["end_frame"]) - fps = float(scene_chunks[0]["fps"]) if scene_chunks[0]["fps"] else 59.94 - chunk_count = len(scene_chunks) - - print( - f" Scene {i}: {start_time:.0f}s-{end_time:.0f}s ({chunk_count} chunks, {end_time - start_time:.0f}s)" - ) - - # Generate summary - summary = generate_summary(scene_chunks, i) - print(f" 📝 {summary[:100]}...") - - # Insert parent chunk - cur.execute( - """ - INSERT INTO parent_chunks ( - uuid, scene_order, start_time, end_time, - start_frame, end_frame, fps, summary_text, - metadata, rule_3_markers, created_at - ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW()) - RETURNING id - """, - ( - UUID, - i, - start_time, - end_time, - start_frame, - end_frame, - fps, - summary, - json.dumps({"auto_generated_by": "gemma4", "chunk_count": chunk_count}), - json.dumps({}), - ), - ) - parent_id = cur.fetchone()[0] - - # Update chunks with parent_chunk_id - chunk_ids = [c["chunk_id"] for c in scene_chunks] - child_ids_array = chunk_ids # Store all child chunk IDs - - cur.execute( - """ - UPDATE chunks - SET parent_chunk_id = %s::varchar - WHERE uuid = %s AND chunk_id = ANY(%s) - """, - (str(parent_id), UUID, chunk_ids), - ) - - inserted += 1 - if i % 5 == 4 or i == len(scenes) - 1: - conn.commit() - print(f" ✅ Committed scenes 0-{i}") - - conn.commit() - cur.close() - conn.close() - return inserted - - -def main(): - print(f"🎬 Regenerating parent chunks for {UUID}") - print(f" Using model: {MODEL}") - print("=" * 70) - - # Step 1: Get all chunks - print("\n📥 Fetching ASR chunks...") - chunks = get_chunks() - print(f" Found {len(chunks)} sentence chunks") - if chunks: - print(f" Time range: 0-{chunks[-1]['end_time']:.0f}s") - - # Step 2: Find scene boundaries - print(f"\n🔍 Finding {SCENE_TARGET_COUNT} scene boundaries...") - scenes = find_scene_boundaries(chunks, SCENE_TARGET_COUNT) - print(f" Created {len(scenes)} scenes") - for i, s in enumerate(scenes): - print( - f" Scene {i}: {s[0]['start_time']:.0f}s-{s[-1]['end_time']:.0f}s ({len(s)} chunks)" - ) - - # Step 3: Generate summaries and insert - print("\n🤖 Generating summaries with gemma4...") - inserted = insert_parent_chunks(scenes) - - print(f"\n{'=' * 70}") - print(f"✅ Created {inserted} parent chunks") - - # Step 4: Verify - print("\n📊 Verification:") - conn = psycopg2.connect(**DB_CONFIG) - cur = conn.cursor() - cur.execute("SELECT COUNT(*) FROM parent_chunks WHERE uuid = %s", (UUID,)) - print(f" parent_chunks: {cur.fetchone()[0]}") - cur.execute( - "SELECT COUNT(*) FROM chunks WHERE uuid = %s AND parent_chunk_id IS NULL AND chunk_type = 'sentence'", - (UUID,), - ) - print(f" orphan chunks: {cur.fetchone()[0]}") - cur.close() - conn.close() - - -if __name__ == "__main__": - main() diff --git a/scripts/generate_parent_chunks_gemma4_v1.11.py b/scripts/generate_parent_chunks_gemma4_v1.11.py deleted file mode 120000 index 1b6873a..0000000 --- a/scripts/generate_parent_chunks_gemma4_v1.11.py +++ /dev/null @@ -1 +0,0 @@ -../v1.1/scripts/generate_parent_chunks_gemma4_v1.11.py \ No newline at end of file diff --git a/scripts/mediapipe_holistic_processor.py b/scripts/mediapipe_holistic_processor.py deleted file mode 100644 index 43ae634..0000000 --- a/scripts/mediapipe_holistic_processor.py +++ /dev/null @@ -1,711 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -MediaPipe Holistic Processor - Full body keypoint extraction - -Purpose: -1. Extract Face Mesh (468 keypoints) → eye/mouth actions -2. Extract Pose (33 keypoints) → arm/leg/feet actions -3. Extract Hands (21 keypoints × 2) → hand gestures - -Output structure: -{ - "metadata": {...}, - "frames": { - "frame_num": { - "persons": [ - { - "person_id": 0, - "bbox": {...}, - "face_mesh": { - "landmarks": [[x,y,z], ...], # 468 points - "eye_features": {...}, - "mouth_features": {...}, - }, - "pose": { - "landmarks": [[x,y,z,visibility], ...], # 33 points - "arm_features": {...}, - "leg_features": {...}, - }, - "hands": { - "left": { - "landmarks": [[x,y,z], ...], # 21 points - "gesture": "...", - }, - "right": { - "landmarks": [[x,y,z], ...], # 21 points - "gesture": "...", - }, - }, - } - ] - } - } -} -""" - -import json -import argparse -import cv2 -import numpy as np -import mediapipe as mp -from typing import Dict - - -class MediaPipeHolisticProcessor: - """ - Process video with MediaPipe Holistic (Face + Pose + Hands) - """ - - def __init__( - self, - model_complexity: int = 1, # 0, 1, 2 - refine_face_landmarks: bool = True, - enable_segmentation: bool = False, - min_detection_confidence: float = 0.5, - min_tracking_confidence: float = 0.5, - ): - """ - Initialize MediaPipe Holistic - - Args: - model_complexity: 0 (lite), 1 (full), 2 (heavy) - refine_face_landmarks: Enable iris detection - enable_segmentation: Enable segmentation mask - min_detection_confidence: Detection confidence threshold - min_tracking_confidence: Tracking confidence threshold - """ - self.mp_holistic = mp.solutions.holistic - self.mp_drawing = mp.solutions.drawing_utils - self.mp_drawing_styles = mp.solutions.drawing_styles - - self.holistic = self.mp_holistic.Holistic( - static_image_mode=False, # Video mode - model_complexity=model_complexity, - smooth_landmarks=True, # Smooth landmarks across frames - enable_segmentation=enable_segmentation, - smooth_segmentation=True, - refine_face_landmarks=refine_face_landmarks, - min_detection_confidence=min_detection_confidence, - min_tracking_confidence=min_tracking_confidence, - ) - - # Eye landmark indices (Face Mesh) - self.LEFT_EYE_INDICES = [33, 133, 159, 145, 158, 144] # 6 points - self.RIGHT_EYE_INDICES = [362, 263, 386, 374, 385, 373] - - # Iris indices - self.LEFT_IRIS_CENTER = 468 - self.RIGHT_IRIS_CENTER = 473 - - # Mouth indices - self.MOUTH_TOP = 13 - self.MOUTH_BOTTOM = 14 - self.MOUTH_LEFT = 61 - self.MOUTH_RIGHT = 291 - - # Pose key indices - self.POSE_KEYPOINTS = { - "nose": 0, - "left_shoulder": 11, - "right_shoulder": 12, - "left_elbow": 13, - "right_elbow": 14, - "left_wrist": 15, - "right_wrist": 16, - "left_hip": 23, - "right_hip": 24, - "left_knee": 25, - "right_knee": 26, - "left_ankle": 27, - "right_ankle": 28, - } - - # Hand key indices - self.HAND_KEYPOINTS = { - "wrist": 0, - "thumb_cmc": 1, - "thumb_mcp": 2, - "thumb_ip": 3, - "thumb_tip": 4, - "index_mcp": 5, - "index_pip": 6, - "index_dip": 7, - "index_tip": 8, - "middle_mcp": 9, - "middle_pip": 10, - "middle_dip": 11, - "middle_tip": 12, - "ring_mcp": 13, - "ring_pip": 14, - "ring_dip": 15, - "ring_tip": 16, - "pinky_mcp": 17, - "pinky_pip": 18, - "pinky_dip": 19, - "pinky_tip": 20, - } - - def process_frame(self, frame: np.ndarray) -> Dict: - """ - Process single frame - - Args: - frame: BGR image - - Returns: - Dict with face_mesh, pose, hands data - """ - frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - - results = self.holistic.process(frame_rgb) - - person_data = { - "person_id": 0, - "bbox": None, - "face_mesh": None, - "pose": None, - "hands": {"left": None, "right": None}, - } - -# Extract face mesh - height, width = frame.shape[:2] - if results.face_landmarks: - person_data["face_mesh"] = self._extract_face_mesh(results.face_landmarks, width, height) - - # Extract pose - if results.pose_landmarks: - person_data["pose"] = self._extract_pose(results.pose_landmarks, width, height) - - # Extract hands - if results.left_hand_landmarks: - person_data["hands"]["left"] = self._extract_hand(results.left_hand_landmarks, "left", width, height) - - if results.right_hand_landmarks: - person_data["hands"]["right"] = self._extract_hand(results.right_hand_landmarks, "right", width, height) - - # Calculate bbox from pose landmarks - if results.pose_landmarks: - landmarks = results.pose_landmarks.landmark - x_coords = [lm.x for lm in landmarks if lm.visibility > 0.5] - y_coords = [lm.y for lm in landmarks if lm.visibility > 0.5] - - if x_coords and y_coords: - x_min, x_max = min(x_coords), max(x_coords) - y_min, y_max = min(y_coords), max(y_coords) - - person_data["bbox"] = { - "x": int(x_min * width), - "y": int(y_min * height), - "width": int((x_max - x_min) * width), - "height": int((y_max - y_min) * height), - } - - return person_data - - def _extract_face_mesh(self, face_landmarks, width: int, height: int) -> Dict: - """ - Extract face mesh landmarks and calculate features - - Args: - face_landmarks: MediaPipe face landmarks - width: Frame width in pixels - height: Frame height in pixels - - Returns: - Dict with landmarks (in pixels), eye_features, mouth_features - """ - landmarks = [] - for lm in face_landmarks.landmark: - landmarks.append([int(lm.x * width), int(lm.y * height), lm.z]) - - # Eye Aspect Ratio (EAR) - def calculate_ear(eye_indices): - # Get eye points - p1 = face_landmarks.landmark[eye_indices[0]] - p2 = face_landmarks.landmark[eye_indices[1]] - p3 = face_landmarks.landmark[eye_indices[2]] - p4 = face_landmarks.landmark[eye_indices[3]] - p5 = face_landmarks.landmark[eye_indices[4]] - p6 = face_landmarks.landmark[eye_indices[5]] - - # Vertical distances - vertical_1 = np.linalg.norm([p3.x - p5.x, p3.y - p5.y]) - vertical_2 = np.linalg.norm([p4.x - p6.x, p4.y - p6.y]) - - # Horizontal distance - horizontal = np.linalg.norm([p1.x - p2.x, p1.y - p2.y]) - - ear = (vertical_1 + vertical_2) / (2 * horizontal) if horizontal > 0 else 0 - return ear - - left_ear = calculate_ear(self.LEFT_EYE_INDICES) - right_ear = calculate_ear(self.RIGHT_EYE_INDICES) - avg_ear = (left_ear + right_ear) / 2 - - # Iris position (if refined landmarks enabled) - left_iris_x = None - right_iris_x = None - - if len(face_landmarks.landmark) > 477: - left_iris = face_landmarks.landmark[self.LEFT_IRIS_CENTER] - right_iris = face_landmarks.landmark[self.RIGHT_IRIS_CENTER] - - # Normalize iris position relative to eye - left_eye_center_x = (face_landmarks.landmark[33].x + face_landmarks.landmark[133].x) / 2 - right_eye_center_x = (face_landmarks.landmark[362].x + face_landmarks.landmark[263].x) / 2 - - left_eye_width = abs(face_landmarks.landmark[33].x - face_landmarks.landmark[133].x) - right_eye_width = abs(face_landmarks.landmark[362].x - face_landmarks.landmark[263].x) - - left_iris_x = (left_iris.x - left_eye_center_x) / left_eye_width if left_eye_width > 0 else 0 - right_iris_x = (right_iris.x - right_eye_center_x) / right_eye_width if right_eye_width > 0 else 0 - - # Eye action detection - eye_action = "unknown" - if avg_ear < 0.15: - eye_action = "closed" - elif avg_ear > 0.4: - eye_action = "wide_open" - elif 0.15 <= avg_ear < 0.25: - eye_action = "squint" - else: - eye_action = "normal" - - # Gaze direction - gaze_direction = "center" - if left_iris_x and right_iris_x: - avg_iris_x = (left_iris_x + right_iris_x) / 2 - if avg_iris_x < -0.2: - gaze_direction = "left" - elif avg_iris_x > 0.2: - gaze_direction = "right" - - # Mouth Aspect Ratio (MAR) - mouth_top = face_landmarks.landmark[self.MOUTH_TOP] - mouth_bottom = face_landmarks.landmark[self.MOUTH_BOTTOM] - mouth_left = face_landmarks.landmark[self.MOUTH_LEFT] - mouth_right = face_landmarks.landmark[self.MOUTH_RIGHT] - - mouth_height = np.linalg.norm([mouth_top.x - mouth_bottom.x, mouth_top.y - mouth_bottom.y]) - mouth_width = np.linalg.norm([mouth_left.x - mouth_right.x, mouth_left.y - mouth_right.y]) - - mar = mouth_height / mouth_width if mouth_width > 0 else 0 - - # Mouth corner distance (for smile detection) - mouth_center_y = (mouth_top.y + mouth_bottom.y) / 2 - corner_lift = (mouth_center_y - mouth_left.y) + (mouth_center_y - mouth_right.y) - - # Mouth action detection - mouth_action = "unknown" - if mar > 0.7: - mouth_action = "yawn" - elif mar > 0.5: - mouth_action = "open" - elif mar < 0.2: - if corner_lift > 0.02: - mouth_action = "smile" - else: - mouth_action = "closed" - else: - mouth_action = "slightly_open" - - return { - "landmarks": landmarks, - "num_landmarks": len(landmarks), - "eye_features": { - "left_ear": round(left_ear, 4), - "right_ear": round(right_ear, 4), - "avg_ear": round(avg_ear, 4), - "left_iris_x": round(left_iris_x, 4) if left_iris_x else None, - "right_iris_x": round(right_iris_x, 4) if right_iris_x else None, - "eye_action": eye_action, - "gaze_direction": gaze_direction, - }, - "mouth_features": { - "mar": round(mar, 4), - "mouth_height": round(mouth_height, 4), - "mouth_width": round(mouth_width, 4), - "corner_lift": round(corner_lift, 4), - "mouth_action": mouth_action, - }, - } - - def _extract_pose(self, pose_landmarks, width: int, height: int) -> Dict: - """ - Extract pose landmarks and calculate features - - Args: - pose_landmarks: MediaPipe pose landmarks - width: Frame width in pixels - height: Frame height in pixels - - Returns: - Dict with landmarks (in pixels), arm_features, leg_features - """ - landmarks = [] - for lm in pose_landmarks.landmark: - landmarks.append([int(lm.x * width), int(lm.y * height), lm.z, lm.visibility]) - - # Helper function to calculate angle - def calculate_angle(p1_idx, p2_idx, p3_idx): - p1 = pose_landmarks.landmark[p1_idx] - p2 = pose_landmarks.landmark[p2_idx] - p3 = pose_landmarks.landmark[p3_idx] - - v1 = np.array([p1.x, p1.y]) - np.array([p2.x, p2.y]) - v2 = np.array([p3.x, p3.y]) - np.array([p2.x, p2.y]) - - angle = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))) - return np.degrees(angle) - - # Arm features - left_elbow_angle = calculate_angle(11, 13, 15) # shoulder-elbow-wrist - right_elbow_angle = calculate_angle(12, 14, 16) - - # Check if arms raised - left_wrist = pose_landmarks.landmark[15] - left_elbow = pose_landmarks.landmark[13] - left_shoulder = pose_landmarks.landmark[11] - - right_wrist = pose_landmarks.landmark[16] - right_elbow = pose_landmarks.landmark[14] - right_shoulder = pose_landmarks.landmark[12] - - left_arm_raised = left_wrist.y < left_elbow.y < left_shoulder.y - right_arm_raised = right_wrist.y < right_elbow.y < right_shoulder.y - - # Arm action detection - left_arm_action = "unknown" - if left_arm_raised: - left_arm_action = "raise_left" - elif left_elbow_angle > 150: - left_arm_action = "extend_left" - elif left_elbow_angle < 90: - left_arm_action = "fold_left" - else: - left_arm_action = "neutral_left" - - right_arm_action = "unknown" - if right_arm_raised: - right_arm_action = "raise_right" - elif right_elbow_angle > 150: - right_arm_action = "extend_right" - elif right_elbow_angle < 90: - right_arm_action = "fold_right" - else: - right_arm_action = "neutral_right" - - # Cross arms detection - cross_arms = False - if left_wrist.x > right_wrist.x and right_wrist.x < left_shoulder.x: - cross_arms = True - - # Leg features - left_knee_angle = calculate_angle(23, 25, 27) # hip-knee-ankle - right_knee_angle = calculate_angle(24, 26, 28) - - # Check standing/sitting - left_hip = pose_landmarks.landmark[23] - left_knee = pose_landmarks.landmark[25] - left_ankle = pose_landmarks.landmark[27] - - right_hip = pose_landmarks.landmark[24] - right_knee = pose_landmarks.landmark[26] - right_ankle = pose_landmarks.landmark[28] - - hip_avg_y = (left_hip.y + right_hip.y) / 2 - knee_avg_y = (left_knee.y + right_knee.y) / 2 - - # Standing: hip < knee < ankle (y increases downward) - standing = left_hip.y < left_knee.y < left_ankle.y and right_hip.y < right_knee.y < right_ankle.y - - # Sitting: hip ≈ knee height - sitting = abs(hip_avg_y - knee_avg_y) < 0.1 - - # Leg action detection - leg_action = "unknown" - if sitting: - leg_action = "sit" - elif standing: - if left_knee_angle < 120 or right_knee_angle < 120: - leg_action = "knee_bend" - else: - leg_action = "stand" - - return { - "landmarks": landmarks, - "num_landmarks": len(landmarks), - "arm_features": { - "left_elbow_angle": round(left_elbow_angle, 2), - "right_elbow_angle": round(right_elbow_angle, 2), - "left_arm_raised": left_arm_raised, - "right_arm_raised": right_arm_raised, - "left_arm_action": left_arm_action, - "right_arm_action": right_arm_action, - "cross_arms": cross_arms, - }, - "leg_features": { - "left_knee_angle": round(left_knee_angle, 2), - "right_knee_angle": round(right_knee_angle, 2), - "standing": standing, - "sitting": sitting, - "leg_action": leg_action, - }, - } - - def _extract_hand(self, hand_landmarks, hand_type: str, width: int, height: int) -> Dict: - """ - Extract hand landmarks and detect gesture - - Args: - hand_landmarks: MediaPipe hand landmarks - hand_type: "left" or "right" - width: Frame width in pixels - height: Frame height in pixels - - Returns: - Dict with landmarks (in pixels), gesture - """ - landmarks = [] - for lm in hand_landmarks.landmark: - landmarks.append([int(lm.x * width), int(lm.y * height), lm.z]) - - # Check finger extensions - def is_finger_extended(tip_idx, pip_idx): - tip = hand_landmarks.landmark[tip_idx] - pip = hand_landmarks.landmark[pip_idx] - - # Finger is extended if tip is higher (lower y) than pip - return tip.y < pip.y - - thumb_extended = is_finger_extended(4, 3) - index_extended = is_finger_extended(8, 6) - middle_extended = is_finger_extended(12, 10) - ring_extended = is_finger_extended(16, 14) - pinky_extended = is_finger_extended(20, 18) - - extensions = { - "thumb": thumb_extended, - "index": index_extended, - "middle": middle_extended, - "ring": ring_extended, - "pinky": pinky_extended, - } - - # Gesture detection - gesture = "unknown" - - num_extended = sum(extensions.values()) - - if num_extended == 5: - gesture = "open_hand" - elif num_extended == 0: - gesture = "fist" - elif thumb_extended and num_extended == 1: - gesture = "thumbs_up" - elif index_extended and middle_extended and num_extended == 2: - gesture = "peace_sign" - elif index_extended and num_extended == 1: - gesture = "pointing" - elif thumb_extended and index_extended and not any([middle_extended, ring_extended, pinky_extended]): - # Check thumb-index distance for OK gesture - thumb_tip = hand_landmarks.landmark[4] - index_tip = hand_landmarks.landmark[8] - - distance = np.linalg.norm([thumb_tip.x - index_tip.x, thumb_tip.y - index_tip.y]) - - if distance < 0.05: - gesture = "ok_sign" - else: - gesture = "grab" - - return { - "landmarks": landmarks, - "num_landmarks": len(landmarks), - "finger_extensions": extensions, - "num_fingers_extended": num_extended, - "gesture": gesture, - "hand_type": hand_type, - } - - def process_video( - self, - video_path: str, - output_path: str, - sample_interval: int = 1, - uuid: str = "", - ) -> Dict: - """ - Process entire video - - Args: - video_path: Path to video file - output_path: Path to output JSON - sample_interval: Process every N frames - uuid: UUID for progress reporting - - Returns: - Dict with all processed data - """ - cap = cv2.VideoCapture(video_path) - - if not cap.isOpened(): - print(f"MEDIAPIPE_ERROR:Cannot open video: {video_path}", file=sys.stderr) - return {} - - fps = cap.get(cv2.CAP_PROP_FPS) - width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - - print(f"MEDIAPIPE_START", file=sys.stderr) - print(f"MEDIAPIPE_INFO:FPS={fps},total={total_frames},interval={sample_interval}", file=sys.stderr) - - output_data = { - "metadata": { - "video_path": video_path, - "fps": fps, - "width": width, - "height": height, - "total_frames": total_frames, - "sample_interval": sample_interval, - "processor": "mediapipe_holistic", - "model_complexity": 1, - "refine_face_landmarks": True, - }, - "frames": {}, - } - - frame_count = 0 - processed_count = 0 - - while True: - ret, frame = cap.read() - if not ret: - break - - frame_count += 1 - - if frame_count % sample_interval != 0: - continue - - # Process frame - person_data = self.process_frame(frame) - - # Only save if landmarks detected - if person_data["face_mesh"] or person_data["pose"] or person_data["hands"]["left"] or person_data["hands"]["right"]: - timestamp = frame_count / fps if fps > 0 else 0 - - output_data["frames"][str(frame_count)] = { - "frame_number": frame_count, - "timestamp": round(timestamp, 3), - "persons": [person_data], - } - - processed_count += 1 - - if processed_count % 100 == 0: - print(f"MEDIAPIPE_FRAME:{processed_count}", file=sys.stderr) - - cap.release() - - # Update metadata - output_data["metadata"]["processed_frames"] = processed_count - - # Save output - with open(output_path, "w") as f: - json.dump(output_data, f, indent=2) - - print(f"MEDIAPIPE_COMPLETE:{processed_count}", file=sys.stderr) - - return output_data - - def close(self): - """Close MediaPipe model""" - self.holistic.close() - - -def main(): - parser = argparse.ArgumentParser(description="MediaPipe Holistic Processor") - parser.add_argument("video_path", nargs="?", help="Path to video file (positional)") - parser.add_argument("output_path", nargs="?", help="Path to output JSON (positional)") - parser.add_argument("--video", help="Path to video file") - parser.add_argument("--output", help="Path to output JSON") - parser.add_argument("--sample-interval", type=int, default=1, help="Process every N frames") - parser.add_argument("--model-complexity", type=int, default=1, choices=[0, 1, 2], help="Model complexity") - parser.add_argument("--test-frame", type=int, help="Test single frame only") - parser.add_argument("--uuid", default="", help="UUID for progress reporting") - args = parser.parse_args() - - # Resolve positional vs flagged args - video_path = args.video or args.video_path - output_path = args.output or args.output_path - if not video_path or not output_path: - parser.error("video_path and output_path are required") - - print("=" * 70) - print("MediaPipe Holistic Processor") - print("=" * 70) - - processor = MediaPipeHolisticProcessor( - model_complexity=args.model_complexity, - refine_face_landmarks=True, - ) - - if args.test_frame: - # Test single frame - print(f"\nTesting frame {args.test_frame}...") - - cap = cv2.VideoCapture(video_path) - cap.set(cv2.CAP_PROP_POS_FRAMES, args.test_frame - 1) - - ret, frame = cap.read() - cap.release() - - if ret: - person_data = processor.process_frame(frame) - - print("\n=== Results ===") - - if person_data["face_mesh"]: - face = person_data["face_mesh"] - print(f"\nFace Mesh: {face['num_landmarks']} landmarks") - print(f" Eye: {face['eye_features']['eye_action']} (EAR: {face['eye_features']['avg_ear']})") - print(f" Gaze: {face['eye_features']['gaze_direction']}") - print(f" Mouth: {face['mouth_features']['mouth_action']} (MAR: {face['mouth_features']['mar']})") - - if person_data["pose"]: - pose = person_data["pose"] - print(f"\nPose: {pose['num_landmarks']} keypoints") - print(f" Left arm: {pose['arm_features']['left_arm_action']} (angle: {pose['arm_features']['left_elbow_angle']}°)") - print(f" Right arm: {pose['arm_features']['right_arm_action']} (angle: {pose['arm_features']['right_elbow_angle']}°)") - print(f" Cross arms: {pose['arm_features']['cross_arms']}") - print(f" Leg: {pose['leg_features']['leg_action']}") - - if person_data["hands"]["left"]: - hand = person_data["hands"]["left"] - print(f"\nLeft hand: {hand['num_landmarks']} keypoints") - print(f" Gesture: {hand['gesture']}") - print(f" Fingers extended: {hand['num_fingers_extended']}") - - if person_data["hands"]["right"]: - hand = person_data["hands"]["right"] - print(f"\nRight hand: {hand['num_landmarks']} keypoints") - print(f" Gesture: {hand['gesture']}") - print(f" Fingers extended: {hand['num_fingers_extended']}") - else: - print("❌ Cannot read frame") - else: - # Process entire video - processor.process_video( - video_path, - output_path, - args.sample_interval, - uuid=args.uuid, - ) - - processor.close() - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/scripts/mediapipe_holistic_processor_v1.11.py b/scripts/mediapipe_holistic_processor_v1.11.py deleted file mode 120000 index 59af35b..0000000 --- a/scripts/mediapipe_holistic_processor_v1.11.py +++ /dev/null @@ -1 +0,0 @@ -../v1.1/scripts/mediapipe_holistic_processor_v1.11.py \ No newline at end of file diff --git a/scripts/mediapipe_processor_v1.11.py b/scripts/mediapipe_processor_v1.11.py deleted file mode 120000 index 06701d0..0000000 --- a/scripts/mediapipe_processor_v1.11.py +++ /dev/null @@ -1 +0,0 @@ -../v1.1/scripts/mediapipe_processor_v1.11.py \ No newline at end of file diff --git a/scripts/parent_chunk_5w1h.py b/scripts/parent_chunk_5w1h.py deleted file mode 100644 index 1afa8c9..0000000 --- a/scripts/parent_chunk_5w1h.py +++ /dev/null @@ -1,381 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Story Processor V2.0 — Dual Pipeline: Story-based + LLM-based Parent-Child Summarization - -Pipeline 1 (Story): Template-based, instant, no LLM cost - → Parent story summary + Child story summary - → Embedding (Ollama nomic-embed) → pgvector - → BM25 (PostgreSQL tsvector) → full-text search - -Pipeline 2 (LLM): LLM-based summarization (Gemma4/Qwen when resources allow) - → Parent LLM summary + Child LLM summary - → Embedding → pgvector + BM25 - -Both pipelines store into chunks table with distinct chunk_types: - story_parent, story_child, llm_parent, llm_child - -Usage: - python parent_chunk_5w1h.py --file-uuid --mode story [--embed] - python parent_chunk_5w1h.py --file-uuid --mode llm [--embed] -""" - -import json, os, sys, argparse, time, requests, psycopg2 -from collections import defaultdict -from typing import Dict, List, Optional - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - -DB_URL = os.getenv("DATABASE_URL", "postgresql://accusys@localhost:5432/momentry") -SCHEMA = os.getenv("DATABASE_SCHEMA", "dev") -OUTPUT_DIR = os.getenv("MOMENTRY_OUTPUT_DIR", "/Users/accusys/momentry/output_dev") -EMBEDDING_URL = os.getenv("EMBEDDING_URL", "http://localhost:11436/v1/embeddings") - -def load_speaker_map(file_uuid: str) -> dict: - """Load speaker→identity mapping from DB (generalized, not hardcoded)""" - try: - conn = psycopg2.connect(DB_URL) - cur = conn.cursor() - cur.execute("SET search_path TO %s, public", (SCHEMA,)) - cur.execute( - "SELECT metadata->>'speaker_id', name FROM identities " - "WHERE metadata->>'speaker_id' IS NOT NULL" - ) - spk_map = {} - for spk_id, name in cur.fetchall(): - spk_map[spk_id] = (name, 0.85) # default confidence from MAR - cur.close(); conn.close() - return spk_map if spk_map else DEFAULT_SPEAKER_MAP - except Exception: - return DEFAULT_SPEAKER_MAP - -# Default fallback (used when DB has no speaker mapping) -DEFAULT_SPEAKER_MAP = {} - -CURRENT_VERSIONS = { - "asr": "faster-whisper/small/v1", - "asrx": "speechbrain/ecapa-tdnn/v1", - "cut": "pyscenedetect/default", - "yolo": "yolov5-coreml/v2", - "face_detection": "apple-vision/v2", - "face_embedding": "coreml-facenet/v2", - "speaker_binding": "mar-lip/v1", - "identity_clustering": "cosine-threshold/v1", - "story_agent": "template/v2.0", - "embedding_agent": "nomic-embed-768d/v1", -} - -LLM_URL = os.getenv("MOMENTRY_LLM_URL", os.getenv("MOMENTRY_LLM_SUMMARY_URL", "http://127.0.0.1:8082/v1/chat/completions")) -LLM_MODEL = os.getenv("MOMENTRY_LLM_SUMMARY_MODEL", "gemma4") - - -def load_data(file_uuid: str) -> dict: - data = {} - for name in ["asr", "asrx", "cut"]: - path = os.path.join(OUTPUT_DIR, f"{file_uuid}.{name}.json") - data[name] = json.load(open(path)) if os.path.exists(path) else None - return data - - -def build_child_chunks(data: dict, file_uuid: str) -> List[dict]: - """Group ASR sentences by CUT scene boundaries → parent/child structure.""" - asr_segs = data["asr"].get("segments", []) if data["asr"] else [] - asrx_segs = data["asrx"].get("segments", []) if data["asrx"] else [] - cut_scenes = data["cut"].get("scenes", []) if data["cut"] else [] - - # Dynamically load speaker→identity mapping from DB - speaker_map = load_speaker_map(file_uuid) - - if not cut_scenes: - max_t = max( - (asr_segs[-1].get("end", 0) if asr_segs else 0), - (asrx_segs[-1].get("end_time", 0) if asrx_segs else 0), - ) - cut_scenes = [{"start_time": t, "end_time": min(t + 60, max_t)} for t in range(0, int(max_t) + 60, 60)] - - scenes = [] - for cs in cut_scenes: - s, e = cs["start_time"], cs["end_time"] - - children = [] - for seg_idx, seg in enumerate(asr_segs): - st, en = seg.get("start", 0), seg.get("end", 0) - text = seg.get("text", "").strip() - if st < s or en > e or not text: continue - - spk_id = "unknown" - for ax in asrx_segs: - if ax["start_time"] <= st and ax["end_time"] >= en: - spk_id = ax.get("speaker_id", "unknown"); break - - spk_info = speaker_map.get(spk_id) - if spk_info: - character, spk_conf = spk_info - else: - character, spk_conf = spk_id, 0.0 - - children.append({ - "start": st, "end": en, "text": text, - "speaker_id": spk_id, "speaker_name": character, - "speaker_confidence": spk_conf, - "chunk_id": f"{file_uuid}_{seg_idx}", - }) - - # Boundary overlap: even empty scenes get partial children - for seg_idx, seg in enumerate(asr_segs): - st, en = seg.get("start", 0), seg.get("end", 0) - text = seg.get("text", "").strip() - if not text: continue - if st >= s and en <= e: continue - if not (st < e and en > s): continue - - spk_id = "unknown" - for ax in asrx_segs: - if ax["start_time"] <= st and ax["end_time"] >= en: - spk_id = ax.get("speaker_id", "unknown"); break - spk_info = speaker_map.get(spk_id) - if spk_info: - character, spk_conf = spk_info - else: - character, spk_conf = spk_id, 0.0 - children.append({ - "start": st, "end": en, "text": text, - "speaker_id": spk_id, "speaker_name": character, - "speaker_confidence": spk_conf, - "chunk_id": f"{file_uuid}_{seg_idx}", - "overlap_type": "partial", - }) - - if children: - scenes.append({ - "start_time": s, "end_time": e, "duration": e - s, - "children": children, "child_count": len(children), - }) - return scenes - - -# ===== Pipeline 1: Story (Template) Summaries ===== - -def generate_story_parent_summary(scene: dict) -> str: - children = scene["children"] - characters = sorted(set(c["speaker_name"] for c in children)) - total_words = sum(len(c["text"].split()) for c in children) - by_speaker = defaultdict(list) - for c in children: by_speaker[c["speaker_name"]].append(c["text"]) - speakers = [] - for char, texts in sorted(by_speaker.items()): - speakers.append(f"{char} ({len(texts)} lines)") - - return ( - f"[{scene['start_time']:.0f}s-{scene['end_time']:.0f}s, {scene['duration']:.0f}s] " - f"Cast: {', '.join(characters)}. Total: {len(children)} lines, {total_words} words. " - f"Speakers: {' | '.join(speakers[:3])}" - ) - - -def generate_story_child_summary(child: dict, parent_summary: str) -> str: - return ( - f"[{child['start']:.0f}s-{child['end']:.0f}s] " - f"{child['speaker_name']}: \"{child['text']}\"" - ) - - -# ===== Pipeline 2: LLM Summaries (requires LLM server) ===== - -def generate_llm_parent_summary(scene: dict, max_scenes_processed: int) -> Optional[str]: - """LLM-based parent summary""" - if not LLM_URL: return None - children = scene["children"] - dialogue = "\n".join( - f"[{c['start']:.0f}s] {c['speaker_name']}: {c['text'][:150]}" - for c in children[:15] - ) - prompt = ( - "You are a film analyst. Summarize this scene in one flowing paragraph (60-100 words). " - "Include: who is present, what they discuss, tone/mood.\n\n" - f"Scene: {scene['start_time']:.0f}s - {scene['end_time']:.0f}s\n" - f"Dialogue:\n{dialogue}\n\nSummary:" - ) - try: - resp = requests.post(LLM_URL, json={ - "model": LLM_MODEL, - "messages": [{"role": "user", "content": prompt}], - "max_tokens": 200, "temperature": 0.3, - }, timeout=60) - return resp.json()["choices"][0]["message"]["content"].strip() - except Exception as e: - print(f" ⚠️ LLM parent summary failed: {e}") - return None - - -def generate_llm_child_summary(child: dict, parent_summary: str) -> Optional[str]: - """LLM-based child (sentence) summary""" - return f"[{child['start']:.0f}s-{child['end']:.0f}s] {child['speaker_name']}: \"{child['text']}\"" - - -# ===== Embedding (Ollama nomic-embed) ===== - -def embed_text(text: str, max_retries: int = 3) -> Optional[List[float]]: - """Get embedding via EmbeddingGemma server""" - for attempt in range(max_retries): - try: - resp = requests.post(EMBEDDING_URL, json={ - "input": [text], - }, timeout=30) - if resp.status_code == 200: - data = resp.json() - items = data.get("data", []) - if items: - return items[0]["embedding"] - except Exception as e: - if attempt == max_retries - 1: - print(f" ⚠️ Embedding failed: {e}") - return None - time.sleep(1) - return None - - -# ===== DB Store (chunks table with embedding + BM25) ===== - -def store_chunks(file_uuid: str, scenes: List[dict], mode: str, do_embed: bool, conn): - """Store parent + child summaries into chunks table.""" - cur = conn.cursor() - parent_type = f"{mode}_parent" - child_type = f"{mode}_child" - - parent_count = 0 - child_count = 0 - - # Get base chunk_index - cur.execute( - f"SELECT COALESCE(MAX(chunk_index), 0) FROM {SCHEMA}.chunk WHERE file_uuid = %s", - (file_uuid,), - ) - next_index = (cur.fetchone()[0] or 0) + 1 - - for scene in scenes: - parent_text = generate_story_parent_summary(scene) if mode == "story" else generate_llm_parent_summary(scene, parent_count) - if not parent_text: continue - - parent_id = f"{mode}_parent_{file_uuid}_{scene['start_time']:.0f}_{scene['end_time']:.0f}" - - parent_embedding = embed_text(parent_text) if do_embed else None - if do_embed and parent_embedding: - cur.execute( - f""" - INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index, - start_time, end_time, content, text_content, parent_chunk_id, embedding) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s::vector) - ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE - SET content = EXCLUDED.content, text_content = EXCLUDED.text_content, - embedding = EXCLUDED.embedding - """, - (parent_id, parent_id, file_uuid, parent_type, next_index, - scene["start_time"], scene["end_time"], - json.dumps({"summary": parent_text, "mode": mode, "type": "parent", - "source_versions": CURRENT_VERSIONS}), - parent_text, None, parent_embedding), - ) - else: - cur.execute( - f""" - INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index, - start_time, end_time, content, text_content, parent_chunk_id) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s) - ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE - SET content = EXCLUDED.content, text_content = EXCLUDED.text_content - """, - (parent_id, parent_id, file_uuid, parent_type, next_index, - scene["start_time"], scene["end_time"], - json.dumps({"summary": parent_text, "mode": mode, "type": "parent", - "source_versions": CURRENT_VERSIONS}), - parent_text, None), - ) - next_index += 1 - parent_count += 1 - - for child in scene["children"]: - child_id = child["chunk_id"] - child_text = generate_story_child_summary(child, parent_text) if mode == "story" else generate_llm_child_summary(child, parent_text) - - child_embedding = embed_text(child_text) if do_embed else None - if do_embed and child_embedding: - cur.execute( - f""" - INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index, - start_time, end_time, content, text_content, parent_chunk_id, embedding) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s::vector) - ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE - SET content = EXCLUDED.content, text_content = EXCLUDED.text_content, - parent_chunk_id = EXCLUDED.parent_chunk_id, - embedding = EXCLUDED.embedding - """, - (child_id, child_id, file_uuid, child_type, next_index, - child["start"], child["end"], - json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode, - "speaker_confidence": child.get("speaker_confidence", 0), - "source_versions": CURRENT_VERSIONS}), - child_text, parent_id, child_embedding), - ) - else: - cur.execute( - f""" - INSERT INTO {SCHEMA}.chunk (chunk_id, old_chunk_id, file_uuid, chunk_type, chunk_index, - start_time, end_time, content, text_content, parent_chunk_id) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s) - ON CONFLICT (file_uuid, old_chunk_id) DO UPDATE - SET content = EXCLUDED.content, text_content = EXCLUDED.text_content, - parent_chunk_id = EXCLUDED.parent_chunk_id - """, - (child_id, child_id, file_uuid, child_type, next_index, - child["start"], child["end"], - json.dumps({"speaker": child["speaker_name"], "text": child["text"], "mode": mode, - "speaker_confidence": child.get("speaker_confidence", 0), - "source_versions": CURRENT_VERSIONS}), - child_text, parent_id), - ) - next_index += 1 - child_count += 1 - - conn.commit() - cur.close() - return parent_count, child_count - - -def main(): - parser = argparse.ArgumentParser(description="Story Processor V2.0") - parser.add_argument("--file-uuid", required=True) - parser.add_argument("--mode", choices=["story", "llm"], default="story") - parser.add_argument("--max-scenes", type=int, default=99999) - parser.add_argument("--embed", action="store_true", help="Generate embeddings (Ollama)") - parser.add_argument("--no-db", action="store_true", help="Skip DB storage") - args = parser.parse_args() - - file_uuid = args.file_uuid - print(f"[STORY] Mode: {args.mode}, Embed: {args.embed}") - - data = load_data(file_uuid) - if not data["asr"]: - print("[STORY] ❌ No ASR data"); return - - scenes = build_child_chunks(data, file_uuid)[:args.max_scenes] - total_children = sum(s["child_count"] for s in scenes) - print(f"[STORY] {len(scenes)} scenes, {total_children} child chunks") - - if not args.no_db: - conn = psycopg2.connect(DB_URL) - try: - pc, cc = store_chunks(file_uuid, scenes, args.mode, args.embed, conn) - print(f"[STORY] DB: {pc} parent, {cc} child chunks ({args.mode})") - finally: - conn.close() - - # Save JSON output - out_path = os.path.join(OUTPUT_DIR, f"{file_uuid}.story_{args.mode}.json") - out_data = {"file_uuid": file_uuid, "mode": args.mode, "scenes": scenes} - with open(out_path, "w") as f: - json.dump(out_data, f, indent=2, ensure_ascii=False, default=str) - print(f"[STORY] ✅ {out_path}") - - -if __name__ == "__main__": - main() diff --git a/scripts/parent_chunk_5w1h_v1.11.py b/scripts/parent_chunk_5w1h_v1.11.py deleted file mode 120000 index c2c77b2..0000000 --- a/scripts/parent_chunk_5w1h_v1.11.py +++ /dev/null @@ -1 +0,0 @@ -../v1.1/scripts/parent_chunk_5w1h_v1.11.py \ No newline at end of file diff --git a/scripts/rebuild_story_content.py b/scripts/rebuild_story_content.py deleted file mode 100644 index 0b5ba66..0000000 --- a/scripts/rebuild_story_content.py +++ /dev/null @@ -1,320 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Rebuild story chunk text_content and regenerates summaries using new ASRX speaker assignments. -Then updates Qdrant momentry_dev_stories and sentence_story/sentence_summary collections. -""" - -import json, sys, time, urllib.request -from urllib.request import Request, urlopen -import psycopg2 - -UUID = "aeed71342a899fe4b4c57b7d41bcb692" -DB_URL = "postgresql://accusys@localhost:5432/momentry?host=/tmp" -QDRANT_URL = "http://localhost:6333" -LLM_URL = "http://localhost:8082/v1/chat/completions" -EMBED_URL = "http://localhost:11436/v1/embeddings" - -def call_llm(dialogue_text): - prompt = f"Dialogue:\n{dialogue_text}\n\n50-word summary:" - body = json.dumps({"model": "google_gemma-4-26B-A4B-it-Q5_K_M.gguf", - "messages": [{"role": "user", "content": prompt}], - "temperature": 0.1, "max_tokens": 100}).encode() - req = Request(LLM_URL, data=body, headers={"Content-Type": "application/json"}) - try: - resp = urlopen(req, timeout=120) - return json.loads(resp.read())["choices"][0]["message"]["content"].strip() - except Exception as e: - print(f" LLM error: {e}") - return "" - -def call_embed(text): - body = json.dumps({"input": text}).encode() - req = Request(EMBED_URL, data=body, headers={"Content-Type": "application/json"}) - try: - resp = urlopen(req, timeout=30) - return json.loads(resp.read())["data"][0]["embedding"] - except Exception as e: - print(f" Embed error: {e}") - return [0.0] * 768 - -print("=== Step 1: Load sentence chunks with new speaker info ===") -conn = psycopg2.connect(DB_URL) -cur = conn.cursor() - -cur.execute(""" - SELECT chunk_index, text_content, metadata->>'new_speaker_name', - metadata->>'speaker_name', content - FROM dev.chunks - WHERE file_uuid = %s AND chunk_type = 'sentence' - ORDER BY chunk_index -""", (UUID,)) -sentence_rows = cur.fetchall() -print(f"Loaded {len(sentence_rows)} sentence chunks") - -# Build lookup -sentences = {} -for r in sentence_rows: - idx, old_text, new_name, old_name, content = r - sentences[idx] = { - "old_text": old_text or "", - "new_name": new_name or old_name or "Unknown", - "old_name": old_name or "Unknown", - "content": content or {}, - } - -# Rebuild sentence text_content with new speaker names -print("\n=== Step 2: Rebuild sentence text_content ===") -updated_sentences = 0 -for r in sentence_rows: - idx, old_text, new_name, old_name, content = r - new_name = new_name or old_name or "Unknown" - - # Extract the text part (remove old speaker prefix if exists) - raw_text = "" - if content and isinstance(content, dict): - raw_text = content.get("data", {}).get("text", "") - if not raw_text and old_text: - # Parse old format: [Speaker] text - import re - m = re.search(r'\]\s*(.*)', old_text) - if m: - raw_text = m.group(1) - else: - raw_text = old_text - - new_text = f"[{new_name}] {raw_text}" - - cur.execute(""" - UPDATE dev.chunks - SET text_content = %s, updated_at = NOW() - WHERE file_uuid = %s AND chunk_type = 'sentence' AND chunk_index = %s - """, (new_text, UUID, idx)) - updated_sentences += 1 - -conn.commit() -print(f"Updated {updated_sentences} sentence chunks text_content") - -print("\n=== Step 3: Rebuild story chunk text_content ===") -cur.execute(""" - SELECT id, chunk_id, chunk_index, child_chunk_ids, start_time, end_time, - text_content, summary_text - FROM dev.chunks - WHERE file_uuid = %s AND chunk_type = 'story' - ORDER BY chunk_index -""", (UUID,)) -story_rows = cur.fetchall() -print(f"Loaded {len(story_rows)} story chunks") - -# Build child text per story chunk -story_dialogue_texts = [] -for r in story_rows: - db_id, cid, idx, child_ids, st, et, old_text, old_summary = r - - dialogue_parts = [] - for child_cid in (child_ids or []): - parts = child_cid.split("_") - child_idx = int(parts[-1]) - if child_idx in sentences: - s = sentences[child_idx] - raw = "" - if s["content"] and isinstance(s["content"], dict): - raw = s["content"].get("data", {}).get("text", "") - if not raw: - import re - m = re.search(r'\]\s*(.*)', s["old_text"]) - if m: - raw = m.group(1) - else: - raw = s["old_text"] - if raw: - dialogue_parts.append(f'({s["new_name"]}) {raw}') - - dialogue_text = " ".join(dialogue_parts) - story_dialogue_texts.append((db_id, cid, idx, st, et, dialogue_text, old_summary)) - -print(f"Built {len(story_dialogue_texts)} story dialogue texts") - -# Update DB with new text_content (dialogue only, not summary yet) -for item in story_dialogue_texts: - db_id, cid, idx, st, et, dialogue_text, old_summary = item - cur.execute(""" - UPDATE dev.chunks - SET text_content = %s, updated_at = NOW() - WHERE id = %s - """, (dialogue_text, db_id)) - -conn.commit() -print("Updated story chunk dialogue texts") - -print("\n=== Step 4: Generate LLM summaries (all 228 stories) ===") -summaries = [] -for i, item in enumerate(story_dialogue_texts): - db_id, cid, idx, st, et, dialogue_text, old_summary = item - - if len(dialogue_text) < 10: - summary = "[no dialogue]" - embedding = [0.0] * 768 - else: - print(f" [{i+1}/{len(story_dialogue_texts)}] {cid}: {len(dialogue_text)} chars", end="") - try: - summary = call_llm(dialogue_text[:3000]) - print(f" -> {len(summary)} chars") - time.sleep(0.3) - embedding = call_embed(summary) - except Exception as e: - print(f" ERROR: {e}") - summary = "[error]" - embedding = [0.0] * 768 - - # Update DB - s_esc = summary.replace("'", "''") - cur.execute(f""" - UPDATE dev.chunks - SET summary_text = '{s_esc}', updated_at = NOW() - WHERE id = {db_id} - """) - - summaries.append({ - "db_id": db_id, - "chunk_id": cid, - "chunk_index": idx, - "start_time": st, - "end_time": et, - "dialogue": dialogue_text, - "summary": summary, - "embedding": embedding, - }) - -conn.commit() -print(f"\nGenerated {len(summaries)} summaries") - -print("\n=== Step 5: Rebuild Qdrant momentry_dev_stories ===") -# Delete existing -req = Request(f"{QDRANT_URL}/collections/momentry_dev_stories", method="DELETE") -try: - urlopen(req) - time.sleep(0.3) -except: - pass - -# Recreate -req = Request(f"{QDRANT_URL}/collections/momentry_dev_stories", - data=json.dumps({"vectors": {"size": 768, "distance": "Cosine"}}).encode(), - headers={"Content-Type": "application/json"}, method="PUT") -urlopen(req) -time.sleep(0.3) - -# Upload dialogue points (0..227) and summary points (228..455) -dialogue_points = [] -summary_points = [] -for s in summaries: - idx = s["chunk_index"] - dialogue_points.append({ - "id": idx + 1, - "vector": [0.0] * 768, - "payload": { - "chunk_id": s["chunk_id"], - "file_uuid": UUID, - "start_time": s["start_time"], - "end_time": s["end_time"], - "type": "story_dialogue", - "text": s["dialogue"][:500], - } - }) - summary_points.append({ - "id": idx + 1 + 228, - "vector": s["embedding"], - "payload": { - "chunk_id": s["chunk_id"], - "file_uuid": UUID, - "start_time": s["start_time"], - "end_time": s["end_time"], - "type": "story_summary", - "summary": s["summary"], - } - }) - -all_story_points = dialogue_points + summary_points - -batch_size = 100 -for start in range(0, len(all_story_points), batch_size): - batch = all_story_points[start:start+batch_size] - req = Request(f"{QDRANT_URL}/collections/momentry_dev_stories/points?wait=true", - data=json.dumps({"points": batch}).encode(), - headers={"Content-Type": "application/json"}, method="PUT") - try: - urlopen(req) - except Exception as e: - print(f" Batch {start}: {e}") - if (start // batch_size) % 3 == 0: - print(f" Uploaded {start + len(batch)}/{len(all_story_points)}") - -print(f"Uploaded {len(all_story_points)} points to momentry_dev_stories") - -print("\n=== Step 6: Populate sentence_story and sentence_summary ===") -# These are the per-sentence template + summary collections -# sentence_story: 3417 points, 768D, template payloads -# sentence_summary: 3417 points, 768D, LLM summary payloads - -for col_name in ["sentence_story", "sentence_summary"]: - req = Request(f"{QDRANT_URL}/collections/{col_name}", method="DELETE") - try: - urlopen(req) - time.sleep(0.2) - except: - pass - - req = Request(f"{QDRANT_URL}/collections/{col_name}", - data=json.dumps({"vectors": {"size": 768, "distance": "Cosine"}}).encode(), - headers={"Content-Type": "application/json"}, method="PUT") - urlopen(req) - time.sleep(0.2) - -# Build points for sentence_story and sentence_summary -story_sentence_points = [] -summary_sentence_points = [] -for idx in sorted(sentences.keys()): - s = sentences[idx] - raw_text = "" - if s["content"] and isinstance(s["content"], dict): - raw_text = s["content"].get("data", {}).get("text", "") - - dialog_line = f'({s["new_name"]}) {raw_text}' - - story_sentence_points.append({ - "id": idx + 1, - "vector": [0.0] * 768, - "payload": { - "chunk_id": f"{UUID}_{idx}", - "file_uuid": UUID, - "start_time": 0, - "end_time": 0, - "text": dialog_line, - "speaker_name": s["new_name"], - "chunk_type": "sentence", - } - }) - -# Upload sentence_story (dialogue template) -batch_size = 200 -for start in range(0, len(story_sentence_points), batch_size): - batch = story_sentence_points[start:start+batch_size] - req = Request(f"{QDRANT_URL}/collections/sentence_story/points?wait=true", - data=json.dumps({"points": batch}).encode(), - headers={"Content-Type": "application/json"}, method="PUT") - try: - urlopen(req) - except Exception as e: - print(f" sentence_story batch {start}: {e}") - if (start // batch_size) % 5 == 0: - print(f" Uploaded {start + len(batch)}/3417 sentence_story") - -print("Uploaded sentence_story points") - -# sentence_summary will be populated when we generate per-sentence summaries -# For now, mark as TODO -print("sentence_summary: SKIPPED (needs per-sentence LLM summaries)") - -cur.close() -conn.close() -print("\n=== Done ===") diff --git a/scripts/rebuild_story_content_v1.11.py b/scripts/rebuild_story_content_v1.11.py deleted file mode 120000 index 876d33d..0000000 --- a/scripts/rebuild_story_content_v1.11.py +++ /dev/null @@ -1 +0,0 @@ -../v1.1/scripts/rebuild_story_content_v1.11.py \ No newline at end of file diff --git a/scripts/regenerate_parent_5w1h.py b/scripts/regenerate_parent_5w1h.py deleted file mode 100644 index 9f3cad0..0000000 --- a/scripts/regenerate_parent_5w1h.py +++ /dev/null @@ -1,197 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Regenerate parent chunk summaries using 5W1H multi-dimensional structure via gemma4. - -5W1H Structure: -- Who: Main characters/people involved -- What: Key actions/events -- When: Temporal context (sequence in story) -- Where: Location/setting -- Why: Motivation/conflict driving the scene -- How: Emotional tone/manner of events -""" - -import json -import requests -import psycopg2 -import psycopg2.extras - -DB_CONFIG = {"host": "localhost", "user": "accusys", "dbname": "momentry"} -UUID = "384b0ff44aaaa1f1" -LLAMA_URL = "http://127.0.0.1:8081/v1/chat/completions" - - -def get_parent_with_children(): - """Get all parent chunks with their child chunk texts""" - conn = psycopg2.connect(**DB_CONFIG) - cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) - - cur.execute( - """ - SELECT pc.id, pc.scene_order, pc.start_time, pc.end_time, - pc.start_frame, pc.end_frame, pc.fps, pc.summary_text as old_summary, - pc.metadata, - ARRAY_AGG(c.text_content ORDER BY c.start_time) as child_texts - FROM parent_chunks pc - LEFT JOIN chunks c ON c.parent_chunk_id = pc.id::varchar - WHERE pc.uuid = %s - GROUP BY pc.id, pc.scene_order, pc.start_time, pc.end_time, - pc.start_frame, pc.end_frame, pc.fps, pc.summary_text, pc.metadata - ORDER BY pc.scene_order - """, - (UUID,), - ) - - parents = cur.fetchall() - cur.close() - conn.close() - return parents - - -def call_gemma4(prompt, max_tokens=1500): - """Call Gemma4 via llama-server OpenAI-compatible API""" - payload = { - "messages": [{"role": "user", "content": prompt}], - "max_tokens": max_tokens, - "temperature": 0.3, - "min_p": 0.1, - } - try: - resp = requests.post(LLAMA_URL, json=payload, timeout=180) - if resp.status_code == 200: - result = resp.json() - content = ( - result.get("choices", [{}])[0] - .get("message", {}) - .get("content", "") - .strip() - ) - return content - except Exception as e: - print(f" ⚠️ llama-server error: {e}") - return "" - - -def generate_5w1h_summary(parent, scene_num): - """Generate 5W1H structured summary using gemma4""" - texts = [t for t in (parent["child_texts"] or []) if t] - if not texts: - return None - - # Use only first 3 and last 3 dialogue lines for context (much faster) - sample_texts = texts[:3] + ["..."] + texts[-3:] if len(texts) > 6 else texts - combined = "\n".join(sample_texts)[:1500] - duration = parent["end_time"] - parent["start_time"] - - prompt = f"""You are a film scene analyst. Analyze this scene and provide 5W1H analysis. - -Scene {scene_num}/17 | {duration:.0f}s | {len(texts)} dialogue lines - -Key dialogue: -{combined} - -Respond with ONLY this JSON: -{{"summary_5lines":"...","who":"...","what":"...","when":"...","where":"...","why":"...","how":"...","characters":[],"tone":[],"key_events":[]}} -IMPORTANT: "summary_5lines" must be EXACTLY 5 lines describing the scene. Each line should be a complete sentence separated by \\n.""" - - response = call_gemma4(prompt, max_tokens=2000) - - if not response: - return None - - # Simple JSON extraction: find first { and last } - try: - start = response.find("{") - end = response.rfind("}") + 1 - if start >= 0 and end > start: - return json.loads(response[start:end]) - except Exception: - pass - - return None - - -def update_parent_chunk(parent, analysis): - """Update parent chunk with 5W1H structured data""" - if not analysis: - return False - - conn = psycopg2.connect(**DB_CONFIG) - cur = conn.cursor() - - # Create structured summary text (5 lines) - structured_text = f"{analysis.get('summary_5lines', '')}" - - # Update metadata with full 5W1H structure - metadata = parent["metadata"] if parent["metadata"] else {} - metadata["auto_generated_by"] = "gemma4" - metadata["chunk_count"] = len(parent["child_texts"] or []) - metadata["structured_summary"] = { - "summary_5lines": analysis.get("summary_5lines", ""), - "who": analysis.get("who", ""), - "what": analysis.get("what", ""), - "when": analysis.get("when", ""), - "where": analysis.get("where", ""), - "why": analysis.get("why", ""), - "how": analysis.get("how", ""), - "characters": analysis.get("characters", []), - "tone": analysis.get("tone", []), - "key_events": analysis.get("key_events", []), - } - - cur.execute( - """ - UPDATE parent_chunks - SET summary_text = %s, - metadata = %s::jsonb - WHERE id = %s - """, - (structured_text, json.dumps(metadata, ensure_ascii=False), parent["id"]), - ) - - conn.commit() - cur.close() - conn.close() - return True - - -def main(): - print(f"🎬 Regenerating 5W1H summaries for {UUID}") - print(f" Using llama.cpp server at {LLAMA_URL}") - print("=" * 70) - - parents = get_parent_with_children() - print(f"📥 Found {len(parents)} parent chunks") - - success_count = 0 - for i, parent in enumerate(parents): - duration = parent["end_time"] - parent["start_time"] - text_count = len(parent["child_texts"] or []) - print( - f"\n🎬 Scene {parent['scene_order']}: {parent['start_time']:.0f}s-{parent['end_time']:.0f}s ({duration:.0f}s, {text_count} chunks)" - ) - if parent["old_summary"]: - print(f" Old: {parent['old_summary'][:80]}...") - - analysis = generate_5w1h_summary(parent, parent["scene_order"]) - - if analysis: - summary = analysis.get("summary_5lines", "N/A") - print(f" ✅ Summary: {summary[:100]}...") - print(f" 👤 Who: {analysis.get('who', 'N/A')[:60]}") - print(f" 📍 Where: {analysis.get('where', 'N/A')[:60]}") - print(f" 💡 Why: {analysis.get('why', 'N/A')[:60]}") - - if update_parent_chunk(parent, analysis): - success_count += 1 - else: - print(" ❌ Failed to generate analysis") - - print(f"\n{'=' * 70}") - print( - f"✅ Updated {success_count}/{len(parents)} parent chunks with 5W1H summaries" - ) - - -if __name__ == "__main__": - main() diff --git a/scripts/regenerate_parent_5w1h_v1.11.py b/scripts/regenerate_parent_5w1h_v1.11.py deleted file mode 120000 index 422dd8a..0000000 --- a/scripts/regenerate_parent_5w1h_v1.11.py +++ /dev/null @@ -1 +0,0 @@ -../v1.1/scripts/regenerate_parent_5w1h_v1.11.py \ No newline at end of file diff --git a/scripts/store_traced_faces.py b/scripts/store_traced_faces.py index 54f3174..4650ef3 100644 --- a/scripts/store_traced_faces.py +++ b/scripts/store_traced_faces.py @@ -39,140 +39,8 @@ def get_conn(): def merge_traces_within_cuts(face_data: dict, cut_scenes: list) -> dict: - """Merge traces within the same cut if they have similar embeddings (same person re-appeared).""" - frames = face_data.get("frames", {}) - if not frames: - return face_data - - # Map each frame to its scene/cut number - frame_to_scene = {} - for s in cut_scenes: - for f in range(s["start_frame"], s["end_frame"] + 1): - frame_to_scene[f] = s["scene_number"] - - # Collect per-trace data: scene numbers, embeddings, face positions - trace_frames = defaultdict(list) - trace_embeddings = defaultdict(list) - trace_poses = {} - - for fnum_str, frm_data in frames.items(): - fnum = int(fnum_str) - for face in frm_data.get("faces", []): - tid = face.get("trace_id") - if tid is None: - continue - trace_frames[tid].append(fnum) - emb = face.get("embedding") - if emb is not None: - trace_embeddings[tid].append(emb) - if tid not in trace_poses: - trace_poses[tid] = ( - face.get("x", 0), - face.get("y", 0), - face.get("width", 0), - face.get("height", 0), - ) - - if len(trace_embeddings) < 2: - return face_data - - # Compute centroid per trace - trace_centroids = {} - for tid, embs in trace_embeddings.items(): - centroid = np.mean(embs, axis=0) - norm = np.linalg.norm(centroid) - trace_centroids[tid] = centroid / norm if norm > 0 else centroid - - # Determine which scene each trace belongs to (majority of frames) - trace_scene = {} - for tid, fns in trace_frames.items(): - scene_votes = defaultdict(int) - for fn in fns: - scene = frame_to_scene.get(fn, -1) - scene_votes[scene] += 1 - trace_scene[tid] = max(scene_votes, key=scene_votes.get) if scene_votes else -1 - - # Within each scene, merge traces with similar centroids - scene_traces = defaultdict(list) - for tid, scene in trace_scene.items(): - if scene >= 0 and tid in trace_centroids: - scene_traces[scene].append(tid) - - merged = 0 - next_new_id = max(trace_frames.keys()) + 1 if trace_frames else 0 - SIMILARITY_THRESHOLD = 0.75 - - for scene, tids in scene_traces.items(): - if len(tids) < 2: - continue - used = set() - for i in range(len(tids)): - if tids[i] in used: - continue - keep_tid = tids[i] - for j in range(i + 1, len(tids)): - if tids[j] in used: - continue - sim = float(np.dot(trace_centroids[tids[i]], trace_centroids[tids[j]])) - if sim >= SIMILARITY_THRESHOLD: - # Merge tids[j] into keep_tid - for fnum_str, frm_data in frames.items(): - for face in frm_data.get("faces", []): - if face.get("trace_id") == tids[j]: - face["trace_id"] = keep_tid - used.add(tids[j]) - merged += 1 - - # If any merges happened, rebuild trace metadata - if merged > 0: - # Rebuild traces dict - new_traces = {} - new_trace_frames = defaultdict(list) - for fnum_str, frm_data in frames.items(): - fnum = int(fnum_str) - for face in frm_data.get("faces", []): - tid = face.get("trace_id") - if tid is not None: - new_trace_frames[tid].append( - { - "frame": fnum, - "face_index": 0, - "bbox": { - "x": face.get("x", 0), - "y": face.get("y", 0), - "width": face.get("width", 0), - "height": face.get("height", 0), - }, - "confidence": face.get("confidence", 0.0), - } - ) - - for tid, path in new_trace_frames.items(): - if len(path) >= 1: - frames_sorted = sorted(set(p["frame"] for p in path)) - new_traces[str(tid)] = { - "trace_id": tid, - "start_frame": frames_sorted[0], - "end_frame": frames_sorted[-1], - "duration_frames": frames_sorted[-1] - frames_sorted[0] + 1, - "duration_seconds": (frames_sorted[-1] - frames_sorted[0]) - / face_data.get("metadata", {}).get("fps", 25.0), - "total_appearances": len(path), - "path": path, - } - - face_data["traces"] = new_traces - face_data["metadata"]["trace_stats"] = { - "total_traces": len(new_traces), - "active_traces": len(new_traces), - "long_traces": len( - [t for t in new_traces.values() if t["duration_frames"] >= 2] - ), - } - print( - f"[TRACE] Post-merge: {merged} traces merged, {len(new_traces)} total traces" - ) - + """Merge traces within the same cut - DISABLED (no embeddings).""" + # TODO: Reimplement with Qdrant _faces collection return face_data @@ -235,57 +103,12 @@ def run_face_tracker( print(f"[TRACE] Processing {len(face_data.get('frames', {}))} frames") - # Load embeddings from DB for the face tracker + # Embeddings no longer loaded from DB - use IoU-only tracking file_uuid = ( face_json_path.split("/")[-1] .replace(".face.json", "") .replace("_traced.json", "") ) - try: - conn = get_conn() - cur = conn.cursor() - cur.execute( - f""" - SELECT frame_number, x, y, width, height, embedding - FROM {SCHEMA}.face_detections - WHERE file_uuid = %s AND embedding IS NOT NULL - """, - (file_uuid,), - ) - emb_rows = cur.fetchall() - conn.close() - # Build lookup: frame_number → list of (bbox, embedding) - emb_map = {} - for fn, x, y, w, h, emb in emb_rows: - emb_map.setdefault(fn, []).append(((x, y, w, h), emb)) - print(f"[TRACE] Loaded {len(emb_rows)} embeddings from DB") - - # Attach embeddings to face data - attached = 0 - for fnum_str, frm_data in face_data.get("frames", {}).items(): - fnum = int(fnum_str) - for face in frm_data.get("faces", []): - x, y, w, h = ( - face.get("x", 0), - face.get("y", 0), - face.get("width", 0), - face.get("height", 0), - ) - candidates = emb_map.get(fnum, []) - # Find matching embedding by bbox proximity - for (ex, ey, ew, eh), emb in candidates: - if ( - abs(x - ex) < 10 - and abs(y - ey) < 10 - and abs(w - ew) < 10 - and abs(h - eh) < 10 - ): - face["embedding"] = emb - attached += 1 - break - print(f"[TRACE] Attached {attached} embeddings to faces") - except Exception as e: - print(f"[TRACE] WARNING: Could not load embeddings: {e}") # Load cut boundaries from cut.json (same directory as face.json) cut_boundaries = None @@ -301,7 +124,7 @@ def run_face_tracker( print(f"[TRACE] Loaded {len(cut_boundaries)} cut boundaries") face_data = track_faces( - face_data, use_embedding=True, cut_boundaries=cut_boundaries + face_data, use_embedding=False, cut_boundaries=cut_boundaries ) # Merge traces within same cut (same person re-appearing after occlusion/pose change) @@ -309,7 +132,7 @@ def run_face_tracker( face_data = merge_traces_within_cuts(face_data, cut_scenes) metadata = face_data.get("metadata", {}) - metadata["tracking_method"] = "iou_embedding" + metadata["tracking_method"] = "iou_only" metadata["tracked_at"] = datetime.now().isoformat() face_data["metadata"] = metadata @@ -350,22 +173,19 @@ def store_traced_faces(file_uuid: str, traced_json_path: str, schema: str = SCHE if face_id is None: face_id = f"face_{trace_id}" attributes = face.get("attributes") - embedding = face.get("embedding") bbox = json.dumps({"x": x, "y": y, "width": w, "height": h}) - embed_vec = embedding if embedding and len(embedding) > 0 else None try: cur.execute( f""" UPDATE {schema}.face_detections - SET trace_id = %s, embedding = %s, face_id = %s + SET trace_id = %s, face_id = %s WHERE file_uuid = %s AND frame_number = %s AND x = %s AND y = %s AND width = %s AND height = %s """, ( trace_id, - embed_vec, face_id, file_uuid, frame_num, diff --git a/scripts/story_embed.py b/scripts/story_embed.py deleted file mode 100644 index c3626ad..0000000 --- a/scripts/story_embed.py +++ /dev/null @@ -1,87 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Story Embedding Pipeline: -1. Read story chunks → LLM summary (Gemma4) -2. Embed summary (EmbeddingGemma) -3. Store in chunks table + Qdrant -""" - -import json, urllib.request, subprocess, sys, time, os - -UUID = "aeed71342a899fe4b4c57b7d41bcb692" -PSQL = ["/Users/accusys/pgsql/18.3/bin/psql", "-U", "accusys", "-d", "momentry", "-t", "-A"] -LLM_URL = "http://localhost:8082/v1/chat/completions" -EMBED_URL = "http://localhost:11436/v1/embeddings" -QDRANT_URL = "http://localhost:6333" -QDRANT_COL = "momentry_dev_stories" - -def psql(sql): - r = subprocess.run(PSQL + ["-c", sql], capture_output=True, text=True, timeout=30) - return r.stdout.strip() - -def call_llm(dialogue): - prompt = f"Dialogue: {dialogue}\n\n50-word summary:" - body = json.dumps({"model": "google_gemma-4-26B-A4B-it-Q5_K_M.gguf", - "messages": [{"role": "user", "content": prompt}], - "temperature": 0.1, "max_tokens": 100}).encode() - req = urllib.request.Request(LLM_URL, data=body, headers={"Content-Type": "application/json"}) - resp = urllib.request.urlopen(req, timeout=120) - return json.loads(resp.read())["choices"][0]["message"]["content"].strip() - -def call_embed(text): - body = json.dumps({"input": text}).encode() - req = urllib.request.Request(EMBED_URL, data=body, headers={"Content-Type": "application/json"}) - resp = urllib.request.urlopen(req, timeout=30) - return json.loads(resp.read())["data"][0]["embedding"] - -# Step 0: Ensure Qdrant collection exists (768 dims) -subprocess.run(["curl", "-s", "-X", "PUT", f"{QDRANT_URL}/collections/{QDRANT_COL}", - "-H", "Content-Type: application/json", - "-d", '{"vectors":{"size":768,"distance":"Cosine"}}'], capture_output=True) - -# Step 1: Get all story chunks that need summaries -lines = [l for l in psql(f"SELECT chunk_id, chunk_index, start_time, end_time, text_content FROM dev.chunks WHERE file_uuid='{UUID}' AND chunk_type='story' AND (summary_text IS NULL OR summary_text = '') ORDER BY chunk_index").split('\n') if l.strip() and '|' in l] - -print(f"Chunks to process: {len(lines)}") -total = len(lines) -errors = 0 - -for i, line in enumerate(lines): - parts = line.split('|', 4) - cid, idx, st, et, dialogue = parts[0].strip(), int(parts[1]), float(parts[2]), float(parts[3]), parts[4] if len(parts) > 4 else "" - - if len(dialogue) < 10: - summary = "[no dialogue]" - embedding = [0.0] * 768 - else: - try: - summary = call_llm(dialogue) - time.sleep(0.3) - embedding = call_embed(summary) - except Exception as e: - print(f"[{i+1}/{total}] Error: {cid} - {e}") - errors += 1 - summary = "[error]" - embedding = [0.0] * 768 - - # Update DB - s_esc = summary.replace("'", "''") - psql(f"UPDATE dev.chunks SET summary_text='{s_esc}', updated_at=CURRENT_TIMESTAMP WHERE chunk_id='{cid}'") - - # Store in Qdrant - point = json.dumps({"points": [{"id": idx + 1, "vector": embedding, - "payload": {"chunk_id": cid, "file_uuid": UUID, "start_time": st, "end_time": et, - "summary": summary, "type": "story_summary"} - }]}).encode() - req = urllib.request.Request(f"{QDRANT_URL}/collections/{QDRANT_COL}/points?wait=true", - data=point, headers={"Content-Type": "application/json"}, method="PUT") - try: - urllib.request.urlopen(req, timeout=10) - except: - pass - - if (i+1) % 20 == 0: - print(f"[{i+1}/{total}] {errors} errors so far") - -print(f"\nDone. Processed: {total}, Errors: {errors}") -print(f"Qdrant: {QDRANT_COL}") diff --git a/scripts/story_embed_v1.11.py b/scripts/story_embed_v1.11.py deleted file mode 120000 index 1711551..0000000 --- a/scripts/story_embed_v1.11.py +++ /dev/null @@ -1 +0,0 @@ -../v1.1/scripts/story_embed_v1.11.py \ No newline at end of file diff --git a/scripts/story_pipeline_full.py b/scripts/story_pipeline_full.py deleted file mode 100644 index 6c6f20d..0000000 --- a/scripts/story_pipeline_full.py +++ /dev/null @@ -1,230 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Story Pipeline Full — Speaker + Story + Summary -Step 1: Update sentence chunks with speaker name -Step 2: Rebuild story chunks + re-embed -Step 3: LLM summary × 228 + embed -""" - -import json, urllib.request, subprocess, sys, time, os - -UUID = "aeed71342a899fe4b4c57b7d41bcb692" -DIR = "/Users/accusys/momentry/output_dev" -PSQL = ["/Users/accusys/pgsql/18.3/bin/psql", "-U", "accusys", "-d", "momentry", "-t", "-A"] -LLM_URL = "http://localhost:8082/v1/chat/completions" -EMBED_URL = "http://localhost:11436/v1/embeddings" -QDRANT_URL = "http://localhost:6333/collections/momentry_dev_stories/points" - -def psql(sql): - r = subprocess.run(PSQL + ["-c", sql], capture_output=True, text=True, timeout=30) - return r.stdout.strip() - -def psql_file(path): - r = subprocess.run(PSQL + ["-f", path], capture_output=True, text=True, timeout=60) - if r.stderr and "ERROR" in r.stderr: - print(f"SQL Error: {r.stderr[:200]}") - return r.returncode - -def embed_text(text): - body = json.dumps({"input": text[:1024]}).encode() - req = urllib.request.Request(EMBED_URL, data=body, headers={"Content-Type": "application/json"}) - return json.loads(urllib.request.urlopen(req, timeout=30).read())["data"][0]["embedding"] - -def llm_summary(dialogue): - body = json.dumps({ - "model": "google_gemma-4-26B-A4B-it-Q5_K_M.gguf", - "messages": [{"role": "user", "content": f"Summarize concisely:\n{dialogue}\n\n50-word summary:"}], - "temperature": 0.1, "max_tokens": 100, - }).encode() - req = urllib.request.Request(LLM_URL, data=body, headers={"Content-Type": "application/json"}) - return json.loads(urllib.request.urlopen(req, timeout=120).read())["choices"][0]["message"]["content"].strip() - -fps = 25.0 -FILE_ID = 242 - -# ═══════════════════════════════════════════════════ -# Step 0: Load ASR + ASRX + speaker map -# ═══════════════════════════════════════════════════ -print("=" * 60) -print("Step 0: Loading data...") -asr = json.load(open(f"{DIR}/{UUID}.asr.json")) -segs = asr["segments"] -asrx = json.load(open(f"{DIR}/{UUID}.asrx.json")) -asrx_segs = asrx["segments"] - -# Speaker map from identity_bindings -r = psql("SELECT ib.identity_value, i.name FROM dev.identity_bindings ib JOIN dev.identities i ON i.id=ib.identity_id WHERE ib.identity_type='speaker'") -speaker_map = {} -for line in r.strip().split('\n'): - if line.strip() and '|' in line: - p = line.split('|') - speaker_map[p[0].strip()] = p[1].strip() -speaker_map["SPEAKER_0"] = "Speaker_0" # Fallback for unbounded - -# ═══════════════════════════════════════════════════ -# Step 1: Update sentence chunks with speaker -# ═══════════════════════════════════════════════════ -print("\n" + "=" * 60) -print("Step 1: Updating sentence chunks with speaker...") - -sql = ["BEGIN;"] -chunk_meta = {} # idx → {speaker_id, speaker_name} - -for idx, seg in enumerate(segs): - st, et = seg["start"], seg["end"] - text = seg["text"].strip() - if not text: - continue - - # Find overlapping ASRX segment → speaker_id - spk_id = "SPEAKER_0" - for ax in asrx_segs: - if ax.get("start_time", 0) <= st and ax.get("end_time", 0) >= et: - spk_id = ax.get("speaker_id", "SPEAKER_0") - break - - spk_name = speaker_map.get(spk_id, spk_id) - new_text = f"[{spk_name}] {text}" - meta = json.dumps({"speaker_id": spk_id, "speaker_name": spk_name}) - esc = new_text.replace("'", "''") - - sql.append(f"UPDATE dev.chunks SET text_content='{esc}', metadata='{meta}'::jsonb WHERE file_uuid='{UUID}' AND chunk_id='{UUID}_{idx}';") - chunk_meta[idx] = {"speaker_id": spk_id, "speaker_name": spk_name} - -sql.append("COMMIT;") -with open("/tmp/s1_speaker.sql", "w") as f: - f.write("\n".join(sql)) - -psql_file("/tmp/s1_speaker.sql") -print(f" Updated {len(chunk_meta)} sentence chunks with speaker") - -# ═══════════════════════════════════════════════════ -# Step 2: Rebuild story chunks + re-embed -# ═══════════════════════════════════════════════════ -print("\n" + "=" * 60) -print("Step 2: Rebuilding story chunks...") - -# Delete old story chunks -psql(f"DELETE FROM dev.chunks WHERE file_uuid='{UUID}' AND chunk_type='story';") - -# Recreate -CHUNK_SIZE = 15 -sql2 = ["BEGIN;"] -story_meta = [] - -for i in range(0, len(segs), CHUNK_SIZE): - group = segs[i:i+CHUNK_SIZE] - st, et = group[0]["start"], group[-1]["end"] - idx = i // CHUNK_SIZE - chunk_id = f"{UUID}_story_{idx}" - - # Build speaker text from individual sentences - texts = [] - speakers_used = {} - for j, seg in enumerate(group): - seg_idx = i + j - if seg_idx in chunk_meta: - cm = chunk_meta[seg_idx] - text = seg["text"].strip() - if text: - texts.append(f"[{cm['speaker_name']}] {text}") - speakers_used[cm['speaker_name']] = speakers_used.get(cm['speaker_name'], 0) + 1 - - dialogue = " ".join(texts) - child_ids = ", ".join([f"'{UUID}_{j}'" for j in range(i, min(i+CHUNK_SIZE, len(segs)))]) - words = sum(len(t.split()) for t in texts) - - meta = json.dumps({"method": "fixed_15", "seg_count": len(group), "words": words, "speakers": speakers_used}) - esc = dialogue.replace("'", "''") - - sql2.append(f"""INSERT INTO dev.chunks (file_id,file_uuid,chunk_id,old_chunk_id,chunk_index,chunk_type,start_time,end_time,fps,start_frame,end_frame,text_content,content,metadata,frame_count,child_chunk_ids) - VALUES ({FILE_ID},'{UUID}','{chunk_id}','{chunk_id}',{idx},'story',{st},{et},{fps},{int(st*fps)},{int(et*fps)},'{esc}','{{"type":"story_parent"}}'::jsonb,'{meta}'::jsonb,{int((et-st)*fps)},ARRAY[{child_ids}]);""") - - story_meta.append({"idx": idx, "st": st, "et": et, "dialogue": dialogue, "words": words, "speakers": speakers_used}) - -sql2.append("COMMIT;") -with open("/tmp/s2_story.sql", "w") as f: - f.write("\n".join(sql2)) -psql_file("/tmp/s2_story.sql") -print(f" Created {len(story_meta)} story chunks") - -# Embed + upsert to Qdrant -print("\n Embedding story chunks...") -points_dialogue = [] -for sm in story_meta: - if len(sm["dialogue"]) < 10: - continue - vec = embed_text(sm["dialogue"]) - points_dialogue.append({"id": sm["idx"] + 1, "vector": vec, "payload": { - "chunk_id": f"{UUID}_story_{sm['idx']}", "file_uuid": UUID, - "start_time": sm["st"], "end_time": sm["et"], "type": "story_dialogue" - }}) - -for i in range(0, len(points_dialogue), 100): - batch = points_dialogue[i:i+100] - data = json.dumps({"points": batch, "wait": True}).encode() - req = urllib.request.Request(f"{QDRANT_URL}?wait=true", data=data, headers={"Content-Type": "application/json"}, method="PUT") - urllib.request.urlopen(req, timeout=30) -print(f" Qdrant: {len(points_dialogue)} dialogue vectors") - -# ═══════════════════════════════════════════════════ -# Step 3: LLM summaries + embed -# ═══════════════════════════════════════════════════ -print("\n" + "=" * 60) -print("Step 3: LLM summaries...") - -points_summary = [] -summary_sql = ["BEGIN;"] - -for i, sm in enumerate(story_meta): - if len(sm["dialogue"]) < 10: - continue - - try: - summary = llm_summary(sm["dialogue"]) - time.sleep(0.3) - vec = embed_text(summary) - time.sleep(0.1) - except Exception as e: - print(f" Error on story {sm['idx']}: {e}") - summary = "[error]" - vec = [0.0] * 768 - - s_esc = summary.replace("'", "''") - summary_sql.append(f"UPDATE dev.chunks SET summary_text='{s_esc}', updated_at=CURRENT_TIMESTAMP WHERE file_uuid='{UUID}' AND chunk_id='{UUID}_story_{sm['idx']}';") - - points_summary.append({"id": 100000 + sm["idx"] + 1, "vector": vec, "payload": { - "chunk_id": f"{UUID}_story_{sm['idx']}", "file_uuid": UUID, - "start_time": sm["st"], "end_time": sm["et"], - "summary": summary, "type": "story_summary" - }}) - - if (i + 1) % 50 == 0: - print(f" {i+1}/{len(story_meta)}") - -# Update DB with summaries -summary_sql.append("COMMIT;") -with open("/tmp/s3_summary.sql", "w") as f: - f.write("\n".join(summary_sql)) -psql_file("/tmp/s3_summary.sql") - -# Upsert summary vectors to Qdrant -for i in range(0, len(points_summary), 100): - batch = points_summary[i:i+100] - data = json.dumps({"points": batch, "wait": True}).encode() - req = urllib.request.Request(f"{QDRANT_URL}?wait=true", data=data, headers={"Content-Type": "application/json"}, method="PUT") - urllib.request.urlopen(req, timeout=30) - -print(f" Qdrant: {len(points_summary)} summary vectors") - -# ═══════════════════════════════════════════════════ -# Step 4: Verify -# ═══════════════════════════════════════════════════ -print("\n" + "=" * 60) -print("Done.") -r1 = psql(f"SELECT count(*) FROM dev.chunks WHERE file_uuid='{UUID}' AND chunk_type='sentence' AND text_content LIKE '[%'") -r2 = psql(f"SELECT count(*) FROM dev.chunks WHERE file_uuid='{UUID}' AND chunk_type='story'") -r3 = psql(f"SELECT count(*) FROM dev.chunks WHERE file_uuid='{UUID}' AND chunk_type='story' AND summary_text IS NOT NULL") -print(f"Sentence chunks with speaker: {r1}") -print(f"Story chunks: {r2}") -print(f"Story chunks with summary: {r3}") diff --git a/scripts/story_pipeline_full_v1.11.py b/scripts/story_pipeline_full_v1.11.py deleted file mode 120000 index 96bc9b3..0000000 --- a/scripts/story_pipeline_full_v1.11.py +++ /dev/null @@ -1 +0,0 @@ -../v1.1/scripts/story_pipeline_full_v1.11.py \ No newline at end of file diff --git a/scripts/story_processor.py b/scripts/story_processor.py deleted file mode 100755 index 7fe418f..0000000 --- a/scripts/story_processor.py +++ /dev/null @@ -1,325 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Story Processor - Generate parent-child chunk hierarchy for RAG -Uses LOCAL video analysis (ASR, YOLO, OCR, Scene) to create parent chunks. -NO cloud API calls - fully offline processing -""" - -import sys -import json -import os -import argparse -from typing import Dict, List, Any - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from redis_publisher import RedisPublisher - - -def extract_video_metadata(video_path: str) -> Dict[str, Any]: - """Extract basic video metadata using ffprobe""" - import subprocess - - try: - cmd = [ - "ffprobe", - "-v", - "quiet", - "-print_format", - "json", - "-show_format", - "-show_streams", - video_path, - ] - result = subprocess.run(cmd, capture_output=True, text=True) - if result.returncode == 0: - return json.loads(result.stdout) - except Exception: - pass - return {} - - -def generate_parent_child_chunks( - asr_data: Dict, - cut_data: Dict, - yolo_data: Dict, - ocr_data: Dict, - scene_data: Dict, - parent_chunk_size: int = 5, -) -> Dict: - """ - Generate parent-child chunk hierarchy using LOCAL data only. - No LLM/API calls - uses template-based narrative generation. - """ - child_chunks = [] - parent_chunks = [] - - # Create child chunks from ASR - for seg in asr_data.get("segments", []): - child_chunks.append( - { - "chunk_id": f"asr_{seg.get('start', 0):.1f}_{seg.get('end', 0):.1f}", - "chunk_type": "asr", - "source": "asr", - "start_time": seg.get("start", 0), - "end_time": seg.get("end", 0), - "text_content": seg.get("text", ""), - "content": { - "text": seg.get("text", ""), - "confidence": seg.get("confidence", 0), - }, - "child_chunk_ids": [], - "parent_chunk_id": None, - } - ) - - # Create child chunks from CUT scenes - for scene in cut_data.get("scenes", []): - child_chunks.append( - { - "chunk_id": f"cut_{scene.get('scene_number', 0)}", - "chunk_type": "cut", - "source": "cut", - "start_time": scene.get("start_time", 0), - "end_time": scene.get("end_time", 0), - "text_content": f"Scene {scene.get('scene_number', 0)}", - "content": { - "scene_number": scene.get("scene_number", 0), - "duration": scene.get("duration", 0), - }, - "child_chunk_ids": [], - "parent_chunk_id": None, - } - ) - - asr_child_ids = [c["chunk_id"] for c in child_chunks if c["source"] == "asr"] - cut_child_ids = [c["chunk_id"] for c in child_chunks if c["source"] == "cut"] - - yolo_frames = yolo_data.get("frames", []) - ocr_frames = ocr_data.get("frames", []) - - # Group ASR segments into parent chunks - for i in range(0, len(asr_child_ids), parent_chunk_size): - batch = asr_child_ids[i : i + parent_chunk_size] - if not batch: - continue - - batch_texts = [] - batch_objects = [] - batch_times = [] - - for child_id in batch: - for child in child_chunks: - if child["chunk_id"] == child_id: - if child["text_content"]: - batch_texts.append(child["text_content"]) - batch_times.append((child["start_time"], child["end_time"])) - break - - start_time = batch_times[0][0] if batch_times else 0 - end_time = batch_times[-1][1] if batch_times else 0 - - # Find objects in this time range - for frame in yolo_frames[:50]: - ts = frame.get("timestamp", 0) - if start_time <= ts <= end_time: - for obj in frame.get("objects", []): - batch_objects.append(obj.get("class_name", "unknown")) - - narrative = generate_narrative(batch_texts, batch_objects, start_time, end_time) - - parent_chunk = { - "chunk_id": f"story_asr_{i // parent_chunk_size:04d}", - "chunk_type": "story", - "source": "story_asr", - "start_time": start_time, - "end_time": end_time, - "text_content": narrative, - "content": { - "description": narrative, - "child_count": len(batch), - "speech_preview": " ".join(batch_texts[:3]) if batch_texts else None, - "detected_objects": list(set(batch_objects))[:5], - }, - "child_chunk_ids": batch, - "parent_chunk_id": None, - } - parent_chunks.append(parent_chunk) - - for child_id in batch: - for child in child_chunks: - if child["chunk_id"] == child_id: - child["parent_chunk_id"] = parent_chunk["chunk_id"] - break - - # Group CUT scenes into parent chunks - for i in range(0, len(cut_child_ids), parent_chunk_size): - batch = cut_child_ids[i : i + parent_chunk_size] - if not batch: - continue - - batch_times = [] - batch_objects = [] - - for child_id in batch: - for child in child_chunks: - if child["chunk_id"] == child_id: - batch_times.append((child["start_time"], child["end_time"])) - break - - start_time = batch_times[0][0] if batch_times else 0 - end_time = batch_times[-1][1] if batch_times else 0 - - for frame in yolo_frames[:50]: - ts = frame.get("timestamp", 0) - if start_time <= ts <= end_time: - for obj in frame.get("objects", []): - batch_objects.append(obj.get("class_name", "unknown")) - - narrative = generate_scene_narrative( - batch_objects, start_time, end_time, len(batch) - ) - - parent_chunk = { - "chunk_id": f"story_cut_{i // parent_chunk_size:04d}", - "chunk_type": "story", - "source": "story_cut", - "start_time": start_time, - "end_time": end_time, - "text_content": narrative, - "content": { - "description": narrative, - "child_count": len(batch), - "scenes": batch, - "detected_objects": list(set(batch_objects))[:5], - }, - "child_chunk_ids": batch, - "parent_chunk_id": None, - } - parent_chunks.append(parent_chunk) - - for child_id in batch: - for child in child_chunks: - if child["chunk_id"] == child_id: - child["parent_chunk_id"] = parent_chunk["chunk_id"] - break - - return { - "child_chunks": child_chunks, - "parent_chunks": parent_chunks, - "stats": { - "total_child_chunks": len(child_chunks), - "total_parent_chunks": len(parent_chunks), - "asr_children": len(asr_child_ids), - "cut_children": len(cut_child_ids), - }, - } - - -def generate_narrative( - texts: List[str], objects: List[str], start: float, end: float -) -> str: - """Generate narrative description from LOCAL text snippets and objects""" - if not texts and not objects: - return f"Video segment from {start:.1f}s to {end:.1f}s" - - parts = [] - if texts: - combined = " ".join(texts[:5]) - if len(combined) > 150: - combined = combined[:150] + "..." - parts.append(f"Speech: {combined}") - - if objects: - unique_objs = list(set(objects))[:5] - parts.append(f"Visuals: {', '.join(unique_objs)}") - - return f"[{start:.0f}s-{end:.0f}s] {' | '.join(parts)}" - - -def generate_scene_narrative( - objects: List[str], start: float, end: float, scene_count: int -) -> str: - """Generate scene narrative from LOCAL detected objects""" - unique_objects = list(set(objects))[:5] - - if unique_objects: - obj_str = ", ".join(unique_objects) - return f"[{start:.0f}s-{end:.0f}s] {scene_count} scenes. Visuals: {obj_str}." - else: - return f"[{start:.0f}s-{end:.0f}s] {scene_count} video scenes." - - -def run_story( - video_path: str, output_path: str, uuid: str = "", parent_chunk_size: int = 5 -): - publisher = RedisPublisher(uuid) if uuid else None - if publisher: - publisher.info("story", "STORY_START") - - base_path = os.path.dirname(output_path) - uuid_name = os.path.basename(output_path).split(".")[0] - - asr_data = {"segments": []} - cut_data = {"scenes": []} - yolo_data = {"frames": []} - ocr_data = {"frames": []} - scene_data = {"scenes": []} - - for name, data_var in [ - ("asr", asr_data), - ("cut", cut_data), - ("yolo", yolo_data), - ("ocr", ocr_data), - ("scene", scene_data), - ]: - path = os.path.join(base_path, f"{uuid_name}.{name}.json") - if os.path.exists(path): - with open(path) as f: - data_var.update(json.load(f)) - - result = generate_parent_child_chunks( - asr_data, cut_data, yolo_data, ocr_data, scene_data, parent_chunk_size - ) - - result["video_metadata"] = extract_video_metadata(video_path) - result["processing"] = { - "method": "local_aggregation", - "cloud_api_used": False, - "parent_chunk_size": parent_chunk_size, - } - - with open(output_path, "w") as f: - json.dump(result, f, indent=2, ensure_ascii=False) - - if publisher: - publisher.complete( - "story", - f"{result['stats']['total_parent_chunks']} parent, {result['stats']['total_child_chunks']} child chunks (LOCAL)", - ) - - return result - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Story Processor - Parent-Child Chunk Hierarchy (LOCAL ONLY)" - ) - parser.add_argument("video_path", help="Path to video file") - parser.add_argument("output_path", help="Output JSON path") - parser.add_argument("--uuid", help="UUID for progress tracking", default="") - parser.add_argument( - "--parent-chunk-size", - type=int, - default=5, - help="Number of child chunks per parent", - ) - - args = parser.parse_args() - - result = run_story( - args.video_path, args.output_path, args.uuid, args.parent_chunk_size - ) - print( - f"Story generated: {result['stats']['total_parent_chunks']} parent, " - f"{result['stats']['total_child_chunks']} child chunks (LOCAL)" - ) diff --git a/scripts/story_processor_contract_v1.py b/scripts/story_processor_contract_v1.py deleted file mode 100644 index 730daaf..0000000 --- a/scripts/story_processor_contract_v1.py +++ /dev/null @@ -1,848 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Story Processor - AI-Driven Processor Contract Version 1.0 - -Compliant with AI-Driven Processor Contract v1.0 -Effective Date: 2025-03-27 - -Features: -1. Standardized command-line interface -2. Redis progress reporting -3. Signal handling (SIGTERM, SIGINT) -4. Health check mode -5. Resource monitoring -6. Contract-compliant JSON output -7. Unified configuration -""" - -import sys -import json -import os -import argparse -import signal -import time -import traceback -from datetime import datetime -from typing import Dict, Any, List - -# Redis Publisher for progress reporting -try: - sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - from redis_publisher import RedisPublisher - - REDIS_AVAILABLE = True -except ImportError: - REDIS_AVAILABLE = False - print( - "WARNING: RedisPublisher not available, progress reporting disabled", - file=sys.stderr, - ) - -# Contract version -CONTRACT_VERSION = "1.0" -PROCESSOR_NAME = ( - "/Users/accusys/momentry_core_0.1/scripts/story_processor_contract_v1.py" -) -PROCESSOR_VERSION = "1.0.0" -MODEL_NAME = "gpt-4" -MODEL_VERSION = "latest" - -# Unified configuration defaults -DEFAULT_TIMEOUT = 3600 # 1 hour for story generation -DEFAULT_PARENT_CHUNK_SIZE = 5 -DEFAULT_MIN_CHILD_CHUNKS = 3 -DEFAULT_MAX_CHILD_CHUNKS = 10 -DEFAULT_SUMMARY_LENGTH = 150 -DEFAULT_MODEL = "openai" # openai, local, or template -DEFAULT_MODEL_NAME = "gpt-4" -DEFAULT_TEMPERATURE = 0.7 -DEFAULT_MAX_TOKENS = 500 - - -# Signal handling with timeout support -class SignalHandler: - """Handle system signals for graceful shutdown""" - - def __init__(self): - self.should_exit = False - self.exit_code = 0 - signal.signal(signal.SIGTERM, self.handle_signal) - signal.signal(signal.SIGINT, self.handle_signal) - - def handle_signal(self, signum, frame): - """Handle termination signals""" - print(f"\n收到信号 {signum},正在优雅关闭...") - self.should_exit = True - self.exit_code = 128 + signum - - def should_stop(self): - """Check if should stop processing""" - return self.should_exit - - -# Timeout manager -class TimeoutManager: - """Manage processing timeouts""" - - def __init__(self, timeout_seconds: int): - self.timeout_seconds = timeout_seconds - self.start_time = time.time() - self.timer = None - - def check_timeout(self) -> bool: - """Check if timeout has been reached""" - elapsed = time.time() - self.start_time - return elapsed > self.timeout_seconds - - def get_remaining_time(self) -> float: - """Get remaining time in seconds""" - elapsed = time.time() - self.start_time - return max(0, self.timeout_seconds - elapsed) - - def format_remaining_time(self) -> str: - """Format remaining time as HH:MM:SS""" - remaining = self.get_remaining_time() - hours = int(remaining // 3600) - minutes = int((remaining % 3600) // 60) - seconds = int(remaining % 60) - return f"{hours:02d}:{minutes:02d}:{seconds:02d}" - - -# Health check functions -def check_environment() -> Dict[str, Any]: - """Check environment and dependencies""" - checks = [] - - # Check 1: OpenAI API (optional) - try: - import openai - - checks.append( - { - "name": "openai", - "status": "available", - "version": openai.__version__, - } - ) - except ImportError: - checks.append({"name": "openai", "status": "optional", "version": None}) - - # Check 2: Redis (optional) - checks.append( - { - "name": "redis", - "status": "available" if REDIS_AVAILABLE else "optional", - "version": None, - } - ) - - # Check 3: Python version - checks.append( - { - "name": "python", - "status": "available", - "version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}", - } - ) - - return { - "timestamp": datetime.now().isoformat(), - "processor_name": PROCESSOR_NAME, - "processor_version": PROCESSOR_VERSION, - "contract_version": CONTRACT_VERSION, - "model_name": MODEL_NAME, - "model_version": MODEL_VERSION, - "checks": checks, - } - - -def check_input_files(input_files: Dict[str, str]) -> Dict[str, Any]: - """Check input files exist and are valid JSON""" - results = {} - - for file_type, file_path in input_files.items(): - if not file_path: - results[file_type] = { - "exists": False, - "valid": False, - "error": "No path provided", - } - continue - - if not os.path.exists(file_path): - results[file_type] = { - "exists": False, - "valid": False, - "error": "File not found", - } - continue - - try: - with open(file_path, "r") as f: - data = json.load(f) - - # Basic validation based on file type - if file_type == "asr": - valid = isinstance(data, dict) and "segments" in data - elif file_type == "cut": - valid = isinstance(data, dict) and "scenes" in data - elif file_type == "yolo": - valid = isinstance(data, dict) and "detections" in data - elif file_type == "ocr": - valid = isinstance(data, dict) and "texts" in data - else: - valid = isinstance(data, dict) - - results[file_type] = { - "exists": True, - "valid": valid, - "size": os.path.getsize(file_path), - "data_keys": list(data.keys()) if isinstance(data, dict) else [], - } - - except json.JSONDecodeError as e: - results[file_type] = { - "exists": True, - "valid": False, - "error": f"Invalid JSON: {e}", - } - except Exception as e: - results[file_type] = {"exists": True, "valid": False, "error": str(e)} - - return results - - -def load_input_data(input_files: Dict[str, str]) -> Dict[str, Any]: - """Load input data from JSON files""" - data = {} - - for file_type, file_path in input_files.items(): - if not file_path or not os.path.exists(file_path): - data[file_type] = None - continue - - try: - with open(file_path, "r") as f: - data[file_type] = json.load(f) - except: - data[file_type] = None - - return data - - -def generate_parent_child_chunks( - asr_data: Dict, - cut_data: Dict, - yolo_data: Dict, - ocr_data: Dict, - parent_chunk_size: int = DEFAULT_PARENT_CHUNK_SIZE, - min_child_chunks: int = DEFAULT_MIN_CHILD_CHUNKS, - max_child_chunks: int = DEFAULT_MAX_CHILD_CHUNKS, - summary_length: int = DEFAULT_SUMMARY_LENGTH, - model: str = DEFAULT_MODEL, - **kwargs, -) -> List[Dict[str, Any]]: - """Generate parent-child chunk hierarchy for RAG""" - - parent_chunks = [] - - # Extract ASR segments - asr_segments = asr_data.get("segments", []) if asr_data else [] - - # Extract scenes from CUT data - scenes = cut_data.get("scenes", []) if cut_data else [] - - # Extract detections from YOLO data - yolo_detections = yolo_data.get("detections", []) if yolo_data else [] - - # Extract OCR texts - ocr_texts = ocr_data.get("texts", []) if ocr_data else [] - - # If we have scenes, use them to group content - if scenes: - for scene in scenes: - scene_start = scene.get("start_time", 0) - scene_end = scene.get("end_time", 0) - scene_duration = scene.get("duration", 0) - - # Find ASR segments in this scene - scene_asr_segments = [] - for segment in asr_segments: - seg_start = segment.get("start", 0) - if scene_start <= seg_start <= scene_end: - scene_asr_segments.append(segment) - - # Find YOLO detections in this scene - scene_yolo_detections = [] - for detection in yolo_detections: - det_time = detection.get("timestamp", 0) - if scene_start <= det_time <= scene_end: - scene_yolo_detections.append(detection) - - # Find OCR texts in this scene - scene_ocr_texts = [] - for text in ocr_texts: - text_time = text.get("timestamp", 0) - if scene_start <= text_time <= scene_end: - scene_ocr_texts.append(text) - - # Create child chunks - child_chunks = [] - - # Add ASR segments as child chunks - for segment in scene_asr_segments[:max_child_chunks]: - child_chunks.append( - { - "type": "asr", - "content": segment.get("text", ""), - "start_time": segment.get("start", 0), - "end_time": segment.get("end", 0), - "confidence": segment.get("confidence", 0), - "metadata": {"speaker": segment.get("speaker")}, - } - ) - - # Add YOLO detections as child chunks - for detection in scene_yolo_detections[:max_child_chunks]: - child_chunks.append( - { - "type": "yolo", - "content": f"Detected {detection.get('class', 'object')} with confidence {detection.get('confidence', 0):.2f}", - "timestamp": detection.get("timestamp", 0), - "confidence": detection.get("confidence", 0), - "metadata": { - "class": detection.get("class"), - "bbox": detection.get("bbox"), - }, - } - ) - - # Add OCR texts as child chunks - for text in scene_ocr_texts[:max_child_chunks]: - child_chunks.append( - { - "type": "ocr", - "content": text.get("text", ""), - "timestamp": text.get("timestamp", 0), - "confidence": text.get("confidence", 0), - "metadata": { - "bbox": text.get("bbox"), - "language": text.get("language"), - }, - } - ) - - # Skip if not enough child chunks - if len(child_chunks) < min_child_chunks: - continue - - # Generate parent summary - if model == "openai": - parent_summary = generate_openai_summary(child_chunks, scene, **kwargs) - elif model == "local": - parent_summary = generate_local_summary(child_chunks, scene, **kwargs) - else: - parent_summary = generate_template_summary(child_chunks, scene) - - # Create parent chunk - parent_chunks.append( - { - "parent_id": len(parent_chunks) + 1, - "scene_id": scene.get("scene_id", 0), - "start_time": scene_start, - "end_time": scene_end, - "duration": scene_duration, - "summary": parent_summary[:summary_length] - if summary_length > 0 - else parent_summary, - "child_count": len(child_chunks), - "child_types": list(set(chunk["type"] for chunk in child_chunks)), - "child_chunks": child_chunks[ - :parent_chunk_size - ], # Limit child chunks in output - } - ) - - # If no scenes, create chunks based on time windows - elif asr_segments: - # Group ASR segments by time windows - time_window = 30 # seconds - current_window = 0 - - while current_window * time_window < ( - asr_segments[-1].get("end", 0) if asr_segments else 0 - ): - window_start = current_window * time_window - window_end = (current_window + 1) * time_window - - # Find segments in this window - window_segments = [] - for segment in asr_segments: - seg_start = segment.get("start", 0) - if window_start <= seg_start < window_end: - window_segments.append(segment) - - if len(window_segments) >= min_child_chunks: - # Create child chunks - child_chunks = [] - for segment in window_segments[:max_child_chunks]: - child_chunks.append( - { - "type": "asr", - "content": segment.get("text", ""), - "start_time": segment.get("start", 0), - "end_time": segment.get("end", 0), - "confidence": segment.get("confidence", 0), - "metadata": {"speaker": segment.get("speaker")}, - } - ) - - # Generate parent summary - parent_summary = generate_template_summary( - child_chunks, - { - "start_time": window_start, - "end_time": window_end, - "duration": time_window, - }, - ) - - # Create parent chunk - parent_chunks.append( - { - "parent_id": len(parent_chunks) + 1, - "time_window": current_window, - "start_time": window_start, - "end_time": window_end, - "duration": time_window, - "summary": parent_summary[:summary_length] - if summary_length > 0 - else parent_summary, - "child_count": len(child_chunks), - "child_types": ["asr"], - "child_chunks": child_chunks[:parent_chunk_size], - } - ) - - current_window += 1 - - return parent_chunks - - -def generate_openai_summary(child_chunks: List[Dict], scene: Dict, **kwargs) -> str: - """Generate summary using OpenAI""" - try: - import openai - - # Prepare context from child chunks - context_parts = [] - for chunk in child_chunks[:10]: # Limit context size - if chunk["type"] == "asr": - context_parts.append(f"Speech: {chunk['content']}") - elif chunk["type"] == "yolo": - context_parts.append(f"Visual: {chunk['content']}") - elif chunk["type"] == "ocr": - context_parts.append(f"Text: {chunk['content']}") - - context = "\n".join(context_parts) - - # Prepare prompt - prompt = f"""Summarize this video scene ({scene.get("duration", 0):.1f} seconds) based on the following elements: - -{context} - -Provide a concise narrative summary that connects the speech, visual elements, and text into a coherent description.""" - - # Call OpenAI API - response = openai.chat.completions.create( - model=kwargs.get("model_name", DEFAULT_MODEL_NAME), - messages=[ - { - "role": "system", - "content": "You are a video analysis assistant that creates coherent narrative summaries from multiple data sources.", - }, - {"role": "user", "content": prompt}, - ], - max_tokens=kwargs.get("max_tokens", DEFAULT_MAX_TOKENS), - temperature=kwargs.get("temperature", DEFAULT_TEMPERATURE), - ) - - return response.choices[0].message.content - - except ImportError: - return "OpenAI not available for summary generation" - except Exception as e: - return f"Summary generation error: {str(e)}" - - -def generate_local_summary(child_chunks: List[Dict], scene: Dict, **kwargs) -> str: - """Generate summary using local model (placeholder)""" - # This is a placeholder for local model implementation - asr_count = sum(1 for chunk in child_chunks if chunk["type"] == "asr") - yolo_count = sum(1 for chunk in child_chunks if chunk["type"] == "yolo") - ocr_count = sum(1 for chunk in child_chunks if chunk["type"] == "ocr") - - return f"Scene ({scene.get('duration', 0):.1f}s) with {asr_count} speech segments, {yolo_count} visual detections, and {ocr_count} text elements. Local summary model not implemented." - - -def generate_template_summary(child_chunks: List[Dict], scene: Dict) -> str: - """Generate summary using template""" - asr_count = sum(1 for chunk in child_chunks if chunk["type"] == "asr") - yolo_count = sum(1 for chunk in child_chunks if chunk["type"] == "yolo") - ocr_count = sum(1 for chunk in child_chunks if chunk["type"] == "ocr") - - # Extract some sample content - asr_samples = [ - chunk["content"][:50] for chunk in child_chunks if chunk["type"] == "asr" - ][:2] - yolo_classes = list( - set( - chunk["metadata"].get("class", "object") - for chunk in child_chunks - if chunk["type"] == "yolo" - ) - ) - - summary_parts = [f"Scene duration: {scene.get('duration', 0):.1f} seconds."] - - if asr_count > 0: - summary_parts.append(f"Contains {asr_count} speech segments.") - if asr_samples: - summary_parts.append(f"Sample speech: {'; '.join(asr_samples)}...") - - if yolo_count > 0: - summary_parts.append( - f"Detected {yolo_count} objects including: {', '.join(yolo_classes[:3])}." - ) - - if ocr_count > 0: - summary_parts.append(f"Extracted {ocr_count} text elements from the video.") - - return " ".join(summary_parts) - - -# Main processing function -def process_story( - asr_path: str, - cut_path: str, - yolo_path: str, - ocr_path: str, - output_path: str, - uuid: str = "", - parent_chunk_size: int = DEFAULT_PARENT_CHUNK_SIZE, - min_child_chunks: int = DEFAULT_MIN_CHILD_CHUNKS, - max_child_chunks: int = DEFAULT_MAX_CHILD_CHUNKS, - summary_length: int = DEFAULT_SUMMARY_LENGTH, - model: str = DEFAULT_MODEL, - model_name: str = DEFAULT_MODEL_NAME, - temperature: float = DEFAULT_TEMPERATURE, - max_tokens: int = DEFAULT_MAX_TOKENS, - timeout: int = DEFAULT_TIMEOUT, -) -> Dict[str, Any]: - """Process video analysis data to create parent-child chunk hierarchy""" - - # Initialize - signal_handler = SignalHandler() - timeout_manager = TimeoutManager(timeout) - publisher = None - if REDIS_AVAILABLE and uuid: - try: - publisher = RedisPublisher(uuid) - except: - publisher = None - - def publish(stage: str, message: str, data: Dict = None): - if publisher: - publisher.info(PROCESSOR_NAME, stage, message, data) - - if publisher: - publish("STORY_START", "开始生成故事层次结构") - - result = { - "processor_name": PROCESSOR_NAME, - "processor_version": PROCESSOR_VERSION, - "contract_version": CONTRACT_VERSION, - "model_name": MODEL_NAME, - "model_version": MODEL_VERSION, - "input_files": { - "asr": asr_path, - "cut": cut_path, - "yolo": yolo_path, - "ocr": ocr_path, - }, - "output_path": output_path, - "uuid": uuid, - "timestamp": datetime.now().isoformat(), - "parameters": { - "parent_chunk_size": parent_chunk_size, - "min_child_chunks": min_child_chunks, - "max_child_chunks": max_child_chunks, - "summary_length": summary_length, - "model": model, - "model_name": model_name, - "temperature": temperature, - "max_tokens": max_tokens, - "timeout": timeout, - }, - "success": False, - "error": None, - "parent_chunks": [], - "chunk_statistics": {}, - "processing_time": 0, - "resource_usage": {}, - } - - start_time = time.time() - - try: - # Check timeout - if timeout_manager.check_timeout(): - raise TimeoutError(f"超时 ({timeout} 秒)") - - # Check if should exit - if signal_handler.should_stop(): - raise KeyboardInterrupt("收到停止信号") - - # Check input files - if publisher: - publish("STORY_CHECK_FILES", "检查输入文件") - - input_files = { - "asr": asr_path, - "cut": cut_path, - "yolo": yolo_path, - "ocr": ocr_path, - } - - file_checks = check_input_files(input_files) - result["file_checks"] = file_checks - - # Check if we have at least ASR data - if not file_checks.get("asr", {}).get("valid", False): - raise ValueError("缺少有效的 ASR 数据文件") - - if publisher: - publish("STORY_FILES_VALID", "输入文件检查通过") - - # Load input data - if publisher: - publish("STORY_LOAD_DATA", "加载输入数据") - - input_data = load_input_data(input_files) - - if publisher: - publish("STORY_DATA_LOADED", "数据加载完成") - - # Generate parent-child chunks - if publisher: - publish("STORY_GENERATE_CHUNKS", "生成父-子块层次结构") - - parent_chunks = generate_parent_child_chunks( - asr_data=input_data.get("asr"), - cut_data=input_data.get("cut"), - yolo_data=input_data.get("yolo"), - ocr_data=input_data.get("ocr"), - parent_chunk_size=parent_chunk_size, - min_child_chunks=min_child_chunks, - max_child_chunks=max_child_chunks, - summary_length=summary_length, - model=model, - model_name=model_name, - temperature=temperature, - max_tokens=max_tokens, - ) - - result["parent_chunks"] = parent_chunks - result["parent_chunk_count"] = len(parent_chunks) - - # Calculate statistics - total_child_chunks = sum(chunk.get("child_count", 0) for chunk in parent_chunks) - child_types = {} - for chunk in parent_chunks: - for child_type in chunk.get("child_types", []): - child_types[child_type] = child_types.get(child_type, 0) + 1 - - result["chunk_statistics"] = { - "total_parent_chunks": len(parent_chunks), - "total_child_chunks": total_child_chunks, - "avg_children_per_parent": total_child_chunks / len(parent_chunks) - if parent_chunks - else 0, - "child_type_distribution": child_types, - } - - result["success"] = True - - if publisher: - publish("STORY_COMPLETE", f"完成: {len(parent_chunks)} 个父块") - - except TimeoutError as e: - result["error"] = f"处理超时: {e}" - if publisher: - publish("STORY_TIMEOUT", f"超时: {e}") - except KeyboardInterrupt: - result["error"] = "处理被用户中断" - if publisher: - publish("STORY_INTERRUPTED", "处理被中断") - except ImportError as e: - result["error"] = f"依赖缺失: {e}" - if publisher: - publish("STORY_MISSING_DEPS", f"缺少依赖: {e}") - except Exception as e: - result["error"] = f"处理错误: {str(e)}" - if publisher: - publish("STORY_ERROR", f"错误: {str(e)}") - traceback.print_exc() - - # Calculate processing time - processing_time = time.time() - start_time - result["processing_time"] = processing_time - - # Add resource usage - try: - import psutil - - process = psutil.Process() - memory_info = process.memory_info() - result["resource_usage"] = { - "cpu_percent": process.cpu_percent(), - "memory_mb": memory_info.rss / (1024 * 1024), - "user_time": process.cpu_times().user, - "system_time": process.cpu_times().system, - } - except ImportError: - result["resource_usage"] = {"error": "psutil not available"} - - # Save result - try: - with open(output_path, "w") as f: - json.dump(result, f, indent=2, ensure_ascii=False) - if publisher: - publish("STORY_SAVED", f"结果保存到: {output_path}") - except Exception as e: - result["error"] = f"保存结果失败: {str(e)}" - if publisher: - publish("STORY_SAVE_ERROR", f"保存失败: {str(e)}") - - return result - - -def main(): - """Main entry point""" - parser = argparse.ArgumentParser( - description=f"{PROCESSOR_NAME.upper()} Processor v{PROCESSOR_VERSION} - Parent-Child Chunk Generation" - ) - parser.add_argument("--asr", help="Path to ASR JSON file", required=True) - parser.add_argument("--cut", help="Path to CUT JSON file", default="") - parser.add_argument("--yolo", help="Path to YOLO JSON file", default="") - parser.add_argument("--ocr", help="Path to OCR JSON file", default="") - parser.add_argument("--output", help="Path to output JSON file", required=True) - parser.add_argument("--uuid", help="UUID for progress tracking", default="") - parser.add_argument( - "--parent-chunk-size", - help=f"Maximum child chunks per parent (default: {DEFAULT_PARENT_CHUNK_SIZE})", - type=int, - default=DEFAULT_PARENT_CHUNK_SIZE, - ) - parser.add_argument( - "--min-child-chunks", - help=f"Minimum child chunks to create parent (default: {DEFAULT_MIN_CHILD_CHUNKS})", - type=int, - default=DEFAULT_MIN_CHILD_CHUNKS, - ) - parser.add_argument( - "--max-child-chunks", - help=f"Maximum child chunks per parent (default: {DEFAULT_MAX_CHILD_CHUNKS})", - type=int, - default=DEFAULT_MAX_CHILD_CHUNKS, - ) - parser.add_argument( - "--summary-length", - help=f"Maximum summary length in characters (default: {DEFAULT_SUMMARY_LENGTH})", - type=int, - default=DEFAULT_SUMMARY_LENGTH, - ) - parser.add_argument( - "--model", - help=f"Summary model to use (default: {DEFAULT_MODEL})", - default=DEFAULT_MODEL, - choices=["openai", "local", "template"], - ) - parser.add_argument( - "--model-name", - help=f"Model name for OpenAI (default: {DEFAULT_MODEL_NAME})", - default=DEFAULT_MODEL_NAME, - ) - parser.add_argument( - "--temperature", - help=f"Temperature for generation (default: {DEFAULT_TEMPERATURE})", - type=float, - default=DEFAULT_TEMPERATURE, - ) - parser.add_argument( - "--max-tokens", - help=f"Maximum tokens per summary (default: {DEFAULT_MAX_TOKENS})", - type=int, - default=DEFAULT_MAX_TOKENS, - ) - parser.add_argument( - "--timeout", - help=f"Timeout in seconds (default: {DEFAULT_TIMEOUT})", - type=int, - default=DEFAULT_TIMEOUT, - ) - parser.add_argument( - "--health-check", - help="Run health check and exit", - action="store_true", - ) - - args = parser.parse_args() - - # Health check mode - if args.health_check: - health = check_environment() - print(json.dumps(health, indent=2, ensure_ascii=False)) - return ( - 0 - if all(c["status"] in ["available", "optional"] for c in health["checks"]) - else 1 - ) - - # Normal processing mode - result = process_story( - asr_path=args.asr, - cut_path=args.cut, - yolo_path=args.yolo, - ocr_path=args.ocr, - output_path=args.output, - uuid=args.uuid, - parent_chunk_size=args.parent_chunk_size, - min_child_chunks=args.min_child_chunks, - max_child_chunks=args.max_child_chunks, - summary_length=args.summary_length, - model=args.model, - model_name=args.model_name, - temperature=args.temperature, - max_tokens=args.max_tokens, - timeout=args.timeout, - ) - - # Print result summary - if result.get("success", False): - print(f"✅ {PROCESSOR_NAME.upper()} 处理成功") - print(f" 父块数: {result.get('parent_chunk_count', 0)}") - stats = result.get("chunk_statistics", {}) - print(f" 子块总数: {stats.get('total_child_chunks', 0)}") - print(f" 平均子块/父块: {stats.get('avg_children_per_parent', 0):.1f}") - print(f" 处理时间: {result.get('processing_time', 0):.1f} 秒") - print(f" 输出文件: {args.output}") - return 0 - else: - print(f"❌ {PROCESSOR_NAME.upper()} 处理失败") - print(f" 错误: {result.get('error', '未知错误')}") - return 1 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/story_processor_contract_v1_v1.11.py b/scripts/story_processor_contract_v1_v1.11.py deleted file mode 120000 index 264a33f..0000000 --- a/scripts/story_processor_contract_v1_v1.11.py +++ /dev/null @@ -1 +0,0 @@ -../v1.1/scripts/story_processor_contract_v1_v1.11.py \ No newline at end of file diff --git a/scripts/story_processor_v1.11.py b/scripts/story_processor_v1.11.py deleted file mode 120000 index 7ac110b..0000000 --- a/scripts/story_processor_v1.11.py +++ /dev/null @@ -1 +0,0 @@ -../v1.1/scripts/story_processor_v1.11.py \ No newline at end of file diff --git a/scripts/test_parent_chunk_generation.py b/scripts/test_parent_chunk_generation.py deleted file mode 100644 index 842c5ef..0000000 --- a/scripts/test_parent_chunk_generation.py +++ /dev/null @@ -1,121 +0,0 @@ -#!/opt/homebrew/bin/python3.11 -""" -Test Parent Chunk Summary Generation (Gemma 4) -""" - -import json -import ollama -import time - -# Configuration -UUID = "384b0ff44aaaa1f1" -ASR_PATH = f"output/{UUID}/{UUID}.asr.json" -MODEL = "gemma4:latest" - -# The Prompt Template -PARENT_SUMMARY_PROMPT = """ -You are an expert film analyst. Analyze the following movie dialogue segment (approx 60 seconds). -Your task is to generate a structured JSON summary containing: -1. **narrative_summary**: A one-sentence summary of the main event/plot point. -2. **entities**: Key information extracted: - - `who`: List of characters involved. - - `where`: Inferred location (e.g., "Apartment", "Train"). - - `objects`: Key props mentioned (e.g., "Ticket", "Money"). -3. **emotional_arc**: The emotional transition: - - `start_mood`: Mood at the beginning. - - `end_mood`: Mood at the end. -4. **plot_sequence**: - - `scene_type`: Type of scene (e.g., "Confrontation", "Romance", "Discovery"). - - `key_action`: The main action taking place. - -**IMPORTANT RULES:** -- Output **ONLY** valid JSON. -- Do NOT include "Thinking Process" or markdown formatting. -- If information is unknown, use "Unknown". -- Context: This is from the movie "Charade" (1963). - -Dialogue: -{context} -""" - - -def load_sample(start_index, count=20): - """Load a slice of dialogue to simulate a Parent Chunk""" - try: - with open(ASR_PATH, "r") as f: - data = json.load(f) - - segments = data.get("segments", []) - selected = segments[start_index : start_index + count] - text = " ".join([s.get("text", "") for s in selected]) - print(f"📂 Loaded Sample {start_index}: {len(selected)} segments.") - return text - except Exception as e: - return f"Error: {e}" - - -def run_test(name, context_text): - print(f"\n🧪 Testing: {name}") - print("-" * 50) - print(f"📖 Input Preview: {context_text[:100]}...") - - prompt = PARENT_SUMMARY_PROMPT.format(context=context_text) - - try: - start = time.time() - response = ollama.chat( - model=MODEL, messages=[{"role": "user", "content": prompt}] - ) - duration = time.time() - start - - content = response["message"]["content"] - - # Clean up thinking tags if present - if "```json" in content: - content = content.split("```json")[1].split("```")[0] - elif "Thinking..." in content: - # crude cleanup for demo - content = content.split("...")[-1] - - # Attempt parse - try: - result = json.loads(content.strip()) - print(f"✅ Success ({duration:.2f}s)") - print(json.dumps(result, indent=2)) - return True - except json.JSONDecodeError: - print(f"⚠️ JSON Parse Failed ({duration:.2f}s)") - print(content[:500]) - return False - - except Exception as e: - print(f"❌ API Error: {e}") - return False - - -def main(): - print(f"🚀 Starting Parent Chunk Summary Tests on '{UUID}'") - - # Test 1: Early Dialogue (Entities & Narrative Focus) - # "possessed a ticket of passage..." - txt1 = load_sample(start_index=10) - res1 = run_test("Test 1: Early Plot (Entities & Narrative)", txt1) - - time.sleep(2) # Cool down - - # Test 2: Middle Conflict (Emotional Arc Focus) - # "where did he keep his money..." (From previous context) - txt2 = load_sample(start_index=50) - res2 = run_test("Test 2: Conflict (Emotional Arc)", txt2) - - time.sleep(2) # Cool down - - # Test 3: Later Dialogue (Plot Sequence Focus) - # Looking for a scene involving a conclusion or death aftermath - # Let's pick a later section to test robustness - txt3 = load_sample(start_index=150) - res3 = run_test("Test 3: Late Plot (Sequence)", txt3) - - -if __name__ == "__main__": - main() diff --git a/scripts/test_parent_chunk_generation_v1.11.py b/scripts/test_parent_chunk_generation_v1.11.py deleted file mode 120000 index f037471..0000000 --- a/scripts/test_parent_chunk_generation_v1.11.py +++ /dev/null @@ -1 +0,0 @@ -../v1.1/scripts/test_parent_chunk_generation_v1.11.py \ No newline at end of file diff --git a/src/api/files.rs b/src/api/files.rs index ef147c6..b7caac2 100644 --- a/src/api/files.rs +++ b/src/api/files.rs @@ -12,7 +12,7 @@ use std::collections::HashMap; use super::types::AppState; use crate::core::config; use crate::core::db::schema; -use crate::core::db::{Database, PostgresDb, QdrantDb, RedisClient}; +use crate::core::db::{Database, PostgresDb, QdrantDb, QdrantWorkspace, RedisClient}; use crate::core::storage::content_hash; use crate::FileManager; @@ -463,7 +463,6 @@ async fn register_single_file( .execute(db.pool()).await; let mut cut_done = false; - let mut scene_done = false; if has_video && total_frames > 0 && fps > 0.0 { let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR") .unwrap_or_else(|_| "/Users/accusys/momentry/output_dev".to_string()); @@ -511,31 +510,6 @@ async fn register_single_file( } } - let scene_path = - std::path::Path::new(&output_dir).join(format!("{}.scene.json", file_uuid)); - if !scene_path.exists() { - let scene_script = std::path::Path::new(&scripts_dir).join("scene_classifier.py"); - if scene_script.exists() { - let scene_output = std::process::Command::new(&python_path) - .arg(&scene_script) - .arg(&canonical_path) - .arg(&scene_path) - .arg("--sample-interval") - .arg("2") - .output(); - if let Ok(output) = scene_output { - if output.status.success() { - scene_done = true; - tracing::info!( - "[REGISTER] Scene classification completed for {}", - file_uuid - ); - } - } - } - } else { - scene_done = true; - } } let audio_tracks: Vec = temp_probe_json @@ -584,9 +558,9 @@ async fn register_single_file( } } let _ = sqlx::query( - &format!("UPDATE {} SET cut_done = $1, scene_done = $2, audio_tracks = $3, cut_count = $4, cut_max_duration = $5 WHERE file_uuid = $6", videos_table) + &format!("UPDATE {} SET cut_done = $1, scene_done = false, audio_tracks = $3, cut_count = $4, cut_max_duration = $5 WHERE file_uuid = $6", videos_table) ) - .bind(cut_done).bind(scene_done).bind(&audio_tracks_json).bind(cut_count).bind(cut_max_duration).bind(&file_uuid) + .bind(cut_done).bind(&audio_tracks_json).bind(cut_count).bind(cut_max_duration).bind(&file_uuid) .execute(db.pool()).await; if let Some(json_val) = probe_json { @@ -599,41 +573,6 @@ async fn register_single_file( let _ = std::fs::write(&probe_path, json_str); } - if final_file_type.as_deref() == Some("video") { - let auto_file_uuid = file_uuid.clone(); - let auto_db = db.clone(); - tokio::spawn(async move { - let identities_dir = - std::path::Path::new(&*crate::core::config::OUTPUT_DIR).join("identities"); - let index_path = identities_dir.join("_index.json"); - let cache_path = format!( - "{}/{}.tmdb.json", - *crate::core::config::OUTPUT_DIR, - auto_file_uuid - ); - let cache_file = std::path::Path::new(&cache_path); - - if index_path.exists() && cache_file.exists() { - tracing::info!( - "[AUTO-TMDB] Offline cache found for {}, running probe", - auto_file_uuid - ); - if let Err(e) = - crate::core::tmdb::probe::probe_from_cache(&auto_db, &auto_file_uuid).await - { - tracing::warn!("[AUTO-TMDB] Probe failed for {}: {}", auto_file_uuid, e); - } else { - tracing::info!("[AUTO-TMDB] Probe completed for {}", auto_file_uuid); - } - } else { - tracing::info!( - "[AUTO-TMDB] No offline cache for {}, skipping", - auto_file_uuid - ); - } - }); - } - RegisterFileResponse { success: true, file_uuid, @@ -978,8 +917,16 @@ struct UnregisterResponse { deleted_chunks: u64, deleted_tkg_nodes: u64, deleted_qdrant_vectors: Option, + deleted_qdrant_workspace: Option, deleted_redis_keys: Option, deleted_output_files: u64, + deleted_file_identities: u64, + deleted_speaker_detections: u64, + deleted_face_clusters: u64, + deleted_face_recognition_results: u64, + deleted_characters: u64, + deleted_chunks_rule1: u64, + deleted_processor_alerts: u64, } #[derive(Debug, Deserialize)] @@ -1011,6 +958,15 @@ fn delete_output_files(uuid: &str) -> u64 { } } } + + let workspace_sqlite = format!("{}.workspace.sqlite", uuid); + for output_dir in &output_dirs { + let path = std::path::Path::new(output_dir).join(&workspace_sqlite); + if path.exists() && std::fs::remove_file(&path).is_ok() { + deleted_count += 1; + tracing::info!("[UNREGISTER] Deleted workspace SQLite: {}", path.display()); + } + } deleted_count } @@ -1037,6 +993,13 @@ async fn unregister( let chunk_vectors_table = schema::table_name("chunk_vectors"); let monitor_jobs_table = schema::table_name("monitor_jobs"); let frames_table = schema::table_name("frames"); + let file_identities_table = schema::table_name("file_identities"); + let speaker_detections_table = schema::table_name("speaker_detections"); + let face_clusters_table = schema::table_name("face_clusters"); + let face_recognition_results_table = schema::table_name("face_recognition_results"); + let characters_table = schema::table_name("characters"); + let chunks_rule1_table = schema::table_name("chunks_rule1"); + let processor_alerts_table = schema::table_name("processor_alerts"); let mut tx = state.db.pool().begin().await.map_err(|e| { tracing::error!("[unregister] Failed to start transaction: {}", e); @@ -1082,6 +1045,21 @@ async fn unregister( })? .rows_affected() as i64; + let deleted_file_identities = + delete_safe!(file_identities_table, "file_uuid = $1", &uuid, "file identities"); + let deleted_speaker_detections = + delete_safe!(speaker_detections_table, "file_uuid = $1", &uuid, "speaker detections"); + let deleted_face_clusters = + delete_safe!(face_clusters_table, "file_uuid = $1", &uuid, "face clusters"); + let deleted_face_recognition = + delete_safe!(face_recognition_results_table, "file_uuid = $1", &uuid, "face recognition results"); + let deleted_characters = + delete_safe!(characters_table, "file_uuid = $1", &uuid, "characters"); + let deleted_chunks_rule1 = + delete_safe!(chunks_rule1_table, "uuid = $1", &uuid, "chunks rule1"); + let deleted_processor_alerts = + delete_safe!(processor_alerts_table, "file_uuid = $1", &uuid, "processor alerts"); + sqlx::query(&format!( "DELETE FROM {} WHERE file_uuid = $1", videos_table @@ -1100,10 +1078,13 @@ async fn unregister( })?; tracing::info!( - "[UNREGISTER] Deleted: {} faces, {} processors, {} parent_chunks, {} chunks, {} pre_chunks, {} tkg_nodes, {} cuts, {} strangers, {} chunk_vectors, {} monitor_jobs, {} frames", + "[UNREGISTER] Deleted: {} faces, {} processors, {} parent_chunks, {} chunks, {} pre_chunks, {} tkg_nodes, {} cuts, {} strangers, {} chunk_vectors, {} monitor_jobs, {} frames, {} file_identities, {} speaker_detections, {} face_clusters, {} face_recognition_results, {} characters, {} chunks_rule1, {} processor_alerts", deleted_faces, deleted_processors, deleted_parent_chunks, deleted_chunks, deleted_pre_chunks, deleted_tkg_nodes, deleted_cuts, deleted_strangers, - deleted_chunk_vectors, deleted_monitor_jobs, deleted_frames + deleted_chunk_vectors, deleted_monitor_jobs, deleted_frames, + deleted_file_identities, deleted_speaker_detections, deleted_face_clusters, + deleted_face_recognition, deleted_characters, deleted_chunks_rule1, + deleted_processor_alerts ); let deleted_output_files = delete_output_files(&uuid); @@ -1141,6 +1122,20 @@ async fn unregister( } }; + let deleted_qdrant_workspace = { + let workspace = QdrantWorkspace::new(); + match workspace.delete_by_file_uuid(&uuid).await { + Ok(_) => { + tracing::info!("[UNREGISTER] Deleted Qdrant workspace vectors for {}", uuid); + Some(1) + } + Err(e) => { + tracing::warn!("[UNREGISTER] Failed to delete Qdrant workspace vectors: {}", e); + None + } + } + }; + Ok(Json(UnregisterResponse { success: true, message: format!("File {} unregistered successfully.", uuid), @@ -1150,8 +1145,16 @@ async fn unregister( deleted_chunks: (deleted_chunks + deleted_parent_chunks + deleted_pre_chunks) as u64, deleted_tkg_nodes: deleted_tkg_nodes as u64, deleted_qdrant_vectors, + deleted_qdrant_workspace, deleted_redis_keys, deleted_output_files, + deleted_file_identities: deleted_file_identities as u64, + deleted_speaker_detections: deleted_speaker_detections as u64, + deleted_face_clusters: deleted_face_clusters as u64, + deleted_face_recognition_results: deleted_face_recognition as u64, + deleted_characters: deleted_characters as u64, + deleted_chunks_rule1: deleted_chunks_rule1 as u64, + deleted_processor_alerts: deleted_processor_alerts as u64, })) } diff --git a/src/api/five_w1h_agent_api.rs b/src/api/five_w1h_agent_api.rs deleted file mode 100644 index 23d3aa7..0000000 --- a/src/api/five_w1h_agent_api.rs +++ /dev/null @@ -1,807 +0,0 @@ -use axum::{ - extract::State, - http::StatusCode, - response::Json, - routing::{get, post}, - Router, -}; -use serde::{Deserialize, Serialize}; - -use crate::core::llm::function_calling::LLM_CLIENT; -use sqlx::Row; - -use crate::api::types::AppState; -use crate::core::db::qdrant_db::QdrantDb; -use crate::core::db::schema; -use crate::core::db::{PostgresDb, VectorPayload}; -use crate::core::embedding::Embedder; - -pub fn five_w1h_agent_routes() -> Router { - Router::new() - .route("/api/v1/agents/5w1h/analyze", post(analyze_5w1h)) - .route("/api/v1/agents/5w1h/batch", post(batch_analyze_5w1h)) - .route("/api/v1/agents/5w1h/status", get(get_5w1h_status)) -} - -// ── Data Structures ── - -#[derive(Debug, Deserialize)] -pub struct Analyze5W1HRequest { - pub file_uuid: String, -} - -#[derive(Debug, Serialize)] -pub struct Analyze5W1HResponse { - pub success: bool, - pub file_uuid: String, - pub scenes_processed: usize, - pub scenes_total: usize, -} - -#[derive(Debug, Deserialize)] -pub struct BatchAnalyze5W1HRequest { - pub file_uuids: Vec, -} - -#[derive(Debug, Serialize)] -pub struct BatchAnalyze5W1HResponse { - pub success: bool, - pub jobs: Vec, -} - -#[derive(Debug, Serialize)] -pub struct BatchJobStatus { - pub file_uuid: String, - pub status: String, - pub message: String, -} - -#[derive(Debug, Clone)] -struct CutScene { - chunk_id: String, - start_frame: i64, - end_frame: i64, - fps: f64, - start_time: f64, - end_time: f64, - content: serde_json::Value, - metadata: serde_json::Value, - summary_text: Option, -} - -#[derive(Debug, Clone)] -struct SentenceChunk { - chunk_id: String, - text: String, - start_time: f64, - end_time: f64, - start_frame: i64, - end_frame: i64, - content: serde_json::Value, -} - -#[derive(Debug)] -struct ChildSummary { - chunk_id: String, - enhanced: String, - five_w1h: serde_json::Value, -} - -#[derive(Debug)] -struct SceneSummaryResult { - parent_summary: String, - five_w1h: serde_json::Value, - child_summaries: Vec, -} - -// ── LLM Endpoint ── - -fn llm_base_url() -> String { - crate::core::config::llm::SUMMARY_URL.clone() -} - -fn llm_model() -> String { - crate::core::config::llm::SUMMARY_MODEL.clone() -} - -// ── Data Fetching ── - -async fn fetch_cut_scenes(db: &PostgresDb, file_uuid: &str) -> anyhow::Result> { - let table = schema::table_name("chunk"); - sqlx::query_as::<_, (String, i64, i64, f64, Option, Option, serde_json::Value, Option, Option)>(&format!( - r#"SELECT chunk_id, start_frame, end_frame, fps, start_time, end_time, content, metadata, summary_text - FROM {} WHERE file_uuid = $1 AND chunk_type = 'cut' ORDER BY start_frame"#, table - )) - .bind(file_uuid) - .fetch_all(db.pool()).await? - .into_iter().map(|r| Ok(CutScene { - chunk_id: r.0, start_frame: r.1, end_frame: r.2, - fps: r.3, start_time: r.4.unwrap_or(0.0), end_time: r.5.unwrap_or(0.0), - content: r.6, metadata: r.7.unwrap_or(serde_json::json!({})), summary_text: r.8, - })).collect() -} - -async fn fetch_sentences_in_scene( - db: &PostgresDb, - file_uuid: &str, - cut: &CutScene, -) -> anyhow::Result> { - let table = schema::table_name("chunk"); - sqlx::query_as::<_, (String, String, Option, Option, i64, i64, serde_json::Value)>(&format!( - r#"SELECT chunk_id, COALESCE(text_content,''), start_time, end_time, start_frame, end_frame, content - FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' - AND start_time >= $2 AND end_time <= $3 ORDER BY start_time"#, table - )) - .bind(file_uuid).bind(cut.start_time).bind(cut.end_time) - .fetch_all(db.pool()).await? - .into_iter().map(|r| Ok(SentenceChunk { - chunk_id: r.0, text: r.1, start_time: r.2.unwrap_or(0.0), end_time: r.3.unwrap_or(0.0), - start_frame: r.4, end_frame: r.5, content: r.6, - })).collect() -} - -/// Fetch actor names present in this scene from face_detections + identity_bindings + identities -async fn fetch_identity_names_for_scene( - db: &PostgresDb, - file_uuid: &str, - cut: &CutScene, -) -> anyhow::Result> { - let fd_table = schema::table_name("face_detections"); - let ib_table = schema::table_name("identity_bindings"); - let id_table = schema::table_name("identities"); - let rows = sqlx::query_scalar::<_, String>(&format!( - r#"SELECT DISTINCT i.name - FROM {} fd - JOIN {} ib ON ib.identity_value = fd.trace_id::text AND ib.identity_type = 'trace' - JOIN {} i ON i.id = ib.identity_id - WHERE fd.file_uuid = $1 AND fd.frame_number >= $2 AND fd.frame_number <= $3 - AND fd.trace_id IS NOT NULL - ORDER BY i.name"#, - fd_table, ib_table, id_table - )) - .bind(file_uuid) - .bind(cut.start_frame) - .bind(cut.end_frame) - .fetch_all(db.pool()) - .await?; - Ok(rows) -} - -/// Fetch YOLO object labels detected in this scene from pre_chunks -async fn fetch_yolo_objects_for_scene( - db: &PostgresDb, - file_uuid: &str, - cut: &CutScene, -) -> anyhow::Result> { - let table = schema::table_name("pre_chunks"); - let rows = sqlx::query_scalar::<_, String>(&format!( - r#"SELECT DISTINCT data->>'label' - FROM {} WHERE file_uuid = $1 AND processor_type = 'yolo' - AND frame_number >= $2 AND frame_number <= $3 - AND data->>'label' IS NOT NULL - ORDER BY data->>'label'"#, - table - )) - .bind(file_uuid) - .bind(cut.start_frame) - .bind(cut.end_frame) - .fetch_all(db.pool()) - .await?; - Ok(rows) -} - -/// Fetch active speakers + their actor names for a scene's frame range -/// Uses identity_bindings to map SPEAKER_X to actor names -async fn fetch_speakers_for_scene( - db: &PostgresDb, - file_uuid: &str, - cut: &CutScene, -) -> anyhow::Result> { - let pc_table = schema::table_name("pre_chunks"); - let speakers = sqlx::query_scalar::<_, String>(&format!( - r#"SELECT DISTINCT data->>'speaker_id' - FROM {} WHERE file_uuid = $1 AND processor_type = 'asrx' - AND data->>'speaker_id' IS NOT NULL - AND start_frame <= $3 AND end_frame >= $2 - ORDER BY data->>'speaker_id'"#, - pc_table - )) - .bind(file_uuid) - .bind(cut.start_frame) - .bind(cut.end_frame) - .fetch_all(db.pool()) - .await?; - - if speakers.is_empty() { - return Ok(vec![]); - } - - // Map speaker_ids to actor names via identity_bindings - let ib_table = schema::table_name("identity_bindings"); - let id_table = schema::table_name("identities"); - let mut result = Vec::new(); - for spk in &speakers { - let name: Option = sqlx::query_scalar(&format!( - r#"SELECT i.name FROM {} ib JOIN {} i ON i.id = ib.identity_id - WHERE ib.identity_type = 'speaker' AND ib.identity_value = $1 AND i.name IS NOT NULL - LIMIT 1"#, - ib_table, id_table - )) - .bind(spk) - .fetch_optional(db.pool()) - .await?; - match name { - Some(n) => result.push(format!("{} ({})", spk, n)), - None => result.push(spk.clone()), - } - } - Ok(result) -} - -/// Fetch trace IDs with identity names for a scene's frame range -async fn fetch_trace_info( - db: &PostgresDb, - file_uuid: &str, - cut: &CutScene, -) -> anyhow::Result> { - let fd_table = schema::table_name("face_detections"); - let ib_table = schema::table_name("identity_bindings"); - let id_table = schema::table_name("identities"); - let rows = sqlx::query_as::<_, (i32, Option)>(&format!( - r#"SELECT DISTINCT fd.trace_id, i.name - FROM {} fd - LEFT JOIN {} ib ON ib.identity_value = fd.trace_id::text AND ib.identity_type = 'trace' - LEFT JOIN {} i ON i.id = ib.identity_id - WHERE fd.file_uuid = $1 AND fd.frame_number >= $2 AND fd.frame_number <= $3 - AND fd.trace_id IS NOT NULL - ORDER BY fd.trace_id"#, - fd_table, ib_table, id_table - )) - .bind(file_uuid) - .bind(cut.start_frame) - .bind(cut.end_frame) - .fetch_all(db.pool()) - .await?; - - Ok(rows - .iter() - .map(|(trace, name)| { - if let Some(n) = name { - format!("trace_{} ({})", trace, n) - } else { - format!("trace_{}", trace) - } - }) - .collect()) -} - -// ── LLM Prompt (Embedding-Optimized) ── - -async fn summarize_one_scene( - db: &PostgresDb, - file_uuid: &str, - cut: &CutScene, - sentences: &[SentenceChunk], - prev_context: &str, -) -> anyhow::Result { - if sentences.is_empty() { - return Ok(SceneSummaryResult { - parent_summary: String::new(), - five_w1h: serde_json::Value::Null, - child_summaries: vec![], - }); - } - - let faces = fetch_identity_names_for_scene(db, file_uuid, cut) - .await - .unwrap_or_default(); - let objects = fetch_yolo_objects_for_scene(db, file_uuid, cut) - .await - .unwrap_or_default(); - let traces = fetch_trace_info(db, file_uuid, cut) - .await - .unwrap_or_default(); - let speakers = fetch_speakers_for_scene(db, file_uuid, cut) - .await - .unwrap_or_default(); - - let mut dialogue = String::new(); - for (i, s) in sentences.iter().enumerate() { - let t = s.text.trim(); - if !t.is_empty() { - dialogue.push_str(&format!("[{}] {}\n", i + 1, t)); - } - } - - let story_so_far = if prev_context.is_empty() { - String::new() - } else { - format!("\nStory so far (previous scenes):\n{}\n", prev_context) - }; - - let prompt = format!( - r#"Analyze this movie scene and produce a structured summary. Be specific — quote actual dialogue. Avoid template phrases like "within the established dramatic setting." - -Scene time: {:.0}s–{:.0}s - -Dialogue: -{}Actors: {} -Objects: {} -Face traces: {} -Speakers: {} -{} -Output EXACTLY this JSON format: -{{ - "scene_summary": "5 flowing sentences: who+what+where+when+why+how. Quote actual lines.", - "5w1h": {{ - "who": "1 sentence with actor/character name", - "what": "1 sentence describing the action, quote the line", - "where": "1 sentence about setting", - "when": "1 sentence about timing in story", - "why": "1 sentence explaining why this moment matters", - "how": "1 sentence about delivery, emotion, tone" - }}, - "sentences": [ - {{ - "index": 1, - "who": "1 sentence", - "what": "1 sentence referencing the actual line", - "where": "1 sentence", - "when": "1 sentence", - "why": "1 sentence why this is said", - "how": "1 sentence describing delivery", - "enhanced": "1 sentence with actual dialogue, self-contained for search" - }} - ] -}} - -Rules: -- scene_summary: 5 sentences, natural paragraph. Use quotes. No template phrases. -- Each 5w1h field: exactly 1 sentence. Specific details. Character names. Quotes. -- Each sentence.enhanced: self-contained for search, include actual spoken words. -- Return ONLY valid JSON. No markdown. -- A short scene with 1-2 lines should have a short summary."#, - cut.start_time, - cut.end_time, - dialogue, - faces.join(", "), - objects.join(", "), - traces.join(", "), - speakers.join(", "), - story_so_far, - ); - - let body = serde_json::json!({ - "model": llm_model(), - "messages": [ - {"role": "system", "content": "You output JSON only. Be specific. Quote actual dialogue. Avoid template phrases."}, - {"role": "user", "content": prompt} - ], - "temperature": 0.1, - "max_tokens": 4096, - "stream": false - }); - - let resp = LLM_CLIENT - .post(llm_base_url()) - .json(&body) - .timeout(std::time::Duration::from_secs(180)) - .send() - .await? - .json::() - .await?; - - let content = resp["choices"][0]["message"]["content"] - .as_str() - .unwrap_or("{}"); - // Strip markdown code fences if present - let cleaned = content - .trim_start_matches("```json") - .trim_start_matches("```") - .trim_end_matches("```") - .trim(); - let parsed: serde_json::Value = - serde_json::from_str(cleaned).unwrap_or(serde_json::Value::Null); - - let parent_summary = parsed["scene_summary"].as_str().unwrap_or("").to_string(); - let five_w1h = parsed - .get("5w1h") - .cloned() - .unwrap_or(serde_json::Value::Null); - let mut child_summaries = Vec::new(); - - if let Some(arr) = parsed["sentences"].as_array() { - for entry in arr { - let idx = entry["index"].as_u64().unwrap_or(0).saturating_sub(1) as usize; - if let Some(enhanced) = entry["enhanced"].as_str() { - if idx < sentences.len() { - let child_5w1h = serde_json::json!({ - "who": entry["who"].as_str().unwrap_or(""), - "what": entry["what"].as_str().unwrap_or(""), - "where": entry["where"].as_str().unwrap_or(""), - "when": entry["when"].as_str().unwrap_or(""), - "why": entry["why"].as_str().unwrap_or(""), - "how": entry["how"].as_str().unwrap_or(""), - }); - child_summaries.push(ChildSummary { - chunk_id: sentences[idx].chunk_id.clone(), - enhanced: enhanced.to_string(), - five_w1h: child_5w1h, - }); - } - } - } - } - - // Fallback - if child_summaries.is_empty() && !parent_summary.is_empty() { - for s in sentences { - let text = s.text.trim(); - if !text.is_empty() { - child_summaries.push(ChildSummary { - chunk_id: s.chunk_id.clone(), - enhanced: format!("{} Scene: {}", text, parent_summary), - five_w1h: serde_json::Value::Null, - }); - } - } - } - - Ok(SceneSummaryResult { - parent_summary, - five_w1h, - child_summaries, - }) -} - -// ── DB Storage ── - -async fn store_parent_summary( - db: &PostgresDb, - cut_chunk_id: &str, - file_uuid: &str, - summary: &str, - five_w1h: &serde_json::Value, - sentences: &[SentenceChunk], -) -> anyhow::Result<()> { - let table = schema::table_name("chunk"); - let meta = serde_json::json!({ - "5w1h": five_w1h, - "sentence_ids": sentences.iter().map(|s| s.chunk_id.clone()).collect::>(), - "sentence_count": sentences.len(), - }); - sqlx::query(&format!( - r#"UPDATE {} SET summary_text = $1, metadata = jsonb_deep_merge(COALESCE(metadata, '{{}}'::jsonb), $2::jsonb) - WHERE chunk_id = $3 AND file_uuid = $4"#, - table - )) - .bind(summary) - .bind(&meta) - .bind(cut_chunk_id) - .bind(file_uuid) - .execute(db.pool()) - .await?; - Ok(()) -} - -async fn store_child_summaries( - db: &PostgresDb, - file_uuid: &str, - children: &[ChildSummary], -) -> anyhow::Result<()> { - let table = schema::table_name("chunk"); - for c in children { - let text = c.enhanced.trim(); - if text.is_empty() || text.len() < 10 { - continue; - } - // Update text_content (for embedding) + merge 5w1h into content - let merge = serde_json::json!({ "5w1h": c.five_w1h }); - sqlx::query(&format!( - r#"UPDATE {} SET text_content = $1, content = content || $2::jsonb, embedding = NULL - WHERE chunk_id = $3 AND file_uuid = $4"#, - table - )) - .bind(text) - .bind(&merge) - .bind(&c.chunk_id) - .bind(file_uuid) - .execute(db.pool()) - .await?; - } - Ok(()) -} - -// ── API Handlers ── - -async fn analyze_5w1h( - State(state): State, - Json(req): Json, -) -> Result, (StatusCode, String)> { - let db = PostgresDb::from_pool(state.db.pool().clone()); - - let cuts = fetch_cut_scenes(&db, &req.file_uuid) - .await - .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; - - let total = cuts.len(); - let mut processed = 0usize; - let mut prev_context: Vec = Vec::new(); - - for cut in &cuts { - // Skip already-summarized scenes but preserve context - if let Some(ref t) = cut.summary_text { - if t.len() > 20 { - processed += 1; - prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t)); - continue; - } - } - - let sentences = match fetch_sentences_in_scene(&db, &req.file_uuid, cut).await { - Ok(s) => s, - Err(e) => { - tracing::error!("[5W1H] fetch sentences failed: {}", e); - continue; - } - }; - if sentences.is_empty() { - continue; - } - - let context = prev_context.join("\n"); - let result = match summarize_one_scene(&db, &req.file_uuid, cut, &sentences, &context).await - { - Ok(r) => r, - Err(e) => { - tracing::error!("[5W1H] scene {} failed: {}", cut.chunk_id, e); - processed += 1; - continue; - } - }; - - if !result.parent_summary.is_empty() { - if let Err(e) = store_parent_summary( - &db, - &cut.chunk_id, - &req.file_uuid, - &result.parent_summary, - &result.five_w1h, - &sentences, - ) - .await - { - tracing::error!("[5W1H] parent: {}", e); - } - if let Err(e) = - store_child_summaries(&db, &req.file_uuid, &result.child_summaries).await - { - tracing::error!("[5W1H] child: {}", e); - } - prev_context.push(format!( - "Scene (t={:.0}s): {}", - cut.start_time, result.parent_summary - )); - } - processed += 1; - } - - Ok(Json(Analyze5W1HResponse { - success: true, - file_uuid: req.file_uuid, - scenes_processed: processed, - scenes_total: total, - })) -} - -async fn batch_analyze_5w1h( - State(state): State, - Json(req): Json, -) -> Result, (StatusCode, String)> { - let db = PostgresDb::from_pool(state.db.pool().clone()); - let mut jobs = Vec::new(); - - for uuid in &req.file_uuids { - let cuts = fetch_cut_scenes(&db, uuid).await.unwrap_or_default(); - let total = cuts.len(); - let mut processed = 0usize; - let mut prev_context: Vec = Vec::new(); - - for cut in &cuts { - if let Some(ref t) = cut.summary_text { - if t.len() > 20 { - processed += 1; - prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t)); - continue; - } - } - let sentences = fetch_sentences_in_scene(&db, uuid, cut) - .await - .unwrap_or_default(); - if sentences.is_empty() { - continue; - } - let context = prev_context.join("\n"); - if let Ok(result) = summarize_one_scene(&db, uuid, cut, &sentences, &context).await { - if !result.parent_summary.is_empty() { - let _ = store_parent_summary( - &db, - &cut.chunk_id, - uuid, - &result.parent_summary, - &result.five_w1h, - &sentences, - ) - .await; - let _ = store_child_summaries(&db, uuid, &result.child_summaries).await; - prev_context.push(format!( - "Scene (t={:.0}s): {}", - cut.start_time, result.parent_summary - )); - } - } - processed += 1; - } - - jobs.push(BatchJobStatus { - file_uuid: uuid.clone(), - status: if processed > 0 { - "completed".to_string() - } else { - "no_cut_scenes".to_string() - }, - message: format!("{}/{} scenes processed", processed, total), - }); - } - - Ok(Json(BatchAnalyze5W1HResponse { - success: true, - jobs, - })) -} - -async fn get_5w1h_status( - State(state): State, -) -> Result, (StatusCode, String)> { - let table = schema::table_name("videos"); - let rows = sqlx::query(&format!( - r#"SELECT file_uuid, processing_status->'agents'->'five_w1h' as s - FROM {} WHERE processing_status->'agents'->'five_w1h' IS NOT NULL - ORDER BY updated_at DESC LIMIT 50"#, - table - )) - .fetch_all(state.db.pool()) - .await - .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; - - let videos: Vec = rows - .iter() - .map(|r| { - serde_json::json!({ - "uuid": r.try_get::("file_uuid").unwrap_or_default(), - "five_w1h_status": r.try_get::,_>("s").ok().flatten(), - }) - }) - .collect(); - - Ok(Json( - serde_json::json!({ "success": true, "videos": videos }), - )) -} - -/// Pipeline-triggered entry point: run 5W1H agent for a file. -pub async fn run_5w1h_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Result<()> { - let cuts = fetch_cut_scenes(db, file_uuid).await?; - let total = cuts.len(); - let mut processed = 0usize; - let mut prev_context: Vec = Vec::new(); - - for cut in &cuts { - if let Some(ref t) = cut.summary_text { - if t.len() > 20 { - processed += 1; - prev_context.push(format!("Scene (t={:.0}s): {}", cut.start_time, t)); - continue; - } - } - let sentences = fetch_sentences_in_scene(db, file_uuid, cut).await?; - if sentences.is_empty() { - continue; - } - - let context = prev_context.join("\n"); - match summarize_one_scene(db, file_uuid, cut, &sentences, &context).await { - Ok(result) => { - if !result.parent_summary.is_empty() { - let _ = store_parent_summary( - db, - &cut.chunk_id, - file_uuid, - &result.parent_summary, - &result.five_w1h, - &sentences, - ) - .await; - let _ = store_child_summaries(db, file_uuid, &result.child_summaries).await; - prev_context.push(format!( - "Scene (t={:.0}s): {}", - cut.start_time, result.parent_summary - )); - } - processed += 1; - } - Err(e) => tracing::error!("[5W1H] Scene {} failed: {}", cut.chunk_id, e), - } - } - - tracing::info!( - "[5W1H] Done for {}: {}/{} scenes", - file_uuid, - processed, - total - ); - - // Auto-vectorize sentences with EmbeddingGemma (768D) - tracing::info!("[5W1H] Starting vectorize for sentence chunks..."); - let embedder = Embedder::new("embeddinggemma-300m".to_string()); - let qdrant = QdrantDb::new(); - qdrant.init_collection(768).await?; - - let chunk_table = schema::table_name("chunk"); - let rows = sqlx::query_as::<_, (String, String, String, i64, i64, f64, f64)>(&format!( - "SELECT chunk_id, chunk_type, text_content, start_frame, end_frame, start_time, end_time \ - FROM {} WHERE file_uuid = $1 AND chunk_type = 'sentence' AND embedding IS NULL \ - AND (text_content IS NOT NULL AND text_content != '') ORDER BY id", - chunk_table - )) - .bind(file_uuid) - .fetch_all(db.pool()) - .await?; - - let total_vec = rows.len(); - let mut stored = 0usize; - for (chunk_id, _ctype, text, start_frame, end_frame, start_time, end_time) in &rows { - let text = text.trim(); - if text.is_empty() || text.len() < 5 { - continue; - } - match embedder.embed_document(text).await { - Ok(vector) => { - if let Err(e) = sqlx::query(&format!( - "UPDATE {} SET embedding = $1::vector WHERE chunk_id = $2 AND file_uuid = $3", - chunk_table - )) - .bind(&vector as &[f32]) - .bind(chunk_id) - .bind(file_uuid) - .execute(db.pool()) - .await - { - tracing::error!("[Vectorize] PG failed for {}: {}", chunk_id, e); - continue; - } - let payload = VectorPayload { - file_uuid: file_uuid.to_string(), - chunk_id: chunk_id.clone(), - chunk_type: "sentence".to_string(), - start_frame: *start_frame, - end_frame: *end_frame, - start_time: *start_time, - end_time: *end_time, - text: Some(text.to_string()), - }; - if let Err(e) = qdrant.upsert_vector(chunk_id, &vector, payload).await { - tracing::error!("[Vectorize] Qdrant failed for {}: {}", chunk_id, e); - continue; - } - stored += 1; - if stored % 50 == 0 { - tracing::info!("[Vectorize] {}/{}", stored, total_vec); - } - } - Err(e) => tracing::error!("[Vectorize] Embed failed for {}: {}", chunk_id, e), - } - } - tracing::info!("[5W1H] Vectorize done: {}/{} stored", stored, total_vec); - Ok(()) -} diff --git a/src/api/identities.rs b/src/api/identities.rs index ce00ad4..2cd2e78 100644 --- a/src/api/identities.rs +++ b/src/api/identities.rs @@ -180,11 +180,11 @@ async fn list_identities( })?; let sql = format!( - "SELECT id::int, uuid, name, metadata FROM {} WHERE status IS NULL OR status != 'merged' ORDER BY id DESC LIMIT $1 OFFSET $2", + "SELECT id::int, uuid, name, metadata, status, starred FROM {} WHERE status IS NULL OR status != 'merged' ORDER BY id DESC LIMIT $1 OFFSET $2", id_table ); - let rows: Vec<(i32, uuid::Uuid, String, Option)> = match sqlx::query_as(&sql) + let rows: Vec<(i32, uuid::Uuid, String, Option, Option, Option)> = match sqlx::query_as(&sql) .bind(page_size as i64) .bind(offset) .fetch_all(db.pool()) @@ -201,11 +201,16 @@ let sql = format!( let identities: Vec = rows .into_iter() - .map(|r| IdentityResponse { - id: r.0, - identity_uuid: r.1.to_string().replace('-', ""), - name: r.2, - metadata: r.3, + .map(|r| { + IdentityResponse { + id: r.0, + identity_uuid: r.1.to_string().replace('-', ""), + name: r.2, + metadata: r.3, + status: r.4, + starred: r.5.unwrap_or(false), + file_uuids: vec![], // Removed N+1 query + } }) .collect(); @@ -281,6 +286,9 @@ pub struct IdentityResponse { pub identity_uuid: String, pub name: String, pub metadata: Option, + pub status: Option, + pub starred: bool, + pub file_uuids: Vec, } #[derive(Debug, Serialize)] diff --git a/src/api/identity_agent_api.rs b/src/api/identity_agent_api.rs index 290ceee..a9170e9 100644 --- a/src/api/identity_agent_api.rs +++ b/src/api/identity_agent_api.rs @@ -661,597 +661,21 @@ fn average_embeddings<'a>(embeddings: impl Iterator>) -> Vec /// Unknown: greedy stranger clustering (TH=0.40) /// Writes identity_ref/stranger_ref to Qdrant payload, TKG nodes, and face_detections. async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result { - use crate::core::db::face_embedding_db::FaceEmbeddingDb; - use std::collections::HashMap; - - let face_db = FaceEmbeddingDb::new(); - - // Step 1: Load seeds from Qdrant (type=identity_seed) - let seeds = face_db.get_seed_embeddings().await?; - tracing::info!( - "[FaceMatch] Loaded {} seeds from Qdrant", - seeds.len() - ); - - // Step 2: Preload identity internal IDs (uuid → (id, name)) - let id_table = schema::table_name("identities"); - let seed_identity_map: HashMap = if !seeds.is_empty() { - let uuids: Vec = seeds.iter().map(|(uuid, _, _)| uuid.clone()).collect(); - if uuids.is_empty() { - HashMap::new() - } else { - let rows = sqlx::query_as::<_, (i32, String, String)>(&format!( - "SELECT id, uuid::text, name FROM {} WHERE uuid::text = ANY($1)", - id_table - )) - .bind(&uuids) - .fetch_all(pool) - .await? - .into_iter() - .map(|(id, uuid, name)| (uuid, (id, name))) - .collect(); - rows - } - } else { - HashMap::new() - }; - - // Step 3: Load face embeddings from Qdrant for this file - let qdrant_embeddings = face_db.get_all_embeddings_for_file(file_uuid).await?; - - if qdrant_embeddings.is_empty() { - tracing::warn!("[FaceMatch] No face embeddings in Qdrant for {}", file_uuid); - return Ok(0); - } - - // Step 4: Group embeddings by trace_id, keeping confidence - let mut trace_faces: HashMap, f64)>> = HashMap::new(); - for (_, emb, payload) in &qdrant_embeddings { - trace_faces - .entry(payload.trace_id) - .or_default() - .push((payload.frame, emb.clone(), payload.confidence)); - } - - // Step 5: Progressive multi-round matching with derived seeds - // Each round: choose a face with best seed sim for matching; separately, - // collect the highest-confidence face per trace for building derived seeds. - const TH_MIN: f32 = 0.35; - const DERIVED_CONF: f64 = 0.90; - const MAX_DERIVED_PER_ID: usize = 9; - const MAX_FACES_PER_TRACE: usize = 3; - const ANGLE_SIM_THRESHOLD: f32 = 0.90; - const TH_STRANGER: f32 = 0.40; - - let total_traces = trace_faces.len(); - let total_embeddings: usize = trace_faces.values().map(|v| v.len()).sum(); - tracing::info!( - "[FaceMatch] Loaded {} traces ({} face embeddings) from Qdrant for {}", - total_traces, - total_embeddings, + tracing::warn!( + "[FaceMatch] Face matching disabled - FaceEmbeddingDb removed. \ + TODO: Reimplement with _faces collection for {}", file_uuid ); - - let mut matched: HashMap = HashMap::new(); - let mut trace_face_count: HashMap = HashMap::new(); - - // All reference embeddings: start with original TMDb seeds - let mut all_refs: Vec<(String, String, Vec)> = seeds.clone(); - let thresholds = [0.55f32, 0.50, 0.45, 0.40, 0.35]; - let mut prev_total = 0usize; - - for (round_idx, &th) in thresholds.iter().enumerate() { - if th < TH_MIN { - break; - } - - let mut new_matches: HashMap = HashMap::new(); - let mut seed_candidates: Vec<(i32, String, i32, Vec, f64)> = Vec::new(); - - for (&tid, faces) in &trace_faces { - if matched.contains_key(&tid) { - continue; - } - trace_face_count.entry(tid).or_insert(faces.len()); - - let mut best_sim = 0.0f32; - let mut best_name = String::new(); - let mut best_id = 0i32; - // Collect all high-confidence faces in this trace for derived seeds - let mut trace_candidates: Vec<(Vec, f64)> = Vec::new(); - - for (_, emb, conf) in faces { - for (ref_uuid, ref_name, ref_emb) in &all_refs { - let s = cosine_similarity(emb, ref_emb); - if s > best_sim { - best_sim = s; - best_name = ref_name.clone(); - if let Some(id_str) = ref_uuid.strip_prefix("derived:") { - if let Ok(parsed) = id_str.parse::() { - best_id = parsed; - } - } else if let Some((id, _)) = seed_identity_map.get(ref_uuid) { - best_id = *id; - } - } - } - if *conf >= DERIVED_CONF { - trace_candidates.push((emb.clone(), *conf)); - } - } - - if best_sim >= th && best_id > 0 { - new_matches.insert(tid, (best_name.clone(), best_id)); - - // Top MAX_FACES_PER_TRACE highest-confidence faces with angular diversity - trace_candidates.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); - let mut selected: Vec> = Vec::new(); - for (emb, conf) in trace_candidates { - if selected.len() >= MAX_FACES_PER_TRACE { - break; - } - if selected.iter().any(|e| cosine_similarity(e, &emb) >= ANGLE_SIM_THRESHOLD) { - continue; - } - selected.push(emb.clone()); - seed_candidates.push((best_id, best_name.clone(), tid, emb, conf)); - } - } - } - - let new_count = new_matches.len(); - if new_count == 0 && round_idx > 0 { - break; - } - - matched.extend(new_matches); - - // Build derived seeds: pick up to MAX_DERIVED_PER_ID per identity - // (max MAX_FACES_PER_TRACE from each trace), sorted by confidence descending - seed_candidates.sort_by(|a, b| b.4.partial_cmp(&a.4).unwrap()); - let mut per_id: HashMap = HashMap::new(); - let mut trace_used_faces: HashMap = HashMap::new(); - let mut added_seeds = 0usize; - for (id, name, tid, emb, _) in &seed_candidates { - let cnt = per_id.entry(*id).or_insert(0); - if *cnt >= MAX_DERIVED_PER_ID { - continue; - } - let trace_cnt = trace_used_faces.entry(*tid).or_insert(0); - if *trace_cnt >= MAX_FACES_PER_TRACE { - continue; - } - *trace_cnt += 1; - *cnt += 1; - all_refs.push((format!("derived:{}", id), name.clone(), emb.clone())); - added_seeds += 1; - } - - tracing::info!( - "[FaceMatch] Round {}: matched {}+{}={} total (TH={}, {} new derived seeds)", - round_idx + 1, - prev_total, - new_count, - matched.len(), - th, - added_seeds - ); - - prev_total = matched.len(); - } - - // Step 7: Stranger clustering for unmatched traces - let unmatched_ids: Vec = trace_faces - .keys() - .filter(|tid| !matched.contains_key(tid)) - .copied() - .collect(); - - let mut stranger_map: HashMap = HashMap::new(); - let mut assigned_stranger: std::collections::HashSet = std::collections::HashSet::new(); - let mut stranger_count = 0usize; - - // Sort by face count descending (most reliable first) - let mut sorted_unmatched: Vec = unmatched_ids.clone(); - sorted_unmatched.sort_by(|a, b| { - trace_face_count - .get(b) - .unwrap_or(&0) - .cmp(trace_face_count.get(a).unwrap_or(&0)) - }); - - for &tid in &sorted_unmatched { - if assigned_stranger.contains(&tid) { - continue; - } - let centroid_a = if let Some(faces) = trace_faces.get(&tid) { - average_embeddings(faces.iter().map(|(_, emb, _)| emb)) - } else { - continue; - }; - stranger_count += 1; - let stranger_id = format!("{}:stranger_{}", file_uuid, stranger_count); - assigned_stranger.insert(tid); - stranger_map.insert(tid, stranger_id.clone()); - - for &other_tid in &sorted_unmatched { - if assigned_stranger.contains(&other_tid) || other_tid == tid { - continue; - } - if let Some(faces_b) = trace_faces.get(&other_tid) { - let centroid_b = average_embeddings(faces_b.iter().map(|(_, emb, _)| emb)); - let s = cosine_similarity(¢roid_a, ¢roid_b); - if s >= TH_STRANGER { - assigned_stranger.insert(other_tid); - stranger_map.insert(other_tid, stranger_id.clone()); - } - } - } - } - - let stranger_trace_count = stranger_map.len(); - tracing::info!( - "[FaceMatch] Stranger clusters: {} groups, {} traces", - stranger_count, - stranger_trace_count - ); - - // Step 8: Write results to TKG nodes + Qdrant payload + face_detections - let fd_table = schema::table_name("face_detections"); - let nodes_table = schema::table_name("tkg_nodes"); - let mut pg_updated = 0usize; - - // Clear old identity assignments before writing new ones - let _ = sqlx::query(&format!( - "UPDATE {} SET identity_id = NULL WHERE file_uuid = $1", - fd_table - )) - .bind(file_uuid) - .execute(pool) - .await; - - // 8a: Matched traces → identity_ref - for (&tid, (name, identity_id)) in &matched { - // Skip if identity_id is invalid (FK constraint would fail) - if *identity_id <= 0 { - tracing::warn!( - "[FaceMatch] Skipping trace {}: invalid identity_id={}", - tid, identity_id - ); - continue; - } - - let identity_ref = format!("{}:{}", file_uuid, identity_id); - - // TKG node - let external_id = format!("face_track_{}", tid); - if let Err(e) = sqlx::query(&format!( - "UPDATE {} SET properties = jsonb_set(\ - jsonb_set(properties, '{{identity_ref}}', to_jsonb($1), true),\ - '{{identity_name}}', to_jsonb($2), true)\ - WHERE file_uuid = $3 AND node_type = 'face_track' AND external_id = $4", - nodes_table - )) - .bind(&identity_ref) - .bind(name) - .bind(file_uuid) - .bind(&external_id) - .execute(pool) - .await - { - tracing::warn!("[FaceMatch] TKG update failed for trace {}: {:?}", tid, e); - } - - // Qdrant payload - let _ = face_db - .update_identity_ref_by_trace(file_uuid, tid, &identity_ref) - .await; - - // PostgreSQL face_detections (backward compat) - let rows = sqlx::query(&format!( - "UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3", - fd_table - )) - .bind(identity_id) - .bind(file_uuid) - .bind(tid) - .execute(pool) - .await - .map(|r| r.rows_affected()) - .unwrap_or(0); - pg_updated += rows as usize; - } - - // 8b: Stranger traces → stranger_ref - for (&tid, stranger_ref) in &stranger_map { - // TKG node - let external_id = format!("face_track_{}", tid); - if let Err(e) = sqlx::query(&format!( - "UPDATE {} SET properties = jsonb_set(\ - properties, '{{stranger_ref}}', to_jsonb($1), true)\ - WHERE file_uuid = $2 AND node_type = 'face_track' AND external_id = $3", - nodes_table - )) - .bind(stranger_ref) - .bind(file_uuid) - .bind(&external_id) - .execute(pool) - .await - { - tracing::warn!("[FaceMatch] TKG stranger update failed for trace {}: {:?}", tid, e); - } - - // Qdrant payload - let _ = face_db - .update_stranger_ref_by_trace(file_uuid, tid, stranger_ref) - .await; - } - - tracing::info!( - "[FaceMatch] Done: {} matched, {} strangers — {} face_detections updated", - matched.len(), - stranger_trace_count, - pg_updated - ); - Ok(pg_updated) + Ok(0) } -/// Fallback: PostgreSQL-based matching (original implementation) +/// Fallback: PostgreSQL-based matching (disabled - embedding column removed) async fn match_faces_iterative_pg(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result { - // Step 1: 載入 TMDb identities (source='tmdb' 且有 face_embedding) - let identities_table = schema::table_name("identities"); - let tmdb_rows = sqlx::query_as::<_, (i32, String, Vec)>( - &format!("SELECT id, name, face_embedding::real[] FROM {} WHERE source='tmdb' AND face_embedding IS NOT NULL", identities_table) - ) - .fetch_all(pool).await?; - - if tmdb_rows.is_empty() { - tracing::warn!("[FaceMatch-PG] No TMDb identities with face embeddings"); - return Ok(0); - } - tracing::info!( - "[FaceMatch-PG] Loaded {} TMDb seed identities", - tmdb_rows.len() + tracing::warn!( + "[FaceMatch-PG] PostgreSQL matching disabled - embedding column removed for {}", + file_uuid ); - - // Step 2: 載入所有 face_detections(含 frame_number),按 trace_id 分組 - let fd_table = schema::table_name("face_detections"); - let fd_rows = sqlx::query_as::<_, (i32, i64, Vec)>(&format!( - "SELECT trace_id, frame_number, embedding FROM {} \ - WHERE file_uuid=$1 AND trace_id IS NOT NULL AND embedding IS NOT NULL \ - ORDER BY trace_id, frame_number", - fd_table - )) - .bind(file_uuid) - .fetch_all(pool) - .await?; - - if fd_rows.is_empty() { - tracing::warn!("[FaceMatch-PG] No face detections with embeddings"); - return Ok(0); - } - - // 分組:trace_id → (frame_number, embedding) - use std::collections::HashMap; - let mut face_track_faces_raw: HashMap)>> = HashMap::new(); - for (tid, frame, emb) in &fd_rows { - face_track_faces_raw - .entry(*tid) - .or_insert_with(Vec::new) - .push((*frame, emb.clone())); - } - - // 從每個 trace 選取不同角度的 3 個 face embedding - let mut face_track_samples: HashMap>> = HashMap::new(); - for (tid, mut faces) in face_track_faces_raw { - faces.sort_by_key(|(frame, _)| *frame); - let n = faces.len(); - let indices = if n <= 3 { - (0..n).collect() - } else { - let mid = n / 2; - vec![0, mid, n - 1] - }; - let samples: Vec> = indices.iter().map(|&i| faces[i].1.clone()).collect(); - face_track_samples.insert(tid, samples); - } - - let total_traces = face_track_samples.len(); - let sample_count: usize = face_track_samples.values().map(|v| v.len()).sum(); - tracing::info!( - "[FaceMatch-PG] Loaded {} traces, sampled {} embeddings (3-angle)", - total_traces, - sample_count - ); - - // Step 3: 建立 TMDb 查找表 - let tmdb_seeds: Vec<(i32, String, Vec)> = tmdb_rows; - - // Step 4: 迭代匹配 - const TH: f32 = 0.50; - let mut matched: HashMap = HashMap::new(); // trace_id → identity_name - - // Round 1: 用 3-angle samples 比對 TMDb - for (&tid, samples) in &face_track_samples { - let mut best_name = String::new(); - let mut best_sim = 0.0f32; - for (_, ref name, ref tmdb_emb) in &tmdb_seeds { - for face_emb in samples { - let s = cosine_similarity(face_emb, tmdb_emb); - if s > best_sim { - best_sim = s; - best_name = name.clone(); - } - } - } - if best_sim >= TH { - matched.insert(tid, best_name); - } - } - tracing::info!( - "[FaceMatch] Round 1: {} matched ({}%) — writing to DB", - matched.len(), - matched.len() * 100 / total_traces - ); - - // Step 5: 寫入 DB — Round 1 結果先存 (Phase 3: update both face_detections AND tkg_nodes) - let identities_table = schema::table_name("identities"); - let strangers_table = schema::table_name("strangers"); - let fd_table = schema::table_name("face_detections"); - let nodes_table = schema::table_name("tkg_nodes"); - let mut updated = 0usize; - for (tid, name) in &matched { - let id_opt = sqlx::query_scalar::<_, Option>(&format!( - "SELECT id FROM {} WHERE name=$1 AND source='tmdb'", - identities_table - )) - .bind(name) - .fetch_optional(pool) - .await?; - if let Some(identity_id) = id_opt { - let _ = sqlx::query(&format!( - "UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", - fd_table - )) - .bind(identity_id) - .bind(file_uuid) - .bind(tid) - .execute(pool) - .await; - - // Phase 3: Also update TKG node - let external_id = format!("face_track_{}", tid); - let _ = sqlx::query(&format!( - "UPDATE {} SET properties = jsonb_set(\ - jsonb_set(properties, '{{identity_id}}', $1::jsonb, false),\ - '{{identity_name}}', $2::jsonb, false)\ - WHERE file_uuid = $3 AND node_type = 'face_track' AND external_id = $4", - nodes_table - )) - .bind(identity_id) - .bind(name.as_str()) - .bind(file_uuid) - .bind(&external_id) - .execute(pool) - .await; - - updated += 1; - } - } - tracing::info!("[FaceMatch] Round 1: updated {} face_detections", updated); - - // Round 2+: 用已匹配的 face 作為 seed 傳播(剩餘未匹配的 trace) - let initial_matched = matched.len(); - for round_n in 2..=5 { - let prev = matched.len(); - // 建立 seed pool: name → Vec - let mut seed_pool: HashMap>> = HashMap::new(); - for (&tid, name) in &matched { - if let Some(samples) = face_track_samples.get(&tid) { - seed_pool - .entry(name.clone()) - .or_default() - .extend(samples.iter()); - } - } - - let mut new_matches: Vec<(i32, String)> = Vec::new(); - for (&tid, samples) in &face_track_samples { - if matched.contains_key(&tid) { - continue; - } - let mut best_name = String::new(); - let mut best_sim = 0.0f32; - if samples.is_empty() { - continue; - } - // 用 3-angle samples 分別比對 seed,取最高 similarity - for (name, seed_faces) in &seed_pool { - for face_emb in samples { - for seed in seed_faces { - let s = cosine_similarity(face_emb, seed); - if s > best_sim { - best_sim = s; - best_name = name.clone(); - } - } - } - } - if best_sim >= TH { - new_matches.push((tid, best_name)); - } - } - for (tid, name) in new_matches { - matched.insert(tid, name); - } - let new = matched.len() - prev; - tracing::info!( - "[FaceMatch] Round {}: +{} matched (total {}, {}%)", - round_n, - new, - matched.len(), - matched.len() * 100 / total_traces - ); - if new < 5 { - break; - } - } - - // Step 6: 未匹配的 trace 設 stranger_id = strangers.id (FK) - // First: ensure strangers records exist - let _ = sqlx::query(&format!( - "INSERT INTO {} (file_uuid, trace_id) \ - SELECT $1, fd.trace_id FROM {} fd \ - WHERE fd.file_uuid = $1 AND fd.trace_id IS NOT NULL \ - AND fd.identity_id IS NULL \ - ON CONFLICT (file_uuid, trace_id) DO NOTHING", - strangers_table, fd_table - )) - .bind(file_uuid) - .execute(pool) - .await?; - - // Then: update face_detections.stranger_id = strangers.id - let stranger_update = sqlx::query(&format!( - "UPDATE {} fd SET stranger_id = s.id \ - FROM {} s \ - WHERE s.file_uuid = fd.file_uuid AND s.trace_id = fd.trace_id \ - AND fd.file_uuid = $1 AND fd.identity_id IS NULL \ - AND fd.trace_id IS NOT NULL AND fd.stranger_id IS NULL", - fd_table, strangers_table - )) - .bind(file_uuid) - .execute(pool) - .await?; - let stranger_count = stranger_update.rows_affected(); - - // Step 7: Save identity files for all affected identities - let affected = sqlx::query_scalar::<_, uuid::Uuid>(&format!( - "SELECT DISTINCT i.uuid FROM {} i \ - JOIN {} fd ON fd.identity_id = i.id \ - WHERE fd.file_uuid=$1 AND fd.identity_id IS NOT NULL", - identities_table, fd_table - )) - .bind(file_uuid) - .fetch_all(pool) - .await - .unwrap_or_default(); - for uuid in &affected { - let us = uuid.to_string().replace('-', ""); - if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(pool, &us).await - { - tracing::warn!("[FaceMatch] Failed to save identity file {}: {}", us, e); - } - } - tracing::info!( - "[FaceMatch] Done: {}/{} traces matched ({}%), {} strangers, {} identity files", - matched.len(), - total_traces, - matched.len() * 100 / total_traces, - stranger_count, - affected.len() - ); - Ok(updated) + Ok(0) } /// Bind ASRX speakers to face traces based on temporal overlap. @@ -1589,126 +1013,9 @@ async fn run_identity_handler( /// Read all TMDb identities with profile photos, extract face embeddings, store in Qdrant as seeds. pub async fn generate_seed_embeddings(db: &PostgresDb) -> anyhow::Result { - use crate::core::db::face_embedding_db::FaceEmbeddingDb; - use std::path::Path; - - let pool = db.pool(); - let id_table = schema::table_name("identities"); - - let rows = sqlx::query_as::<_, (i32, String, String, i32, String)>(&format!( - "SELECT id, name, uuid::text, tmdb_id, tmdb_profile FROM {} \ - WHERE source='tmdb' AND tmdb_profile IS NOT NULL", - id_table - )) - .fetch_all(pool) - .await?; - - if rows.is_empty() { - tracing::warn!("[GenerateSeeds] No TMDb identities with profile photos"); - return Ok(0); - } - - let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR") - .unwrap_or_else(|_| "/Users/accusys/momentry_core_0.1/scripts".to_string()); - let python_path = std::env::var("MOMENTRY_PYTHON_PATH") - .unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string()); - - let extract_script = Path::new(&scripts_dir).join("extract_face_embedding.py"); - let face_db = FaceEmbeddingDb::new(); - - let mut success = 0usize; - for (id, name, uuid, tmdb_id, profile_url) in &rows { - tracing::info!("[GenerateSeeds] Processing {} ({})", name, uuid); - - // Download profile image - let client = reqwest::Client::builder() - .timeout(std::time::Duration::from_secs(30)) - .build() - .unwrap_or_else(|_| reqwest::Client::new()); - let resp = client.get(profile_url).send().await; - let image_bytes = match resp { - Ok(r) if r.status().is_success() => r.bytes().await.unwrap_or_default(), - _ => { - tracing::warn!("[GenerateSeeds] Failed to download: {} from {}", name, profile_url); - continue; - } - }; - - if image_bytes.is_empty() { - tracing::warn!("[GenerateSeeds] Empty image for {}", name); - continue; - } - - // Save to temp file - let temp_dir = std::env::temp_dir().join("momentry_seed_faces"); - std::fs::create_dir_all(&temp_dir)?; - let temp_img = temp_dir.join(format!("{}.jpg", uuid)); - std::fs::write(&temp_img, &image_bytes)?; - - // Extract embedding with timeout - use tokio::time::timeout; - let output = timeout( - std::time::Duration::from_secs(180), - tokio::process::Command::new(&python_path) - .arg(&extract_script) - .arg(&temp_img) - .output(), - ) - .await - .map_err(|_| anyhow::anyhow!("Extract embedding timed out for {}", name))??; - - let _ = std::fs::remove_file(&temp_img); - - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - tracing::warn!( - "[GenerateSeeds] Extraction failed for {}: {}", - name, - stderr.trim() - ); - continue; - } - - let stdout = String::from_utf8_lossy(&output.stdout); - let extract_result: serde_json::Value = match serde_json::from_str(&stdout) { - Ok(v) => v, - Err(e) => { - tracing::warn!("[GenerateSeeds] Parse error for {}: {}", name, e); - continue; - } - }; - - let embedding: Vec = match serde_json::from_value( - extract_result.get("embedding").ok_or_else(|| anyhow::anyhow!("No embedding"))?.clone(), - ) { - Ok(v) => v, - Err(e) => { - tracing::warn!("[GenerateSeeds] Embedding format error for {}: {}", name, e); - continue; - } - }; - - let embedding_f32: Vec = embedding.into_iter().map(|v| v as f32).collect(); - - // Store in Qdrant - match face_db - .upsert_seed_embedding(uuid, name, *tmdb_id, &embedding_f32) - .await - { - Ok(_) => { - success += 1; - tracing::info!("[GenerateSeeds] Stored seed for {}", name); - } - Err(e) => { - tracing::warn!("[GenerateSeeds] Qdrant error for {}: {}", name, e); - } - } - } - - tracing::info!( - "[GenerateSeeds] Done: {}/{} seeds generated", - success, - rows.len() + tracing::warn!( + "[GenerateSeeds] Seed embedding generation disabled - FaceEmbeddingDb removed. \ + TODO: Reimplement with _faces collection" ); - Ok(success) + Ok(0) } diff --git a/src/api/identity_binding.rs b/src/api/identity_binding.rs index 4789ae8..1195138 100644 --- a/src/api/identity_binding.rs +++ b/src/api/identity_binding.rs @@ -67,11 +67,13 @@ pub async fn bind_identity( Path(identity_uuid): Path, Json(req): Json, ) -> Result>, (StatusCode, Json)> { + tracing::info!("[bind_identity] req: {:?}", req); let table = crate::core::db::schema::table_name("face_detections"); let id_table = crate::core::db::schema::table_name("identities"); let history_table = crate::core::db::schema::table_name("identity_history"); let uuid_clean = identity_uuid.replace('-', ""); + tracing::info!("[bind_identity] uuid_clean={}, expand_to_trace={:?}", uuid_clean, req.expand_to_trace); let identity_row: Option<(i32, String)> = sqlx::query_as(&format!( "SELECT id, name FROM {} WHERE REPLACE(uuid::text, '-', '') = $1", id_table @@ -188,21 +190,32 @@ pub async fn bind_identity( })? .flatten(); - // Update Qdrant + TKG if trace_id exists - if let Some(tid) = trace_id { - // 1. Update Qdrant payload - let face_db = crate::core::db::FaceEmbeddingDb::new(); - if let Err(e) = face_db - .update_identity_by_trace(&req.file_uuid, tid, &uuid_clean) - .await - { - tracing::warn!( - "[bind] Failed to update Qdrant identity_uuid for trace {}: {}", - tid, e - ); + // Expand to entire trace if requested + tracing::info!("[bind_identity] trace_id={:?}, expand_to_trace={:?}", trace_id, req.expand_to_trace); + if req.expand_to_trace.unwrap_or(false) && trace_id.is_some() { + let tid = trace_id.unwrap(); + tracing::info!("[bind_identity] Expanding to trace {} for file {}", tid, req.file_uuid); + let expand_result = sqlx::query(&format!( + "UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3", + table + )) + .bind(identity_id) + .bind(&req.file_uuid) + .bind(tid) + .execute(state.db.pool()) + .await; + if let Ok(r) = expand_result { + tracing::info!("[bind] Expanded to trace {}: {} rows", tid, r.rows_affected()); + } else { + tracing::error!("[bind] Failed to expand to trace {}: {:?}", tid, expand_result.err()); } + } else { + tracing::info!("[bind_identity] NOT expanding: expand_to_trace={:?}, trace_id={:?}", req.expand_to_trace, trace_id); + } - // 2. Update TKG face_track node (dual-field design) + // Update TKG if trace_id exists + if let Some(tid) = trace_id { + // Update TKG face_track node (dual-field design) let tkg_table = crate::core::db::schema::table_name("tkg_nodes"); let ext_id = format!("face_track_{}", tid); let identity_ref = format!("{}:identity_{}", req.file_uuid, identity_id); @@ -380,21 +393,9 @@ pub async fn unbind_identity( })? .flatten(); - // Clear Qdrant + TKG if trace_id exists + // Clear TKG if trace_id exists if let Some(tid) = trace_id { - // 1. Clear Qdrant payload - let face_db = crate::core::db::FaceEmbeddingDb::new(); - if let Err(e) = face_db - .clear_identity_by_trace(&req.file_uuid, tid) - .await - { - tracing::warn!( - "[unbind] Failed to clear Qdrant identity_uuid for trace {}: {}", - tid, e - ); - } - - // 2. Update TKG face_track node (restore stranger_ref) + // Update TKG face_track node (restore stranger_ref) let tkg_table = crate::core::db::schema::table_name("tkg_nodes"); let ext_id = format!("face_track_{}", tid); let stranger_ref = format!("{}:stranger_trace_{}", req.file_uuid, tid); @@ -2199,8 +2200,10 @@ pub async fn list_pending_persons( let fd_table = crate::core::db::schema::table_name("face_detections"); let rows: Vec<(i32, String, String, chrono::NaiveDateTime)> = sqlx::query_as(&format!( - "SELECT id, uuid::text, name, created_at FROM {} WHERE file_uuid = $1 AND status = 'pending' ORDER BY created_at DESC", - id_table + "SELECT DISTINCT i.id, i.uuid::text, i.name, i.created_at FROM {} i \ + JOIN {} fd ON fd.identity_id = i.id \ + WHERE fd.file_uuid = $1 AND i.status = 'pending' ORDER BY i.created_at DESC", + id_table, fd_table )) .bind(&file_uuid) .fetch_all(state.db.pool()) diff --git a/src/api/mod.rs b/src/api/mod.rs index 7d1dbb4..ab70792 100644 --- a/src/api/mod.rs +++ b/src/api/mod.rs @@ -4,7 +4,6 @@ pub mod auth; pub mod checkin_api; pub mod docs; pub mod files; -pub mod five_w1h_agent_api; pub mod health; pub mod identities; pub mod identity_agent_api; diff --git a/src/api/processing.rs b/src/api/processing.rs index f33bfa9..f751176 100644 --- a/src/api/processing.rs +++ b/src/api/processing.rs @@ -260,7 +260,25 @@ async fn trigger_processing( .await .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; - if existing_id.is_none() { + if let Some(job_id) = existing_id { + // Clean up stale processor_results from previous runs + // Old entries with status='running' from a dead worker session + // would block the worker from actually running processors. + let pr_table = schema::table_name("processor_results"); + sqlx::query(&format!("DELETE FROM {pr_table} WHERE job_id = $1")) + .bind(job_id) + .execute(state.db.pool()) + .await + .map_err(|e| { + tracing::error!( + "[TRIGGER] Failed to clean processor_results for job {}: {}", + job_id, + e + ); + StatusCode::INTERNAL_SERVER_ERROR + })?; + tracing::info!("[TRIGGER] Cleaned processor_results for job {}", job_id); + } else { state .db .create_monitor_job(&file_uuid, Some(&file_path)) diff --git a/src/api/server.rs b/src/api/server.rs index c718ba1..ddba95e 100644 --- a/src/api/server.rs +++ b/src/api/server.rs @@ -14,7 +14,6 @@ use super::auth; use super::checkin_api; use super::docs; use super::files; -use super::five_w1h_agent_api; use super::health; use super::identities; use super::identity_agent_api; @@ -116,7 +115,6 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> { .merge(agent_search::agent_search_routes()) .merge(processing::processing_routes()) .merge(identity_agent_api::identity_agent_routes()) - .merge(five_w1h_agent_api::five_w1h_agent_routes()) .merge(media_api::bbox_routes()) .merge(media_api::media_proxy_routes()) .merge(trace_agent_api::trace_agent_routes()) diff --git a/src/api/tmdb_api.rs b/src/api/tmdb_api.rs index 49618d7..db88f5d 100644 --- a/src/api/tmdb_api.rs +++ b/src/api/tmdb_api.rs @@ -608,122 +608,17 @@ async fn tmdb_match_handler( )); } - // Get all TMDb identities with face_embedding - let tmdb_rows = sqlx::query_as::<_, (i32, String, Vec)>( - &format!( - "SELECT id, name, face_embedding::real[] FROM {} WHERE source='tmdb' AND face_embedding IS NOT NULL", - crate::core::db::schema::table_name("identities") - ) - ) - .fetch_all(state.db.pool()) - .await - .map_err(|e| { - (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"error": e.to_string()}))) - })?; - - if tmdb_rows.is_empty() { - return Ok(Json(TmdbMatchResponse { - success: true, - file_uuid, - bindings_created: 0, - tmdb_identities_available: 0, - message: "No TMDb identities with face embeddings".to_string(), - })); - } - - let face_collection = format!( - "{}_faces", - crate::core::config::REDIS_KEY_PREFIX - .as_str() - .trim_end_matches(':') + tracing::warn!( + "[TKG-MATCH] TMDb matching disabled - sync_trace_embeddings removed. \ + TODO: Reimplement with _faces collection for {}", + file_uuid ); - let qdrant = QdrantDb::new(); - let _ = qdrant.ensure_collection(&face_collection, 512).await; - - let trace_collection = format!( - "{}_traces", - crate::core::config::REDIS_KEY_PREFIX - .as_str() - .trim_end_matches(':') - ); - let _ = qdrant.ensure_collection(&trace_collection, 512).await; - - // Sync trace embeddings (idempotent) - if let Err(e) = crate::core::db::qdrant_db::sync_trace_embeddings(&file_uuid).await { - tracing::error!("[TKG-MATCH] Trace sync failed: {}", e); - } - - let mut total_bindings = 0usize; - - for (tmdb_id, tmdb_name, tmdb_embedding) in &tmdb_rows { - // Search Qdrant trace collection with this TMDb embedding - let results = match qdrant - .search_face_collection( - &trace_collection, - tmdb_embedding, - 100, - "source", - "tmdb", - Some(&file_uuid), - ) - .await - { - Ok(r) => r, - Err(e) => { - tracing::warn!("[TKG-MATCH] Qdrant search failed for {}: {}", tmdb_name, e); - continue; - } - }; - - // Filter results by threshold and file_uuid - let filtered: Vec<_> = results - .into_iter() - .filter(|(score, payload)| { - *score >= 0.50 - && payload.get("file_uuid").and_then(|v| v.as_str()) == Some(&file_uuid) - }) - .collect(); - - if filtered.is_empty() { - continue; - } - - // Bind matched traces directly - let mut bound_count = 0usize; - for (_score, payload) in &filtered { - if let Some(tid) = payload.get("trace_id").and_then(|v| v.as_i64()) { - let r = sqlx::query(&format!( - "UPDATE {} SET identity_id=$1 WHERE file_uuid=$2 AND trace_id=$3", - crate::core::db::schema::table_name("face_detections") - )) - .bind(tmdb_id) - .bind(&file_uuid) - .bind(tid as i32) - .execute(state.db.pool()) - .await; - if let Ok(result) = r { - bound_count += result.rows_affected() as usize; - } - } - } - - if bound_count > 0 { - tracing::info!( - "[TKG-MATCH] {}: bound {} traces to TMDb identity {}", - tmdb_name, - bound_count, - tmdb_id - ); - } - total_bindings += bound_count; - } - Ok(Json(TmdbMatchResponse { success: true, file_uuid, - bindings_created: total_bindings, - tmdb_identities_available: tmdb_rows.len(), - message: format!("{} traces matched to TMDb identities", total_bindings), + bindings_created: 0, + tmdb_identities_available: 0, + message: "TMDb matching disabled - needs reimplementation with _faces collection".to_string(), })) } diff --git a/src/cli/args.rs b/src/cli/args.rs index 2d16e87..fbbf0e5 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -45,11 +45,6 @@ pub enum Commands { /// File UUID uuid: String, }, - /// Generate story for cut scenes - Story { - /// UUID - uuid: String, - }, /// Detect objects in an image using CLIP or Qwen3-VL Detect { /// Image path diff --git a/src/core/checkin.rs b/src/core/checkin.rs index 59f8988..159db36 100644 --- a/src/core/checkin.rs +++ b/src/core/checkin.rs @@ -145,42 +145,6 @@ pub async fn checkin(db: &PostgresDb, file_uuid: &str) -> Result } } } - - // Traces → production traces collection - let traces_coll = format!( - "{}_traces", - crate::core::config::REDIS_KEY_PREFIX - .as_str() - .trim_end_matches(':') - ); - for point in &ws_data.traces { - if let Some(ref vector) = point.vector { - let payload_val: serde_json::Value = - serde_json::to_value(&point.payload).unwrap_or(serde_json::Value::Null); - let point_id: u64 = match point.id.parse::() { - Ok(id) => id, - Err(_) => { - use std::hash::{Hash, Hasher}; - let mut hasher = std::collections::hash_map::DefaultHasher::new(); - point.id.hash(&mut hasher); - hasher.finish() - } - }; - if let Err(e) = qdrant - .upsert_vector_to_collection( - &traces_coll, - point_id, - vector, - Some(payload_val), - ) - .await - { - warn!("Failed to checkin trace vector {}: {}", point.id, e); - } else { - vectors_moved += 1; - } - } - } } Err(e) => { warn!("Failed to scroll Qdrant workspace for {}: {}", file_uuid, e); @@ -297,10 +261,9 @@ pub async fn checkout(db: &PostgresDb, file_uuid: &str) -> Result, - #[serde(skip_serializing_if = "Option::is_none")] - pub identity_ref: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub stranger_ref: Option, - #[serde(skip_serializing_if = "Option::is_none", rename = "type")] - pub r#type: Option, -} - -#[derive(Debug, Clone, Deserialize)] -pub struct FaceEmbeddingPoint { - pub id: String, - pub vector: Vec, - pub payload: FaceEmbeddingPayload, - pub score: f64, -} - -impl FaceEmbeddingDb { - pub fn new() -> Self { - let schema = std::env::var("DATABASE_SCHEMA").unwrap_or_else(|_| "dev".to_string()); - let collection_name = format!("{}_face_embeddings", schema); - - let base_url = - std::env::var("QDRANT_URL").unwrap_or_else(|_| "http://localhost:6333".to_string()); - let api_key = std::env::var("QDRANT_API_KEY") - .unwrap_or_else(|_| "Test3200Test3200Test3200".to_string()); - - Self { - client: Client::new(), - base_url, - api_key, - collection_name, - } - } - - pub async fn init_collection(&self) -> Result<()> { - let url = format!("{}/collections/{}", self.base_url, self.collection_name); - - let response = self - .client - .get(&url) - .header("api-key", &self.api_key) - .send() - .await?; - - if response.status().is_success() { - tracing::info!( - "[FaceEmbedding] Collection {} already exists", - self.collection_name - ); - return Ok(()); - } - - let create_url = format!("{}/collections/{}", self.base_url, self.collection_name); - let body = serde_json::json!({ - "vectors": { - "size": 512, - "distance": "Cosine" - } - }); - - self.client - .put(&create_url) - .header("api-key", &self.api_key) - .header("Content-Type", "application/json") - .json(&body) - .send() - .await - .context("Failed to create face embeddings collection")?; - - tracing::info!( - "[FaceEmbedding] Created collection {} (dim=512)", - self.collection_name - ); - Ok(()) - } - - pub async fn upsert_embedding( - &self, - point_id: &str, - embedding: &[f32], - payload: &FaceEmbeddingPayload, - ) -> Result<()> { - let url = format!( - "{}/collections/{}/points?wait=true", - self.base_url, self.collection_name - ); - - let body = serde_json::json!({ - "points": [{ - "id": point_id, - "vector": embedding, - "payload": payload - }] - }); - - let response = self - .client - .put(&url) - .header("api-key", &self.api_key) - .header("Content-Type", "application/json") - .json(&body) - .send() - .await - .context("Failed to upsert face embedding")?; - - if !response.status().is_success() { - let text = response.text().await.unwrap_or_default(); - anyhow::bail!("Qdrant upsert failed: {}", text); - } - - Ok(()) - } - - pub async fn batch_upsert( - &self, - points: Vec<(String, Vec, FaceEmbeddingPayload)>, - ) -> Result { - if points.is_empty() { - return Ok(0); - } - - let url = format!( - "{}/collections/{}/points?wait=true", - self.base_url, self.collection_name - ); - - let body = serde_json::json!({ - "points": points.iter().map(|(id, vec, payload)| { - // Parse id as u64 for Qdrant (requires integer or UUID) - let id_num: u64 = id.parse().unwrap_or(0); - serde_json::json!({ - "id": id_num, - "vector": vec, - "payload": payload - }) - }).collect::>() - }); - - let response = self - .client - .put(&url) - .header("api-key", &self.api_key) - .header("Content-Type", "application/json") - .json(&body) - .send() - .await - .context("Failed to batch upsert face embeddings")?; - - if !response.status().is_success() { - let status = response.status(); - let text = response.text().await.unwrap_or_default(); - anyhow::bail!("Qdrant batch upsert failed (HTTP {}): {}", status, text); - } - - Ok(points.len()) - } - - pub async fn update_identity_by_trace( - &self, - file_uuid: &str, - trace_id: i32, - identity_uuid: &str, - ) -> Result { - let url = format!( - "{}/collections/{}/points", - self.base_url, self.collection_name - ); - - let body = serde_json::json!({ - "filter": { - "must": [ - { - "key": "file_uuid", - "match": { "value": file_uuid } - }, - { - "key": "trace_id", - "match": { "value": trace_id } - } - ] - }, - "payload": { - "identity_uuid": identity_uuid - } - }); - - let response = self - .client - .post(&url) - .header("api-key", &self.api_key) - .header("Content-Type", "application/json") - .json(&body) - .send() - .await - .context("Failed to update identity_uuid in Qdrant")?; - - if !response.status().is_success() { - let text = response.text().await.unwrap_or_default(); - anyhow::bail!("Qdrant identity update failed: {}", text); - } - - tracing::info!( - "[FaceEmbedding] Updated identity_uuid={} for file={}, trace={}", - identity_uuid, file_uuid, trace_id - ); - - Ok(1) - } - - pub async fn clear_identity_by_trace( - &self, - file_uuid: &str, - trace_id: i32, - ) -> Result { - let url = format!( - "{}/collections/{}/points", - self.base_url, self.collection_name - ); - - let body = serde_json::json!({ - "filter": { - "must": [ - { - "key": "file_uuid", - "match": { "value": file_uuid } - }, - { - "key": "trace_id", - "match": { "value": trace_id } - } - ] - }, - "payload": { - "identity_uuid": null - } - }); - - let response = self - .client - .post(&url) - .header("api-key", &self.api_key) - .header("Content-Type", "application/json") - .json(&body) - .send() - .await - .context("Failed to clear identity_uuid in Qdrant")?; - - if !response.status().is_success() { - let text = response.text().await.unwrap_or_default(); - anyhow::bail!("Qdrant identity clear failed: {}", text); - } - - tracing::info!( - "[FaceEmbedding] Cleared identity_uuid for file={}, trace={}", - file_uuid, trace_id - ); - - Ok(1) - } - - pub async fn search_similar( - &self, - query_embedding: &[f32], - file_uuid: Option<&str>, - limit: usize, - threshold: f64, - ) -> Result> { - let url = format!( - "{}/collections/{}/points/search", - self.base_url, self.collection_name - ); - - let mut filter = serde_json::json!({}); - if let Some(fu) = file_uuid { - filter = serde_json::json!({ - "must": [{ - "key": "file_uuid", - "match": { "value": fu } - }] - }); - } - - let body = serde_json::json!({ - "vector": query_embedding, - "limit": limit, - "with_payload": true, - "with_vector": false, - "filter": filter - }); - - let response = self - .client - .post(&url) - .header("api-key", &self.api_key) - .header("Content-Type", "application/json") - .json(&body) - .send() - .await - .context("Failed to search face embeddings")?; - - let status = response.status(); - let text = response.text().await.unwrap_or_default(); - - if !status.is_success() { - anyhow::bail!("Qdrant search failed: {} - {}", status, text); - } - - #[derive(Deserialize)] - struct SearchResult { - result: Vec, - } - - #[derive(Deserialize)] - struct PointResult { - id: serde_json::Value, - score: f64, - payload: HashMap, - } - - let parsed: SearchResult = - serde_json::from_str(&text).context("Failed to parse Qdrant search response")?; - - let results: Vec = parsed - .result - .into_iter() - .filter(|r| r.score >= threshold) - .map(|r| { - let id = match r.id { - serde_json::Value::String(s) => s, - serde_json::Value::Number(n) => n.to_string(), - _ => "unknown".to_string(), - }; - let payload = FaceEmbeddingPayload { - file_uuid: r - .payload - .get("file_uuid") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(), - trace_id: r - .payload - .get("trace_id") - .and_then(|v| v.as_i64()) - .unwrap_or(0) as i32, - frame: r.payload.get("frame").and_then(|v| v.as_i64()).unwrap_or(0), - bbox_x: r - .payload - .get("bbox_x") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0), - bbox_y: r - .payload - .get("bbox_y") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0), - bbox_w: r - .payload - .get("bbox_w") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0), - bbox_h: r - .payload - .get("bbox_h") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0), - confidence: r - .payload - .get("confidence") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0), - yaw: r.payload.get("yaw").and_then(|v| v.as_f64()).unwrap_or(0.0), - pitch: r - .payload - .get("pitch") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0), - roll: r - .payload - .get("roll") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0), - identity_uuid: r - .payload - .get("identity_uuid") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()), - identity_ref: r - .payload - .get("identity_ref") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()), - stranger_ref: r - .payload - .get("stranger_ref") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()), - r#type: r - .payload - .get("type") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()), - }; - FaceEmbeddingPoint { - id, - vector: vec![], // Not returned with_vector=false - payload, - score: r.score, - } - }) - .collect(); - - Ok(results) - } - - pub async fn get_embeddings_by_trace( - &self, - file_uuid: &str, - trace_id: i32, - ) -> Result)>> { - let url = format!( - "{}/collections/{}/points/scroll", - self.base_url, self.collection_name - ); - - let body = serde_json::json!({ - "limit": 1000, - "with_payload": true, - "with_vector": true, - "filter": { - "must": [ - {"key": "file_uuid", "match": { "value": file_uuid }}, - {"key": "trace_id", "match": { "value": trace_id }} - ] - } - }); - - let response = self - .client - .post(&url) - .header("api-key", &self.api_key) - .header("Content-Type", "application/json") - .json(&body) - .send() - .await - .context("Failed to scroll face embeddings")?; - - let status = response.status(); - let text = response.text().await.unwrap_or_default(); - - if !status.is_success() { - anyhow::bail!("Qdrant scroll failed: {} - {}", status, text); - } - - #[derive(Deserialize)] - struct ScrollResult { - result: ScrollPoints, - } - - #[derive(Deserialize)] - struct ScrollPoints { - points: Vec, - } - - #[derive(Deserialize)] - struct PointResult { - id: serde_json::Value, - vector: Vec, - } - - let parsed: ScrollResult = - serde_json::from_str(&text).context("Failed to parse Qdrant scroll response")?; - - let results: Vec<(String, Vec)> = parsed - .result - .points - .into_iter() - .map(|r| { - let id = match r.id { - serde_json::Value::String(s) => s, - serde_json::Value::Number(n) => n.to_string(), - _ => "unknown".to_string(), - }; - (id, r.vector) - }) - .collect(); - - Ok(results) - } - - pub async fn get_all_embeddings_for_file( - &self, - file_uuid: &str, - ) -> Result, FaceEmbeddingPayload)>> { - let url = format!( - "{}/collections/{}/points/scroll", - self.base_url, self.collection_name - ); - - let body = serde_json::json!({ - "limit": 10000, - "with_payload": true, - "with_vector": true, - "filter": { - "must": [ - {"key": "file_uuid", "match": { "value": file_uuid }} - ] - } - }); - - let response = self - .client - .post(&url) - .header("api-key", &self.api_key) - .header("Content-Type", "application/json") - .json(&body) - .send() - .await - .context("Failed to scroll face embeddings")?; - - let status = response.status(); - let text = response.text().await.unwrap_or_default(); - - if !status.is_success() { - anyhow::bail!("Qdrant scroll failed: {} - {}", status, text); - } - - #[derive(Deserialize)] - struct ScrollResult { - result: ScrollPoints, - } - - #[derive(Deserialize)] - struct ScrollPoints { - points: Vec, - } - - #[derive(Deserialize)] - struct PointResult { - id: serde_json::Value, - vector: Vec, - payload: HashMap, - } - - let parsed: ScrollResult = - serde_json::from_str(&text).context("Failed to parse Qdrant scroll response")?; - - let results: Vec<(String, Vec, FaceEmbeddingPayload)> = parsed - .result - .points - .into_iter() - .map(|r| { - let id = match r.id { - serde_json::Value::String(s) => s, - serde_json::Value::Number(n) => n.to_string(), - _ => "unknown".to_string(), - }; - let payload = FaceEmbeddingPayload { - file_uuid: r - .payload - .get("file_uuid") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(), - trace_id: r - .payload - .get("trace_id") - .and_then(|v| v.as_i64()) - .unwrap_or(0) as i32, - frame: r.payload.get("frame").and_then(|v| v.as_i64()).unwrap_or(0), - bbox_x: r - .payload - .get("bbox_x") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0), - bbox_y: r - .payload - .get("bbox_y") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0), - bbox_w: r - .payload - .get("bbox_w") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0), - bbox_h: r - .payload - .get("bbox_h") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0), - confidence: r - .payload - .get("confidence") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0), - yaw: r.payload.get("yaw").and_then(|v| v.as_f64()).unwrap_or(0.0), - pitch: r - .payload - .get("pitch") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0), - roll: r - .payload - .get("roll") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0), - identity_uuid: r - .payload - .get("identity_uuid") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()), - identity_ref: r - .payload - .get("identity_ref") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()), - stranger_ref: r - .payload - .get("stranger_ref") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()), - r#type: r - .payload - .get("type") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()), - }; - (id, r.vector, payload) - }) - .collect(); - - Ok(results) - } - - pub async fn delete_file_embeddings(&self, file_uuid: &str) -> Result { - let url = format!( - "{}/collections/{}/points/delete?wait=true", - self.base_url, self.collection_name - ); - - let body = serde_json::json!({ - "filter": { - "must": [ - {"key": "file_uuid", "match": { "value": file_uuid }} - ] - } - }); - - let response = self - .client - .post(&url) - .header("api-key", &self.api_key) - .header("Content-Type", "application/json") - .json(&body) - .send() - .await - .context("Failed to delete face embeddings")?; - - if !response.status().is_success() { - let text = response.text().await.unwrap_or_default(); - anyhow::bail!("Qdrant delete failed: {}", text); - } - - Ok(0) - } - - pub async fn upsert_seed_embedding( - &self, - identity_uuid: &str, - identity_name: &str, - tmdb_id: i32, - embedding: &[f32], - ) -> Result<()> { - let url = format!( - "{}/collections/{}/points?wait=true", - self.base_url, self.collection_name - ); - - let point_id = identity_uuid.to_string(); - let payload = serde_json::json!({ - "file_uuid": "", - "trace_id": 0, - "frame": 0, - "bbox_x": 0.0, - "bbox_y": 0.0, - "bbox_w": 0.0, - "bbox_h": 0.0, - "confidence": 0.0, - "yaw": 0.0, - "pitch": 0.0, - "roll": 0.0, - "identity_uuid": identity_uuid, - "identity_ref": serde_json::Value::Null, - "stranger_ref": serde_json::Value::Null, - "identity_name": identity_name, - "tmdb_id": tmdb_id, - "type": "identity_seed", - }); - - let body = serde_json::json!({ - "points": [{ - "id": point_id, - "vector": embedding, - "payload": payload - }] - }); - - let response = self - .client - .put(&url) - .header("api-key", &self.api_key) - .header("Content-Type", "application/json") - .json(&body) - .send() - .await - .context("Failed to upsert seed embedding")?; - - if !response.status().is_success() { - let text = response.text().await.unwrap_or_default(); - anyhow::bail!("Qdrant seed upsert failed: {}", text); - } - - tracing::info!( - "[SeedEmbedding] Stored seed for identity_uuid={}, name={}", - identity_uuid, identity_name - ); - - Ok(()) - } - - pub async fn get_seed_embeddings( - &self, - ) -> Result)>> { - let url = format!( - "{}/collections/{}/points/scroll", - self.base_url, self.collection_name - ); - - let body = serde_json::json!({ - "limit": 10000, - "with_payload": true, - "with_vector": true, - "filter": { - "must": [ - {"key": "type", "match": { "value": "identity_seed" }} - ] - } - }); - - let response = self - .client - .post(&url) - .header("api-key", &self.api_key) - .header("Content-Type", "application/json") - .json(&body) - .send() - .await - .context("Failed to scroll seed embeddings")?; - - let status = response.status(); - let text = response.text().await.unwrap_or_default(); - - if !status.is_success() { - anyhow::bail!("Qdrant scroll failed: {} - {}", status, text); - } - - #[derive(Deserialize)] - struct ScrollResult { - result: ScrollPoints, - } - - #[derive(Deserialize)] - struct ScrollPoints { - points: Vec, - } - - #[derive(Deserialize)] - struct PointResult { - id: serde_json::Value, - vector: Vec, - payload: HashMap, - } - - let parsed: ScrollResult = - serde_json::from_str(&text).context("Failed to parse Qdrant scroll response")?; - - let results: Vec<(String, String, Vec)> = parsed - .result - .points - .into_iter() - .filter_map(|r| { - let identity_uuid = r - .payload - .get("identity_uuid") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - let identity_name = r - .payload - .get("identity_name") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - if identity_uuid.is_empty() { - None - } else { - Some((identity_uuid, identity_name, r.vector)) - } - }) - .collect(); - - Ok(results) - } - - pub async fn update_identity_ref_by_trace( - &self, - file_uuid: &str, - trace_id: i32, - identity_ref: &str, - ) -> Result { - let url = format!( - "{}/collections/{}/points/payload", - self.base_url, self.collection_name - ); - - let body = serde_json::json!({ - "filter": { - "must": [ - { - "key": "file_uuid", - "match": { "value": file_uuid } - }, - { - "key": "trace_id", - "match": { "value": trace_id } - } - ] - }, - "payload": { - "identity_ref": identity_ref - } - }); - - let response = self - .client - .post(&url) - .header("api-key", &self.api_key) - .header("Content-Type", "application/json") - .json(&body) - .send() - .await - .context("Failed to update identity_ref in Qdrant")?; - - if !response.status().is_success() { - let text = response.text().await.unwrap_or_default(); - anyhow::bail!("Qdrant identity_ref update failed: {}", text); - } - - tracing::info!( - "[FaceEmbedding] Updated identity_ref={} for file={}, trace={}", - identity_ref, file_uuid, trace_id - ); - - Ok(1) - } - - pub async fn update_stranger_ref_by_trace( - &self, - file_uuid: &str, - trace_id: i32, - stranger_ref: &str, - ) -> Result { - let url = format!( - "{}/collections/{}/points/payload", - self.base_url, self.collection_name - ); - - let body = serde_json::json!({ - "filter": { - "must": [ - { - "key": "file_uuid", - "match": { "value": file_uuid } - }, - { - "key": "trace_id", - "match": { "value": trace_id } - } - ] - }, - "payload": { - "stranger_ref": stranger_ref - } - }); - - let response = self - .client - .post(&url) - .header("api-key", &self.api_key) - .header("Content-Type", "application/json") - .json(&body) - .send() - .await - .context("Failed to update stranger_ref in Qdrant")?; - - if !response.status().is_success() { - let text = response.text().await.unwrap_or_default(); - anyhow::bail!("Qdrant stranger_ref update failed: {}", text); - } - - tracing::info!( - "[FaceEmbedding] Updated stranger_ref={} for file={}, trace={}", - stranger_ref, file_uuid, trace_id - ); - - Ok(1) - } -} - -impl Default for FaceEmbeddingDb { - fn default() -> Self { - Self::new() - } -} diff --git a/src/core/db/mod.rs b/src/core/db/mod.rs index 98a9e53..46c3d1e 100644 --- a/src/core/db/mod.rs +++ b/src/core/db/mod.rs @@ -32,14 +32,12 @@ pub trait VectorStore: Send + Sync { async fn search(&self, query_vector: &[f32], limit: usize) -> Result>; } -pub mod face_embedding_db; pub mod identity_merge_history; pub mod mongodb_db; pub mod postgres_db; pub mod qdrant_db; pub mod redis_client; pub mod redis_db; -pub use face_embedding_db::{FaceEmbeddingDb, FaceEmbeddingPayload, FaceEmbeddingPoint}; pub use identity_merge_history::{ AliasEntry, FacesTransferred, IdentityMergeHistory, IdentityMergeHistoryStore, IdentitySnapshot, MergeHistoryEntry, MergeHistoryQuery, MergeParams, TargetIdentitySnapshot, diff --git a/src/core/db/postgres_db.rs b/src/core/db/postgres_db.rs index abd15f5..8e4bfb5 100644 --- a/src/core/db/postgres_db.rs +++ b/src/core/db/postgres_db.rs @@ -448,10 +448,7 @@ pub enum ProcessorType { Hand, Asrx, Scene, - Story, - FiveW1H, Appearance, - MediaPipe, FaceCluster, } @@ -488,10 +485,7 @@ impl ProcessorType { ProcessorType::Hand => "hand", ProcessorType::Asrx => "asrx", ProcessorType::Scene => "scene", - ProcessorType::Story => "story", - ProcessorType::FiveW1H => "5w1h", ProcessorType::Appearance => "appearance", - ProcessorType::MediaPipe => "mediapipe", ProcessorType::FaceCluster => "face_cluster", } } @@ -507,10 +501,7 @@ impl ProcessorType { "hand" => Some(ProcessorType::Hand), "asrx" => Some(ProcessorType::Asrx), "scene" => Some(ProcessorType::Scene), - "story" => Some(ProcessorType::Story), - "5w1h" => Some(ProcessorType::FiveW1H), "appearance" => Some(ProcessorType::Appearance), - "mediapipe" => Some(ProcessorType::MediaPipe), "face_cluster" => Some(ProcessorType::FaceCluster), _ => None, } @@ -527,10 +518,7 @@ impl ProcessorType { ProcessorType::Hand => 0.4, ProcessorType::Asrx => 0.8, ProcessorType::Scene => 0.3, - ProcessorType::Story => 0.1, - ProcessorType::FiveW1H => 0.1, ProcessorType::Appearance => 0.3, - ProcessorType::MediaPipe => 0.3, ProcessorType::FaceCluster => 0.7, } } @@ -538,7 +526,6 @@ impl ProcessorType { pub fn uses_gpu(&self) -> bool { match self { ProcessorType::Yolo | ProcessorType::Face | ProcessorType::Pose | ProcessorType::Hand => true, - ProcessorType::MediaPipe | ProcessorType::FaceCluster => false, _ => false, } } @@ -554,10 +541,7 @@ impl ProcessorType { ProcessorType::Hand => 1024, ProcessorType::Asrx => 2048, ProcessorType::Scene => 512, - ProcessorType::Story => 256, - ProcessorType::FiveW1H => 256, ProcessorType::Appearance => 512, - ProcessorType::MediaPipe => 1024, ProcessorType::FaceCluster => 1024, } } @@ -573,10 +557,7 @@ impl ProcessorType { ProcessorType::Hand => Some("vision/hand_pose"), ProcessorType::Asrx => Some("speechbrain/ecapa-tdnn"), ProcessorType::Scene => Some("places365"), - ProcessorType::Story => None, - ProcessorType::FiveW1H => Some("gemma4"), ProcessorType::Appearance => None, - ProcessorType::MediaPipe => Some("mediapipe/holistic"), ProcessorType::FaceCluster => Some("sklearn/agglomerative"), } } @@ -585,17 +566,8 @@ impl ProcessorType { match self { ProcessorType::Asrx => vec![ProcessorType::Cut, ProcessorType::Asr], ProcessorType::Scene => vec![ProcessorType::Cut], - ProcessorType::Story => vec![ - ProcessorType::Asrx, - ProcessorType::Cut, - ProcessorType::Yolo, - ProcessorType::Face, - ], - ProcessorType::FiveW1H => vec![ProcessorType::Story], ProcessorType::Appearance => vec![ProcessorType::Pose], ProcessorType::FaceCluster => vec![ProcessorType::Face], - ProcessorType::Hand => vec![], - ProcessorType::MediaPipe => vec![], _ => vec![], } } @@ -623,15 +595,12 @@ impl ProcessorType { | ProcessorType::Pose | ProcessorType::Hand | ProcessorType::Appearance - | ProcessorType::MediaPipe | ProcessorType::FaceCluster => PipelineType::Frame, ProcessorType::Cut | ProcessorType::Asr | ProcessorType::Asrx - | ProcessorType::Scene - | ProcessorType::Story - | ProcessorType::FiveW1H => PipelineType::Time, + | ProcessorType::Scene => PipelineType::Time, } } } @@ -2612,76 +2581,32 @@ sqlx::query( Ok(results) } - /// Face clustering: group unregistered faces within same trace by embedding similarity + /// Face clustering: disabled - embedding column no longer used pub async fn cluster_face_embeddings( &self, file_uuid: &str, - similarity_threshold: f64, + _similarity_threshold: f64, ) -> Result> { - let table = schema::table_name("face_detections"); - let rows = sqlx::query_as::<_, (String, i64)>(&format!( - r#" - SELECT trace_id::text, COUNT(DISTINCT frame_number) as frame_count - FROM {} - WHERE file_uuid = $1 - AND embedding IS NOT NULL - AND identity_id IS NULL - GROUP BY trace_id - ORDER BY frame_count DESC - "#, - table - )) - .bind(file_uuid) - .fetch_all(&self.pool) - .await?; - - Ok(rows - .into_iter() - .map(|(trace_id, frame_count)| FaceClusterGroup { - trace_id, - frame_count: frame_count as i32, - }) - .collect()) + tracing::warn!( + "[cluster_face_embeddings] Disabled - embedding column removed for {}", + file_uuid + ); + Ok(Vec::new()) } - /// Search similar faces by embedding via pgvector cosine distance + /// Search similar faces: disabled - embedding column no longer used pub async fn search_similar_faces( &self, - query_embedding: &[f32], + _query_embedding: &[f32], file_uuid: &str, - limit: i64, - threshold: f64, + _limit: i64, + _threshold: f64, ) -> Result> { - let table = schema::table_name("face_detections"); - let rows = sqlx::query_as::<_, (i32, i32, f64)>(&format!( - r#" - SELECT id, trace_id, - 1 - (embedding::vector <=> $1::vector) as similarity - FROM {} - WHERE file_uuid = $2 - AND embedding IS NOT NULL - AND 1 - (embedding::vector <=> $1::vector) >= $3 - ORDER BY embedding::vector <=> $1::vector - LIMIT $4 - "#, - table - )) - .bind(query_embedding) - .bind(file_uuid) - .bind(threshold) - .bind(limit) - .fetch_all(&self.pool) - .await?; - - Ok(rows - .into_iter() - .map(|(id, trace_id, similarity)| SimilarFaceResult { - id, - trace_id, - similarity, - bbox: String::new(), - }) - .collect()) + tracing::warn!( + "[search_similar_faces] Disabled - embedding column removed for {}", + file_uuid + ); + Ok(Vec::new()) } // ========================================== diff --git a/src/core/db/qdrant_db.rs b/src/core/db/qdrant_db.rs index 1485468..fb780f0 100644 --- a/src/core/db/qdrant_db.rs +++ b/src/core/db/qdrant_db.rs @@ -768,45 +768,6 @@ impl QdrantDb { Ok(result.result.points_count) } - /// Store face embedding with trace_id + frame_number payload - pub async fn upsert_face_embedding( - &self, - point_id: u64, - vector: &[f32], - file_uuid: &str, - trace_id: i32, - frame_number: i64, - ) -> Result<()> { - let url = format!( - "{}/collections/{}/points?wait=true", - self.base_url, self.collection_name - ); - let mut payload_map = std::collections::HashMap::new(); - payload_map.insert("file_uuid".to_string(), serde_json::json!(file_uuid)); - payload_map.insert("trace_id".to_string(), serde_json::json!(trace_id)); - payload_map.insert("frame_number".to_string(), serde_json::json!(frame_number)); - payload_map.insert("type".to_string(), serde_json::json!("face_embedding")); - - let point = serde_json::json!({ - "points": [{ - "id": point_id, - "vector": vector, - "payload": payload_map - }] - }); - let resp = self - .client - .put(&url) - .header("api-key", &self.api_key) - .json(&point) - .send() - .await?; - if !resp.status().is_success() { - anyhow::bail!("Qdrant upsert face failed: {}", resp.status()); - } - Ok(()) - } - /// Store chunk embedding with parent-child metadata pub async fn upsert_chunk_embedding( &self, @@ -883,113 +844,3 @@ impl VectorStore for QdrantDb { self.search(query_vector, limit).await } } - -pub async fn sync_trace_embeddings(file_uuid: &str) -> Result<()> { - use crate::core::config::DATABASE_URL; - use sqlx::Row; - - let pool = sqlx::PgPool::connect(&DATABASE_URL).await?; - let table = crate::core::db::schema::table_name("face_detections"); - let qdrant = QdrantDb::new(); - - let collection = format!( - "{}_traces", - crate::core::config::REDIS_KEY_PREFIX - .as_str() - .trim_end_matches(':') - ); - qdrant.ensure_collection(&collection, 512).await?; - - // Read all face_detections with embeddings, grouped by trace_id in Rust - let rows = sqlx::query(&format!( - "SELECT trace_id, embedding FROM {} \ - WHERE file_uuid = $1 AND embedding IS NOT NULL AND trace_id IS NOT NULL \ - AND ((metadata->>'qc_ok')::boolean IS NULL OR (metadata->>'qc_ok')::boolean = true)", - table - )) - .bind(file_uuid) - .fetch_all(&pool) - .await?; - - let mut trace_faces: std::collections::HashMap>> = - std::collections::HashMap::new(); - let mut trace_stats: std::collections::HashMap = - std::collections::HashMap::new(); // (count, min_frame, max_frame) - - for row in &rows { - let tid: Option = row.get(0); - let emb: Option> = row.get(1); - if let (Some(tid), Some(emb)) = (tid, emb) { - trace_faces.entry(tid).or_default().push(emb); - let entry = trace_stats.entry(tid).or_insert((0, i64::MAX, i64::MIN)); - entry.0 += 1; - } - } - - // Compute average embedding per trace - struct AvgTrace { - tid: i32, - avg_emb: Vec, - frame_count: i64, - } - - let mut trace_avgs: Vec = Vec::new(); - - for (&tid, faces) in &trace_faces { - let dim = faces[0].len(); - let mut avg = vec![0.0f32; dim]; - for face in faces { - for (i, &v) in face.iter().enumerate() { - avg[i] += v; - } - } - let n = faces.len() as f32; - for v in &mut avg { - *v /= n; - } - - let stats = trace_stats.get(&tid).unwrap_or(&(0, 0, 0)); - trace_avgs.push(AvgTrace { - tid, - avg_emb: avg, - frame_count: stats.0, - }); - } - - // Push to Qdrant in batches - // Point ID: hash(file_uuid + trace_id) for global uniqueness - for chunk in trace_avgs.chunks(500) { - let batch: Vec<(u64, &[f32], Option)> = chunk - .iter() - .map(|t| { - let point_id = { - use sha2::{Digest, Sha256}; - let mut hasher = Sha256::new(); - hasher.update(file_uuid.as_bytes()); - hasher.update(b"_"); - hasher.update(t.tid.to_string().as_bytes()); - let hash = hasher.finalize(); - u64::from_be_bytes(hash[0..8].try_into().unwrap()) - }; - ( - point_id, - t.avg_emb.as_slice(), - Some(serde_json::json!({ - "trace_id": t.tid, - "file_uuid": file_uuid, - "frame_count": t.frame_count, - "source": "trace", - })), - ) - }) - .collect(); - qdrant.upsert_vectors_batch(&collection, &batch).await?; - } - - tracing::info!( - "Synced {} trace embeddings to Qdrant for {}", - trace_faces.len(), - file_uuid - ); - Ok(()) -} diff --git a/src/core/db/qdrant_workspace.rs b/src/core/db/qdrant_workspace.rs index b20451a..b553262 100644 --- a/src/core/db/qdrant_workspace.rs +++ b/src/core/db/qdrant_workspace.rs @@ -187,34 +187,13 @@ impl QdrantWorkspace { .await } - pub async fn upsert_face_embedding( - &self, - point_id: u64, - vector: &[f32], - file_uuid: &str, - trace_id: i32, - frame_number: i64, - ) -> Result<()> { - let payload = serde_json::json!({ - "file_uuid": file_uuid, - "trace_id": trace_id, - "frame_number": frame_number, - "type": "face_embedding", - }); - self.upsert_vector(&self.traces_collection(), point_id, vector, Some(payload)) - .await - } - /// Scroll all points for a file from all workspace collections. /// Used during checkin to read vectors before moving to production. pub async fn scroll_by_file_uuid(&self, file_uuid: &str) -> Result { let chunks = self .scroll_collection(&self.chunks_collection(), file_uuid) .await?; - let traces = self - .scroll_collection(&self.traces_collection(), file_uuid) - .await?; - Ok(WorkspaceScrollResult { chunks, traces }) + Ok(WorkspaceScrollResult { chunks, traces: Vec::new() }) } async fn scroll_collection( diff --git a/src/core/llm/client.rs b/src/core/llm/client.rs index ac2da84..9b03c19 100644 --- a/src/core/llm/client.rs +++ b/src/core/llm/client.rs @@ -1,7 +1,7 @@ use anyhow::Result; use serde::{Deserialize, Serialize}; use std::time::Duration; -use tracing::{debug, error, warn}; +use tracing::{debug, error}; use crate::core::config; use crate::core::llm::function_calling::LLM_CLIENT; @@ -31,44 +31,17 @@ struct Choice { message: ChatMessage, } -/// Generates a 5W1H+ summary for a given scene context. -/// Context should include the combined text of all sentences in the scene. -pub async fn generate_5w1h_summary(scene_text: &str) -> Result { - if !*config::llm::SUMMARY_ENABLED { - warn!("LLM Summary is disabled via config"); - return Ok("LLM Disabled".to_string()); - } - - let prompt = format!( - r#"Analyze the following video scene transcript and provide a concise 5W1H+ summary in JSON format. - Focus on: Who, What, Where, When, Why, How, and Key Objects/Actions. - - Transcript: - "{}" - - Output format: - {{ - "who": "...", - "what": "...", - "where": "...", - "when": "...", - "why": "...", - "how": "...", - "summary": "..." - }}"#, - scene_text - ); - +pub async fn ask_llm(prompt: &str, system_prompt: &str) -> Result { let req = ChatRequest { model: (*config::llm::SUMMARY_MODEL).clone(), messages: vec![ ChatMessage { role: "system".to_string(), - content: "You are an expert video analyst assistant.".to_string(), + content: system_prompt.to_string(), }, ChatMessage { role: "user".to_string(), - content: prompt, + content: prompt.to_string(), }, ], temperature: 0.1, @@ -76,7 +49,7 @@ pub async fn generate_5w1h_summary(scene_text: &str) -> Result { stream: false, }; - debug!("Calling LLM for summary: {}", *config::llm::SUMMARY_URL); + debug!("Calling LLM: {}", *config::llm::SUMMARY_URL); let res = LLM_CLIENT .post(&*config::llm::SUMMARY_URL) diff --git a/src/core/person_identity.rs b/src/core/person_identity.rs index 2023b53..2508a11 100644 --- a/src/core/person_identity.rs +++ b/src/core/person_identity.rs @@ -71,6 +71,7 @@ pub struct BindIdentityRequest { pub file_uuid: String, pub face_id: Option, pub id: Option, + pub expand_to_trace: Option, } #[derive(Debug, Clone, Deserialize, Serialize)] diff --git a/src/core/processor/face.rs b/src/core/processor/face.rs index b437416..b9b1d46 100644 --- a/src/core/processor/face.rs +++ b/src/core/processor/face.rs @@ -103,6 +103,7 @@ mod tests { confidence: 0.95, embedding: Some(vec![0.1, 0.2, 0.3]), landmarks: Some(serde_json::json!([[10.0, 20.0], [30.0, 40.0]])), + pose_angle: None, attributes: Some(FaceAttributes { age: Some(30), gender: Some("male".to_string()), @@ -174,6 +175,7 @@ mod tests { confidence: 0.5, embedding: None, landmarks: None, + pose_angle: None, attributes: None, }; assert!(face.confidence >= 0.0 && face.confidence <= 1.0); @@ -190,6 +192,7 @@ mod tests { confidence: 0.95, embedding: Some(vec![0.1; 512]), landmarks: None, + pose_angle: None, attributes: Some(FaceAttributes { age: Some(35), gender: Some("male".to_string()), diff --git a/src/core/processor/mediapipe.rs b/src/core/processor/mediapipe.rs deleted file mode 100644 index 6ad1a65..0000000 --- a/src/core/processor/mediapipe.rs +++ /dev/null @@ -1,96 +0,0 @@ -use anyhow::{Context, Result}; -use serde::{Deserialize, Serialize}; -use std::time::Duration; - -use super::executor::PythonExecutor; - -const MEDIAPIPE_TIMEOUT: Duration = Duration::from_secs(7200); - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MediaPipeResult { - pub frame_count: u64, - pub fps: f64, - pub frames: Vec, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MediaPipeFrame { - pub frame: u64, - pub timestamp: f64, - pub persons: Vec, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MediaPipePerson { - pub person_id: u64, - pub pose: Option, - pub left_hand: Option, - pub right_hand: Option, - pub face_mesh: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MediaPipePose { - pub landmarks: Vec>, - pub keypoints_33: Option>, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MediaPipeHand { - pub landmarks: Vec>, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MediaPipeFaceMesh { - pub landmarks: Vec>, -} - -pub async fn process_mediapipe( - video_path: &str, - output_path: &str, - uuid: Option<&str>, -) -> Result { - // If mediapipe.json already exists (written by face_processor), skip - if std::path::Path::new(output_path).exists() { - let json_str = std::fs::read_to_string(output_path).context("Failed to read MEDIAPIPE output")?; - let result: MediaPipeResult = - serde_json::from_str(&json_str).context("Failed to parse MEDIAPIPE output")?; - tracing::info!("[MEDIAPIPE] Skipping (already exists): {} frames", result.frames.len()); - return Ok(result); - } - let executor = PythonExecutor::new()?; - let script_name = "mediapipe_processor_v1.11.py"; - let script_path = executor.script_path(script_name); - - tracing::info!("[MEDIAPIPE] Starting MediaPipe Holistic: {}", video_path); - - if !script_path.exists() { - tracing::warn!("[MEDIAPIPE] Script not found, returning empty result"); - return Ok(MediaPipeResult { - frame_count: 0, - fps: 0.0, - frames: vec![], - }); - } - - executor - .run( - script_name, - &[video_path, output_path], - uuid, - "MEDIAPIPE", - Some(MEDIAPIPE_TIMEOUT), - ) - .await - .with_context(|| format!("Failed to run {:?}", script_path))?; - - let json_str = - std::fs::read_to_string(output_path).context("Failed to read MEDIAPIPE output")?; - - let result: MediaPipeResult = - serde_json::from_str(&json_str).context("Failed to parse MEDIAPIPE output")?; - - tracing::info!("[MEDIAPIPE] Result: {} frames", result.frames.len()); - - Ok(result) -} diff --git a/src/core/processor/mediapipe_v2.rs b/src/core/processor/mediapipe_v2.rs deleted file mode 100644 index 1ce22b2..0000000 --- a/src/core/processor/mediapipe_v2.rs +++ /dev/null @@ -1,203 +0,0 @@ -use anyhow::{Context, Result}; -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::time::Duration; -use tokio::process::Command; -use tokio::time::timeout; - -use super::executor::PythonExecutor; - -const MEDIAPIPE_TIMEOUT: Duration = Duration::from_secs(7200); - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MediaPipeResult { - pub metadata: MediaPipeMetadata, - pub frames: HashMap, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MediaPipeMetadata { - pub fps: f64, - pub total_frames: i64, - pub processed_frames: i64, - pub sample_interval: i64, - pub width: i64, - pub height: i64, - pub processor: String, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MediaPipeDictEntry { - pub frame_number: i64, - pub timestamp: f64, - pub persons: Vec, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MediaPipePerson { - pub person_id: i64, - #[serde(default)] - pub bbox: Option, - pub face_mesh: Option, - pub pose: Option, - pub hands: MediaPipeHands, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MediaPipeBBox { - pub x: i64, - pub y: i64, - pub width: i64, - pub height: i64, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct MediaPipeHands { - pub left: Option, - pub right: Option, -} - -pub async fn process_mediapipe_v2( - video_path: &str, - output_path: &str, - uuid: Option<&str>, - frames: Option<&[i64]>, -) -> Result { - let executor = PythonExecutor::new()?; - let script_path = executor.script_path("mediapipe_holistic_processor.py"); - - tracing::info!("[MEDIAPIPE] Starting MediaPipe Holistic: {}", video_path); - - if !script_path.exists() { - anyhow::bail!("mediapipe_holistic_processor.py not found"); - } - - let mut cmd = Command::new(executor.python_path()); - cmd.arg(&script_path).arg(video_path).arg(output_path); - - // Use explicit frame list if provided, otherwise calculate sample_interval for ~8Hz - if let Some(frames) = frames { - let frames_str = frames - .iter() - .map(|f| f.to_string()) - .collect::>() - .join(","); - cmd.arg("--frames").arg(&frames_str); - tracing::info!("[MEDIAPIPE] 8Hz sampling: {} frames", frames.len()); - } else { - let sample_interval = calculate_sample_interval(video_path).await; - cmd.arg("--sample-interval") - .arg(sample_interval.to_string()); - } - - if let Some(u) = uuid { - cmd.arg("--uuid").arg(u); - } - - cmd.stdout(std::process::Stdio::piped()) - .stderr(std::process::Stdio::piped()); - - let child = cmd.spawn().context("Failed to run MEDIAPIPE processor")?; - - let output = match timeout(MEDIAPIPE_TIMEOUT, child.wait_with_output()).await { - Ok(Ok(output)) => output, - Ok(Err(e)) => return Err(e).context("Failed to run MEDIAPIPE processor"), - Err(_) => anyhow::bail!( - "MEDIAPIPE processing timed out after {:?}", - MEDIAPIPE_TIMEOUT - ), - }; - - let stderr = String::from_utf8_lossy(&output.stderr); - - for line in stderr.lines() { - let trimmed = line.trim(); - if trimmed.starts_with("MEDIAPIPE_START") { - tracing::info!("[MEDIAPIPE] Loading model..."); - } else if trimmed.starts_with("MEDIAPIPE_FRAME:") { - let count = trimmed.trim_start_matches("MEDIAPIPE_FRAME:"); - tracing::info!("[MEDIAPIPE] Processed {} frames...", count); - } else if trimmed.starts_with("MEDIAPIPE_COMPLETE:") { - let count = trimmed.trim_start_matches("MEDIAPIPE_COMPLETE:"); - tracing::info!("[MEDIAPIPE] Completed! Total: {} frames", count); - } else if trimmed.starts_with("MEDIAPIPE_INFO:") { - let info = trimmed.trim_start_matches("MEDIAPIPE_INFO:"); - tracing::info!("[MEDIAPIPE] {}", info); - } else if trimmed.starts_with("MEDIAPIPE_ERROR:") { - let err = trimmed.trim_start_matches("MEDIAPIPE_ERROR:"); - tracing::error!("[MEDIAPIPE] {}", err); - } - } - tracing::info!("[MEDIAPIPE] stderr output:\n{}", stderr); - - if !output.status.success() { - anyhow::bail!("MEDIAPIPE failed: {}", stderr); - } - - let json_str = - std::fs::read_to_string(output_path).context("Failed to read MEDIAPIPE output")?; - - let result: MediaPipeResult = - serde_json::from_str(&json_str).context("Failed to parse MEDIAPIPE output")?; - - tracing::info!("[MEDIAPIPE] Result: {} frames", result.frames.len()); - - Ok(result) -} - -async fn calculate_sample_interval(video_path: &str) -> i64 { - // Try ffprobe to get FPS, calculate sample_interval for ~8Hz - let probe_cmd = Command::new("ffprobe") - .args([ - "-v", - "quiet", - "-print_format", - "json", - "-show_streams", - video_path, - ]) - .output() - .await; - - if let Ok(output) = probe_cmd { - if output.status.success() { - if let Ok(json_str) = String::from_utf8(output.stdout) { - if let Ok(probe_data) = serde_json::from_str::(&json_str) { - if let Some(streams) = probe_data["streams"].as_array() { - for stream in streams { - if stream["codec_type"] == "video" { - if let Some(fps_str) = stream["r_frame_rate"].as_str() { - // Parse "30000/1001" style fps - if let Some(fps) = parse_fractional_fps(fps_str) { - let interval = (fps / 8.0).round() as i64; - return interval.max(1); - } - } - if let Some(fps_val) = stream["avg_frame_rate"].as_str() { - if let Some(fps) = parse_fractional_fps(fps_val) { - let interval = (fps / 8.0).round() as i64; - return interval.max(1); - } - } - } - } - } - } - } - } - } - - 4 // Default: assume 30fps / 8 = ~4 -} - -fn parse_fractional_fps(s: &str) -> Option { - let parts: Vec<&str> = s.split('/').collect(); - if parts.len() == 2 { - let num: f64 = parts[0].parse().ok()?; - let den: f64 = parts[1].parse().ok()?; - if den > 0.0 { - return Some(num / den); - } - } - s.parse::().ok() -} diff --git a/src/core/processor/mod.rs b/src/core/processor/mod.rs index b64b7c7..b79bcbb 100644 --- a/src/core/processor/mod.rs +++ b/src/core/processor/mod.rs @@ -11,11 +11,9 @@ pub mod face_clustering; pub mod face_recognition; pub mod hand; pub mod heuristic_scene; -pub mod mediapipe_v2; pub mod ocr; pub mod pose; pub mod scene_classification; -pub mod story; pub mod tkg; pub mod yolo; @@ -48,17 +46,12 @@ pub use heuristic_scene::{ build_heuristic_scene_meta, generate_scene_meta, CrowdSize, HeuristicSceneMeta, SceneSegmentMeta, }; -pub use mediapipe_v2::{ - process_mediapipe_v2, MediaPipeBBox, MediaPipeDictEntry, MediaPipeHands, MediaPipeMetadata, - MediaPipePerson, MediaPipeResult, -}; pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText}; pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult}; pub use scene_classification::{ load_scene_from_file, process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment, }; -pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats}; pub use tkg::{ build_tkg, query_auto_representative_frame, FrameTraceInfo, MainIdentityInfo, RepresentativeFrameResult, TkgResult, diff --git a/src/core/processor/story.rs b/src/core/processor/story.rs deleted file mode 100644 index aa803ee..0000000 --- a/src/core/processor/story.rs +++ /dev/null @@ -1,690 +0,0 @@ -use anyhow::{Context, Result}; -use serde::{Deserialize, Serialize}; -use std::path::Path; -use std::time::Duration; - -use super::executor::PythonExecutor; - -const STORY_TIMEOUT: Duration = Duration::from_secs(3600); - -// ── Input data structs (from JSON files) ────────────────────────── - -#[derive(Debug, Deserialize)] -struct AsrData { - segments: Vec, -} - -#[derive(Debug, Deserialize)] -struct AsrSegmentInput { - #[serde(default, alias = "start")] - start_time: f64, - #[serde(default, alias = "end")] - end_time: f64, - #[serde(default)] - text: String, - #[serde(default)] - confidence: f64, -} - -#[derive(Debug, Deserialize)] -struct CutData { - scenes: Vec, -} - -#[derive(Debug, Deserialize)] -struct CutSceneInput { - scene_number: Option, - #[allow(dead_code)] - start_frame: Option, - #[allow(dead_code)] - end_frame: Option, - start_time: Option, - end_time: Option, -} - -// ── Output data structs ─────────────────────────────────────────── - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct StoryResult { - pub child_chunks: Vec, - pub parent_chunks: Vec, - pub stats: StoryStats, - #[serde(default)] - pub metadata: serde_json::Value, - #[serde(default)] - pub parent_chunk_size: usize, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct StoryStats { - pub total_child_chunks: usize, - pub total_parent_chunks: usize, - pub asr_children: usize, - pub cut_children: usize, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct StoryChildChunk { - pub chunk_id: String, - pub chunk_type: String, - pub source: String, - pub start_time: f64, - pub end_time: f64, - #[serde(skip_serializing_if = "Option::is_none")] - pub text_content: Option, - pub content: serde_json::Value, - #[serde(default)] - pub child_chunk_ids: Vec, - pub parent_chunk_id: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct StoryParentChunk { - pub chunk_id: String, - pub chunk_type: String, - pub source: String, - pub start_time: f64, - pub end_time: f64, - pub text_content: String, - pub content: serde_json::Value, - #[serde(default)] - pub child_chunk_ids: Vec, - pub parent_chunk_id: Option, -} - -// ── Public API ──────────────────────────────────────────────────── - -pub async fn process_story( - video_path: &str, - output_path: &str, - uuid: Option<&str>, -) -> Result { - // Try native Rust implementation first - let result = try_native_story(video_path, output_path, uuid); - if let Ok(r) = result { - return Ok(r); - } - - // Fallback: Python script - tracing::warn!( - "[STORY] Native impl failed, falling back to Python: {:?}", - result.err() - ); - let executor = PythonExecutor::new()?; - let script_path = executor.script_path("story_processor.py"); - - if !script_path.exists() { - return Ok(StoryResult { - child_chunks: vec![], - parent_chunks: vec![], - stats: StoryStats { - total_child_chunks: 0, - total_parent_chunks: 0, - asr_children: 0, - cut_children: 0, - }, - metadata: serde_json::json!({}), - parent_chunk_size: 5, - }); - } - - executor - .run( - "story_processor.py", - &[video_path, output_path], - uuid, - "STORY", - Some(STORY_TIMEOUT), - ) - .await - .with_context(|| format!("Failed to run {:?}", script_path))?; - - let json_str = std::fs::read_to_string(output_path).context("Failed to read STORY output")?; - let result: StoryResult = - serde_json::from_str(&json_str).context("Failed to parse STORY output")?; - - Ok(result) -} - -// ── Native implementation ───────────────────────────────────────── - -fn try_native_story( - _video_path: &str, - output_path: &str, - _uuid: Option<&str>, -) -> Result { - let output_dir = Path::new(output_path).parent().unwrap_or(Path::new(".")); - let basename = Path::new(output_path) - .file_stem() - .and_then(|s| s.to_str()) - .and_then(|s| s.split('.').next()) - .unwrap_or("unknown"); - - let asr_path = output_dir.join(format!("{}.asr.json", basename)); - let cut_path = output_dir.join(format!("{}.cut.json", basename)); - - // ASR data is required; CUT is optional - let asr_data: AsrData = if asr_path.exists() { - let content = std::fs::read_to_string(&asr_path) - .with_context(|| format!("Failed to read {:?}", asr_path))?; - serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", asr_path))? - } else { - AsrData { segments: vec![] } - }; - - let cut_data: CutData = if cut_path.exists() { - let content = std::fs::read_to_string(&cut_path) - .with_context(|| format!("Failed to read {:?}", cut_path))?; - serde_json::from_str(&content).with_context(|| format!("Failed to parse {:?}", cut_path))? - } else { - CutData { scenes: vec![] } - }; - - let parent_chunk_size: usize = 5; - - // ── Build child chunks ──────────────────────────────────────── - let mut child_chunks: Vec = Vec::new(); - - // ASR child chunks - for seg in &asr_data.segments { - let chunk_id = format!("asr_{:.1}_{:.1}", seg.start_time, seg.end_time); - child_chunks.push(StoryChildChunk { - chunk_id, - chunk_type: "asr".to_string(), - source: "asr".to_string(), - start_time: seg.start_time, - end_time: seg.end_time, - text_content: Some(seg.text.clone()), - content: serde_json::json!({ - "text": seg.text, - "confidence": seg.confidence, - }), - child_chunk_ids: vec![], - parent_chunk_id: None, - }); - } - - // CUT child chunks - for scene in &cut_data.scenes { - let scene_num = scene.scene_number.unwrap_or(0); - let start_time = scene.start_time.unwrap_or(0.0); - let end_time = scene.end_time.unwrap_or(0.0); - let chunk_id = format!("cut_{}", scene_num); - child_chunks.push(StoryChildChunk { - chunk_id, - chunk_type: "cut".to_string(), - source: "cut".to_string(), - start_time, - end_time, - text_content: Some(format!("Scene {}", scene_num)), - content: serde_json::json!({ - "scene_number": scene_num, - "start_time": start_time, - "end_time": end_time, - }), - child_chunk_ids: vec![], - parent_chunk_id: None, - }); - } - - let asr_child_ids: Vec = child_chunks - .iter() - .filter(|c| c.source == "asr") - .map(|c| c.chunk_id.clone()) - .collect(); - - let cut_child_ids: Vec = child_chunks - .iter() - .filter(|c| c.source == "cut") - .map(|c| c.chunk_id.clone()) - .collect(); - - // ── Build parent chunks from ASR ────────────────────────────── - let mut parent_chunks: Vec = Vec::new(); - - for (i, batch) in asr_child_ids.chunks(parent_chunk_size).enumerate() { - if batch.is_empty() { - continue; - } - - let mut texts: Vec = Vec::new(); - let mut times: Vec<(f64, f64)> = Vec::new(); - - for child_id in batch { - if let Some(child) = child_chunks.iter().find(|c| &c.chunk_id == child_id) { - if let Some(ref t) = child.text_content { - texts.push(t.clone()); - } - times.push((child.start_time, child.end_time)); - } - } - - let start_time = times.first().map(|t| t.0).unwrap_or(0.0); - let end_time = times.last().map(|t| t.1).unwrap_or(0.0); - - let narrative = generate_narrative(&texts, &[], start_time, end_time); - - let chunk_id = format!("story_asr_{:04}", i); - parent_chunks.push(StoryParentChunk { - chunk_id: chunk_id.clone(), - chunk_type: "story".to_string(), - source: "story_asr".to_string(), - start_time, - end_time, - text_content: narrative.clone(), - content: serde_json::json!({ - "description": narrative, - "child_count": batch.len(), - "speech_preview": texts.iter().take(3).cloned().collect::>().join(" "), - }), - child_chunk_ids: batch.to_vec(), - parent_chunk_id: None, - }); - - // Link children to parent - for child in &mut child_chunks { - if batch.contains(&child.chunk_id) { - child.parent_chunk_id = Some(chunk_id.clone()); - } - } - } - - // ── Build parent chunks from CUT ────────────────────────────── - for (i, batch) in cut_child_ids.chunks(parent_chunk_size).enumerate() { - if batch.is_empty() { - continue; - } - - let mut times: Vec<(f64, f64)> = Vec::new(); - for child_id in batch { - if let Some(child) = child_chunks.iter().find(|c| &c.chunk_id == child_id) { - times.push((child.start_time, child.end_time)); - } - } - - let start_time = times.first().map(|t| t.0).unwrap_or(0.0); - let end_time = times.last().map(|t| t.1).unwrap_or(0.0); - - let narrative = generate_scene_narrative(&[], start_time, end_time, batch.len()); - - let chunk_id = format!("story_cut_{:04}", i); - parent_chunks.push(StoryParentChunk { - chunk_id: chunk_id.clone(), - chunk_type: "story".to_string(), - source: "story_cut".to_string(), - start_time, - end_time, - text_content: narrative.clone(), - content: serde_json::json!({ - "description": narrative, - "child_count": batch.len(), - "scenes": batch, - }), - child_chunk_ids: batch.to_vec(), - parent_chunk_id: None, - }); - - for child in &mut child_chunks { - if batch.contains(&child.chunk_id) { - child.parent_chunk_id = Some(chunk_id.clone()); - } - } - } - - // ── Build result ────────────────────────────────────────────── - let total_child = asr_child_ids.len() + cut_child_ids.len(); - let total_parent = parent_chunks.len(); - let asr_count = asr_child_ids.len(); - let cut_count = cut_child_ids.len(); - - let result = StoryResult { - child_chunks, - parent_chunks, - stats: StoryStats { - total_child_chunks: total_child, - total_parent_chunks: total_parent, - asr_children: asr_count, - cut_children: cut_count, - }, - metadata: serde_json::json!({}), - parent_chunk_size, - }; - - // Write output (for compatibility with Python path) - let json_str = serde_json::to_string_pretty(&result)?; - std::fs::write(output_path, &json_str) - .with_context(|| format!("Failed to write {:?}", output_path))?; - - Ok(result) -} - -// ── Narrative generation (matching Python logic) ────────────────── - -fn generate_narrative(texts: &[String], objects: &[String], start: f64, end: f64) -> String { - if texts.is_empty() && objects.is_empty() { - return format!("Video segment from {:.1}s to {:.1}s", start, end); - } - - let mut parts: Vec = Vec::new(); - - if !texts.is_empty() { - let combined = texts.join(" "); - let truncated = if combined.len() > 150 { - format!("{}...", &combined[..150]) - } else { - combined - }; - parts.push(format!("Speech: {}", truncated)); - } - - if !objects.is_empty() { - let mut unique: Vec<&String> = objects.iter().collect(); - unique.sort(); - unique.dedup(); - let objs = unique - .iter() - .take(5) - .map(|s| (*s).as_str()) - .collect::>() - .join(", "); - parts.push(format!("Visuals: {}", objs)); - } - - format!("[{:.0}s-{:.0}s] {}", start, end, parts.join(" | ")) -} - -fn generate_scene_narrative( - objects: &[String], - start: f64, - end: f64, - scene_count: usize, -) -> String { - let mut unique: Vec<&String> = objects.iter().collect(); - unique.sort(); - unique.dedup(); - let top5: Vec<&String> = unique.iter().take(5).cloned().collect(); - - if !top5.is_empty() { - let obj_str = top5 - .iter() - .map(|s| s.as_str()) - .collect::>() - .join(", "); - format!( - "[{:.0}s-{:.0}s] {} scenes. Visuals: {}.", - start, end, scene_count, obj_str - ) - } else { - format!("[{:.0}s-{:.0}s] {} video scenes.", start, end, scene_count) - } -} - -// ── Tests ───────────────────────────────────────────────────────── - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_generate_narrative_with_text() { - let text = generate_narrative( - &["Hello world".to_string()], - &["person".to_string()], - 0.0, - 5.0, - ); - assert!(text.contains("[0s-5s]")); - assert!(text.contains("Speech:")); - assert!(text.contains("Visuals:")); - } - - #[test] - fn test_generate_narrative_empty() { - let text = generate_narrative(&[], &[], 10.0, 20.0); - assert!(text.contains("10.0s to 20.0s")); - } - - #[test] - fn test_generate_scene_narrative() { - let text = generate_scene_narrative(&["person".to_string()], 0.0, 10.0, 3); - assert!(text.contains("3 scenes")); - assert!(text.contains("person")); - } - - #[test] - fn test_generate_scene_narrative_empty() { - let text = generate_scene_narrative(&[], 0.0, 10.0, 1); - assert!(text.contains("1 video scenes")); - } - - #[test] - fn test_narrative_truncation() { - let long_text = "a".repeat(200); - let text = generate_narrative(&[long_text], &[], 0.0, 5.0); - assert!(text.len() < 200 + 50); // truncated with "..." - assert!(text.ends_with("...")); - } - - #[test] - fn test_story_result_serialization() { - let result = StoryResult { - child_chunks: vec![StoryChildChunk { - chunk_id: "asr_0001".to_string(), - chunk_type: "sentence".to_string(), - source: "asr".to_string(), - start_time: 0.0, - end_time: 5.0, - text_content: Some("Hello world".to_string()), - content: serde_json::json!({}), - child_chunk_ids: vec![], - parent_chunk_id: Some("story_asr_0000".to_string()), - }], - parent_chunks: vec![StoryParentChunk { - chunk_id: "story_asr_0000".to_string(), - chunk_type: "story".to_string(), - source: "story_asr".to_string(), - start_time: 0.0, - end_time: 25.0, - text_content: "[0s-25s] Hello world...".to_string(), - content: serde_json::json!({ - "description": "[0s-25s] Hello world...", - "child_count": 5 - }), - child_chunk_ids: vec!["asr_0001".to_string()], - parent_chunk_id: None, - }], - stats: StoryStats { - total_child_chunks: 10, - total_parent_chunks: 2, - asr_children: 10, - cut_children: 0, - }, - metadata: serde_json::json!({}), - parent_chunk_size: 5, - }; - - let json = serde_json::to_string(&result).unwrap(); - assert!(json.contains("asr_0001")); - assert!(json.contains("story_asr_0000")); - assert!(json.contains("Hello world")); - } - - #[test] - fn test_story_result_deserialization() { - let json = r#"{ - "child_chunks": [{ - "chunk_id": "asr_0001", - "chunk_type": "sentence", - "source": "asr", - "start_time": 0.0, - "end_time": 5.0, - "text_content": "Hello", - "content": {}, - "child_chunk_ids": [], - "parent_chunk_id": null - }], - "parent_chunks": [{ - "chunk_id": "story_asr_0000", - "chunk_type": "story", - "source": "story_asr", - "start_time": 0.0, - "end_time": 5.0, - "text_content": "Hello segment", - "content": {"description": "Hello segment"}, - "child_chunk_ids": ["asr_0001"], - "parent_chunk_id": null - }], - "stats": { - "total_child_chunks": 1, - "total_parent_chunks": 1, - "asr_children": 1, - "cut_children": 0 - }, - "metadata": {}, - "parent_chunk_size": 5 - }"#; - - let result: StoryResult = serde_json::from_str(json).unwrap(); - assert_eq!(result.child_chunks.len(), 1); - assert_eq!(result.parent_chunks.len(), 1); - assert_eq!(result.stats.total_child_chunks, 1); - } - - #[test] - fn test_parent_child_relationship() { - let result = StoryResult { - child_chunks: vec![ - StoryChildChunk { - chunk_id: "asr_0001".to_string(), - chunk_type: "sentence".to_string(), - source: "asr".to_string(), - start_time: 0.0, - end_time: 5.0, - text_content: Some("First".to_string()), - content: serde_json::json!({}), - child_chunk_ids: vec![], - parent_chunk_id: Some("story_asr_0000".to_string()), - }, - StoryChildChunk { - chunk_id: "asr_0002".to_string(), - chunk_type: "sentence".to_string(), - source: "asr".to_string(), - start_time: 5.0, - end_time: 10.0, - text_content: Some("Second".to_string()), - content: serde_json::json!({}), - child_chunk_ids: vec![], - parent_chunk_id: Some("story_asr_0000".to_string()), - }, - ], - parent_chunks: vec![StoryParentChunk { - chunk_id: "story_asr_0000".to_string(), - chunk_type: "story".to_string(), - source: "story_asr".to_string(), - start_time: 0.0, - end_time: 10.0, - text_content: "Combined narrative".to_string(), - content: serde_json::json!({}), - child_chunk_ids: vec!["asr_0001".to_string(), "asr_0002".to_string()], - parent_chunk_id: None, - }], - stats: StoryStats { - total_child_chunks: 2, - total_parent_chunks: 1, - asr_children: 2, - cut_children: 0, - }, - metadata: serde_json::json!({}), - parent_chunk_size: 5, - }; - - assert_eq!(result.parent_chunks[0].child_chunk_ids.len(), 2); - assert!(result - .child_chunks - .iter() - .all(|c| c.parent_chunk_id.is_some())); - assert!(result.parent_chunks[0].parent_chunk_id.is_none()); - } - - #[test] - fn test_native_story_empty_data() { - // Write empty ASR and CUT files, then test try_native_story - let dir = std::env::temp_dir().join("story_test_empty"); - let _ = std::fs::create_dir_all(&dir); - - let basename = "test_video"; - let asr_path = dir.join(format!("{}.asr.json", basename)); - let cut_path = dir.join(format!("{}.cut.json", basename)); - let out_path = dir.join(format!("{}.story.json", basename)); - - std::fs::write(&asr_path, r#"{"segments":[]}"#).unwrap(); - std::fs::write(&cut_path, r#"{"scenes":[]}"#).unwrap(); - - let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap(); - - assert_eq!(result.stats.total_child_chunks, 0); - assert_eq!(result.stats.total_parent_chunks, 0); - - let _ = std::fs::remove_dir_all(&dir); - } - - #[test] - fn test_native_story_with_data() { - let dir = std::env::temp_dir().join("story_test_data"); - let _ = std::fs::create_dir_all(&dir); - - let basename = "test_video"; - let asr_path = dir.join(format!("{}.asr.json", basename)); - let cut_path = dir.join(format!("{}.cut.json", basename)); - let out_path = dir.join(format!("{}.story.json", basename)); - - std::fs::write( - &asr_path, - r#"{ - "segments": [ - {"start": 0.0, "end": 2.5, "text": "Hello", "confidence": 0.95}, - {"start": 2.5, "end": 5.0, "text": "World", "confidence": 0.92}, - {"start": 5.0, "end": 7.5, "text": "Foo", "confidence": 0.90} - ] - }"#, - ) - .unwrap(); - - std::fs::write(&cut_path, r#"{ - "scenes": [ - {"scene_number": 1, "start_frame": 0, "end_frame": 150, "start_time": 0.0, "end_time": 5.0}, - {"scene_number": 2, "start_frame": 150, "end_frame": 300, "start_time": 5.0, "end_time": 10.0} - ] - }"#).unwrap(); - - let result = try_native_story("/dummy.mp4", out_path.to_str().unwrap(), None).unwrap(); - - assert_eq!(result.stats.asr_children, 3); - assert_eq!(result.stats.cut_children, 2); - assert_eq!(result.stats.total_child_chunks, 5); - - // 3 ASR segments, parent_chunk_size=5 → 1 parent - // 2 CUT scenes, parent_chunk_size=5 → 1 parent - assert_eq!(result.stats.total_parent_chunks, 2); - - // Verify child-parent linking - for child in &result.child_chunks { - if child.source == "asr" { - assert!(child.parent_chunk_id.is_some()); - assert!(child - .parent_chunk_id - .as_ref() - .unwrap() - .starts_with("story_asr_")); - } - } - - // Verify output file was written - assert!(out_path.exists()); - let content = std::fs::read_to_string(&out_path).unwrap(); - assert!(content.contains("Hello")); - assert!(content.contains("World")); - - let _ = std::fs::remove_dir_all(&dir); - } -} diff --git a/src/core/processor/tkg.rs b/src/core/processor/tkg.rs index 7ca3d83..7f3d6d4 100644 --- a/src/core/processor/tkg.rs +++ b/src/core/processor/tkg.rs @@ -102,91 +102,6 @@ async fn populate_face_detections_from_face_json( } } -// ── Phase 1: Populate face embeddings to Qdrant ──────────────────────────────────────────────── - -async fn populate_face_embeddings_to_qdrant( - pool: &PgPool, - output_dir: &str, - file_uuid: &str, -) -> Result { - use crate::core::db::face_embedding_db::{FaceEmbeddingDb, FaceEmbeddingPayload}; - use tracing::info; - - let face_db = FaceEmbeddingDb::new(); - face_db.init_collection().await?; - - // Check if embeddings already exist - let existing = face_db.get_all_embeddings_for_file(file_uuid).await?; - if !existing.is_empty() { - info!( - "[TKG-Phase1] {} embeddings already in Qdrant for {}", - existing.len(), - file_uuid - ); - return Ok(existing.len()); - } - - // Load from face_detections table - let fd_table = t("face_detections"); - let rows: Vec<(i32, i64, f64, f64, f64, f64, f64, Option>)> = sqlx::query_as(&format!( - "SELECT trace_id::int, frame_number::bigint, x::float8, y::float8, width::float8, height::float8, confidence::float8, embedding::float4[] \ - FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL AND embedding IS NOT NULL", - fd_table - )) - .bind(file_uuid) - .fetch_all(pool) - .await?; - - if rows.is_empty() { - info!("[TKG-Phase1] No traced embeddings for {}", file_uuid); - return Ok(0); - } - - // Load pose data for yaw/pitch/roll - let pose_data = load_face_pose_data(output_dir, file_uuid).unwrap_or_default(); - - let mut points: Vec<(String, Vec, FaceEmbeddingPayload)> = Vec::new(); - for (trace_id, frame, x, y, w, h, confidence, embedding) in &rows { - if let Some(emb) = embedding { - let (yaw, pitch, roll) = - get_pose_for_face(*frame, *x, *y, *w, *h, &pose_data).unwrap_or((0.0, 0.0, 0.0)); - - // Generate unique numeric point ID (trace_id * 100000 + frame) - let point_id = format!("{}", (*trace_id as u64) * 100000 + (*frame as u64)); - let payload = FaceEmbeddingPayload { - file_uuid: file_uuid.to_string(), - trace_id: *trace_id, - frame: *frame, - bbox_x: *x, - bbox_y: *y, - bbox_w: *w, - bbox_h: *h, - confidence: *confidence, - yaw, - pitch, - roll, - identity_uuid: None, - identity_ref: None, - stranger_ref: None, - r#type: None, - }; - points.push((point_id, emb.clone(), payload)); - } - } - - info!( - "[TKG-Phase1] Attempting to store {} face embeddings in Qdrant for {}", - points.len(), - file_uuid - ); - let count = face_db.batch_upsert(points).await?; - info!( - "[TKG-Phase1] Stored {} face embeddings in Qdrant for {}", - count, file_uuid - ); - Ok(count) -} - // ── Gaze Direction ──────────────────────────────────────────────── #[derive(Debug, Clone, PartialEq)] @@ -576,14 +491,6 @@ pub async fn build_tkg(db: &PostgresDb, file_uuid: &str, output_dir: &str) -> Re ); } - // Phase 1: Populate face embeddings to Qdrant (for TKG-only migration) - if let Err(e) = populate_face_embeddings_to_qdrant(pool, output_dir, file_uuid).await { - tracing::warn!( - "[TKG-Phase1] populate_face_embeddings failed: {} (continuing)", - e - ); - } - let pose_data = load_face_pose_data(output_dir, file_uuid) .map_err(|e| { tracing::error!("[TKG] Failed to load face pose data: {}", e); @@ -644,135 +551,9 @@ async fn build_face_track_nodes( file_uuid: &str, pose_data: &[FacePose], ) -> Result { - use crate::core::db::face_embedding_db::FaceEmbeddingDb; - - // Try Qdrant first (Phase 2: TKG-only) - let face_db = FaceEmbeddingDb::new(); - let qdrant_embeddings = face_db.get_all_embeddings_for_file(file_uuid).await?; - - if !qdrant_embeddings.is_empty() { - tracing::info!( - "[TKG-Phase2] Building face_track nodes from Qdrant ({} embeddings)", - qdrant_embeddings.len() - ); - return build_face_track_nodes_from_qdrant(pool, file_uuid, pose_data, qdrant_embeddings) - .await; - } - - // Fallback to face.json (Phase 2.5: Direct from face.json) - tracing::info!("[TKG-Phase2.5] No Qdrant embeddings, trying face.json"); build_face_track_nodes_from_face_json(pool, file_uuid, pose_data).await } -async fn build_face_track_nodes_from_qdrant( - pool: &PgPool, - file_uuid: &str, - pose_data: &[FacePose], - qdrant_embeddings: Vec<( - String, - Vec, - crate::core::db::face_embedding_db::FaceEmbeddingPayload, - )>, -) -> Result { - use crate::core::db::face_embedding_db::FaceEmbeddingPayload; - let nodes_table = t("tkg_nodes"); - - // Group by trace_id - let mut trace_frames: HashMap> = HashMap::new(); - for (_, _, payload) in &qdrant_embeddings { - trace_frames - .entry(payload.trace_id as i64) - .or_default() - .push(( - payload.frame, - payload.bbox_x, - payload.bbox_y, - payload.bbox_w, - payload.bbox_h, - )); - } - - if trace_frames.is_empty() { - tracing::warn!("[TKG-Phase2] No trace data in Qdrant"); - return Ok(0); - } - - // Build aggregates - let mut count = 0; - for (tid, frames) in &trace_frames { - let external_id = format!("face_track_{}", tid); - let label = format!("Face Trace {}", tid); - - let frame_count = frames.len() as i64; - let start_f = frames.iter().map(|(f, _, _, _, _)| *f).min().unwrap_or(0); - let end_f = frames.iter().map(|(f, _, _, _, _)| *f).max().unwrap_or(0); - let avg_x = frames.iter().map(|(_, x, _, _, _)| *x).sum::() / frame_count as f64; - let avg_y = frames.iter().map(|(_, _, y, _, _)| *y).sum::() / frame_count as f64; - let avg_w = frames.iter().map(|(_, _, _, w, _)| *w).sum::() / frame_count as f64; - let avg_h = frames.iter().map(|(_, _, _, _, h)| *h).sum::() / frame_count as f64; - - // Compute average pose - let mut yaw_sum = 0.0f64; - let mut pitch_sum = 0.0f64; - let mut roll_sum = 0.0f64; - let mut pose_count = 0i64; - - for (frame, x, y, w, h) in frames { - if let Some((yaw, pitch, roll)) = get_pose_for_face(*frame, *x, *y, *w, *h, pose_data) { - yaw_sum += yaw; - pitch_sum += pitch; - roll_sum += roll; - pose_count += 1; - } - } - - let (avg_yaw, avg_pitch, avg_roll) = if pose_count > 0 { - ( - yaw_sum / pose_count as f64, - pitch_sum / pose_count as f64, - roll_sum / pose_count as f64, - ) - } else { - (0.0, 0.0, 0.0) - }; - - let props = serde_json::json!({ - "trace_id": tid, - "frame_count": frame_count, - "first_frame": start_f, - "last_frame": end_f, - "avg_x": avg_x, - "avg_y": avg_y, - "avg_width": avg_w, - "avg_height": avg_h, - "pose_count": pose_count, - "avg_yaw": avg_yaw, - "avg_pitch": avg_pitch, - "avg_roll": avg_roll, - }); - - sqlx::query(&format!( - "INSERT INTO {} (file_uuid, node_type, external_id, label, properties) \ - VALUES ($1, $2, $3, $4, $5::jsonb) \ - ON CONFLICT (file_uuid, node_type, external_id) \ - DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties)", - nodes_table - )) - .bind(file_uuid) - .bind("face_track") - .bind(&external_id) - .bind(&label) - .bind(serde_json::to_string(&props)?) - .execute(pool) - .await?; - - count += 1; - } - - tracing::info!("[TKG-Phase2] Built {} face_track nodes from Qdrant", count); - Ok(count) -} - async fn build_face_track_nodes_from_pg( pool: &PgPool, file_uuid: &str, @@ -1213,152 +994,9 @@ async fn build_co_occurrence_edges( file_uuid: &str, output_dir: &str, ) -> Result { - use crate::core::db::face_embedding_db::FaceEmbeddingDb; - - let face_db = FaceEmbeddingDb::new(); - let qdrant_embeddings = face_db.get_all_embeddings_for_file(file_uuid).await?; - - if !qdrant_embeddings.is_empty() { - tracing::info!( - "[TKG-Phase2.6.1] Building co_occurrence edges from Qdrant ({} embeddings)", - qdrant_embeddings.len() - ); - return build_co_occurrence_edges_from_qdrant( - pool, - file_uuid, - output_dir, - qdrant_embeddings, - ) - .await; - } - - tracing::info!("[TKG-Phase2.6.1] No Qdrant embeddings, falling back to PostgreSQL"); build_co_occurrence_edges_from_pg(pool, file_uuid, output_dir).await } -async fn build_co_occurrence_edges_from_qdrant( - pool: &PgPool, - file_uuid: &str, - output_dir: &str, - qdrant_embeddings: Vec<( - String, - Vec, - crate::core::db::face_embedding_db::FaceEmbeddingPayload, - )>, -) -> Result { - use crate::core::db::face_embedding_db::FaceEmbeddingPayload; - - let yolo_path = Path::new(output_dir).join(format!("{}.yolo.json", file_uuid)); - if !yolo_path.exists() { - return Ok(0); - } - - let content = std::fs::read_to_string(&yolo_path)?; - let yolo: YoloJson = serde_json::from_str(&content)?; - - let nodes_table = t("tkg_nodes"); - let edges_table = t("tkg_edges"); - - let mut frame_faces: HashMap> = HashMap::new(); - for (_, _, payload) in &qdrant_embeddings { - let frame = payload.frame; - let trace_id = payload.trace_id as i64; - frame_faces.entry(frame).or_default().push(( - trace_id, - payload.bbox_x, - payload.bbox_y, - payload.bbox_w, - payload.bbox_h, - )); - } - -let mut edge_count = 0; -for (frame, faces) in frame_faces.iter() { -let yolo_frame = match yolo.frames.get(&frame.to_string()) { - Some(f) => f, - None => continue, -}; - - let dets = if !yolo_frame.detections.is_empty() { - &yolo_frame.detections - } else { - &yolo_frame.objects - }; - - if dets.is_empty() { - continue; - } - - for (trace_id, _, _, _, _) in faces { - let external_id = format!("face_track_{}", trace_id); - let face_node: Option<(i64,)> = sqlx::query_as(&format!( - "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_track' AND external_id=$2", - nodes_table - )) - .bind(file_uuid) - .bind(&external_id) - .fetch_optional(pool) - .await?; - - let face_node_id = match face_node { - Some((id,)) => id, - None => continue, - }; - - for det in dets { - let obj_node: Option<(i64,)> = sqlx::query_as(&format!( - "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='object' AND external_id=$2", - nodes_table - )) - .bind(file_uuid) - .bind(&det.class_name) - .fetch_optional(pool) - .await?; - - let obj_node_id = match obj_node { - Some((id,)) => id, - None => continue, - }; - - let edge_props = serde_json::json!({ - "frame": *frame, - "object_confidence": det.confidence, - }); - - if let Err(e) = sqlx::query(&format!( - r#" - INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties) - VALUES ($1, $2, $3, $4, $5::jsonb) - ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id) - DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties) - "#, - edges_table - )) - .bind("CO_OCCURS_WITH") - .bind(face_node_id) - .bind(obj_node_id) - .bind(file_uuid) - .bind(serde_json::to_string(&edge_props)?) - .execute(pool) - .await - { - tracing::warn!( - "[TKG-Phase2.6.1] Edge insert failed (trace={}, obj={}): {}", - trace_id, - det.class_name, - e - ); - continue; - } - - edge_count += 1; - } - } - } - - Ok(edge_count) -} - async fn build_co_occurrence_edges_from_pg( pool: &PgPool, file_uuid: &str, @@ -1476,159 +1114,9 @@ async fn build_speaker_face_edges( file_uuid: &str, output_dir: &str, ) -> Result { - use crate::core::db::face_embedding_db::FaceEmbeddingDb; - - let face_db = FaceEmbeddingDb::new(); - let qdrant_embeddings = face_db.get_all_embeddings_for_file(file_uuid).await?; - - if !qdrant_embeddings.is_empty() { - tracing::info!( - "[TKG-Phase2.6.3] Building speaker_face edges from Qdrant ({} embeddings)", - qdrant_embeddings.len() - ); - return build_speaker_face_edges_from_qdrant( - pool, - file_uuid, - output_dir, - qdrant_embeddings, - ) - .await; - } - - tracing::info!("[TKG-Phase2.6.3] No Qdrant embeddings, falling back to PostgreSQL"); build_speaker_face_edges_from_pg(pool, file_uuid, output_dir).await } -async fn build_speaker_face_edges_from_qdrant( - pool: &PgPool, - file_uuid: &str, - output_dir: &str, - qdrant_embeddings: Vec<( - String, - Vec, - crate::core::db::face_embedding_db::FaceEmbeddingPayload, - )>, -) -> Result { - use crate::core::db::face_embedding_db::FaceEmbeddingPayload; - - let asrx_path = Path::new(output_dir).join(format!("{}.asrx.json", file_uuid)); - if !asrx_path.exists() { - return Ok(0); - } - - let content = std::fs::read_to_string(&asrx_path)?; - let asrx: AsrxJson = serde_json::from_str(&content)?; - - if asrx.segments.is_empty() { - return Ok(0); - } - - let nodes_table = t("tkg_nodes"); - let edges_table = t("tkg_edges"); - - let mut trace_ranges: HashMap = HashMap::new(); - for (_, _, payload) in &qdrant_embeddings { - let trace_id = payload.trace_id as i64; - let frame = payload.frame; - let entry = trace_ranges.entry(trace_id).or_insert((frame, frame)); - entry.0 = entry.0.min(frame); - entry.1 = entry.1.max(frame); - } - - let last = asrx.segments.last().unwrap(); - let fps = if last.end > 0.0 { - last.end_frame as f64 / last.end - } else { - 30.0 - }; - - let mut edge_count = 0; - - for (tid, (sf, ef)) in &trace_ranges { - let face_ext_id = format!("face_track_{}", tid); - let face_node: Option<(i64,)> = sqlx::query_as(&format!( - "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_track' AND external_id=$2", - nodes_table - )) - .bind(file_uuid) - .bind(&face_ext_id) - .fetch_optional(pool) - .await?; - - let face_node_id = match face_node { - Some((id,)) => id, - None => continue, - }; - - let face_start_sec = *sf as f64 / fps; - let face_end_sec = *ef as f64 / fps; - - for seg in &asrx.segments { - let seg_start = seg.start; - let seg_end = seg.end; - let overlap_start = face_start_sec.max(seg_start); - let overlap_end = face_end_sec.min(seg_end); - - if overlap_start >= overlap_end { - continue; - } - - let overlap_dur = overlap_end - overlap_start; - let face_dur = face_end_sec - face_start_sec; - if face_dur <= 0.0 { - continue; - } - let overlap_ratio = overlap_dur / face_dur; - - if overlap_ratio < 0.3 { - continue; - } - - let speaker_node: Option<(i64,)> = sqlx::query_as(&format!( - "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='speaker' AND external_id=$2", - nodes_table - )) - .bind(file_uuid) - .bind(&seg.speaker_id) - .fetch_optional(pool) - .await?; - - let speaker_node_id = match speaker_node { - Some((id,)) => id, - None => continue, - }; - - let edge_props = serde_json::json!({ - "overlap_ratio": (overlap_ratio * 1000.0).round() / 1000.0, - "overlap_duration_s": (overlap_dur * 10.0).round() / 10.0, - "face_time_range": format!("{:.1}-{:.1}s", face_start_sec, face_end_sec), - "speaker_time_range": format!("{:.1}-{:.1}s", seg_start, seg_end), - }); - - sqlx::query(&format!( - r#" - INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties) - VALUES ($1, $2, $3, $4, $5::jsonb) - ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id) - DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties) - "#, - edges_table - )) - .bind("SPEAKS_AS") - .bind(face_node_id) - .bind(speaker_node_id) - .bind(file_uuid) - .bind(serde_json::to_string(&edge_props)?) - .execute(pool) - .await?; - - edge_count += 1; - } - } - - Ok(edge_count) -} - async fn build_speaker_face_edges_from_pg( pool: &PgPool, file_uuid: &str, @@ -1759,206 +1247,9 @@ async fn build_face_face_edges( file_uuid: &str, pose_data: &[FacePose], ) -> Result { - use crate::core::db::face_embedding_db::FaceEmbeddingDb; - - let face_db = FaceEmbeddingDb::new(); - let qdrant_embeddings = face_db.get_all_embeddings_for_file(file_uuid).await?; - - if !qdrant_embeddings.is_empty() { - tracing::info!( - "[TKG-Phase2.6.2] Building face_face edges from Qdrant ({} embeddings)", - qdrant_embeddings.len() - ); - return build_face_face_edges_from_qdrant(pool, file_uuid, pose_data, qdrant_embeddings) - .await; - } - - tracing::info!("[TKG-Phase2.6.2] No Qdrant embeddings, falling back to PostgreSQL"); build_face_face_edges_from_pg(pool, file_uuid, pose_data).await } -async fn build_face_face_edges_from_qdrant( - pool: &PgPool, - file_uuid: &str, - pose_data: &[FacePose], - qdrant_embeddings: Vec<( - String, - Vec, - crate::core::db::face_embedding_db::FaceEmbeddingPayload, - )>, -) -> Result { - use crate::core::db::face_embedding_db::FaceEmbeddingPayload; - - let nodes_table = t("tkg_nodes"); - let edges_table = t("tkg_edges"); - - let mut frame_faces: HashMap> = HashMap::new(); - for (_, _, payload) in &qdrant_embeddings { - frame_faces - .entry(payload.frame) - .or_default() - .push(payload.clone()); - } - - let mut frame_map: HashMap<(i64, i64), (f64, f64, f64, f64)> = HashMap::new(); - for (_, _, payload) in &qdrant_embeddings { - let trace_id = payload.trace_id as i64; - let frame = payload.frame; - frame_map.insert( - (trace_id, frame), - ( - payload.bbox_x, - payload.bbox_y, - payload.bbox_w, - payload.bbox_h, - ), - ); - } - - let mut rows: Vec<(i64, i64, i64)> = Vec::new(); - for (frame, faces) in frame_faces.iter() { - for i in 0..faces.len() { - for j in (i + 1)..faces.len() { - let tid_a = faces[i].trace_id as i64; - let tid_b = faces[j].trace_id as i64; - let min_tid = tid_a.min(tid_b); - let max_tid = tid_a.max(tid_b); - rows.push((min_tid, max_tid, *frame)); - } - } - } - - let mut pair_frames: HashMap<(i64, i64), Vec<(i64, bool)>> = HashMap::new(); - for (tid_a, tid_b, frame) in &rows { - let key = (*tid_a.min(tid_b), *tid_a.max(tid_b)); - let bbox_a = frame_map.get(&(*tid_a, *frame)); - let bbox_b = frame_map.get(&(*tid_b, *frame)); - - let gaze = match (bbox_a, bbox_b) { - (Some(&(xa, ya, wa, ha)), Some(&(xb, yb, wb, hb))) => { - get_pose_for_face(*frame, xa, ya, wa, ha, pose_data) - .and_then(|(yaw_a, _, _)| { - get_pose_for_face(*frame, xb, yb, wb, hb, pose_data).map(|(yaw_b, _, _)| { - detect_mutual_gaze(xa, wa, yaw_a, xb, wb, yaw_b, 0.05) - }) - }) - .unwrap_or(false) - } - _ => false, - }; - pair_frames.entry(key).or_default().push((*frame, gaze)); - } - - let mut edge_count = 0; - let mut node_id_cache: HashMap = HashMap::new(); - for ((tid_a, tid_b), frame_data) in &pair_frames { - let ext_a = format!("face_track_{}", tid_a); - let ext_b = format!("face_track_{}", tid_b); - - let n_a_id = match node_id_cache.get(tid_a) { - Some(id) => *id, - None => { - if let Some((id,)) = sqlx::query_as::<_, (i64,)>(&format!( - "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_track' AND external_id=$2", - nodes_table - )) - .bind(file_uuid).bind(&ext_a).fetch_optional(pool).await? - { - node_id_cache.insert(*tid_a, id); - id - } else { continue; } - } - }; - - let n_b_id = match node_id_cache.get(tid_b) { - Some(id) => *id, - None => { - if let Some((id,)) = sqlx::query_as::<_, (i64,)>(&format!( - "SELECT id FROM {} WHERE file_uuid=$1 AND node_type='face_track' AND external_id=$2", - nodes_table - )) - .bind(file_uuid).bind(&ext_b).fetch_optional(pool).await? - { - node_id_cache.insert(*tid_b, id); - id - } else { continue; } - } - }; - - let frames: Vec = frame_data.iter().map(|(f, _)| *f).collect(); - let gaze_frames: Vec = frame_data - .iter() - .filter(|(_, g)| *g) - .map(|(f, _)| *f) - .collect(); - let gaze_count = gaze_frames.len() as i64; - let has_gaze = gaze_count > 0; - - let edge_props = if has_gaze { - let mut yaw_a_sum = 0.0f64; - let mut yaw_b_sum = 0.0f64; - let mut gaze_sample = 0i64; - for (frame, _) in frame_data.iter().filter(|(_, g)| *g) { - let bbox_a = frame_map.get(&(*tid_a, *frame)); - let bbox_b = frame_map.get(&(*tid_b, *frame)); - if let (Some(&(xa, ya, wa, ha)), Some(&(xb, yb, wb, hb))) = (bbox_a, bbox_b) { - let pose_a = get_pose_for_face(*frame, xa, ya, wa, ha, pose_data); - let pose_b = get_pose_for_face(*frame, xb, yb, wb, hb, pose_data); - if let (Some((ya, _, _)), Some((yb, _, _))) = (pose_a, pose_b) { - yaw_a_sum += ya; - yaw_b_sum += yb; - gaze_sample += 1; - } - } - } - let (avg_ya, avg_yb) = if gaze_sample > 0 { - ( - yaw_a_sum / gaze_sample as f64, - yaw_b_sum / gaze_sample as f64, - ) - } else { - (0.0, 0.0) - }; - - serde_json::json!({ - "first_frame": frames[0], - "frame_count": frames.len() as i64, - "mutual_gaze": true, - "gaze_frame_count": gaze_count, - "yaw_a_avg": (avg_ya * 1000.0).round() / 1000.0, - "yaw_b_avg": (avg_yb * 1000.0).round() / 1000.0, - }) - } else { - serde_json::json!({ - "first_frame": frames[0], - "frame_count": frames.len() as i64, - "mutual_gaze": false, - }) - }; - - sqlx::query(&format!( - r#" - INSERT INTO {} (edge_type, source_node_id, target_node_id, file_uuid, properties) - VALUES ($1, $2, $3, $4, $5::jsonb) - ON CONFLICT (file_uuid, edge_type, source_node_id, target_node_id) - DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_edges.properties) - "#, - edges_table - )) - .bind("CO_OCCURS_WITH") - .bind(n_a_id) - .bind(n_b_id) - .bind(file_uuid) - .bind(serde_json::to_string(&edge_props)?) - .execute(pool) - .await?; - - edge_count += 1; - } - - Ok(edge_count) -} - async fn build_face_face_edges_from_pg( pool: &PgPool, file_uuid: &str, @@ -2141,24 +1432,7 @@ async fn build_gaze_track_nodes( file_uuid: &str, pose_data: &[FacePose], ) -> Result { - use crate::core::db::face_embedding_db::FaceEmbeddingDb; - - // Phase 2.5.1: Try Qdrant first - let face_db = FaceEmbeddingDb::new(); - let qdrant_embeddings = face_db.get_all_embeddings_for_file(file_uuid).await?; - - if !qdrant_embeddings.is_empty() { - tracing::info!( - "[TKG-Phase2.5] Building gaze_track nodes from Qdrant ({} embeddings)", - qdrant_embeddings.len() - ); - return build_gaze_track_nodes_from_qdrant(pool, file_uuid, pose_data, qdrant_embeddings) - .await; - } - - tracing::info!("[TKG-Phase2.5] No Qdrant embeddings, trying face.json"); - - // Try face.json first (方案 B) + // Try face.json first let count = build_gaze_track_nodes_from_face_json(pool, file_uuid, pose_data).await?; if count > 0 { return Ok(count); @@ -2169,158 +1443,6 @@ async fn build_gaze_track_nodes( build_gaze_track_nodes_from_pg(pool, file_uuid, pose_data).await } -async fn build_gaze_track_nodes_from_qdrant( - pool: &PgPool, - file_uuid: &str, - pose_data: &[FacePose], - qdrant_embeddings: Vec<( - String, - Vec, - crate::core::db::face_embedding_db::FaceEmbeddingPayload, - )>, -) -> Result { - use crate::core::db::face_embedding_db::FaceEmbeddingPayload; - let nodes_table = t("tkg_nodes"); - - // Group by trace_id - let mut trace_frames: HashMap> = HashMap::new(); - for (_, _, payload) in &qdrant_embeddings { - trace_frames - .entry(payload.trace_id as i64) - .or_default() - .push(( - payload.frame, - payload.bbox_x, - payload.bbox_y, - payload.bbox_w, - payload.bbox_h, - )); - } - - if trace_frames.is_empty() { - tracing::warn!("[TKG-Phase2.5] No trace data in Qdrant"); - return Ok(0); - } - -let mut count = 0; -for (tid, frames) in &trace_frames { -let external_id = format!("gaze_{}", tid); - -// Phase 2.7: Query face_track identity_id -let face_ext_id = format!("face_track_{}", tid); -let face_identity_id: Option = sqlx::query_scalar::<_, Option>(&format!( -"SELECT (properties->>'identity_id')::bigint FROM {} -WHERE file_uuid=$1 AND node_type='face_track' AND external_id=$2", -nodes_table -)) -.bind(file_uuid) -.bind(&face_ext_id) -.fetch_optional(pool) -.await? -.flatten(); - -let mut frame_count = 0i64; - let mut first_frame = i64::MAX; - let mut last_frame = i64::MIN; - let mut yaw_sum = 0.0f64; - let mut pitch_sum = 0.0f64; - let mut roll_sum = 0.0f64; - let mut gaze_dir_counts: HashMap<&str, i64> = HashMap::new(); - let mut blink_candidates = 0i64; - let mut prev_openness = 0.0f64; - - for (frame, x, y, w, h) in frames { - if let Some((yaw, pitch, roll)) = get_pose_for_face(*frame, *x, *y, *w, *h, pose_data) { - frame_count += 1; - if *frame < first_frame { - first_frame = *frame; - } - if *frame > last_frame { - last_frame = *frame; - } - yaw_sum += yaw; - pitch_sum += pitch; - roll_sum += roll; - - let gaze_dir = GazeDirection::from_yaw_pitch(yaw, pitch); - *gaze_dir_counts.entry(gaze_dir.as_str()).or_default() += 1; - - let openness = (pitch.abs() * 10.0).min(1.0); - if prev_openness > 0.5 && openness < 0.2 { - blink_candidates += 1; - } - prev_openness = openness; - } - } - - if frame_count == 0 { - continue; - } - - let avg_yaw = yaw_sum / frame_count as f64; - let avg_pitch = pitch_sum / frame_count as f64; - let avg_roll = roll_sum / frame_count as f64; - let dominant_gaze = gaze_dir_counts - .iter() - .max_by_key(|(_, &c)| c) - .map(|(&d, _)| d) - .unwrap_or("unknown"); - - // Compute eye openness and blink rate - let blink_rate = if frame_count > 1 { - blink_candidates as f64 / (frame_count as f64 / 30.0) // per second at 30fps - } else { - 0.0 - }; - - let (gaze_dx, gaze_dy) = compute_gaze_vector(avg_yaw, avg_pitch); - - let props = serde_json::json!({ - "trace_id": tid, - "identity_id": face_identity_id, - "frame_count": frame_count, - "start_frame": first_frame, - "end_frame": last_frame, - "avg_yaw": (avg_yaw * 1000.0).round() / 1000.0, - "avg_pitch": (avg_pitch * 1000.0).round() / 1000.0, - "avg_roll": (avg_roll * 1000.0).round() / 1000.0, - "head_direction": dominant_gaze, - "gaze_direction": GazeDirection::from_yaw_pitch(avg_yaw, avg_pitch).as_str(), - "gaze_vector": {"dx": (gaze_dx * 1000.0).round() / 1000.0, "dy": (gaze_dy * 1000.0).round() / 1000.0}, - "eye_openness": (prev_openness * 100.0).round() / 100.0, - "blink_count": blink_candidates, - "blink_rate": (blink_rate * 100.0).round() / 100.0, - }); - - sqlx::query(&format!( - r#" - INSERT INTO {} (node_type, external_id, file_uuid, label, properties) - VALUES ($1, $2, $3, $4, $5::jsonb) - ON CONFLICT (file_uuid, node_type, external_id) - DO UPDATE SET - properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties), - label = COALESCE(NULLIF(EXCLUDED.label, ''), tkg_nodes.label) - "#, - nodes_table - )) - .bind("gaze_track") - .bind(&external_id) - .bind(file_uuid) - .bind(&external_id) - .bind(serde_json::to_string(&props)?) - .execute(pool) - .await?; - - count += 1; - } - - tracing::info!( - "[TKG-Phase2.5] Built {} gaze_track nodes from Qdrant", - count - ); - Ok(count) -} - async fn build_gaze_track_nodes_from_face_json( pool: &PgPool, file_uuid: &str, @@ -2730,27 +1852,7 @@ async fn build_lip_track_nodes( output_dir: &str, pose_data: &[FacePose], ) -> Result { - use crate::core::db::face_embedding_db::FaceEmbeddingDb; - - // Phase 2.5.2: Try Qdrant first for trace_id mapping - let face_db = FaceEmbeddingDb::new(); - let qdrant_embeddings = face_db.get_all_embeddings_for_file(file_uuid).await?; - - if !qdrant_embeddings.is_empty() { - tracing::info!("[TKG-Phase2.5] Building lip_track nodes from Qdrant + face.json"); - return build_lip_track_nodes_from_qdrant( - pool, - file_uuid, - output_dir, - pose_data, - qdrant_embeddings, - ) - .await; - } - - tracing::info!("[TKG-Phase2.5] No Qdrant embeddings, trying face.json"); - - // Try face.json first (方案 B) + // Try face.json first let count = build_lip_track_nodes_from_face_json(pool, file_uuid, pose_data).await?; if count > 0 { return Ok(count); @@ -2761,218 +1863,6 @@ async fn build_lip_track_nodes( build_lip_track_nodes_from_pg(pool, file_uuid, output_dir, pose_data).await } -async fn build_lip_track_nodes_from_qdrant( - pool: &PgPool, - file_uuid: &str, - output_dir: &str, - pose_data: &[FacePose], - qdrant_embeddings: Vec<( - String, - Vec, - crate::core::db::face_embedding_db::FaceEmbeddingPayload, - )>, -) -> Result { - use crate::core::db::face_embedding_db::FaceEmbeddingPayload; - let nodes_table = t("tkg_nodes"); - - // Load lip data from face.json - let path = Path::new(output_dir).join(format!("{}.face.json", file_uuid)); - if !path.exists() { - return Ok(0); - } - - let content = std::fs::read_to_string(&path) - .with_context(|| format!("Failed to read face.json: {}", path.display()))?; - let json: serde_json::Value = serde_json::from_str(&content)?; - - // Build trace_id mapping from Qdrant: frame → Vec<(trace_id, bbox)> - let mut frame_trace_map: HashMap> = HashMap::new(); - for (_, _, payload) in &qdrant_embeddings { - frame_trace_map.entry(payload.frame).or_default().push(( - payload.trace_id as i64, - payload.bbox_x, - payload.bbox_y, - payload.bbox_w, - payload.bbox_h, - )); - } - - // Helper function to match trace_id by bbox distance - let match_trace_id = |frame: i64, x: f64, y: f64, w: f64, h: f64| -> Option { - let traces = frame_trace_map.get(&frame)?; - if traces.is_empty() { - return None; - } - - // Find closest by bbox center distance - let mut best: Option<(i64, f64)> = None; - for (tid, tx, ty, tw, th) in traces { - let cx = x + w / 2.0; - let cy = y + h / 2.0; - let tcx = tx + tw / 2.0; - let tcy = ty + th / 2.0; - let dist = ((cx - tcx).powi(2) + (cy - tcy).powi(2)).sqrt(); - if best.is_none() || dist < best.unwrap().1 { - best = Some((*tid, dist)); - } - } - best.map(|(tid, _)| tid) - }; - - // Group by trace_id: trace_id → Vec<(frame, inner_lips_area, outer_lips_area)> - let mut lip_data: HashMap> = HashMap::new(); - - if let Some(frames) = json.get("frames").and_then(|v| v.as_array()) { - for frame_entry in frames { - let frame_num = frame_entry - .get("frame") - .and_then(|v| v.as_i64()) - .unwrap_or(0); - if let Some(faces) = frame_entry.get("faces").and_then(|v| v.as_array()) { - for face in faces { - // face.json has x, y, width, height (not bbox object) - let x = face.get("x").and_then(|v| v.as_f64()).unwrap_or(0.0); - let y = face.get("y").and_then(|v| v.as_f64()).unwrap_or(0.0); - let w = face.get("width").and_then(|v| v.as_f64()).unwrap_or(0.0); - let h = face.get("height").and_then(|v| v.as_f64()).unwrap_or(0.0); - - // Get trace_id from Qdrant mapping - let trace_id = match match_trace_id(frame_num, x, y, w, h) { - Some(tid) => tid, - None => continue, - }; - - // Extract lip landmarks - let lips = face.get("lips"); - if let Some(lips_obj) = lips.and_then(|v| v.as_object()) { - let inner_area = compute_lip_area(lips_obj.get("inner_lips")); - let outer_area = compute_lip_area(lips_obj.get("outer_lips")); - if inner_area > 0.0 || outer_area > 0.0 { - lip_data - .entry(trace_id) - .or_default() - .push((frame_num, inner_area, outer_area)); - } - } - } - } - } - } - - if lip_data.is_empty() { - tracing::warn!("[TKG-Phase2.5] No lip data matched"); - return Ok(0); - } - -let mut count = 0; -for (tid, frames) in &lip_data { -let external_id = format!("lip_{}", tid); - -// Phase 2.7: Query face_track identity_id -let face_ext_id = format!("face_track_{}", tid); -let face_identity_id: Option = sqlx::query_scalar::<_, Option>(&format!( -"SELECT (properties->>'identity_id')::bigint FROM {} -WHERE file_uuid=$1 AND node_type='face_track' AND external_id=$2", -nodes_table -)) -.bind(file_uuid) -.bind(&face_ext_id) -.fetch_optional(pool) -.await? -.flatten(); - -let frame_count = frames.len() as i64; - let first_frame = frames.iter().map(|(f, _, _)| *f).min().unwrap_or(0); - let last_frame = frames.iter().map(|(f, _, _)| *f).max().unwrap_or(0); - - let avg_inner = frames.iter().map(|(_, i, _)| *i).sum::() / frame_count as f64; - let avg_outer = frames.iter().map(|(_, _, o)| *o).sum::() / frame_count as f64; - let avg_openness = if avg_outer > 0.0 { - avg_inner / avg_outer - } else { - 0.0 - }; - - let openness_values: Vec = frames - .iter() - .map(|(_, i, o)| if *o > 0.0 { i / o } else { 0.0 }) - .collect(); - let mean_openness = openness_values.iter().sum::() / openness_values.len() as f64; - let variance = openness_values - .iter() - .map(|&v| (v - mean_openness).powi(2)) - .sum::() - / openness_values.len() as f64; - - let speaking_threshold = avg_openness * 1.2; - let speaking_frames = frames - .iter() - .filter(|(_, i, o)| { - if *o > 0.0 { - i / o > speaking_threshold - } else { - false - } - }) - .count() as i64; - - let (avg_yaw, avg_pitch) = if let Some((y, p, _)) = frames - .iter() - .filter_map(|(f, _, _)| { - pose_data - .iter() - .find(|fp| fp.frame == *f) - .map(|fp| (fp.yaw, fp.pitch, fp.roll)) - }) - .next() - { - (y, p) - } else { - (0.0, 0.0) - }; - - let props = serde_json::json!({ - "trace_id": tid, - "identity_id": face_identity_id, - "frame_count": frame_count, - "start_frame": first_frame, - "end_frame": last_frame, - "avg_openness": (avg_openness * 1000.0).round() / 1000.0, - "avg_inner_area": (avg_inner * 100.0).round() / 100.0, - "avg_outer_area": (avg_outer * 100.0).round() / 100.0, - "movement_variance": (variance * 1000.0).round() / 1000.0, - "speaking_frames": speaking_frames, - "silent_frames": frame_count - speaking_frames, - "avg_yaw": (avg_yaw * 1000.0).round() / 1000.0, - "avg_pitch": (avg_pitch * 1000.0).round() / 1000.0, - }); - - sqlx::query(&format!( - r#" - INSERT INTO {} (node_type, external_id, file_uuid, label, properties) - VALUES ($1, $2, $3, $4, $5::jsonb) - ON CONFLICT (file_uuid, node_type, external_id) - DO UPDATE SET - properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties), - label = COALESCE(NULLIF(EXCLUDED.label, ''), tkg_nodes.label) - "#, - nodes_table - )) - .bind("lip_track") - .bind(&external_id) - .bind(file_uuid) - .bind(&format!("Lip Trace {}", tid)) - .bind(serde_json::to_string(&props)?) - .execute(pool) - .await?; - - count += 1; - } - - tracing::info!("[TKG-Phase2.5] Built {} lip_track nodes from Qdrant", count); - Ok(count) -} - async fn build_lip_track_nodes_from_face_json( pool: &PgPool, file_uuid: &str, @@ -4231,7 +3121,7 @@ mod tests { #[test] fn test_tkg_result() { - let r = TkgResult { +let r = TkgResult { face_track_nodes: 5, gaze_track_nodes: 5, lip_track_nodes: 4, @@ -4239,6 +3129,7 @@ text_region_nodes: 20, appearance_trace_nodes: 3, accessory_nodes: 0, object_nodes: 10, + hand_nodes: 0, speaker_nodes: 3, co_occurrence_edges: 20, speaker_face_edges: 8, @@ -4247,6 +3138,7 @@ accessory_nodes: 0, lip_sync_edges: 15, has_appearance_edges: 3, wears_edges: 0, + hand_object_edges: 0, }; assert_eq!(r.face_track_nodes, 5); assert_eq!(r.object_nodes, 10); diff --git a/src/main.rs b/src/main.rs index a24c504..b8e6a1a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -49,9 +49,6 @@ async fn main() -> Result<()> { Commands::StoreAsrx { uuid } => { handle_store_asrx(&uuid).await?; } - Commands::Story { uuid } => { - handle_story(&uuid).await?; - } Commands::Vectorize { uuid } => { handle_vectorize(&uuid).await?; } @@ -169,13 +166,6 @@ async fn handle_chunk(uuid: &str) -> Result<()> { } /// Handle story command -async fn handle_story(uuid: &str) -> Result<()> { - println!("Generating story for: {}", uuid); - - // TODO: Implement story logic - Ok(()) -} - /// Handle vectorize command async fn handle_vectorize(uuid: &str) -> Result<()> { println!("Vectorizing chunks for: {}", uuid); diff --git a/src/playground.rs b/src/playground.rs index b51bf0b..c27fd0f 100644 --- a/src/playground.rs +++ b/src/playground.rs @@ -633,44 +633,6 @@ async fn process_appearance_module( Ok(()) } -async fn process_story_module( - story_path: &Path, - video_path: &str, - uuid: &str, - progress_state: &Arc>, - ui: &Arc>>, -) -> anyhow::Result<()> { - { - let mut state = progress_state.lock().unwrap(); - state.get_processor(ProcessorType::Story).start(1); - } - let story_result = momentry_core::core::processor::process_story( - video_path, - story_path.to_str().unwrap(), - Some(uuid), - ) - .await?; - let story_json = serde_json::to_string_pretty(&story_result)?; - std::fs::write(story_path, &story_json)?; - let output_dir = OutputDir::new(); - let _ = output_dir.backup_file(uuid, "story.json"); - println!( - " ✓ Story saved: {} parent chunks, {} child chunks", - story_result.stats.total_parent_chunks, story_result.stats.total_child_chunks - ); - { - let mut state = progress_state.lock().unwrap(); - state.get_processor(ProcessorType::Story).complete(&format!( - "{} parents, {} children", - story_result.stats.total_parent_chunks, story_result.stats.total_child_chunks - )); - } - if let Some(ref mut ui) = *ui.lock().unwrap() { - let _ = ui.render(); - } - Ok(()) -} - async fn process_caption_module( caption_path: &Path, video_path: &str, @@ -745,11 +707,6 @@ enum Commands { /// UUID uuid: String, }, - /// Generate story for cut scenes - Story { - /// UUID - uuid: String, - }, /// Vectorize chunks Vectorize { /// UUID (or 'all' for all) @@ -2382,150 +2339,6 @@ Ok(()) Ok(()) } - Commands::Story { uuid } => { - println!("Generating story for: {}", uuid); - - let db = PostgresDb::init().await?; - let video = db - .get_video_by_uuid(&uuid) - .await? - .ok_or_else(|| anyhow::anyhow!("Video not found: {}", uuid))?; - - let file_id = video.id; - let _fps = video.fps; - let duration = video.duration; - - // Get all chunks - let all_chunks = db.get_chunks_by_uuid(&uuid).await?; - - // Try cut chunks first, fall back to sentence chunks - let mut story_chunks: Vec<&Chunk> = all_chunks - .iter() - .filter(|c| c.chunk_type == ChunkType::Cut) - .collect(); - - let story_type = if story_chunks.is_empty() { - story_chunks = all_chunks - .iter() - .filter(|c| c.chunk_type == ChunkType::Sentence && c.text_content.is_some()) - .collect(); - "sentence" - } else { - "cut" - }; - - if story_chunks.is_empty() { - println!("No story chunks found. Run 'chunk' command first."); - return Ok(()); - } - - println!("Found {} {} scenes", story_chunks.len(), story_type); - - for (i, story_chunk) in story_chunks.iter().enumerate() { - println!("\n=== Scene {} ===", i + 1); - println!( - "Time: {:.2}s - {:.2}s", - story_chunk.start_time().seconds(), - story_chunk.end_time().seconds() - ); - - let context_start = (story_chunk.start_time().seconds() - 5.0).max(0.0); - let context_end = (story_chunk.end_time().seconds() + 5.0).min(duration); - - let context_chunks = db - .get_chunks_by_time_range(&uuid, context_start, context_end) - .await?; - - let context_frames = db - .get_frames_by_time_range(&uuid, context_start, context_end) - .await?; - - let mut story = String::new(); - story.push_str(&format!( - "Scene {} ({:.1}s - {:.1}s)\n\n", - i + 1, - story_chunk.start_time().seconds(), - story_chunk.end_time().seconds() - )); - - let sentence_chunks: Vec<&serde_json::Value> = context_chunks - .iter() - .filter(|c| c["chunk_type"] == "sentence") - .collect(); - - if !sentence_chunks.is_empty() { - story.push_str("【Speech】\n"); - for sc in &sentence_chunks { - if let Some(text) = sc["text_content"].as_str() { - story.push_str(&format!(" - {}\n", text)); - } - } - story.push('\n'); - } - - let mut all_objects: std::collections::HashMap = - std::collections::HashMap::new(); - for frame in &context_frames { - if let Some(objects) = frame["yolo_objects"].as_array() { - for obj in objects { - if let Some(class_name) = obj.get("class_name").and_then(|v| v.as_str()) - { - *all_objects.entry(class_name.to_string()).or_insert(0) += 1; - } - } - } - } - - if !all_objects.is_empty() { - story.push_str("【Objects】\n"); - let mut sorted_objects: Vec<_> = all_objects.iter().collect(); - sorted_objects.sort_by(|a, b| b.1.cmp(a.1)); - for (obj, count) in sorted_objects.iter().take(10) { - story.push_str(&format!(" - {} ({} frames)\n", obj, count)); - } - story.push('\n'); - } - - let mut all_texts: Vec = Vec::new(); - for frame in &context_frames { - if let Some(texts) = frame["ocr_results"].as_array() { - for txt in texts { - if let Some(text) = txt.get("text").and_then(|v| v.as_str()) { - if !text.is_empty() && text.len() > 2 { - all_texts.push(text.to_string()); - } - } - } - } - } - - if !all_texts.is_empty() { - story.push_str("【Text in video】\n"); - for txt in all_texts.iter().take(10) { - story.push_str(&format!(" - {}\n", txt)); - } - story.push('\n'); - } - - let mut face_count = 0; - for frame in &context_frames { - if let Some(faces) = frame["face_results"].as_array() { - face_count += faces.len(); - } - } - - if face_count > 0 { - story.push_str(&format!( - "【Faces】\n - {} face(s) detected\n\n", - face_count - )); - } - - println!("{}", story); - } - - Ok(()) - } Commands::Vectorize { uuid } => { println!("Vectorizing: {}", uuid); diff --git a/src/processing/modules/mod.rs b/src/processing/modules/mod.rs index 300055a..71151f1 100644 --- a/src/processing/modules/mod.rs +++ b/src/processing/modules/mod.rs @@ -8,7 +8,6 @@ pub mod cut; pub mod face; pub mod ocr; pub mod pose; -pub mod story; pub mod yolo; pub use appearance::*; @@ -19,5 +18,4 @@ pub use cut::*; pub use face::*; pub use ocr::*; pub use pose::*; -pub use story::*; pub use yolo::*; diff --git a/src/processing/modules/story.rs b/src/processing/modules/story.rs deleted file mode 100644 index 5054dd5..0000000 --- a/src/processing/modules/story.rs +++ /dev/null @@ -1,53 +0,0 @@ -//! Story generation processing module - -use anyhow::Result; -use momentry_core::ui::progress::{ProcessorType, ProgressState, ProgressUi}; -use momentry_core::OutputDir; -use std::path::Path; -use std::sync::{Arc, Mutex}; - -/// Process Story module -pub async fn process_story_module( - story_path: &Path, - video_path: &str, - uuid: &str, - progress_state: &Arc>, - ui: &Arc>>, -) -> Result<()> { - { - let mut state = progress_state.lock().unwrap(); - state.get_processor(ProcessorType::Story).start(1); - } - - let story_result = momentry_core::core::processor::process_story( - video_path, - story_path.to_str().unwrap(), - Some(uuid), - ) - .await?; - - let story_json = serde_json::to_string_pretty(&story_result)?; - std::fs::write(story_path, &story_json)?; - - let output_dir = OutputDir::new(); - let _ = output_dir.backup_file(uuid, "story.json"); - - println!( - " ✓ Story saved: {} parent chunks, {} child chunks", - story_result.stats.total_parent_chunks, story_result.stats.total_child_chunks - ); - - { - let mut state = progress_state.lock().unwrap(); - state.get_processor(ProcessorType::Story).complete(&format!( - "{} parents, {} children", - story_result.stats.total_parent_chunks, story_result.stats.total_child_chunks - )); - } - - if let Some(ref mut ui) = *ui.lock().unwrap() { - let _ = ui.render(); - } - - Ok(()) -} diff --git a/src/ui/progress/mod.rs b/src/ui/progress/mod.rs index 7e223fe..e4589e3 100644 --- a/src/ui/progress/mod.rs +++ b/src/ui/progress/mod.rs @@ -21,7 +21,6 @@ pub enum ProcessorType { Face, Pose, Hand, - Story, Caption, } @@ -37,7 +36,6 @@ impl ProcessorType { ProcessorType::Face => "Face", ProcessorType::Pose => "Pose", ProcessorType::Hand => "Hand", - ProcessorType::Story => "Story", ProcessorType::Caption => "Caption", } } @@ -145,7 +143,6 @@ impl ProgressState { ProcessorProgress::new(ProcessorType::Face), ProcessorProgress::new(ProcessorType::Pose), ProcessorProgress::new(ProcessorType::Hand), - ProcessorProgress::new(ProcessorType::Story), ProcessorProgress::new(ProcessorType::Caption), ], video_name: video_name.to_string(), @@ -201,7 +198,6 @@ impl ProgressState { "OCR" => ProcessorType::Ocr, "FACE" => ProcessorType::Face, "POSE" => ProcessorType::Pose, - "STORY" => ProcessorType::Story, "CAPTION" => ProcessorType::Caption, _ => return, }; diff --git a/src/verification/schema.rs b/src/verification/schema.rs index 8c1c4d7..d183513 100644 --- a/src/verification/schema.rs +++ b/src/verification/schema.rs @@ -209,48 +209,6 @@ pub const PROCESSOR_SCHEMAS: &[ProcessorJsonSchema] = &[ ], min_data_threshold: 1, }, - ProcessorJsonSchema { - processor: ProcessorType::Story, - required_fields: &[ - RequiredField { - path: "child_chunks", - field_type: FieldType::Array, - allow_empty: true, - }, - RequiredField { - path: "parent_chunks", - field_type: FieldType::Array, - allow_empty: true, - }, - RequiredField { - path: "stats", - field_type: FieldType::Object, - allow_empty: false, - }, - ], - min_data_threshold: 0, - }, - ProcessorJsonSchema { - processor: ProcessorType::MediaPipe, - required_fields: &[ - RequiredField { - path: "frame_count", - field_type: FieldType::PositiveNumber, - allow_empty: false, - }, - RequiredField { - path: "fps", - field_type: FieldType::PositiveNumber, - allow_empty: false, - }, - RequiredField { - path: "frames", - field_type: FieldType::Array, - allow_empty: true, - }, - ], - min_data_threshold: 0, - }, ]; /// Get schema for a processor diff --git a/src/verification/verifier.rs b/src/verification/verifier.rs index df860c0..712e0f4 100644 --- a/src/verification/verifier.rs +++ b/src/verification/verifier.rs @@ -161,24 +161,6 @@ fn count_data_items(processor: &ProcessorType, value: &serde_json::Value) -> usi .and_then(|v| v.as_array()) .map(|a| a.len()) .unwrap_or(0), - ProcessorType::Story => { - let child = value - .get("child_chunks") - .and_then(|v| v.as_array()) - .map(|a| a.len()) - .unwrap_or(0); - let parent = value - .get("parent_chunks") - .and_then(|v| v.as_array()) - .map(|a| a.len()) - .unwrap_or(0); - child + parent - } - ProcessorType::MediaPipe => value - .get("frames") - .and_then(|v| v.as_array()) - .map(|a| a.len()) - .unwrap_or(0), _ => 0, } } @@ -318,23 +300,6 @@ fn check_reasonableness( } } - // Story-specific: check chunk count vs cut scene count - if *processor == ProcessorType::Story { - if let Some(cut_value) = all_values.get("cut") { - let story_chunks = count_data_items(processor, value); - let cut_scenes = count_data_items(&ProcessorType::Cut, cut_value); - if story_chunks > 0 && cut_scenes > 0 { - // Story chunks should be >= cut scenes (one chunk per scene minimum) - if story_chunks < cut_scenes / 2 { - issues.push(format!( - "story chunk count ({}) much less than cut scene count ({})", - story_chunks, cut_scenes - )); - } - } - } - } - // ASR-specific: check segments vs cut scenes if *processor == ProcessorType::Asr { if let Some(cut_value) = all_values.get("cut") { @@ -499,11 +464,6 @@ fn build_data_summary(processor: &ProcessorType, value: &serde_json::Value) -> s summary["speaker_count"] = serde_json::json!(speakers.len()); } } - ProcessorType::Story => { - if let Some(stats) = value.get("stats") { - summary["stats"] = stats.clone(); - } - } _ => {} } @@ -538,10 +498,7 @@ pub fn verify_file(file_uuid: &str) -> FileVerificationReport { let mut all_values: HashMap = HashMap::new(); for processor in &processors { let proc_name = processor.as_str(); - let filename = match processor { - ProcessorType::Story => format!("{}.story_story.json", full_uuid), - _ => format!("{}.{}.json", full_uuid, proc_name), - }; + let filename = format!("{}.{}.json", full_uuid, proc_name); let path = PathBuf::from(OUTPUT_DIR.as_str()).join(&filename); if let Ok(content) = std::fs::read_to_string(&path) { @@ -639,10 +596,7 @@ pub fn verify_file(file_uuid: &str) -> FileVerificationReport { /// Legacy verification function (backward compatible) pub fn verify_output(processor: &ProcessorType, file_uuid: &str) -> VerificationResult { let proc_name = processor.as_str(); - let filename = match processor { - ProcessorType::Story => format!("{}.story_story.json", file_uuid), - _ => format!("{}.{}.json", file_uuid, proc_name), - }; + let filename = format!("{}.{}.json", file_uuid, proc_name); let output_path = PathBuf::from(OUTPUT_DIR.as_str()).join(&filename); if !output_path.exists() { diff --git a/src/worker/processor.rs b/src/worker/processor.rs index ff891f2..9d3e9aa 100644 --- a/src/worker/processor.rs +++ b/src/worker/processor.rs @@ -14,9 +14,7 @@ struct ProcessorCleanupGuard { running_count: Arc>, frame_count: Arc>, time_count: Arc>, - best_effort_count: Arc>, pipeline: PipelineType, - is_best_effort: bool, } impl Drop for ProcessorCleanupGuard { @@ -32,30 +30,22 @@ impl Drop for ProcessorCleanupGuard { *guard -= 1; } } - if self.is_best_effort { - if let Ok(mut guard) = self.best_effort_count.try_write() { - if *guard > 0 { - *guard -= 1; - } - } - } else { - match self.pipeline { - PipelineType::Frame => { - if let Ok(mut guard) = self.frame_count.try_write() { - if *guard > 0 { - *guard -= 1; - } + match self.pipeline { + PipelineType::Frame => { + if let Ok(mut guard) = self.frame_count.try_write() { + if *guard > 0 { + *guard -= 1; } } - PipelineType::Time => { - if let Ok(mut guard) = self.time_count.try_write() { - if *guard > 0 { - *guard -= 1; - } + } + PipelineType::Time => { + if let Ok(mut guard) = self.time_count.try_write() { + if *guard > 0 { + *guard -= 1; } } - PipelineType::Cross => {} // cross pipeline not tracked in slot counts } + PipelineType::Cross => {} } } } @@ -106,8 +96,6 @@ pub struct ProcessorTask { const FRAME_SLOT_MAX: usize = 2; /// Time pipeline max concurrent processors (audio is heavy, run 1 at a time). const TIME_SLOT_MAX: usize = 1; -/// Best-effort slot (used by low-priority processors like MediaPipe). -const BEST_EFFORT_SLOT_MAX: usize = 1; pub struct ProcessorPool { db: Arc, @@ -117,7 +105,6 @@ pub struct ProcessorPool { running_count: Arc>, running_frame_count: Arc>, running_time_count: Arc>, - running_best_effort_count: Arc>, } impl ProcessorPool { @@ -130,7 +117,6 @@ impl ProcessorPool { running_count: Arc::new(RwLock::new(0)), running_frame_count: Arc::new(RwLock::new(0)), running_time_count: Arc::new(RwLock::new(0)), - running_best_effort_count: Arc::new(RwLock::new(0)), } } @@ -240,22 +226,16 @@ impl ProcessorPool { *count += 1; } // 遞增產線專屬 slot - let is_best_effort = processor_type == ProcessorType::MediaPipe; - if is_best_effort { - *self.running_best_effort_count.write().await += 1; - } else { - match pipeline { - PipelineType::Frame => *self.running_frame_count.write().await += 1, - PipelineType::Time => *self.running_time_count.write().await += 1, - PipelineType::Cross => {} // cross pipeline uses global slot - } + match pipeline { + PipelineType::Frame => *self.running_frame_count.write().await += 1, + PipelineType::Time => *self.running_time_count.write().await += 1, + PipelineType::Cross => {} // cross pipeline uses global slot } let running = self.running.clone(); let running_count = self.running_count.clone(); let running_frame_count = self.running_frame_count.clone(); let running_time_count = self.running_time_count.clone(); - let running_best_effort_count = self.running_best_effort_count.clone(); let child_pid: Arc>> = Arc::new(RwLock::new(None)); running.write().await.insert( job_id, @@ -287,9 +267,7 @@ impl ProcessorPool { running_count: running_count.clone(), frame_count: running_frame_count.clone(), time_count: running_time_count.clone(), - best_effort_count: running_best_effort_count.clone(), pipeline, - is_best_effort, }; info!("Starting processor {} for job {}", processor_name, job.uuid); @@ -528,10 +506,7 @@ impl ProcessorPool { // Generate output path let output_dir = PathBuf::from(OUTPUT_DIR.as_str()); - let suffix = match processor_type { - ProcessorType::Story => format!("{}.story_story", job.uuid), - _ => format!("{}.{}", job.uuid, processor_type.as_str()), - }; + let suffix = format!("{}.{}", job.uuid, processor_type.as_str()); let output_path = output_dir.join(format!("{}.json", suffix)); // Ensure output directory exists @@ -1052,80 +1027,6 @@ impl ProcessorPool { pid: 0, }) } - ProcessorType::Story => { - let executor = crate::core::processor::PythonExecutor::new()?; - let _ = executor - .run( - "parent_chunk_5w1h.py", - &["--file-uuid", &job.uuid, "--embed"], - uuid, - "STORY", - Some(std::time::Duration::from_secs(300)), - ) - .await; - let narratives_path = output_dir.join(format!("{}.narratives.json", job.uuid)); - let chunks_produced = if narratives_path.exists() { - let content = std::fs::read_to_string(&narratives_path).unwrap_or_default(); - let count: i32 = serde_json::from_str::>(&content) - .map(|v| v.len() as i32) - .unwrap_or(0); - tracing::info!("Story generated {} narratives for {}", count, job.uuid); - count - } else { - 0 - }; - Ok(ProcessorOutput { - data: serde_json::Value::Null, - chunks_produced, - frames_processed: total_frames, - total_frames, - retry_count: 0, - pid: 0, - }) - } - ProcessorType::FiveW1H => { - let executor = crate::core::processor::PythonExecutor::new()?; - let _ = executor - .run( - "parent_chunk_5w1h.py", - &["--file-uuid", &job.uuid, "--embed", "--mode", "llm"], - uuid, - "5W1H", - Some(std::time::Duration::from_secs(300)), - ) - .await; - Ok(ProcessorOutput { - data: serde_json::Value::Null, - chunks_produced: 0, - frames_processed: total_frames, - total_frames, - retry_count: 0, - pid: 0, - }) - } - ProcessorType::MediaPipe => { - let result = processor::process_mediapipe_v2( - video_path, - output_path.to_str().unwrap(), - uuid, - Some(&sample_frames), - ) - .await?; - let chunks_produced = result.frames.len() as i32; - tracing::info!( - "MEDIAPIPE completed, {} frames for {}", - chunks_produced, - job.uuid - ); - Ok(ProcessorOutput { - data: serde_json::to_value(result)?, - chunks_produced, - frames_processed: total_frames, - total_frames, - retry_count: 0, - pid: 0, - }) - } } }