diff --git a/Cargo.lock b/Cargo.lock index a9cb697..09fd487 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -636,6 +636,8 @@ dependencies = [ "compression-core", "flate2", "memchr", + "zstd", + "zstd-safe", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index b712fa6..1522602 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,7 +55,7 @@ sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "js mongodb = { version = "2", features = ["tokio-runtime"] } bson = { version = "2", features = ["chrono-0_4"] } qdrant-client = "1.7" -reqwest = { version = "0.12", features = ["json", "gzip"] } +reqwest = { version = "0.12", features = ["json", "gzip", "zstd"] } pgvector = { version = "0.3", features = ["sqlx"] } # HTTP Server diff --git a/docs_v1.0/API_WORKSPACE/modules/07_identity.md b/docs_v1.0/API_WORKSPACE/modules/07_identity.md index 0416276..188946e 100644 --- a/docs_v1.0/API_WORKSPACE/modules/07_identity.md +++ b/docs_v1.0/API_WORKSPACE/modules/07_identity.md @@ -923,6 +923,128 @@ curl -s "$API/api/v1/identity/$IDENTITY_UUID/json" \ --- +--- + +### `POST /api/v1/file/:file_uuid/pending-person` + +**Auth**: Required +**Scope**: file-level + +Create a manually managed "pending person" under a specific file. A pending person is an identity with `status='pending'` and `source='manual'`, used for unmatched traces that the user wants to manually label before a full identity resolution. + +Optionally binds a list of trace IDs to this new identity. + +#### Request + +```json +{ + "trace_ids": [100, 150, 200], + "name": "Mystery Man #1" +} +``` + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `trace_ids` | array[int] | No | `[]` | Trace IDs to bind to this pending person | +| `name` | string | No | `"Person N"` | Human-readable name. Auto-generated if omitted | + +#### Example + +```bash +# Create pending person with name and no traces +curl -s -X POST "$API/api/v1/file/$FILE_UUID/pending-person" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"name": "Unknown Woman #2", "trace_ids": []}' + +# Create pending person with auto-name and bind traces +curl -s -X POST "$API/api/v1/file/$FILE_UUID/pending-person" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"trace_ids": [100, 150, 200]}' +``` + +#### Response (200) + +```json +{ + "success": true, + "message": "Created pending person: Mystery Man #1 (uuid: 4d96b25b-68f0-4c52-b238-d69f7dfd588b)", + "data": { + "identity_uuid": "4d96b25b-68f0-4c52-b238-d69f7dfd588b", + "identity_id": 55, + "name": "Mystery Man #1", + "bound_traces": 0 + } +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `identity_uuid` | string | UUID of the newly created pending identity | +| `identity_id` | integer | Internal ID of the new identity | +| `name` | string | Display name | +| `bound_traces` | integer | Number of traces bound | + +#### Side Effects + +- Creates an `identities` row with `status='pending'`, `source='manual'`, `file_uuid=` +- If `trace_ids` provided: `UPDATE face_detections SET identity_id = ...` for matching traces +- If `trace_ids` provided: TKG face_track nodes get `identity_id` / `identity_name` in properties +- Identity JSON file synced to disk + +--- + +### `GET /api/v1/file/:file_uuid/pending-persons` + +**Auth**: Required +**Scope**: file-level + +List all pending persons for a file. + +#### Example + +```bash +curl -s "$API/api/v1/file/$FILE_UUID/pending-persons" \ + -H "X-API-Key: $KEY" +``` + +#### Response (200) + +```json +{ + "success": true, + "message": "Found 2 pending persons for c36f35685177c981aa139b66bbbccc5b", + "data": [ + { + "identity_uuid": "232ecd08-a2bf-4bd0-bd25-0bd8fb7a7dae", + "identity_id": 56, + "name": "Person 2", + "created_at": "2026-06-23 17:13:23", + "trace_count": 3, + "bound_traces": null + } + ] +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `identity_uuid` | string | Identity UUID | +| `identity_id` | integer | Internal identity ID | +| `name` | string | Display name | +| `created_at` | string | Creation timestamp | +| `trace_count` | integer | Number of face traces bound to this pending person | +| `bound_traces` | array[int] | List of bound trace IDs (currently null, reserved for future expansion) | + +#### Notes + +- Pending persons are normal `identities` rows with `status='pending'` — they can be promoted to confirmed via `PATCH /api/v1/identity/:identity_uuid` (`{"status": "confirmed"}`) +- They can be merged into known identities via `POST /api/v1/identity/:identity_uuid/mergeinto` +- Use `GET /api/v1/identity/:identity_uuid/traces` to get detailed trace info for each pending person + +--- + ## Alias System (BCP 47 Locale Tags) Identity aliases support multilingual display names. Aliases are stored in `metadata.aliases` as an array of `{locale, name}` objects. diff --git a/docs_v1.0/doc/04_lookup.html b/docs_v1.0/doc/04_lookup.html index 27073fd..cdd6d21 100644 --- a/docs_v1.0/doc/04_lookup.html +++ b/docs_v1.0/doc/04_lookup.html @@ -32,7 +32,7 @@ a { color: #0066cc; } Logout - +

File Lookup

@@ -137,6 +137,537 @@ curl -s "
+
+

File Listing

+

GET /api/v1/files

+

Auth: Required +Scope: system-level

+

List all registered files with pagination. Optionally filter by status or fetch a specific file by UUID.

+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
pageintegerNo1Page number
page_sizeintegerNo20Items per page
statusstringNoFilter by status: registered, processing, completed, failed, indexed, checked_out
file_uuidstringNoFetch a specific file (returns as single-item list)
+

Example

+
# List all files (paginated)
+curl -s "$API/api/v1/files?page=1&page_size=10" \
+  -H "X-API-Key: $KEY"
+
+# Filter by status
+curl -s "$API/api/v1/files?status=completed" \
+  -H "X-API-Key: $KEY"
+
+# Fetch specific file
+curl -s "$API/api/v1/files?file_uuid=$FILE_UUID" \
+  -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "total": 42,
+  "page": 1,
+  "page_size": 10,
+  "data": [
+    {
+      "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
+      "file_name": "video.mp4",
+      "file_path": "/path/to/video.mp4",
+      "status": "completed"
+    }
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
totalintegerTotal file count
pageintegerCurrent page
page_sizeintegerItems per page
dataarrayArray of file items
data[].file_uuidstring32-char hex UUID
data[].file_namestringRegistered file name
data[].file_pathstringFull filesystem path
data[].statusstringProcessing status
+
+

GET /api/v1/file/:file_uuid

+

Auth: Required +Scope: file-level

+

Get detailed info for a specific registered file including metadata, duration, FPS, and probe data.

+

Example

+
curl -s "$API/api/v1/file/$FILE_UUID" \
+  -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
+  "file_name": "video.mp4",
+  "file_path": "/path/to/video.mp4",
+  "status": "completed",
+  "duration": 120.5,
+  "fps": 24.0,
+  "metadata": {
+    "format": {"duration": "120.5", "size": "794863677"},
+    "streams": [{"codec_name": "h264", "width": 1920, "height": 1080}]
+  },
+  "created_at": "2026-05-16T12:00:00Z"
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
file_uuidstring32-char hex UUID
file_namestringRegistered file name
file_pathstringFull filesystem path
statusstringProcessing status
durationfloatDuration in seconds
fpsfloatFrames per second
metadataobjectFull ffprobe metadata (probe.json)
created_atstringRegistration timestamp (ISO 8601)
+

Error Codes

+ + + + + + + + + + + + + +
HTTPWhen
404File UUID not found
+
+

GET /api/v1/file/:file_uuid/identities

+

Auth: Required +Scope: file-level

+

Get all identities present in a specific file with pagination.

+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
pageintegerNo1Page number
page_sizeintegerNo20Items per page
+

Example

+
curl -s "$API/api/v1/file/$FILE_UUID/identities?page=1&page_size=50" \
+  -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
+  "fps": 24.0,
+  "total": 5,
+  "page": 1,
+  "page_size": 20,
+  "data": [
+    {
+      "identity_id": 1,
+      "identity_uuid": "a9a90105-6d6b-46ff-92da-0c3c1a57dff4",
+      "name": "Audrey Hepburn",
+      "metadata": {"source": "tmdb", "tmdb_id": 1234},
+      "face_count": 142,
+      "speaker_count": 8,
+      "start_frame": 100,
+      "end_frame": 5000,
+      "start_time": 4.17,
+      "end_time": 208.33,
+      "confidence": 0.87
+    }
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
data[].identity_idintegerDatabase identity ID
data[].identity_uuidstring/nullGlobal identity UUID (null if unbound)
data[].namestringIdentity name
data[].metadataobjectSource metadata (TMDb, etc.)
data[].face_countinteger/nullNumber of face detections
data[].speaker_countinteger/nullNumber of speaker segments
data[].start_frameinteger/nullFirst appearance frame
data[].end_frameinteger/nullLast appearance frame
data[].start_timefloat/nullFirst appearance time (seconds)
data[].end_timefloat/nullLast appearance time (seconds)
data[].confidencefloat/nullAverage detection confidence
+
+

GET /api/v1/file/:file_uuid/faces

+

Auth: Required +Scope: file-level

+

List all face detections in a specific file with pagination.

+

Query Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
pageintegerNo1Page number
page_sizeintegerNo50Items per page
+

Example

+
curl -s "$API/api/v1/file/$FILE_UUID/faces?page=1&page_size=100" \
+  -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
+  "total": 1420,
+  "page": 1,
+  "page_size": 50,
+  "data": [
+    {
+      "face_id": "face_100",
+      "frame_number": 1200,
+      "timestamp": 50.0,
+      "bbox": [100, 50, 300, 400],
+      "confidence": 0.95,
+      "identity_id": 1,
+      "identity_uuid": "a9a90105-6d6b-46ff-92da-0c3c1a57dff4",
+      "trace_id": 2
+    }
+  ]
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
data[].face_idstringFace detection ID
data[].frame_numberintegerFrame number in video
data[].timestampfloatTimestamp in seconds
data[].bboxarrayBounding box [x1, y1, x2, y2]
data[].confidencefloatDetection confidence
data[].identity_idinteger/nullBound identity ID (null if unbound)
data[].identity_uuidstring/nullBound identity UUID (null if unbound)
data[].trace_idinteger/nullFace trace ID (null if not traced)
+
+

POST /api/v1/file/:file_uuid/json/:processor

+

Auth: Required +Scope: file-level

+

Download raw JSON output for a specific processor.

+

Path Parameters

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDescription
file_uuidstringYesFile UUID
processorstringYesProcessor name: cut, asrx, yolo, ocr, face, pose, story, etc.
+

Example

+
curl -s -X POST "$API/api/v1/file/$FILE_UUID/json/face" \
+  -H "X-API-Key: $KEY" | jq '.frames | length'
+
+ +

Response (200)

+

Returns the raw JSON output of the specified processor. Structure varies by processor type.

+

Error Codes

+ + + + + + + + + + + + + + + + + +
HTTPWhen
404JSON file not found
500Failed to parse JSON
+

Unregister

POST /api/v1/unregister

Auth: Required @@ -293,7 +824,7 @@ curl -s -X


-

Updated: 2026-05-19 12:49:24

+

Updated: 2026-06-20 — Added file listing, file detail, file identities, file faces, and JSON download endpoints

\ No newline at end of file diff --git a/docs_v1.0/doc/05_process.html b/docs_v1.0/doc/05_process.html index b8f15af..c250249 100644 --- a/docs_v1.0/doc/05_process.html +++ b/docs_v1.0/doc/05_process.html @@ -260,10 +260,11 @@ curl -s -X
-

GET /api/v1/progress/:file_uuid

+

POST /api/v1/progress/:file_uuid

Auth: Required Scope: file-level

Get real-time processing progress for a file via Redis pub/sub. Includes per-processor status, current/total frames, ETA, and system resource stats.

+

Note: This endpoint uses POST method, not GET. The progress data is stored in Redis as a hash, and POST is used to retrieve the latest state.

Pipeline Order

@@ -339,7 +340,7 @@ curl -s -X

All processors except story and 5w1h run concurrently when their dependencies are met. Story and 5W1H run sequentially after their prerequisites.

Example

-
curl -s "$API/api/v1/progress/$FILE_UUID" -H "X-API-Key: $KEY" | jq '{overall_progress, processors: [.processors[] | {processor_type, status}]}'
+
curl -s -X POST "$API/api/v1/progress/$FILE_UUID" -H "X-API-Key: $KEY" | jq '{overall_progress, processors: [.processors[] | {name, status}]}'
 

Response (200)

@@ -506,8 +507,152 @@ curl -s -X +

GET /api/v1/file/:file_uuid/processor-counts

+

Auth: Required +Scope: file-level

+

Get counts of processor JSON output files. See 15_tkg.md for full documentation.


-

Updated: 2026-05-19 12:49:24

+

Pipeline Steps (Manual)

+

These endpoints execute individual pipeline steps. They are typically called by the worker automatically, but can be invoked manually for debugging or re-processing.

+

POST /api/v1/file/:file_uuid/store-asrx

+

Auth: Required +Scope: file-level

+

Store ASRX diarization results as chunk records in the database. Converts ASRX segments into searchable chunk entries.

+

Example

+
curl -s -X POST "$API/api/v1/file/$FILE_UUID/store-asrx" \
+  -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "message": "ASRX chunks stored",
+  "file_uuid": "3a6c1865..."
+}
+
+ +
+

POST /api/v1/file/:file_uuid/rule1

+

Auth: Required +Scope: file-level

+

Execute Rule 1 pipeline step. Applies rule-based chunking to create structured chunk records from processor outputs.

+

Example

+
curl -s -X POST "$API/api/v1/file/$FILE_UUID/rule1" \
+  -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "message": "Rule 1 complete: 45 chunks",
+  "file_uuid": "3a6c1865...",
+  "chunks": 45
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
successbooleanAlways true on 200
messagestringHuman-readable completion message
file_uuidstring32-char hex UUID
chunksintegerNumber of chunks produced
+
+

POST /api/v1/file/:file_uuid/vectorize

+

Auth: Required +Scope: file-level

+

Generate vector embeddings for all chunks of a file and store them in Qdrant for semantic search.

+

Example

+
curl -s -X POST "$API/api/v1/file/$FILE_UUID/vectorize" \
+  -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "message": "Vectorization complete",
+  "file_uuid": "3a6c1865..."
+}
+
+ +
+

POST /api/v1/file/:file_uuid/phase1

+

Auth: Required +Scope: file-level

+

Execute Phase 1 of the post-processing pipeline. Combines store-asrx, rule1, and vectorize into a single step.

+

Example

+
curl -s -X POST "$API/api/v1/file/$FILE_UUID/phase1" \
+  -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "message": "Phase 1 complete",
+  "file_uuid": "3a6c1865..."
+}
+
+ +
+

POST /api/v1/file/:file_uuid/complete

+

Auth: Required +Scope: file-level

+

Mark a video as fully processed. Updates the video status to completed and finalizes all pipeline state.

+

Example

+
curl -s -X POST "$API/api/v1/file/$FILE_UUID/complete" \
+  -H "X-API-Key: $KEY"
+
+ +

Response (200)

+
{
+  "success": true,
+  "message": "Video marked as completed",
+  "file_uuid": "3a6c1865..."
+}
+
+ +
+

Pipeline Step Order

+
  process (trigger)
+    │
+    ├─→ cut, yolo, ocr, face, pose, asrx (parallel processors)
+    │
+    ├─→ store-asrx  (store diarization as chunks)
+    │
+    ├─→ rule1       (rule-based chunking)
+    │
+    ├─→ vectorize   (embed chunks to Qdrant)
+    │
+    └─→ complete    (mark done)
+
+ +

Phase 1 (/phase1) combines store-asrx + rule1 + vectorize into one call.

+
+

Updated: 2026-06-20 12:00:00

\ No newline at end of file diff --git a/docs_v1.0/doc/06_search.html b/docs_v1.0/doc/06_search.html index 86f5f8b..c33983a 100644 --- a/docs_v1.0/doc/06_search.html +++ b/docs_v1.0/doc/06_search.html @@ -32,7 +32,7 @@ a { color: #0066cc; } Logout - +

Search APIs

@@ -282,9 +282,251 @@ a { color: #0066cc; }

POST /api/v1/search/frames

Auth: Required Scope: global / file-level

-

Search face detection frames by identity name or trace ID.

+

Search frames by YOLO objects, OCR text, face IDs, or pose detections. Filters frames based on visual content detected during processing.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
file_uuidstringNoRestrict to specific file
object_classstringNoFilter by YOLO object class (e.g., person, car, dog)
ocr_textstringNoFilter by OCR text content (ILIKE match)
face_idstringNoFilter by face detection ID
time_range[float, float]NoFilter by time range [start_secs, end_secs]
limitintegerNo100Max results
+

Example

+
# Search for frames containing "person" objects
+curl -s -X POST "$API/api/v1/search/frames" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "object_class": "person", "limit": 20}'
+
+# Search for frames with specific OCR text
+curl -s -X POST "$API/api/v1/search/frames" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"file_uuid": "'"$FILE_UUID"'", "ocr_text": "hello", "time_range": [10.0, 30.0]}'
+
+ +

Response (200)

+
{
+  "frames": [
+    {
+      "frame_number": 1200,
+      "timestamp": 50.0,
+      "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
+      "objects": [{"class": "person", "confidence": 0.95, "bbox": [100, 50, 300, 400]}],
+      "ocr_texts": ["Hello World"],
+      "faces": [{"face_id": "face_42", "confidence": 0.88}],
+      "pose_persons": [{"trace_id": 2, "bbox": [120, 60, 280, 380]}]
+    }
+  ],
+  "total": 15
+}
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
framesarrayArray of matching frame objects
frames[].frame_numberintegerFrame number in video
frames[].timestampfloatTimestamp in seconds
frames[].file_uuidstringFile UUID
frames[].objectsarray/nullYOLO detections in this frame
frames[].ocr_textsarray/nullOCR text strings in this frame
frames[].facesarray/nullFace detections in this frame
frames[].pose_personsarray/nullPose-detected persons in this frame
totalintegerTotal matching frame count

-

GET /api/v1/search/identity_text

+

POST /api/v1/search/llm-smart

+

Auth: Required +Scope: global / file-level

+

Smart search with LLM re-ranking. First fetches candidate results via RRF (Reciprocal Rank Fusion) using the existing smart search, then uses an LLM (Gemma4 on port 8000) to re-rank candidates by relevance to the query.

+

Request Parameters

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeRequiredDefaultDescription
querystringYesSearch text
file_uuidstringNoFile UUID to search within
limitintegerNo10Max results to return
+

Pipeline

+
  1. smart_search  fetch N candidates (limit × 3, clamped 10-20)
+  2. LLM rerank    re-order by relevance using Gemma4
+  3. trim          return top `limit` results
+
+ +

Example

+
curl -s -X POST "$API/api/v1/search/llm-smart" \
+  -H "Content-Type: application/json" \
+  -H "X-API-Key: $KEY" \
+  -d '{"query": "two people having a conversation about business", "limit": 5}'
+
+ +

Response (200)

+
{
+  "query": "two people having a conversation about business",
+  "results": [
+    {
+      "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
+      "parent_id": 1234,
+      "scene_order": 1234,
+      "start_frame": 5000,
+      "end_frame": 5200,
+      "fps": 24.0,
+      "start_time": 208.3,
+      "end_time": 216.7,
+      "summary": "[208s-217s, 9s] Two people discussing project timeline...",
+      "similarity": 0.72
+    }
+  ],
+  "page": 1,
+  "page_size": 5,
+  "strategy": "llm_reranked"
+}
+
+ + + + + + + + + + + + + + + + + + + + + +
FieldTypeDescription
strategystringAlways "llm_reranked" for this endpoint
resultsarrayRe-ranked search results (same format as smart search)
+

Fallback

+

If LLM reranking fails (model unavailable, timeout), falls back to RRF order without error.

+
+

Visual Search

Auth: Required Scope: global / file-level

Search text chunks → find associated identities. Returns chunks where face detections overlap with text content.

@@ -392,12 +634,13 @@ a { color: #0066cc; }
-

Visual Search

+

Visual Search (Planned)

+ @@ -405,26 +648,31 @@ a { color: #0066cc; } + + + + + @@ -457,7 +705,7 @@ a { color: #0066cc; }
Method EndpointStatus Description
POST /api/v1/search/visualNot implemented Search visual chunks
POST /api/v1/search/visual/classNot implemented Search by object class
POST /api/v1/search/visual/densityNot implemented Search by object density
POST /api/v1/search/visual/combinationNot implemented Search by object combination
POST /api/v1/search/visual/statsNot implemented Visual chunk statistics

-

Updated: 2026-05-27 — Added global search support for smart, universal, identity_text APIs

+

Updated: 2026-06-20 — Added llm-smart search, completed frames search documentation, marked visual search as planned

\ No newline at end of file diff --git a/docs_v1.0/doc/07_identity.html b/docs_v1.0/doc/07_identity.html index 2180a55..07e6ab6 100644 --- a/docs_v1.0/doc/07_identity.html +++ b/docs_v1.0/doc/07_identity.html @@ -130,7 +130,14 @@ a { color: #0066cc; }

DELETE /api/v1/identity/:identity_uuid

Auth: Required Scope: identity-level

-

Delete an identity permanently.

+

Delete an identity permanently. All face detections bound to this identity are unbound (identity_id set to NULL). The identity JSON file is deleted from disk.

+

History & Undo/Redo

+

Every DELETE records a full snapshot of the identity and its unbound faces. See 14_identity_history.md for:

+
    +
  • Undo via POST /api/v1/identity/:identity_uuid/undo — recreates identity and re-binds faces
  • +
  • Redo via POST /api/v1/identity/:identity_uuid/redo — re-deletes the identity
  • +
+

Note: Delete undo/redo reuses the same endpoints as PATCH undo/redo. The endpoint automatically detects whether the identity was deleted (undo) or needs to be re-deleted (redo) based on the history record.


PATCH /api/v1/identity/:identity_uuid

Auth: Required @@ -209,458 +216,155 @@ curl -s -X -400 -No fields to update or invalid UUID format - - 404 Identity not found + +500 +Database error +

History & Undo/Redo

-

Every PATCH records a before/after snapshot in the operation history. Up to 256 records per identity are kept (oldest auto-deleted). See 14_identity_history.md for:

+

Every bind records a before/after snapshot. See 14_identity_history.md for:

    -
  • POST /api/v1/identity/:identity_uuid/undo — Revert PATCH changes
  • -
  • POST /api/v1/identity/:identity_uuid/redo — Reapply undone changes
  • -
  • GET /api/v1/identity/:identity_uuid/history — Query operation log
  • +
  • POST /api/v1/identity/:identity_uuid/bind/undo — Revert a bind
  • +
  • POST /api/v1/identity/:identity_uuid/bind/redo — Reapply an undone bind
  • +
  • GET /api/v1/identity/:identity_uuid/bind/history — Query bind operations

-

GET /api/v1/identity/:identity_uuid/files

-

Auth: Required -Scope: identity-level

-

Get all files where this identity appears. Returns per-file summary including face count, confidence, and appearance time range.

-

Example

-
curl -s "$API/api/v1/identity/$IDENTITY_UUID/files" -H "X-API-Key: $KEY"
-
- -

Response (200)

-
{
-  "success": true,
-  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
-  "name": "Cary Grant",
-  "total": 3,
-  "page": 1,
-  "page_size": 20,
-  "data": [
-    {
-      "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
-      "file_name": "charade.mp4",
-      "file_path": "/path/to/charade.mp4",
-      "status": "done",
-      "face_count": 16335,
-      "speaker_count": 0,
-      "first_appearance": 206.76,
-      "last_appearance": 6756.68,
-      "confidence": 0.8088
-    }
-  ]
-}
-
- +

Metadata (Embedded JSON)

+

The identities.metadata column is a JSONB field that stores arbitrary structured data alongside the identity's core fields (name, status, identity_type). No schema is enforced — any valid JSON object is accepted.

+

Merge Behavior

- + + + + + + + + + + + + + + + + + + + + + + +
FieldOperationStrategyExample
PATCHShallow top-level merge: COALESCE(metadata,'{}'::jsonb) \|\| $1::jsonbSending {"tmdb_rating": 8.5} only adds/overwrites tmdb_rating; all other existing keys are preserved.
mergeintoRecursive deep merge — nested sub-keys are merged individually, not replaced wholesaleTarget has {"tmdb": {"biography": "..."}}, source has {"tmdb": {"birthday": "1904-01-18"}} → result is {"tmdb": {"biography": "...", "birthday": "1904-01-18"}}.
Upload (POST)Direct overwrite — the entire metadata field is replaced with the request value.
+

Validation

+ + + + + + + + + + + + + + + + + + + + + +
ScenarioResult
PATCH with non-object metadata (string, array, number, null)400 Bad Request: "metadata must be a JSON object"
mergeinto with non-object metadataAccepted (mergeinto validates at application level)
Upload with non-object metadataAccepted (upload replaces directly)
+

Conventional Keys

+ + + + - + + - + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +
Key TypeDescriptionWriterPurpose
namealiases[{locale, name}]PATCH, mergeintoMultilingual display names (see Alias System)
merged_into{uuid, at}mergeintoMarks an identity as merged (undo mechanism reads this)
tmdb_*variousTMDb probeMovie metadata (biography, birthday, known_for, etc.). Written only when MOMENTRY_TMDB_PROBE_ENABLED=true.
source stringIdentity display name
data[].file_uuidstringFile identifier
data[].file_namestringFile name
data[].face_countintegerNumber of face detections in this file
data[].first_appearancefloatFirst appearance time in seconds
data[].last_appearancefloatLast appearance time in seconds
data[].confidencefloatAverage confidence (0.0–1.0)mergeintoTagged on aliases/metadata when added by merge ("merge" value)
-
-

GET /api/v1/identity/:identity_uuid/faces

-

Auth: Required -Scope: identity-level

-

Get all face detection records associated with this identity.

-

Example

-
curl -s "$API/api/v1/identity/$IDENTITY_UUID/faces" -H "X-API-Key: $KEY"
-
- -

Response (200)

-
{
-  "success": true,
-  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
-  "name": "Cary Grant",
-  "total": 963,
-  "page": 1,
-  "page_size": 50,
-  "data": [
-    {
-      "id": 3902,
-      "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
-      "frame_number": 37974,
-      "timestamp_secs": 1518.96,
-      "face_id": "37974_1",
-      "confidence": 0.8197,
-      "bbox": { "x": 1097, "y": 310, "width": 177, "height": 177 }
-    }
-  ]
-}
-
- +

Custom keys are fully supported — no registration required.

+

Search Coverage

+

The identity search endpoint (GET /api/v1/identity/search) matches across three scopes:

+
    +
  1. i.name — exact and ILIKE against display name
  2. +
  3. jsonb_array_elements(i.metadata->'aliases')->>'name' — locale-tagged alias names
  4. +
  5. i.metadata::text ILIKE $1 — raw string search across the entire JSON blob (all keys, all values)
  6. +
+

This means searching for "1904-01-18" or "biography" will match identities whose metadata contains those strings anywhere.

+

History Snapshots

+

Every identity_history record captures the full metadata in both before_snapshot and after_snapshot (as part of the complete identity JSONB dump). Undo restores the identity row — including metadata — to the before_snapshot state.

+

For merge operations, the MongoDB merge history records metadata_fields_added and metadata_fields_added_paths (dot-separated paths like "tmdb.biography"). Merge undo removes only those specific paths, preserving subsequent manual edits to other metadata keys.

+

Best Practices

- - - + + - - - + + - - - + + - - - + + - - - + + - - - + +
FieldTypeDescriptionGuidelineReason
namestringIdentity display nameDeep nesting is allowed in metadataAll metadata merge operations use jsonb_deep_merge() — nested sub-keys are merged recursively, not replaced wholesale
data[].file_uuidstringFile where face was detectedUse aliases for display namesFrontend has built-in locale fallback logic (see Alias System)
data[].frame_numberintegerFrame number of detectionAvoid >1MB per identityMetadata is included in search indexing (metadata::text ILIKE); large blobs degrade query performance
data[].face_idstringFace ID (format: {frame}_{idx})Don't rely on metadata orderingJSONB preserves insertion order but PostgreSQL does not guarantee it across operations
data[].confidencefloatDetection confidenceNo LLM/Gemma4 agent writes to metadataOnly API endpoints (PATCH, mergeinto, upload) and TMDb probe modify identities.metadata
-
-

GET /api/v1/file/:file_uuid/faces

-

Auth: Required -Scope: identity-level

-

List all face detections in a file with binding status. Each face is in one of four binding states:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Statebinding responseMeaning
identity{"identity_id": 9, "identity_uuid": "...", "identity_name": "Audrey Hepburn"}Face matched to a known TMDb or user-defined identity
stranger{"stranger_id": 845, "metadata": {}}Face matched to an unknown person (trace not matched to any known identity)
dangling{"old_identity_id": 18052}Face was previously bound to an auto-generated identity that has been deleted (orphaned reference)
unboundnullFace has no binding at all (identity_id and stranger_id are both NULL)
-

Query Parameters

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParamTypeDefaultDescription
pageint1Page number
page_sizeint50Items per page
bindingstringFilter by state: identity, stranger, dangling, or unbound
trace_idintFilter by trace ID
min_confidencefloatMinimum detection confidence (0.0–1.0)
start_frameintStarting frame number (inclusive)
end_frameintEnding frame number (inclusive)
-

Example

-
curl -s "$API/api/v1/file/aeed71342a899fe4b4c57b7d41bcb692/faces?page=1&page_size=2&binding=identity" -H "X-API-Key: $KEY"
-
- -

Response (200)

-
{
-  "success": true,
-  "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
-  "total": 52244,
-  "page": 1,
-  "page_size": 2,
-  "data": [
-    {
-      "id": 661508,
-      "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
-      "frame_number": 21297,
-      "timestamp_secs": 851.88,
-      "face_id": "21297_0",
-      "trace_id": 485,
-      "bbox": { "x": 1072, "y": 390, "width": 56, "height": 56 },
-      "confidence": 0.6114,
-      "binding": {
-        "identity_id": 9,
-        "identity_uuid": "c3545906-c82d-4b66-aa1d-150bc02decce",
-        "identity_name": "Audrey Hepburn"
-      }
-    }
-  ]
-}
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FieldTypeDescription
totalintNumber of faces matching the filter (not total in file)
data[].trace_idintFace tracking trace ID
data[].timestamp_secsfloatTimestamp in seconds (frame_number / fps)
data[].bboxobjectBounding box {x, y, width, height}
data[].bindingobject/nullOne of four binding states (see table above)
-
-

GET /api/v1/identity/:identity_uuid/chunks

-

Auth: Required -Scope: identity-level

-

Get all text chunks (sentences) spoken while this identity's face was on screen. Useful for finding what a person said.

-

Example

-
curl -s "$API/api/v1/identity/$IDENTITY_UUID/chunks" -H "X-API-Key: $KEY"
-
- -

Response (200)

-
{
-  "success": true,
-  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
-  "name": "Cary Grant",
-  "total": 20,
-  "page": 1,
-  "page_size": 20,
-  "data": [
-    {
-      "id": 0,
-      "file_uuid": "bd80fec92b0b6963d177a2c55bf713e2",
-      "chunk_id": "bd80fec92b0b6963d177a2c55bf713e2_2",
-      "chunk_type": "sentence",
-      "start_frame": 5103,
-      "end_frame": 5127,
-      "fps": 24.0,
-      "start_time": 212.64,
-      "end_time": 213.64,
-      "text_content": "[213s-214s] Cary Grant: \"Olá!\""
-    }
-  ]
-}
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FieldTypeDescription
namestringIdentity display name
data[].file_uuidstringFile identifier
data[].chunk_idstringSentence chunk identifier
data[].start_frameintegerFrame-accurate start position
data[].end_frameintegerFrame-accurate end position
data[].fpsfloatFrames per second
data[].start_timefloatStart time in seconds
data[].end_timefloatEnd time in seconds
data[].text_contentstringSpoken text content
-
-

POST /api/v1/identity/:identity_uuid/bind

-

Auth: Required -Scope: identity-level

-

Bind a face detection to an identity. Associates the face trace with the identity for future search and recognition.

-

Request Parameters

- - - - - - - - - - - - - - - - - - - - - - - -
FieldTypeRequiredDescription
file_uuidstringYesFile where face is detected
face_idstringYesFace ID (format: {frame}_{idx})
-

Side Effects

-
    -
  • 清除該 face detection row 的 stranger_id(設為 NULL)
  • -
  • 不影響 identities 表中原有的 stranger auto-identity 記錄
  • -
-

Example

-
curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind" \
-  -H "X-API-Key: $KEY" \
-  -H "Content-Type: application/json" \
-  -d '{"file_uuid": "'"$FILE_UUID"'", "face_id": "1_5"}'
-
-

POST /api/v1/identity/:identity_uuid/bind/trace

Auth: Required @@ -730,6 +434,8 @@ curl -s -X +

History & Undo/Redo

+

Trace bind operations share the same history/undo/redo system as single-face binds. See 14_identity_history.md for endpoints.


GET /api/v1/identity/:identity_uuid/traces

Auth: Required @@ -898,6 +604,12 @@ curl -s -X

  • 被 unbind 的 face 不會自動成為 stranger
  • 要重新標記為 stranger 需重新跑 Agent API(identity/analyze
  • +

    History & Undo/Redo

    +

    Unbind records a before/after snapshot. See 14_identity_history.md for:

    +
      +
    • POST /api/v1/identity/:identity_uuid/bind/undo — Revert an unbind
    • +
    • POST /api/v1/identity/:identity_uuid/bind/redo — Reapply an undone unbind
    • +

    POST /api/v1/identity/:identity_uuid/mergeinto

    Auth: Required @@ -909,19 +621,19 @@ curl -s -X Case Description -Undo Support +Undo/Redo Support stranger → identity Merge an auto-generated stranger identity into a known identity (TMDb or user-defined) -✅ 24hr undo +✅ 24hr undo/redo identity A → identity B Merge two known identities (e.g., duplicate entries) -✅ 24hr undo +✅ 24hr undo/redo @@ -962,7 +674,7 @@ curl -s -X

  • 將 source metadata fields 加入 target metadata (if not already present)
  • keep_history: true(預設):source identity 設為 status='merged',保留記錄
  • keep_history: false刪除 source identity 及其 identity JSON 檔案
  • -
  • 記錄 merge history 到 MongoDB(支援 undo)
  • +
  • 記錄 merge history 到 MongoDB(支援 undo/redo)
  • Example

    curl -s -X POST "$API/api/v1/identity/$SOURCE_UUID/mergeinto" \
    @@ -1116,6 +828,11 @@ curl -s -X 
     
     
     
    +

    POST /api/v1/identity/merge/:merge_id/redo

    +

    Auth: Required +Scope: identity-level

    +

    Redo a previously undone merge operation. See 14_identity_history.md for full details.

    +

    GET /api/v1/identity/merge/history

    Auth: Required Scope: identity-level

    @@ -1546,6 +1263,381 @@ curl -s -X
    +

    Identity Related Data

    +

    GET /api/v1/identity/:identity_uuid/files

    +

    Auth: Required +Scope: identity-level

    +

    List all files containing this identity.

    +

    Example

    +
    curl -s "$API/api/v1/identity/$IDENTITY_UUID/files" \
    +  -H "X-API-Key: $KEY"
    +
    + +

    Response (200)

    +
    {
    +  "success": true,
    +  "identity_uuid": "a9a90105-6d6b-46ff-92da-0c3c1a57dff4",
    +  "total": 3,
    +  "files": [
    +    {
    +      "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
    +      "file_name": "video1.mp4",
    +      "face_count": 142,
    +      "first_appearance": 4.17,
    +      "last_appearance": 208.33
    +    }
    +  ]
    +}
    +
    + +
    +

    GET /api/v1/identity/:identity_uuid/chunks

    +

    Auth: Required +Scope: identity-level

    +

    List all chunks associated with this identity (chunks where the identity's face appears).

    +

    Query Parameters

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeRequiredDefaultDescription
    pageintegerNo1Page number
    page_sizeintegerNo20Items per page
    +

    Example

    +
    curl -s "$API/api/v1/identity/$IDENTITY_UUID/chunks?page=1&page_size=50" \
    +  -H "X-API-Key: $KEY"
    +
    + +

    Response (200)

    +
    {
    +  "success": true,
    +  "identity_uuid": "a9a90105-6d6b-46ff-92da-0c3c1a57dff4",
    +  "total": 45,
    +  "page": 1,
    +  "page_size": 20,
    +  "chunks": [
    +    {
    +      "chunk_id": "chunk_1",
    +      "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
    +      "start_time": 4.17,
    +      "end_time": 8.33,
    +      "text": "[4s-8s] Hello, how are you?",
    +      "chunk_type": "story_child"
    +    }
    +  ]
    +}
    +
    + +
    +

    GET /api/v1/identity/:identity_uuid/faces

    +

    Auth: Required +Scope: identity-level

    +

    List all face detections for this identity.

    +

    Query Parameters

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeRequiredDefaultDescription
    pageintegerNo1Page number
    page_sizeintegerNo50Items per page
    +

    Example

    +
    curl -s "$API/api/v1/identity/$IDENTITY_UUID/faces?page=1&page_size=100" \
    +  -H "X-API-Key: $KEY"
    +
    + +

    Response (200)

    +
    {
    +  "success": true,
    +  "identity_uuid": "a9a90105-6d6b-46ff-92da-0c3c1a57dff4",
    +  "total": 1420,
    +  "page": 1,
    +  "page_size": 50,
    +  "faces": [
    +    {
    +      "face_id": "face_100",
    +      "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
    +      "frame_number": 1200,
    +      "timestamp": 50.0,
    +      "bbox": [100, 50, 300, 400],
    +      "confidence": 0.95,
    +      "trace_id": 2
    +    }
    +  ]
    +}
    +
    + +
    +

    GET /api/v1/identity/:identity_uuid/status

    +

    Auth: Required +Scope: identity-level

    +

    Get processing/status info for an identity.

    +

    Example

    +
    curl -s "$API/api/v1/identity/$IDENTITY_UUID/status" \
    +  -H "X-API-Key: $KEY"
    +
    + +

    Response (200)

    +
    {
    +  "success": true,
    +  "identity_uuid": "a9a90105-6d6b-46ff-92da-0c3c1a57dff4",
    +  "name": "Audrey Hepburn",
    +  "status": "confirmed",
    +  "face_count": 1420,
    +  "file_count": 3,
    +  "has_embedding": true,
    +  "has_profile_image": true
    +}
    +
    + +
    +

    GET /api/v1/identity/:identity_uuid/json

    +

    Auth: Required +Scope: identity-level

    +

    Get the raw identity JSON file (same format as identity.json on disk).

    +

    Example

    +
    curl -s "$API/api/v1/identity/$IDENTITY_UUID/json" \
    +  -H "X-API-Key: $KEY"
    +
    + +

    Response (200)

    +
    {
    +  "version": 1,
    +  "identity_uuid": "a9a90105-6d6b-46ff-92da-0c3c1a57dff4",
    +  "name": "Audrey Hepburn",
    +  "identity_type": "people",
    +  "source": "tmdb",
    +  "status": "confirmed",
    +  "tmdb_id": 1234,
    +  "tmdb_profile": "https://image.tmdb.org/...",
    +  "metadata": {},
    +  "file_bindings": [
    +    {"file_uuid": "d3f9ae8e...", "trace_ids": [0, 1, 2], "face_count": 142}
    +  ]
    +}
    +
    + +
    +
    +

    POST /api/v1/file/:file_uuid/pending-person

    +

    Auth: Required +Scope: file-level

    +

    Create a manually managed "pending person" under a specific file. A pending person is an identity with status='pending' and source='manual', used for unmatched traces that the user wants to manually label before a full identity resolution.

    +

    Optionally binds a list of trace IDs to this new identity.

    +

    Request

    +
    {
    +  "trace_ids": [100, 150, 200],
    +  "name": "Mystery Man #1"
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeRequiredDefaultDescription
    trace_idsarray[int]No[]Trace IDs to bind to this pending person
    namestringNo"Person N"Human-readable name. Auto-generated if omitted
    +

    Example

    +
    # Create pending person with name and no traces
    +curl -s -X POST "$API/api/v1/file/$FILE_UUID/pending-person" \
    +  -H "X-API-Key: $KEY" \
    +  -H "Content-Type: application/json" \
    +  -d '{"name": "Unknown Woman #2", "trace_ids": []}'
    +
    +# Create pending person with auto-name and bind traces
    +curl -s -X POST "$API/api/v1/file/$FILE_UUID/pending-person" \
    +  -H "X-API-Key: $KEY" \
    +  -H "Content-Type: application/json" \
    +  -d '{"trace_ids": [100, 150, 200]}'
    +
    + +

    Response (200)

    +
    {
    +  "success": true,
    +  "message": "Created pending person: Mystery Man #1 (uuid: 4d96b25b-68f0-4c52-b238-d69f7dfd588b)",
    +  "data": {
    +    "identity_uuid": "4d96b25b-68f0-4c52-b238-d69f7dfd588b",
    +    "identity_id": 55,
    +    "name": "Mystery Man #1",
    +    "bound_traces": 0
    +  }
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    identity_uuidstringUUID of the newly created pending identity
    identity_idintegerInternal ID of the new identity
    namestringDisplay name
    bound_tracesintegerNumber of traces bound
    +

    Side Effects

    +
      +
    • Creates an identities row with status='pending', source='manual', file_uuid=<file_uuid>
    • +
    • If trace_ids provided: UPDATE face_detections SET identity_id = ... for matching traces
    • +
    • If trace_ids provided: TKG face_track nodes get identity_id / identity_name in properties
    • +
    • Identity JSON file synced to disk
    • +
    +
    +

    GET /api/v1/file/:file_uuid/pending-persons

    +

    Auth: Required +Scope: file-level

    +

    List all pending persons for a file.

    +

    Example

    +
    curl -s "$API/api/v1/file/$FILE_UUID/pending-persons" \
    +  -H "X-API-Key: $KEY"
    +
    + +

    Response (200)

    +
    {
    +  "success": true,
    +  "message": "Found 2 pending persons for c36f35685177c981aa139b66bbbccc5b",
    +  "data": [
    +    {
    +      "identity_uuid": "232ecd08-a2bf-4bd0-bd25-0bd8fb7a7dae",
    +      "identity_id": 56,
    +      "name": "Person 2",
    +      "created_at": "2026-06-23 17:13:23",
    +      "trace_count": 3,
    +      "bound_traces": null
    +    }
    +  ]
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    identity_uuidstringIdentity UUID
    identity_idintegerInternal identity ID
    namestringDisplay name
    created_atstringCreation timestamp
    trace_countintegerNumber of face traces bound to this pending person
    bound_tracesarray[int]List of bound trace IDs (currently null, reserved for future expansion)
    +

    Notes

    +
      +
    • Pending persons are normal identities rows with status='pending' — they can be promoted to confirmed via PATCH /api/v1/identity/:identity_uuid ({"status": "confirmed"})
    • +
    • They can be merged into known identities via POST /api/v1/identity/:identity_uuid/mergeinto
    • +
    • Use GET /api/v1/identity/:identity_uuid/traces to get detailed trace info for each pending person
    • +
    +

    Alias System (BCP 47 Locale Tags)

    Identity aliases support multilingual display names. Aliases are stored in metadata.aliases as an array of {locale, name} objects.

    BCP 47 Locale Tags Reference

    @@ -1646,7 +1738,7 @@ curl -s -X

    This replaces the entire aliases array. To add to existing aliases, include all existing entries in the request.


    -

    *Updated: 2026-05-25 — Added GET /api/v1/file/:file_uuid/faces with 4 binding states, filters, strangers table split

    +

    Updated: 2026-06-20 — Added identity files, chunks, faces, status, and JSON endpoints

    \ No newline at end of file diff --git a/docs_v1.0/doc/08_media.html b/docs_v1.0/doc/08_media.html index 99543ef..059a596 100644 --- a/docs_v1.0/doc/08_media.html +++ b/docs_v1.0/doc/08_media.html @@ -790,7 +790,100 @@ curl -s "
    -

    Updated: 2026-05-19 12:49:24

    +

    GET /api/v1/file/:file_uuid/stranger/:stranger_id/representative-face

    +

    Auth: Required +Scope: file-level

    +

    Get the representative face for a stranger (unidentified face trace).

    +

    Example

    +
    curl -s "$API/api/v1/file/$FILE_UUID/stranger/1/representative-face" \
    +  -H "X-API-Key: $KEY"
    +
    + +

    Response (200)

    +
    {
    +  "success": true,
    +  "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
    +  "stranger_id": 1,
    +  "face_count": 85,
    +  "representative": {
    +    "frame_number": 5000,
    +    "timestamp_secs": 208.33,
    +    "bbox": {"x": 200, "y": 100, "width": 150, "height": 150},
    +    "confidence": 0.92,
    +    "quality_score": 20700,
    +    "blur_score": 8.5
    +  }
    +}
    +
    + +
    +

    GET /api/v1/file/:file_uuid/stranger/:stranger_id/thumbnail

    +

    Auth: Required +Scope: file-level

    +

    Extract the best face image for a stranger as JPEG (320×320).

    +

    Example

    +
    curl -s "$API/api/v1/file/$FILE_UUID/stranger/1/thumbnail" \
    +  -H "X-API-Key: $KEY" -o stranger_1_face.jpg
    +
    + +

    Response

    +
      +
    • 200: image/jpeg binary data (320×320 cropped face)
    • +
    • 404: File or stranger not found
    • +
    +
    +

    GET /api/v1/file/:file_uuid/chunk/:chunk_id/thumbnail

    +

    Auth: Required +Scope: file-level

    +

    Get thumbnail for a specific chunk. Extracts the representative frame for the chunk's time range.

    +

    Example

    +
    curl -s "$API/api/v1/file/$FILE_UUID/chunk/chunk_1/thumbnail" \
    +  -H "X-API-Key: $KEY" -o chunk_1.jpg
    +
    + +

    Response

    +
      +
    • 200: image/jpeg binary data
    • +
    • 404: File or chunk not found
    • +
    +
    +

    GET /api/v1/media-proxy

    +

    Auth: Required +Scope: system-level

    +

    Proxy request to fetch media from external URLs. Useful for loading profile images or thumbnails from external services (TMDb, etc.) without exposing the external URL to the client.

    +

    Query Parameters

    + + + + + + + + + + + + + + + + + +
    FieldTypeRequiredDescription
    urlstringYesExternal URL to proxy
    +

    Example

    +
    curl -s "$API/api/v1/media-proxy?url=https://image.tmdb.org/t/p/w500/abc123.jpg" \
    +  -H "X-API-Key: $KEY" -o tmdb_profile.jpg
    +
    + +

    Response

    +
      +
    • 200: Proxied media data (Content-Type from external source)
    • +
    • 400: Missing or invalid URL parameter
    • +
    • 500: External request failed
    • +
    +
    +
    +

    Updated: 2026-06-20 — Added stranger endpoints, chunk thumbnail, and media proxy

    \ No newline at end of file diff --git a/docs_v1.0/doc/09_tmdb.html b/docs_v1.0/doc/09_tmdb.html index bbc59be..6e406b2 100644 --- a/docs_v1.0/doc/09_tmdb.html +++ b/docs_v1.0/doc/09_tmdb.html @@ -125,8 +125,124 @@ If local files exist, no external API call is made. Internet is only needed for }
    +

    POST /api/v1/tmdb/fetch

    +

    Auth: Required +Scope: system-level

    +

    Fetch TMDb data by filename, create identities with profile images and embeddings. Similar to prefetch+probe combined, but also downloads profile images and generates embeddings.

    +

    Request Parameters

    + + + + + + + + + + + + + + + + + +
    FieldTypeRequiredDescription
    filenamestringYesMovie filename to search TMDb for
    +

    Example

    +
    curl -s -X POST "$API/api/v1/tmdb/fetch" \
    +  -H "Content-Type: application/json" \
    +  -H "X-API-Key: $KEY" \
    +  -d '{"filename": "charade.mp4"}'
    +
    + +

    Response (200)

    +
    {
    +  "success": true,
    +  "movie_title": "Charade (1963)",
    +  "tmdb_id": 1234,
    +  "identities_created": 15,
    +  "profile_images_downloaded": 12
    +}
    +
    +
    -

    Updated: 2026-05-19 12:49:24

    +

    POST /api/v1/agents/tmdb/match/:file_uuid

    +

    Auth: Required +Scope: file-level

    +

    Match TMDb identities to face traces using Qdrant vector similarity. Compares face embeddings against TMDb identity embeddings to find the best matches.

    +

    Example

    +
    curl -s -X POST "$API/api/v1/agents/tmdb/match/$FILE_UUID" \
    +  -H "X-API-Key: $KEY"
    +
    + +

    Response (200)

    +
    {
    +  "success": true,
    +  "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
    +  "matches": [
    +    {
    +      "trace_id": 0,
    +      "identity_uuid": "a9a90105-6d6b-46ff-92da-0c3c1a57dff4",
    +      "identity_name": "Audrey Hepburn",
    +      "confidence": 0.92,
    +      "tmdb_id": 1234
    +    }
    +  ],
    +  "total_matches": 5
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    matches[].trace_idintegerFace trace ID
    matches[].identity_uuidstringMatched TMDb identity UUID
    matches[].identity_namestringIdentity display name
    matches[].confidencefloatCosine similarity score (0.0–1.0)
    matches[].tmdb_idintegerTMDb person ID
    total_matchesintegerTotal successful matches
    +
    +

    TMDb Auto-Match

    +

    When MOMENTRY_TMDB_PROBE_ENABLED=true, the worker automatically runs TMDb matching during the post-process phase:

    +
      +
    1. Register phase: Searches TMDb by filename, creates identities with tmdb_id/tmdb_profile
    2. +
    3. Post-process phase: Matches detected faces against TMDb identities via cosine similarity using Qdrant
    4. +
    +

    No manual API call needed if auto-match is enabled.

    +
    +

    Updated: 2026-06-20 — Added tmdb/fetch and tmdb/match endpoints

    \ No newline at end of file diff --git a/docs_v1.0/doc_developer/14_identity_history.html b/docs_v1.0/doc_developer/14_identity_history.html index 568e9a9..5b631db 100644 --- a/docs_v1.0/doc_developer/14_identity_history.html +++ b/docs_v1.0/doc_developer/14_identity_history.html @@ -32,12 +32,46 @@ a { color: #0066cc; } Logout - +

    Identity Operation History

    -

    Every PATCH /api/v1/identity/:identity_uuid automatically records a before/after snapshot in the identity_history table. Use undo/redo to revert or reapply changes, and history to inspect the operation log.

    -

    History System Overview

    +

    Every mutation on an identity automatically records a before/after snapshot. Use undo/redo to revert or reapply changes, and history to inspect the operation log.

    +

    Three independent undo/redo systems exist:

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    SystemStorageOperations Covered
    PATCHPostgreSQL identity_historyupdate
    BindPostgreSQL identity_historybind, unbind, bind_trace
    MergeMongoDB identity_merge_historymergeinto
    DeletePostgreSQL identity_historydelete
    +
    +

    1. PATCH History & Undo/Redo

    +

    Overview

    @@ -64,11 +98,11 @@ a { color: #0066cc; } - +
    Redo stackCleared on new PATCH (is_undone=true records are deleted)Cleared on new PATCH (is_undone=true + operation='update' records are deleted)
    -

    Stack Model

    +
    Stack Model
    PATCH 1 → PATCH 2 → PATCH 3         (undo stack, is_undone=false)
                                ↓ undo
     PATCH 1 → PATCH 2                   (undo stack)
    @@ -77,13 +111,13 @@ PATCH 1 → PATCH 2                   (undo stack)
     PATCH 1 → PATCH 2 → PATCH 3         (undo stack)
     
    -

    A new PATCH after undo clears the redo stack (PATCH 3 is lost).

    +

    A new PATCH after undo clears only the operation='update' redo stack (PATCH 3 is lost). Bind/merge redo stacks are not affected.


    -

    POST /api/v1/identity/:identity_uuid/undo

    +

    POST /api/v1/identity/:identity_uuid/undo

    Auth: Required Scope: identity-level

    Undo the most recent PATCH operations. Restores the identity's before_snapshot and marks the history records as undone.

    -

    Request (JSON)

    +
    Request (JSON)
    @@ -104,22 +138,22 @@ PATCH 1 → PATCH 2 → PATCH 3 (undo stack)
    -

    Behavior

    +
    Behavior
      -
    • Queries is_undone=false records, ordered by created_at DESC
    • +
    • Queries is_undone=false records with operation='update', ordered by created_at DESC
    • Restores name, identity_type, source, status, metadata, tmdb_id, tmdb_profile from the last record's before_snapshot
    • Marks the undone records as is_undone=true with undone_at=NOW()
    • Syncs identity.json to disk
    • Updates _index.json if name changed
    -

    Example

    +
    Example
    curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/undo" \
       -H "X-API-Key: $KEY" \
       -H "Content-Type: application/json" \
       -d '{"steps": 1}'
     
    -

    Response (200)

    +
    Response (200)
    {
       "success": true,
       "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
    @@ -159,7 +193,7 @@ PATCH 1 → PATCH 2 → PATCH 3         (undo stack)
     
     
     
    -

    Error Responses

    +
    Error Responses
    @@ -183,11 +217,11 @@ PATCH 1 → PATCH 2 → PATCH 3 (undo stack)

    -

    POST /api/v1/identity/:identity_uuid/redo

    +

    POST /api/v1/identity/:identity_uuid/redo

    Auth: Required Scope: identity-level

    Redo previously undone PATCH operations. Restores the identity's after_snapshot and marks the history records as no longer undone.

    -

    Request (JSON)

    +
    Request (JSON)
    @@ -208,22 +242,22 @@ PATCH 1 → PATCH 2 → PATCH 3 (undo stack)
    -

    Behavior

    +
    Behavior
      -
    • Queries is_undone=true records, ordered by created_at DESC
    • +
    • Queries is_undone=true records with operation='update', ordered by created_at DESC
    • Restores all identity fields from the last record's after_snapshot
    • Marks records as is_undone=false with undone_at=NULL
    • Syncs identity.json to disk
    • Updates _index.json if name changed
    -

    Example

    +
    Example
    curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/redo" \
       -H "X-API-Key: $KEY" \
       -H "Content-Type: application/json" \
       -d '{"steps": 1}'
     
    -

    Response (200)

    +
    Response (200)
    {
       "success": true,
       "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
    @@ -263,7 +297,7 @@ PATCH 1 → PATCH 2 → PATCH 3         (undo stack)
     
     
     
    -

    Error Responses

    +
    Error Responses
    @@ -287,11 +321,11 @@ PATCH 1 → PATCH 2 → PATCH 3 (undo stack)

    -

    GET /api/v1/identity/:identity_uuid/history

    +

    GET /api/v1/identity/:identity_uuid/history

    Auth: Required Scope: identity-level

    -

    Query the operation history for an identity. Returns paginated records with undo/redo stack counts.

    -

    Query Parameters

    +

    Query the PATCH operation history for an identity. Returns paginated records with undo/redo stack counts (filtered to operation='update').

    +
    Query Parameters
    @@ -319,7 +353,7 @@ PATCH 1 → PATCH 2 → PATCH 3 (undo stack)
    -

    Response (200)

    +
    Response (200)
    {
       "success": true,
       "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
    @@ -357,7 +391,7 @@ PATCH 1 → PATCH 2 → PATCH 3         (undo stack)
     
     total
     integer
    -Total history records for this identity
    +Total PATCH history records for this identity
     
     
     undo_stack_count
    @@ -396,12 +430,12 @@ PATCH 1 → PATCH 2 → PATCH 3         (undo stack)
     
     
     
    -

    Example

    +
    Example
    curl -s "$API/api/v1/identity/$IDENTITY_UUID/history?page=1&limit=10" \
       -H "X-API-Key: $KEY"
     
    -

    Error Responses

    +
    Error Responses
    @@ -421,45 +455,746 @@ PATCH 1 → PATCH 2 → PATCH 3 (undo stack)

    -

    Comparison: PATCH Undo vs Merge Undo

    +

    2. Bind/Unbind/Trace History & Undo/Redo

    +

    All three operations (bind, unbind, bind_trace) share a single history table and undo/redo stack.

    +

    Bind Operation Overview

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    PropertyValue
    StoragePostgreSQL identity_history table (same table as PATCH)
    Snapshot{"file_uuid", "face_id" (or "trace_id"), "identity_id_before/after"}
    Max records256 per identity (shared limit across all operation types)
    Undo stepsUnlimited (steps param)
    Redo stackCleared on new bind/unbind/bind_trace (operation IN ('bind','unbind','bind_trace') + is_undone=true records deleted)
    Stack isolationBind redo stack is independent from PATCH redo stack — clearing one does not affect the other
    +
    Stack Model
    +
    bind face_1 (to id=9)              → unbind face_1          → bind trace 906 (to id=9)
    +(undo stack, is_undone=false)         (undo stack)              (undo stack)
    +                                                               ↓ undo (first undone: bind_trace)
    +                                     bind trace 906 (is_undone=true)
    +                                     (redo stack)
    +                                                               ↓ redo
    +bind face_1 → unbind face_1 → bind trace 906
    +(undo stack)
    +
    + +

    A new bind/unbind/trace after undo clears only the bind redo stack (operations with IN ('bind','unbind','bind_trace')).

    +
    Snapshot Format
    +

    Before (bind):

    +
    {
    +  "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
    +  "face_id": "1_5",
    +  "identity_id_before": null
    +}
    +
    + +

    After (bind):

    +
    {
    +  "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
    +  "face_id": "1_5",
    +  "identity_id_after": 9
    +}
    +
    + +

    Before (unbind) — binding existed before:

    +
    {
    +  "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
    +  "face_id": "1_5",
    +  "identity_id_before": 9
    +}
    +
    + +

    After (unbind):

    +
    {
    +  "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
    +  "face_id": "1_5",
    +  "identity_id_after": null
    +}
    +
    + +

    For bind_trace, the snapshot uses trace_id instead of face_id, with identity_id_before capturing the first face's identity in that trace.

    +
    +

    POST /api/v1/identity/:identity_uuid/bind/undo

    +

    Auth: Required +Scope: identity-level

    +

    Undo the most recent bind/unbind/bind_trace operations. Restores identity_id_before from the snapshot and marks records as undone.

    +
    Request (JSON)
    + + + + + + + + + + + + + + + + + + + +
    FieldTypeRequiredDefaultDescription
    stepsintegerNo1Number of undo steps to apply
    +
    Behavior
    +
      +
    • Queries is_undone=false records with operation IN ('bind','unbind','bind_trace'), ordered by created_at DESC
    • +
    • Restores identity_id_before — for bind this is null (face was unbound), for unbind this is the original identity (face goes back), for bind_trace this is the trace's previous identity
    • +
    • Marks the undone records as is_undone=true with undone_at=NOW()
    • +
    +
    Example
    +
    curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind/undo" \
    +  -H "X-API-Key: $KEY" \
    +  -H "Content-Type: application/json" \
    +  -d '{"steps": 1}'
    +
    + +
    Response (200)
    +
    {
    +  "success": true,
    +  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
    +  "operation": "bind",
    +  "undone_count": 1,
    +  "affected_rows": 53
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    operationstringThe actual operation undone (bind, unbind, or bind_trace)
    undone_countintegerNumber of history records undone
    affected_rowsintegerNumber of face_detections rows updated
    +
    Error Responses
    + + + + + + + + + + + + + + + + + + + + + +
    HTTPWhen
    400No bind undo operations available
    404Identity not found
    500Database error
    +
    +

    POST /api/v1/identity/:identity_uuid/bind/redo

    +

    Auth: Required +Scope: identity-level

    +

    Redo previously undone bind/unbind/bind_trace operations. Restores identity_id_after from the snapshot.

    +
    Request (JSON)
    + + + + + + + + + + + + + + + + + + + +
    FieldTypeRequiredDefaultDescription
    stepsintegerNo1Number of redo steps to apply
    +
    Behavior
    +
      +
    • Queries is_undone=true records with operation IN ('bind','unbind','bind_trace'), ordered by created_at DESC
    • +
    • Restores identity_id_after — for bind this is the identity the face was bound to, for unbind this is null
    • +
    • Marks records as is_undone=false with undone_at=NULL
    • +
    +
    Example
    +
    curl -s -X POST "$API/api/v1/identity/$IDENTITY_UUID/bind/redo" \
    +  -H "X-API-Key: $KEY" \
    +  -H "Content-Type: application/json" \
    +  -d '{"steps": 1}'
    +
    + +
    Response (200)
    +
    {
    +  "success": true,
    +  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
    +  "operation": "unbind",
    +  "redone_count": 1,
    +  "affected_rows": 1
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    operationstringThe actual operation redone (bind, unbind, or bind_trace)
    redone_countintegerNumber of history records redone
    affected_rowsintegerNumber of face_detections rows updated
    +
    Error Responses
    + + + + + + + + + + + + + + + + + + + + + +
    HTTPWhen
    400No bind redo operations available
    404Identity not found
    500Database error
    +
    +

    GET /api/v1/identity/:identity_uuid/bind/history

    +

    Auth: Required +Scope: identity-level

    +

    Query the bind/unbind/bind_trace operation history for an identity. Returns paginated records with undo/redo stack counts.

    +
    Query Parameters
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeRequiredDefaultDescription
    pageintegerNo1Page number (1-indexed)
    limitintegerNo20Items per page (max 100)
    +
    Response (200)
    +
    {
    +  "success": true,
    +  "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4",
    +  "total": 3,
    +  "undo_stack_count": 2,
    +  "redo_stack_count": 1,
    +  "results": [
    +    {
    +      "history_id": 52,
    +      "operation": "bind_trace",
    +      "is_undone": false,
    +      "created_at": "2026-05-27T14:00:00Z",
    +      "undone_at": null
    +    },
    +    {
    +      "history_id": 51,
    +      "operation": "unbind",
    +      "is_undone": true,
    +      "created_at": "2026-05-27T13:00:00Z",
    +      "undone_at": "2026-05-27T14:30:00Z"
    +    },
    +    {
    +      "history_id": 50,
    +      "operation": "bind",
    +      "is_undone": false,
    +      "created_at": "2026-05-27T12:00:00Z",
    +      "undone_at": null
    +    }
    +  ]
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    totalintegerTotal bind history records for this identity
    undo_stack_countintegerRecords available for undo (is_undone=false)
    redo_stack_countintegerRecords available for redo (is_undone=true)
    results[].history_idintegerHistory record ID
    results[].operationstringOperation type (bind, unbind, or bind_trace)
    results[].is_undonebooleanWhether the operation has been undone
    results[].created_atstringWhen the operation was applied
    results[].undone_atstringWhen the undo occurred (null if not undone)
    +
    Example
    +
    curl -s "$API/api/v1/identity/$IDENTITY_UUID/bind/history?page=1&limit=10" \
    +  -H "X-API-Key: $KEY"
    +
    + +
    Error Responses
    + + + + + + + + + + + + + + + + + +
    HTTPWhen
    404Identity not found
    500Database error
    +
    +

    3. Merge History & Undo/Redo

    +

    Merge operations use MongoDB for richer record-keeping, with a 24-hour undo deadline.

    +

    Merge Operation Overview

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    PropertyValue
    StorageMongoDB identity_merge_history collection
    SnapshotFull source identity state + target identity state + aliases/metadata diffs
    TriggerEvery mergeinto with keep_history=true
    Undo deadline24 hours (renewed on redo)
    Redo supportYes — restores undone merges with new 24hr deadline
    Max recordsUnlimited
    +
    +

    POST /api/v1/identity/merge/:merge_id/undo

    +

    Already documented in 07_identity.md. See that document for full details.

    +
    +

    POST /api/v1/identity/merge/:merge_id/redo

    +

    Auth: Required +Scope: identity-level

    +

    Redo a previously undone merge operation within the renewed 24-hour deadline.

    +
    Request
    +

    No body required. The merge ID is taken from the URL path.

    +
    Behavior
    +
      +
    1. Validates the merge record exists and undone=true (not already active)
    2. +
    3. Checks the 24-hour undo deadline (if expired, the redo is rejected)
    4. +
    5. Restores face bindings: moves all faces from target_identity back to source_identity
    6. +
    7. Re-adds aliases that were removed by the undo (aliases with source: "merge" tag)
    8. +
    9. Re-adds metadata fields that were removed by the undo
    10. +
    11. If keep_history=true: sets source_identity.status = 'merged' again
    12. +
    13. If keep_history=false: recreates source identity from the undone_snapshot stored at undo time
    14. +
    15. Syncs both identity JSON files to disk
    16. +
    17. Sets undone=false, clears undone_snapshot, renews undo_deadline = NOW() + 24h
    18. +
    19. Records redone_by user for audit
    20. +
    +
    Example
    +
    curl -s -X POST "$API/api/v1/identity/merge/550e8400-e29b-41d4-a716-446655440000/redo" \
    +  -H "X-API-Key: $KEY"
    +
    + +
    Response (200)
    +
    {
    +  "success": true,
    +  "message": "Redo merge completed: merged 'stranger_13894' into 'Louis Viret' (52 faces transferred)",
    +  "data": {
    +    "merge_id": "550e8400-e29b-41d4-a716-446655440000",
    +    "faces_transferred": 52,
    +    "aliases_re_added": 1,
    +    "metadata_fields_re_added": 2
    +  }
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    merge_idstringThe merge operation ID
    faces_transferredintegerNumber of faces transferred from source to target
    aliases_re_addedintegerNumber of aliases restored to target
    metadata_fields_re_addedintegerNumber of metadata fields restored to target
    +
    Error Responses
    + + + + + + + + + + + + + + + + + + + + + +
    HTTPWhen
    400Merge not undone, deadline expired, or cannot redo
    404Merge record not found
    500Database error
    +
    +

    4. Delete History & Undo/Redo

    +

    Delete Operation Overview

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    PropertyValue
    StoragePostgreSQL identity_history table
    Snapshot{"identity": {...full row...}, "unbound_faces": [{file_uuid, face_id, trace_id}, ...]}
    Max records1 active delete record per identity (redo stack cleared on new delete)
    Undo supportYes — recreates identity row, re-binds faces
    Redo supportYes — re-deletes the identity
    Identity fileDeleted on delete, recreated on undo
    +

    Snapshot Format

    +
    {
    +  "identity": {
    +    "id": 9,
    +    "uuid": "a9a90105-6d6b-46ff-92da-0c3c1a57dff4",
    +    "name": "Cary Grant",
    +    "identity_type": "people",
    +    "source": "tmdb",
    +    "status": "confirmed",
    +    "metadata": {},
    +    "tmdb_id": 112,
    +    "tmdb_profile": null
    +  },
    +  "unbound_faces": [
    +    {
    +      "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
    +      "face_id": "1_5",
    +      "trace_id": null
    +    },
    +    {
    +      "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692",
    +      "face_id": "1_6",
    +      "trace_id": 906
    +    }
    +  ]
    +}
    +
    + +

    Stack Model

    +
    DELETE identity                          (undo stack, is_undone=false)
    +               ↓ undo
    +Identity recreated, faces re-bound
    +               → delete history marked is_undone=true
    +               ↓ redo (re-delete)
    +Identity deleted again, faces unbound
    +               → delete history marked is_undone=false
    +
    + +

    A new delete after an undo clears the delete redo stack (no redo possible for the old delete).

    +

    Undo Behavior (via existing POST /api/v1/identity/:identity_uuid/undo)

    +
      +
    1. Normal identity lookup fails (row was deleted)
    2. +
    3. Checks identity_history for operation='delete' AND is_undone=false matching the UUID in the snapshot
    4. +
    5. Recreates the identity row (new internal id, same UUID)
    6. +
    7. Re-binds all faces listed in unbound_faces to the new identity
    8. +
    9. Deletes the identity_history delete record as is_undone=true with undone_at=NOW()
    10. +
    11. Syncs identity.json to disk
    12. +
    13. Updates _index.json
    14. +
    +

    Redo Behavior (via existing POST /api/v1/identity/:identity_uuid/redo)

    +
      +
    1. Identity lookup succeeds (identity was restored by prior undo)
    2. +
    3. Checks identity_history for operation='delete' AND is_undone=true matching the identity_id
    4. +
    5. Deletes identity.json from disk
    6. +
    7. Unbinds all faces (identity_id = NULL)
    8. +
    9. Deletes the identity row
    10. +
    11. Marks the delete history record as is_undone=false
    12. +
    13. Returns success
    14. +
    +

    Error Responses (delete undo/redo)

    + + + + + + + + + + + + + + + + + + + + + +
    HTTPScenario
    400No delete history available (either no delete or already undone/redone)
    404Identity not found (for redo — identity wasn't restored)
    500Database error
    +
    +

    Comparison: PATCH vs Bind vs Merge vs Delete Undo/Redo

    - + + + + + + + + + + + + + + - + + + - + + + - + + + + + + + + + + + +
    Aspect PATCH Undo/RedoMerge UndoBind Undo/RedoMerge Undo/RedoDelete Undo/Redo
    Storage PostgreSQL identity_historyPostgreSQL identity_history MongoDB identity_merge_historyPostgreSQL identity_history
    Operation filteroperation='update'operation IN ('bind','unbind','bind_trace')operation='delete'
    Trigger Every PATCHEvery bind/unbind/bind_trace Every mergeinto with keep_history=trueEvery DELETE
    Undo deadline None (unlimited)24 hoursNone (unlimited)24 hours (renewed on redo)None (unlimited)
    Redo support YesNoYesYesYes
    Step undo Yes (steps param)No (full undo only)Yes (steps param)No (full undo/redo only)No (single record)
    Max records 256 per identity256 per identity (shared) Unlimited256 per identity (shared)
    User trackinguser_id + user_sourceuser_id + user_sourceperformed_by_user + undone_by / redone_byuser_id + user_source
    diff --git a/docs_v1.0/doc_developer/15_tkg.html b/docs_v1.0/doc_developer/15_tkg.html new file mode 100644 index 0000000..51e95f0 --- /dev/null +++ b/docs_v1.0/doc_developer/15_tkg.html @@ -0,0 +1,728 @@ + + + + +15 Tkg - Momentry API Docs + + + +
    + + + + + +

    Temporal Knowledge Graph (TKG)

    +

    TKG is a time-aligned knowledge graph built from multi-processor outputs (face, yolo, ocr, pose, asrx, gaze, lip, appearance). It produces 9 node types and 14 edge types stored in dev.tkg_nodes and dev.tkg_edges.

    +

    Node Types

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Node TypeDescriptionKey Properties
    face_traceA tracked face identity over timetrace_id, face_count, avg_confidence
    gaze_traceGaze direction over timedirection (frontal/left/right/up/down + diagonals)
    lip_traceLip movement synced with speechspeaker_id, lip_area_range
    text_traceSpoken text aligned to timespeaker_id, text, start_time, end_time
    appearance_traceHuman appearance (clothing) over timeclothing_color, upper_cloth, lower_cloth
    skin_tone_traceFitzpatrick skin tone classificationfitzpatrick_type (I–VI)
    accessoryDetected accessoriestype (glasses/hat/etc.), confidence
    objectYOLO-detected objectclass, confidence, frame_count
    speakerASRX speaker segmentspeaker_id, segment_count, total_duration
    +

    Edge Types

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Edge TypeSource → TargetDescription
    co_occursobject ↔ objectTwo objects appear together in same frame
    speaker_facespeaker ↔ face_traceSpeaker matched to face trace via lip sync
    face_faceface_trace ↔ face_traceTwo face traces interact (mutual gaze)
    mutual_gazegaze_trace ↔ gaze_traceTwo people looking at each other
    lip_synclip_trace ↔ text_traceLip movement aligned with spoken text
    has_appearanceface_trace ↔ appearance_traceFace has specific appearance
    wearsface_trace ↔ accessoryFace wears an accessory
    +
    +

    POST /api/v1/file/:file_uuid/tkg/rebuild

    +

    Auth: Required +Scope: file-level

    +

    Rebuild the Temporal Knowledge Graph for a file. Reads processor JSON outputs (face, yolo, ocr, pose, asrx, gaze, lip, appearance) and generates TKG nodes and edges. Clears existing nodes/edges for the file first, then rebuilds from scratch.

    +

    Example

    +
    curl -s -X POST "$API/api/v1/file/$FILE_UUID/tkg/rebuild" \
    +  -H "X-API-Key: $KEY"
    +
    + +

    Response (200)

    +
    {
    +  "success": true,
    +  "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
    +  "result": {
    +    "face_trace_nodes": 16,
    +    "gaze_trace_nodes": 16,
    +    "lip_trace_nodes": 12,
    +    "text_trace_nodes": 24,
    +    "appearance_trace_nodes": 8,
    +    "skin_tone_trace_nodes": 5,
    +    "accessory_nodes": 3,
    +    "object_nodes": 26,
    +    "speaker_nodes": 4,
    +    "co_occurrence_edges": 94,
    +    "speaker_face_edges": 12,
    +    "face_face_edges": 8,
    +    "mutual_gaze_edges": 2,
    +    "lip_sync_edges": 10,
    +    "has_appearance_edges": 16,
    +    "wears_edges": 3
    +  },
    +  "error": null
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    successbooleanTrue if rebuild completed
    file_uuidstring32-char hex UUID
    resultobjectNode and edge counts by type
    errorstring/nullError message if failed
    +
    +

    POST /api/v1/file/:file_uuid/tkg/nodes

    +

    Auth: Required +Scope: file-level

    +

    Query TKG nodes with pagination and optional type filter.

    +

    Request Parameters

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeRequiredDefaultDescription
    node_typestringNoallFilter by node type: face_trace, gaze_trace, lip_trace, text_trace, appearance_trace, skin_tone_trace, accessory, object, speaker
    pageintegerNo1Page number
    page_sizeintegerNo100Items per page (max 500)
    +

    Example

    +
    # Get all face_trace nodes
    +curl -s -X POST "$API/api/v1/file/$FILE_UUID/tkg/nodes" \
    +  -H "X-API-Key: $KEY" \
    +  -H "Content-Type: application/json" \
    +  -d '{"node_type": "face_trace", "page": 1, "page_size": 50}'
    +
    +# Get all nodes
    +curl -s -X POST "$API/api/v1/file/$FILE_UUID/tkg/nodes" \
    +  -H "X-API-Key: $KEY" \
    +  -H "Content-Type: application/json" \
    +  -d '{}'
    +
    + +

    Response (200)

    +
    {
    +  "success": true,
    +  "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
    +  "total": 16,
    +  "page": 1,
    +  "page_size": 50,
    +  "nodes": [
    +    {
    +      "id": 1,
    +      "node_type": "face_trace",
    +      "external_id": "trace_0",
    +      "label": "Face Trace 0",
    +      "properties": {
    +        "trace_id": 0,
    +        "face_count": 142,
    +        "avg_confidence": 0.87
    +      }
    +    }
    +  ]
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    successbooleanAlways true on 200
    file_uuidstring32-char hex UUID
    totalintegerTotal matching node count
    pageintegerCurrent page
    page_sizeintegerItems per page
    nodesarrayArray of node objects
    nodes[].idintegerDatabase primary key
    nodes[].node_typestringNode type (see table above)
    nodes[].external_idstringExternal identifier (e.g., trace_0, gaze_1)
    nodes[].labelstringHuman-readable label
    nodes[].propertiesobjectType-specific properties as JSON
    +
    +

    POST /api/v1/file/:file_uuid/tkg/edges

    +

    Auth: Required +Scope: file-level

    +

    Query TKG edges with pagination and optional filters.

    +

    Request Parameters

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeRequiredDefaultDescription
    edge_typestringNoallFilter by edge type: co_occurs, speaker_face, face_face, mutual_gaze, lip_sync, has_appearance, wears
    source_typestringNoFilter by source node type
    target_typestringNoFilter by target node type
    pageintegerNo1Page number
    page_sizeintegerNo100Items per page (max 500)
    +

    Example

    +
    # Get all co_occurrence edges
    +curl -s -X POST "$API/api/v1/file/$FILE_UUID/tkg/edges" \
    +  -H "X-API-Key: $KEY" \
    +  -H "Content-Type: application/json" \
    +  -d '{"edge_type": "co_occurs"}'
    +
    +# Get edges between face_trace and speaker nodes
    +curl -s -X POST "$API/api/v1/file/$FILE_UUID/tkg/edges" \
    +  -H "X-API-Key: $KEY" \
    +  -H "Content-Type: application/json" \
    +  -d '{"source_type": "speaker", "target_type": "face_trace"}'
    +
    + +

    Response (200)

    +
    {
    +  "success": true,
    +  "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
    +  "total": 94,
    +  "page": 1,
    +  "page_size": 100,
    +  "edges": [
    +    {
    +      "id": 1,
    +      "edge_type": "co_occurs",
    +      "source_node_id": 10,
    +      "target_node_id": 15,
    +      "properties": {
    +        "frame_count": 45,
    +        "confidence": 0.92
    +      }
    +    }
    +  ]
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    successbooleanAlways true on 200
    file_uuidstring32-char hex UUID
    totalintegerTotal matching edge count
    pageintegerCurrent page
    page_sizeintegerItems per page
    edgesarrayArray of edge objects
    edges[].idintegerDatabase primary key
    edges[].edge_typestringEdge type
    edges[].source_node_idintegerSource node ID (FK to tkg_nodes)
    edges[].target_node_idintegerTarget node ID (FK to tkg_nodes)
    edges[].propertiesobjectEdge-specific properties as JSON
    +
    +

    GET /api/v1/file/:file_uuid/tkg/node/:node_id

    +

    Auth: Required +Scope: file-level

    +

    Get detail for a specific TKG node including its connected edges.

    +

    Example

    +
    curl -s "$API/api/v1/file/$FILE_UUID/tkg/node/1" \
    +  -H "X-API-Key: $KEY"
    +
    + +

    Response (200)

    +
    {
    +  "success": true,
    +  "node": {
    +    "id": 1,
    +    "node_type": "face_trace",
    +    "external_id": "trace_0",
    +    "label": "Face Trace 0",
    +    "properties": {
    +      "trace_id": 0,
    +      "face_count": 142,
    +      "avg_confidence": 0.87
    +    }
    +  },
    +  "connected_edges": [
    +    {
    +      "id": 5,
    +      "edge_type": "co_occurs",
    +      "source_node_id": 1,
    +      "target_node_id": 10,
    +      "properties": {"frame_count": 45}
    +    }
    +  ],
    +  "edge_count": 3
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    successbooleanAlways true on 200
    nodeobjectNode detail (same format as nodes query)
    connected_edgesarrayEdges connected to this node
    edge_countintegerTotal connected edge count
    +

    Error Codes

    + + + + + + + + + + + + + +
    HTTPWhen
    404Node not found
    +
    +

    GET /api/v1/file/:file_uuid/processor-counts

    +

    Auth: Required +Scope: file-level

    +

    Get counts of processor JSON output files for a file. Scans the output directory for {file_uuid}.{processor}.json files and extracts frame counts, segment counts, and chunk counts from each file.

    +

    Supports short UUID prefix matching (e.g., d3f9ae8e → resolves to full d3f9ae8e471a1fc4d47022c66091b920).

    +

    Example

    +
    curl -s "$API/api/v1/file/$FILE_UUID/processor-counts" \
    +  -H "X-API-Key: $KEY"
    +
    + +

    Response (200)

    +
    {
    +  "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
    +  "output_dir": "/Users/accusys/momentry/output_dev",
    +  "processors": [
    +    {
    +      "processor": "cut",
    +      "has_json": true,
    +      "frame_count": 5391,
    +      "segment_count": null,
    +      "chunk_count": null,
    +      "last_modified": "2026-06-16T18:48:01.987241061+00:00"
    +    },
    +    {
    +      "processor": "face",
    +      "has_json": true,
    +      "frame_count": 1112,
    +      "segment_count": null,
    +      "chunk_count": null,
    +      "last_modified": "2026-06-18T17:21:37.408383765+00:00"
    +    },
    +    {
    +      "processor": "asrx",
    +      "has_json": true,
    +      "frame_count": null,
    +      "segment_count": 6,
    +      "chunk_count": null,
    +      "last_modified": "2026-06-18T17:21:40.872063642+00:00"
    +    },
    +    {
    +      "processor": "story",
    +      "has_json": true,
    +      "frame_count": null,
    +      "segment_count": null,
    +      "chunk_count": 12,
    +      "last_modified": "2026-06-18T17:22:00.000000000+00:00"
    +    },
    +    {
    +      "processor": "mediapipe",
    +      "has_json": false,
    +      "frame_count": null,
    +      "segment_count": null,
    +      "chunk_count": null,
    +      "last_modified": null
    +    }
    +  ]
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    file_uuidstringFull 32-char hex UUID (resolved from prefix)
    output_dirstringOutput directory scanned
    processorsarrayPer-processor output info
    processors[].processorstringProcessor name
    processors[].has_jsonbooleanWhether JSON file exists
    processors[].frame_countinteger/nullTotal frames processed (frame-based processors)
    processors[].segment_countinteger/nullSegment count (ASRX segments, etc.)
    processors[].chunk_countinteger/nullChunk count (Story chunks, etc.)
    processors[].last_modifiedstring/nullISO 8601 timestamp of last modification
    +

    Error Codes

    + + + + + + + + + + + + + +
    HTTPWhen
    404File UUID not found in database
    +
    +

    Updated: 2026-06-20 12:00:00

    +
    + + \ No newline at end of file diff --git a/docs_v1.0/doc_developer/16_workspace.html b/docs_v1.0/doc_developer/16_workspace.html new file mode 100644 index 0000000..a8e9a3b --- /dev/null +++ b/docs_v1.0/doc_developer/16_workspace.html @@ -0,0 +1,240 @@ + + + + +16 Workspace - Momentry API Docs + + + +
    + + + + + +

    Workspace Checkin/Checkout

    +

    Workspace checkin/checkout provides a transactional editing model for file data: +- Checkout: Clears PG tables (face_detections, speaker_detections, pre_chunks) and Qdrant vectors, creating an isolated workspace SQLite for editing. +- Checkin: Restores data from the workspace SQLite back to PG and Qdrant, marking the file as Indexed.

    +

    This allows safe concurrent editing — while a file is checked out, its main database records are cleared, preventing conflicts.

    +
    +

    POST /api/v1/file/:file_uuid/checkout

    +

    Auth: Required +Scope: file-level

    +

    Checkout a file workspace. Clears face detections, speaker detections, pre_chunks from PostgreSQL, deletes Qdrant vectors, and creates a workspace SQLite database for isolated editing.

    +

    Example

    +
    curl -s -X POST "$API/api/v1/file/$FILE_UUID/checkout" \
    +  -H "X-API-Key: $KEY"
    +
    + +

    Response (200)

    +
    {
    +  "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
    +  "rows_deleted": 1523,
    +  "status": "checked_out"
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    file_uuidstring32-char hex UUID
    rows_deletedintegerTotal rows cleared from PG tables
    statusstring"checked_out"
    +

    Error Responses

    + + + + + + + + + + + + + +
    HTTPWhen
    500Checkout failed (DB error, workspace creation error)
    +
    +

    POST /api/v1/file/:file_uuid/checkin

    +

    Auth: Required +Scope: file-level

    +

    Checkin a file workspace. Restores face detections, speaker detections, pre_chunks from workspace SQLite back to PostgreSQL, re-indexes vectors to Qdrant, and sets video status to Indexed.

    +

    Example

    +
    curl -s -X POST "$API/api/v1/file/$FILE_UUID/checkin" \
    +  -H "X-API-Key: $KEY"
    +
    + +

    Response (200)

    +
    {
    +  "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
    +  "pre_chunks_moved": 45,
    +  "face_detections_moved": 1200,
    +  "speaker_detections_moved": 320,
    +  "vectors_moved": 45,
    +  "status": "indexed"
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    file_uuidstring32-char hex UUID
    pre_chunks_movedintegerPre-chunks restored from workspace
    face_detections_movedintegerFace detections restored from workspace
    speaker_detections_movedintegerSpeaker detections restored from workspace
    vectors_movedintegerVectors re-indexed to Qdrant
    statusstring"indexed"
    +

    Error Responses

    + + + + + + + + + + + + + +
    HTTPWhen
    500Checkin failed (DB error, workspace not found, vector index error)
    +
    +

    GET /api/v1/file/:file_uuid/workspace

    +

    Auth: Required +Scope: file-level

    +

    Check if a workspace SQLite database exists for a file.

    +

    Example

    +
    curl -s "$API/api/v1/file/$FILE_UUID/workspace" \
    +  -H "X-API-Key: $KEY"
    +
    + +

    Response (200)

    +
    {
    +  "file_uuid": "d3f9ae8e471a1fc4d47022c66091b920",
    +  "exists": true
    +}
    +
    + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeDescription
    file_uuidstring32-char hex UUID
    existsbooleanTrue if workspace SQLite exists
    +
    +

    Workflow

    +
      REGISTERED ──→ CHECKED_OUT ──→ INDEXED
    +     │              │              │
    +     │          checkout        checkin
    +     │              │              │
    +     │     clear PG + Qdrant   restore from SQLite
    +     │     create workspace    re-index vectors
    +     │     set status          set status
    +
    + +
      +
    1. Register file → status: REGISTERED
    2. +
    3. Process file → processors run, data stored in PG + Qdrant
    4. +
    5. Checkout file → clear editable data, create workspace SQLite → status: CHECKED_OUT
    6. +
    7. Edit workspace via Agent Search / identity binding
    8. +
    9. Checkin file → restore from workspace SQLite → status: INDEXED
    10. +
    11. Rebuild TKG if needed after checkin
    12. +
    +
    +

    Updated: 2026-06-20 12:00:00

    +
    + + \ No newline at end of file diff --git a/docs_v1.0/doc_developer/99_incomplete.html b/docs_v1.0/doc_developer/99_incomplete.html new file mode 100644 index 0000000..0940dd9 --- /dev/null +++ b/docs_v1.0/doc_developer/99_incomplete.html @@ -0,0 +1,254 @@ + + + + +99 Incomplete - Momentry API Docs + + + +
    + + + + + +

    Incomplete / Undocumented APIs

    +

    This module tracks API endpoints that exist in the codebase but are either undocumented, partially documented, or stubs.

    +
    +

    Note: Endpoints listed here should be fully documented and moved to their appropriate module once implemented.

    +
    +
    +

    Identity Binding

    +

    POST /api/v1/identity/:identity_uuid/bind

    +

    Auth: Required +Scope: identity-level

    +

    Bind a single face detection to an identity. Unlike bind/trace which binds all faces in a trace, this binds one specific face.

    +

    Request Parameters

    + + + + + + + + + + + + + + + + + + + + + + + +
    FieldTypeRequiredDescription
    file_uuidstringYesFile containing the face
    face_idstringYesFace detection ID to bind
    +

    Status

    +

    ⚠️ Undocumented — exists in code but no full request/response documentation.

    +
    +

    Resource Management

    +

    POST /api/v1/resource/register

    +

    Auth: Required +Scope: system-level

    +

    Register an external resource (e.g., storage backend, API service).

    +

    Status

    +

    ⚠️ Undocumented — endpoint exists but no documentation.

    +
    +

    POST /api/v1/resource/heartbeat

    +

    Auth: Required +Scope: system-level

    +

    Send heartbeat for a registered resource to verify it's still alive.

    +

    Status

    +

    ⚠️ Undocumented — endpoint exists but no documentation.

    +
    +

    GET /api/v1/resources

    +

    Auth: Required +Scope: system-level

    +

    List all registered resources with their status.

    +

    Status

    +

    ⚠️ Undocumented — endpoint exists but no documentation.

    +
    +

    5W1H Agent

    +

    POST /api/v1/agents/5w1h/analyze

    +

    Auth: Required +Scope: file-level

    +

    Run 5W1H analysis on all cut scenes for a file. Uses LLM (Gemma4) to summarize each scene with who/what/where/when/why/how.

    +

    Status

    +

    ⚠️ Partially documented — listed in 12_agent.md but missing full request/response examples.

    +
    +

    POST /api/v1/agents/5w1h/batch

    +

    Auth: Required +Scope: system-level

    +

    Run 5W1H analysis on multiple files at once.

    +

    Request Parameters

    + + + + + + + + + + + + + + + + + +
    FieldTypeRequiredDescription
    file_uuidsstring[]YesArray of file UUIDs to analyze
    +

    Status

    +

    ⚠️ Partially documented — listed in 12_agent.md but missing full request/response examples.

    +
    +

    GET /api/v1/agents/5w1h/status

    +

    Auth: Required +Scope: system-level

    +

    Get 5W1H analysis status across all videos (which files have been analyzed, which are pending).

    +

    Status

    +

    ⚠️ Partially documented — listed in 12_agent.md but missing full response schema.

    +
    +

    Identity Agent

    +

    POST /api/v1/agents/identity/match-from-photo

    +

    Auth: Required +Scope: system-level

    +

    Match an identity using an uploaded photo. Extracts face embedding, finds best trace match.

    +

    Status

    +

    ⚠️ Partially documented — exists in 08_identity_agent.md but missing full response schema and error cases.

    +
    +

    POST /api/v1/agents/identity/match-from-trace

    +

    Auth: Required +Scope: file-level

    +

    Match an identity using a trace. Multi-angle embedding comparison with propagation.

    +

    Status

    +

    ⚠️ Partially documented — exists in 08_identity_agent.md but missing full response schema and error cases.

    +
    +

    Stubs / Not Implemented

    +

    Visual Search Endpoints

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    MethodEndpointStatus
    POST/api/v1/search/visualStub — defined but not functional
    POST/api/v1/search/visual/classStub — defined but not functional
    POST/api/v1/search/visual/densityStub — defined but not functional
    POST/api/v1/search/visual/combinationStub — defined but not functional
    POST/api/v1/search/visual/statsStub — defined but not functional
    +

    Unmounted Routes

    +

    These endpoints are defined in source code but not mounted in the router:

    + + + + + + + + + + + + + + + + + + + + + +
    EndpointNotes
    /api/v1/search/personsDefined but not mounted
    /api/v1/whoDefined but not mounted
    /api/v1/who/candidatesDefined but not mounted
    +
    +

    Tracking

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CountStatus
    Undocumented3 (resource management)
    Partially documented5 (5W1H ×3, identity agent ×2)
    Stub/not functional5 (visual search)
    Defined but unmounted3 (persons, who, who/candidates)
    Total16
    +
    +

    Created: 2026-06-20 — Gap analysis from core API vs doc_wasm sync +Updated: 2026-06-20 — Initial tracking list

    +
    + + \ No newline at end of file diff --git a/docs_v1.0/doc_developer/index.html b/docs_v1.0/doc_developer/index.html index d87cd1a..41c751d 100644 --- a/docs_v1.0/doc_developer/index.html +++ b/docs_v1.0/doc_developer/index.html @@ -29,7 +29,7 @@ a:hover td { background: #f8f8f8; border-radius: 4px; } Logout

    API 參考手冊 — 登入後可瀏覽各模組文件

    -
    錯誤碼Error Codes
    14 Identity History
    +
    錯誤碼Error Codes
    14 Identity History
    15 Tkg
    16 Workspace
    99 Incomplete
    \ No newline at end of file diff --git a/docs_v1.0/doc_wasm/modules/05_process.md b/docs_v1.0/doc_wasm/modules/05_process.md index 72520c3..f08b1a9 100644 --- a/docs_v1.0/doc_wasm/modules/05_process.md +++ b/docs_v1.0/doc_wasm/modules/05_process.md @@ -127,13 +127,15 @@ curl -s "$API/api/v1/file/$FILE_UUID/probe" -H "X-API-Key: $KEY" --- -### `GET /api/v1/progress/:file_uuid` +### `POST /api/v1/progress/:file_uuid` **Auth**: Required **Scope**: file-level Get real-time processing progress for a file via Redis pub/sub. Includes per-processor status, current/total frames, ETA, and system resource stats. +**Note**: This endpoint uses **POST** method, not GET. The progress data is stored in Redis as a hash, and POST is used to retrieve the latest state. + #### Pipeline Order | Order | Processor | Dependencies | Description | @@ -154,7 +156,7 @@ All processors except `story` and `5w1h` run concurrently when their dependencie #### Example ```bash -curl -s "$API/api/v1/progress/$FILE_UUID" -H "X-API-Key: $KEY" | jq '{overall_progress, processors: [.processors[] | {processor_type, status}]}' +curl -s -X POST "$API/api/v1/progress/$FILE_UUID" -H "X-API-Key: $KEY" | jq '{overall_progress, processors: [.processors[] | {name, status}]}' ``` #### Response (200) diff --git a/docs_v1.0/doc_wasm/modules/07_identity.md b/docs_v1.0/doc_wasm/modules/07_identity.md index 0416276..188946e 100644 --- a/docs_v1.0/doc_wasm/modules/07_identity.md +++ b/docs_v1.0/doc_wasm/modules/07_identity.md @@ -923,6 +923,128 @@ curl -s "$API/api/v1/identity/$IDENTITY_UUID/json" \ --- +--- + +### `POST /api/v1/file/:file_uuid/pending-person` + +**Auth**: Required +**Scope**: file-level + +Create a manually managed "pending person" under a specific file. A pending person is an identity with `status='pending'` and `source='manual'`, used for unmatched traces that the user wants to manually label before a full identity resolution. + +Optionally binds a list of trace IDs to this new identity. + +#### Request + +```json +{ + "trace_ids": [100, 150, 200], + "name": "Mystery Man #1" +} +``` + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `trace_ids` | array[int] | No | `[]` | Trace IDs to bind to this pending person | +| `name` | string | No | `"Person N"` | Human-readable name. Auto-generated if omitted | + +#### Example + +```bash +# Create pending person with name and no traces +curl -s -X POST "$API/api/v1/file/$FILE_UUID/pending-person" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"name": "Unknown Woman #2", "trace_ids": []}' + +# Create pending person with auto-name and bind traces +curl -s -X POST "$API/api/v1/file/$FILE_UUID/pending-person" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"trace_ids": [100, 150, 200]}' +``` + +#### Response (200) + +```json +{ + "success": true, + "message": "Created pending person: Mystery Man #1 (uuid: 4d96b25b-68f0-4c52-b238-d69f7dfd588b)", + "data": { + "identity_uuid": "4d96b25b-68f0-4c52-b238-d69f7dfd588b", + "identity_id": 55, + "name": "Mystery Man #1", + "bound_traces": 0 + } +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `identity_uuid` | string | UUID of the newly created pending identity | +| `identity_id` | integer | Internal ID of the new identity | +| `name` | string | Display name | +| `bound_traces` | integer | Number of traces bound | + +#### Side Effects + +- Creates an `identities` row with `status='pending'`, `source='manual'`, `file_uuid=` +- If `trace_ids` provided: `UPDATE face_detections SET identity_id = ...` for matching traces +- If `trace_ids` provided: TKG face_track nodes get `identity_id` / `identity_name` in properties +- Identity JSON file synced to disk + +--- + +### `GET /api/v1/file/:file_uuid/pending-persons` + +**Auth**: Required +**Scope**: file-level + +List all pending persons for a file. + +#### Example + +```bash +curl -s "$API/api/v1/file/$FILE_UUID/pending-persons" \ + -H "X-API-Key: $KEY" +``` + +#### Response (200) + +```json +{ + "success": true, + "message": "Found 2 pending persons for c36f35685177c981aa139b66bbbccc5b", + "data": [ + { + "identity_uuid": "232ecd08-a2bf-4bd0-bd25-0bd8fb7a7dae", + "identity_id": 56, + "name": "Person 2", + "created_at": "2026-06-23 17:13:23", + "trace_count": 3, + "bound_traces": null + } + ] +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `identity_uuid` | string | Identity UUID | +| `identity_id` | integer | Internal identity ID | +| `name` | string | Display name | +| `created_at` | string | Creation timestamp | +| `trace_count` | integer | Number of face traces bound to this pending person | +| `bound_traces` | array[int] | List of bound trace IDs (currently null, reserved for future expansion) | + +#### Notes + +- Pending persons are normal `identities` rows with `status='pending'` — they can be promoted to confirmed via `PATCH /api/v1/identity/:identity_uuid` (`{"status": "confirmed"}`) +- They can be merged into known identities via `POST /api/v1/identity/:identity_uuid/mergeinto` +- Use `GET /api/v1/identity/:identity_uuid/traces` to get detailed trace info for each pending person + +--- + ## Alias System (BCP 47 Locale Tags) Identity aliases support multilingual display names. Aliases are stored in `metadata.aliases` as an array of `{locale, name}` objects. diff --git a/scripts/fix_processor_stats.py b/scripts/fix_processor_stats.py new file mode 100644 index 0000000..68377d5 --- /dev/null +++ b/scripts/fix_processor_stats.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +""" +Fix processor_results statistics (frames_processed, output_size_bytes) +for all jobs that have missing data due to Worker crash. +""" + +import os +import sys +import re +import psycopg2 + +def fix_processor_stats(): + conn = psycopg2.connect(os.environ.get('DATABASE_URL', 'postgres://accusys@localhost:5432/momentry')) + cur = conn.cursor() + + # Find jobs with missing frames_processed + cur.execute(""" + SELECT DISTINCT mj.uuid, mj.id + FROM public.monitor_jobs mj + JOIN public.processor_results pr ON pr.job_id = mj.id + WHERE pr.frames_processed = 0 + AND pr.status = 'completed' + ORDER BY mj.created_at DESC + """) + + jobs = cur.fetchall() + print(f"Found {len(jobs)} jobs with missing statistics") + + for uuid, job_id in jobs: + print(f"\nProcessing UUID: {uuid}") + + # Get total_frames from YOLO output + yolo_file = f'/Users/accusys/momentry/output/{uuid}.yolo.json' + total_frames = 0 + + if os.path.exists(yolo_file): + with open(yolo_file, 'r') as f: + content = f.read(5000) + match = re.search(r'"total_frames": (\d+)', content) + if match: + total_frames = int(match.group(1)) + print(f" total_frames: {total_frames}") + + if total_frames > 0: + # Update frames_processed + cur.execute(""" + UPDATE public.processor_results + SET frames_processed = %s + WHERE job_id = %s + """, (total_frames, job_id)) + + # Update output_size_bytes for each processor + processors = ['asr', 'yolo', 'face', 'ocr', 'pose', 'cut', 'appearance', 'asrx'] + for proc in processors: + file_path = f'/Users/accusys/momentry/output/{uuid}.{proc}.json' + if os.path.exists(file_path): + size = os.path.getsize(file_path) + cur.execute(""" + UPDATE public.processor_results + SET output_size_bytes = %s + WHERE job_id = %s AND processor_type = %s + """, (size, job_id, proc)) + print(f" {proc}: {size} bytes") + + conn.commit() + print(f" ✓ Updated") + else: + print(f" ⚠ Skipped (no total_frames)") + + conn.close() + print(f"\nCompleted: {len(jobs)} jobs processed") + +if __name__ == '__main__': + fix_processor_stats() \ No newline at end of file diff --git a/scripts/fix_processors_asrx.py b/scripts/fix_processors_asrx.py new file mode 100644 index 0000000..937fb06 --- /dev/null +++ b/scripts/fix_processors_asrx.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +""" +Fix existing monitor_jobs records to include ASRX in processors list. + +This script adds 'asrx' to processors array for all monitor_jobs records +that don't have it yet. +""" + +import os +import psycopg2 + +def fix_processors(): + schema = os.environ.get('DATABASE_SCHEMA', 'public') + + conn = psycopg2.connect(os.environ.get('DATABASE_URL', 'postgres://accusys@localhost:5432/momentry')) + cur = conn.cursor() + + # Check current state + cur.execute(f""" + SELECT COUNT(*) FROM {schema}.monitor_jobs + WHERE processors IS NOT NULL + AND NOT ('asrx' = ANY(processors)) + """) + missing_asrx = cur.fetchone()[0] + + print(f"Found {missing_asrx} jobs missing ASRX in processors") + + if missing_asrx > 0: + # Add ASRX to processors array + cur.execute(f""" + UPDATE {schema}.monitor_jobs + SET processors = array_append(processors, 'asrx') + WHERE processors IS NOT NULL + AND NOT ('asrx' = ANY(processors)) + """) + + updated = cur.rowcount + conn.commit() + + print(f"Updated {updated} jobs to include ASRX") + + # Verify + cur.execute(f""" + SELECT uuid, processors FROM {schema}.monitor_jobs + WHERE processors IS NOT NULL + AND 'asrx' = ANY(processors) + ORDER BY created_at DESC LIMIT 5 + """) + + for row in cur.fetchall(): + print(f"UUID: {row[0]}, Processors: {row[1]}") + else: + print("All jobs already have ASRX in processors") + + conn.close() + +if __name__ == '__main__': + fix_processors() \ No newline at end of file diff --git a/src/api/identities.rs b/src/api/identities.rs index 01b4989..aed186f 100644 --- a/src/api/identities.rs +++ b/src/api/identities.rs @@ -166,18 +166,21 @@ async fn list_identities( let id_table = crate::core::db::schema::table_name("identities"); - let total: i64 = sqlx::query_scalar(&format!("SELECT COUNT(*) FROM {}", id_table)) - .fetch_one(db.pool()) - .await - .map_err(|e| { - ( - StatusCode::INTERNAL_SERVER_ERROR, - format!("Count error: {}", e), - ) - })?; + let total: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE status IS NULL OR status != 'merged'", + id_table + )) + .fetch_one(db.pool()) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Count error: {}", e), + ) + })?; let sql = format!( - "SELECT id::int, uuid, name, metadata FROM {} ORDER BY id DESC LIMIT $1 OFFSET $2", + "SELECT id::int, uuid, name, metadata FROM {} WHERE status IS NULL OR status != 'merged' ORDER BY id DESC LIMIT $1 OFFSET $2", id_table ); diff --git a/src/api/identity_agent_api.rs b/src/api/identity_agent_api.rs index 9ed504e..c0f3d89 100644 --- a/src/api/identity_agent_api.rs +++ b/src/api/identity_agent_api.rs @@ -23,6 +23,14 @@ pub fn identity_agent_routes() -> Router { "/api/v1/agents/identity/match-from-trace", post(match_from_trace), ) + .route( + "/api/v1/agents/identity/generate-seeds", + post(generate_seeds_handler), + ) + .route( + "/api/v1/agents/identity/run", + post(run_identity_handler), + ) } #[derive(Debug, Serialize)] @@ -619,198 +627,373 @@ fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { } } -/// 迭代多角度 face embedding 比對 + 傳播 (Qdrant version) -/// Round 1: 用 TMDb seed face_embedding 比對 Qdrant embeddings (threshold 0.50) -/// Round 2+: 用已匹配 trace 的所有 face 作為 seed,傳播到未匹配 trace +fn average_embeddings<'a>(embeddings: impl Iterator>) -> Vec { + let mut count = 0usize; + let mut sum: Option> = None; + for emb in embeddings { + if emb.len() != 512 { + continue; + } + match &mut sum { + None => sum = Some(emb.clone()), + Some(s) => { + for (i, v) in emb.iter().enumerate() { + s[i] += v; + } + } + } + count += 1; + } + if let Some(mut s) = sum { + let c = count as f32; + for v in &mut s { + *v /= c; + } + s + } else { + vec![0.0f32; 512] + } +} + +/// Cluster: trace centroid + seeds from Qdrant + stranger clustering. +/// Round 1: centroid vs seeds (TH=0.55) +/// Round 2+: propagate from matched (TH=0.50) +/// Unknown: greedy stranger clustering (TH=0.40) +/// Writes identity_ref/stranger_ref to Qdrant payload, TKG nodes, and face_detections. async fn match_faces_iterative(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Result { use crate::core::db::face_embedding_db::FaceEmbeddingDb; use std::collections::HashMap; - // Step 1: 載入 TMDb identities (source='tmdb' 且有 face_embedding) - let identities_table = schema::table_name("identities"); - let tmdb_rows = sqlx::query_as::<_, (i32, String, Vec)>( - &format!("SELECT id, name, face_embedding::real[] FROM {} WHERE source='tmdb' AND face_embedding IS NOT NULL", identities_table) - ) - .fetch_all(pool).await?; + let face_db = FaceEmbeddingDb::new(); - if tmdb_rows.is_empty() { - tracing::warn!("[FaceMatch] No TMDb identities with face embeddings"); - return Ok(0); - } + // Step 1: Load seeds from Qdrant (type=identity_seed) + let seeds = face_db.get_seed_embeddings().await?; tracing::info!( - "[FaceMatch-Qdrant] Loaded {} TMDb seed identities", - tmdb_rows.len() + "[FaceMatch] Loaded {} seeds from Qdrant", + seeds.len() ); - // Step 2: Load embeddings from Qdrant - let face_db = FaceEmbeddingDb::new(); + // Step 2: Preload identity internal IDs (uuid → (id, name)) + let id_table = schema::table_name("identities"); + let seed_identity_map: HashMap = if !seeds.is_empty() { + let uuids: Vec = seeds.iter().map(|(uuid, _, _)| uuid.clone()).collect(); + if uuids.is_empty() { + HashMap::new() + } else { + let rows = sqlx::query_as::<_, (i32, String, String)>(&format!( + "SELECT id, uuid::text, name FROM {} WHERE uuid::text = ANY($1)", + id_table + )) + .bind(&uuids) + .fetch_all(pool) + .await? + .into_iter() + .map(|(id, uuid, name)| (uuid, (id, name))) + .collect(); + rows + } + } else { + HashMap::new() + }; + + // Step 3: Load face embeddings from Qdrant for this file let qdrant_embeddings = face_db.get_all_embeddings_for_file(file_uuid).await?; if qdrant_embeddings.is_empty() { - tracing::warn!( - "[FaceMatch-Qdrant] No face embeddings in Qdrant for {}", - file_uuid - ); - return match_faces_iterative_pg(pool, file_uuid).await; // Fallback to PG + tracing::warn!("[FaceMatch] No face embeddings in Qdrant for {}", file_uuid); + return Ok(0); } - // Group: trace_id → Vec<(frame, embedding)> - let mut face_track_faces_raw: HashMap)>> = HashMap::new(); + // Step 4: Group embeddings by trace_id, keeping confidence + let mut trace_faces: HashMap, f64)>> = HashMap::new(); for (_, emb, payload) in &qdrant_embeddings { - face_track_faces_raw + trace_faces .entry(payload.trace_id) .or_default() - .push((payload.frame, emb.clone())); + .push((payload.frame, emb.clone(), payload.confidence)); } - // Sample 3 embeddings per trace (front, mid, back) - let mut face_track_samples: HashMap>> = HashMap::new(); - for (tid, mut faces) in face_track_faces_raw { - faces.sort_by_key(|(frame, _)| *frame); - let n = faces.len(); - let indices = if n <= 3 { - (0..n).collect::>() - } else { - vec![0, n / 2, n - 1] - }; - let samples: Vec> = indices.iter().map(|&i| faces[i].1.clone()).collect(); - face_track_samples.insert(tid, samples); - } + // Step 5: Progressive multi-round matching with derived seeds + // Each round: choose a face with best seed sim for matching; separately, + // collect the highest-confidence face per trace for building derived seeds. + const TH_MIN: f32 = 0.35; + const DERIVED_CONF: f64 = 0.90; + const MAX_DERIVED_PER_ID: usize = 9; + const MAX_FACES_PER_TRACE: usize = 3; + const ANGLE_SIM_THRESHOLD: f32 = 0.90; + const TH_STRANGER: f32 = 0.40; - let total_traces = face_track_samples.len(); - let sample_count: usize = face_track_samples.values().map(|v| v.len()).sum(); + let total_traces = trace_faces.len(); + let total_embeddings: usize = trace_faces.values().map(|v| v.len()).sum(); tracing::info!( - "[FaceMatch-Qdrant] Loaded {} traces, sampled {} embeddings", + "[FaceMatch] Loaded {} traces ({} face embeddings) from Qdrant for {}", total_traces, - sample_count + total_embeddings, + file_uuid ); - // Step 3: Match against TMDb seeds - const TH: f32 = 0.50; - let tmdb_seeds: Vec<(i32, String, Vec)> = tmdb_rows; - let mut matched: HashMap = HashMap::new(); + let mut matched: HashMap = HashMap::new(); + let mut trace_face_count: HashMap = HashMap::new(); - for (&tid, samples) in &face_track_samples { - let mut best_name = String::new(); - let mut best_sim = 0.0f32; - for (_, ref name, ref tmdb_emb) in &tmdb_seeds { - for face_emb in samples { - let s = cosine_similarity(face_emb, tmdb_emb); - if s > best_sim { - best_sim = s; - best_name = name.clone(); - } - } + // All reference embeddings: start with original TMDb seeds + let mut all_refs: Vec<(String, String, Vec)> = seeds.clone(); + let thresholds = [0.55f32, 0.50, 0.45, 0.40, 0.35]; + let mut prev_total = 0usize; + + for (round_idx, &th) in thresholds.iter().enumerate() { + if th < TH_MIN { + break; } - if best_sim >= TH { - matched.insert(tid, best_name); - } - } - tracing::info!( - "[FaceMatch-Qdrant] Round 1: matched {} traces (threshold={})", - matched.len(), - TH - ); - // Round 2+: Propagate - let mut round = 2; - while matched.len() < face_track_samples.len() { - let prev_count = matched.len(); + let mut new_matches: HashMap = HashMap::new(); + let mut seed_candidates: Vec<(i32, String, i32, Vec, f64)> = Vec::new(); - // Collect new matches in separate HashMap - let mut new_matches: HashMap = HashMap::new(); - - for (&tid, samples) in &face_track_samples { + for (&tid, faces) in &trace_faces { if matched.contains_key(&tid) { continue; } + trace_face_count.entry(tid).or_insert(faces.len()); - for (matched_tid, matched_name) in &matched { - if let Some(matched_embs) = face_track_samples.get(matched_tid) { - for face_emb in samples { - for ref_emb in matched_embs { - let s = cosine_similarity(face_emb, ref_emb); - if s >= TH { - new_matches.insert(tid, matched_name.clone()); - break; + let mut best_sim = 0.0f32; + let mut best_name = String::new(); + let mut best_id = 0i32; + // Collect all high-confidence faces in this trace for derived seeds + let mut trace_candidates: Vec<(Vec, f64)> = Vec::new(); + + for (_, emb, conf) in faces { + for (ref_uuid, ref_name, ref_emb) in &all_refs { + let s = cosine_similarity(emb, ref_emb); + if s > best_sim { + best_sim = s; + best_name = ref_name.clone(); + if let Some(id_str) = ref_uuid.strip_prefix("derived:") { + if let Ok(parsed) = id_str.parse::() { + best_id = parsed; } + } else if let Some((id, _)) = seed_identity_map.get(ref_uuid) { + best_id = *id; } } } + if *conf >= DERIVED_CONF { + trace_candidates.push((emb.clone(), *conf)); + } + } + + if best_sim >= th && best_id > 0 { + new_matches.insert(tid, (best_name.clone(), best_id)); + + // Top MAX_FACES_PER_TRACE highest-confidence faces with angular diversity + trace_candidates.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); + let mut selected: Vec> = Vec::new(); + for (emb, conf) in trace_candidates { + if selected.len() >= MAX_FACES_PER_TRACE { + break; + } + if selected.iter().any(|e| cosine_similarity(e, &emb) >= ANGLE_SIM_THRESHOLD) { + continue; + } + selected.push(emb.clone()); + seed_candidates.push((best_id, best_name.clone(), tid, emb, conf)); + } } } - // Merge new matches - matched.extend(new_matches); - - if matched.len() == prev_count { + let new_count = new_matches.len(); + if new_count == 0 && round_idx > 0 { break; } + + matched.extend(new_matches); + + // Build derived seeds: pick up to MAX_DERIVED_PER_ID per identity + // (max MAX_FACES_PER_TRACE from each trace), sorted by confidence descending + seed_candidates.sort_by(|a, b| b.4.partial_cmp(&a.4).unwrap()); + let mut per_id: HashMap = HashMap::new(); + let mut trace_used_faces: HashMap = HashMap::new(); + let mut added_seeds = 0usize; + for (id, name, tid, emb, _) in &seed_candidates { + let cnt = per_id.entry(*id).or_insert(0); + if *cnt >= MAX_DERIVED_PER_ID { + continue; + } + let trace_cnt = trace_used_faces.entry(*tid).or_insert(0); + if *trace_cnt >= MAX_FACES_PER_TRACE { + continue; + } + *trace_cnt += 1; + *cnt += 1; + all_refs.push((format!("derived:{}", id), name.clone(), emb.clone())); + added_seeds += 1; + } + tracing::info!( - "[FaceMatch-Qdrant] Round {}: matched {} total", - round, - matched.len() + "[FaceMatch] Round {}: matched {}+{}={} total (TH={}, {} new derived seeds)", + round_idx + 1, + prev_total, + new_count, + matched.len(), + th, + added_seeds ); - round += 1; + + prev_total = matched.len(); } - // Update face_detections.identity_id AND tkg_nodes.properties (Phase 3) - let fd_table = schema::table_name("face_detections"); - let nodes_table = schema::table_name("tkg_nodes"); - let id_table = schema::table_name("identities"); - let identities_map: HashMap = tmdb_seeds - .iter() - .map(|(id, name, _)| (name.clone(), *id)) + // Step 7: Stranger clustering for unmatched traces + let unmatched_ids: Vec = trace_faces + .keys() + .filter(|tid| !matched.contains_key(tid)) + .copied() .collect(); - // Batch query identity names - let identity_names: HashMap = sqlx::query_as::<_, (i32, String)>(&format!( - "SELECT id, name FROM {} WHERE id = ANY($1)", - id_table - )) - .bind(identities_map.values().collect::>()) - .fetch_all(pool) - .await? - .into_iter() - .collect(); + let mut stranger_map: HashMap = HashMap::new(); + let mut assigned_stranger: std::collections::HashSet = std::collections::HashSet::new(); + let mut stranger_count = 0usize; - let mut updated = 0usize; - for (tid, name) in &matched { - let identity_id = identities_map.get(name); - if let Some(id) = identity_id { - let rows = sqlx::query(&format!( - "UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND face_track_id = $3", - fd_table - )) - .bind(*id) - .bind(file_uuid) - .bind(*tid) - .execute(pool) - .await? - .rows_affected(); - updated += rows as usize; + // Sort by face count descending (most reliable first) + let mut sorted_unmatched: Vec = unmatched_ids.clone(); + sorted_unmatched.sort_by(|a, b| { + trace_face_count + .get(b) + .unwrap_or(&0) + .cmp(trace_face_count.get(a).unwrap_or(&0)) + }); - // Phase 3: Also update TKG node - let external_id = format!("face_track_{}", tid); - let identity_name = identity_names.get(id); - let _ = sqlx::query(&format!( - "UPDATE {} SET properties = jsonb_set(\ - jsonb_set(properties, '{{identity_id}}', $1::jsonb, false),\ - '{{identity_name}}', $2::jsonb, false)\ - WHERE file_uuid = $3 AND node_type = 'face_track' AND external_id = $4", - nodes_table - )) - .bind(*id) - .bind(identity_name.as_deref()) - .bind(file_uuid) - .bind(&external_id) - .execute(pool) - .await; + for &tid in &sorted_unmatched { + if assigned_stranger.contains(&tid) { + continue; + } + let centroid_a = if let Some(faces) = trace_faces.get(&tid) { + average_embeddings(faces.iter().map(|(_, emb, _)| emb)) + } else { + continue; + }; + stranger_count += 1; + let stranger_id = format!("{}:stranger_{}", file_uuid, stranger_count); + assigned_stranger.insert(tid); + stranger_map.insert(tid, stranger_id.clone()); + + for &other_tid in &sorted_unmatched { + if assigned_stranger.contains(&other_tid) || other_tid == tid { + continue; + } + if let Some(faces_b) = trace_faces.get(&other_tid) { + let centroid_b = average_embeddings(faces_b.iter().map(|(_, emb, _)| emb)); + let s = cosine_similarity(¢roid_a, ¢roid_b); + if s >= TH_STRANGER { + assigned_stranger.insert(other_tid); + stranger_map.insert(other_tid, stranger_id.clone()); + } + } } } - tracing::info!("[FaceMatch-Qdrant] Updated {} face_detections", updated); - Ok(updated) + let stranger_trace_count = stranger_map.len(); + tracing::info!( + "[FaceMatch] Stranger clusters: {} groups, {} traces", + stranger_count, + stranger_trace_count + ); + + // Step 8: Write results to TKG nodes + Qdrant payload + face_detections + let fd_table = schema::table_name("face_detections"); + let nodes_table = schema::table_name("tkg_nodes"); + let mut pg_updated = 0usize; + + // Clear old identity assignments before writing new ones + let _ = sqlx::query(&format!( + "UPDATE {} SET identity_id = NULL WHERE file_uuid = $1", + fd_table + )) + .bind(file_uuid) + .execute(pool) + .await; + + // 8a: Matched traces → identity_ref + for (&tid, (name, identity_id)) in &matched { + // Skip if identity_id is invalid (FK constraint would fail) + if *identity_id <= 0 { + tracing::warn!( + "[FaceMatch] Skipping trace {}: invalid identity_id={}", + tid, identity_id + ); + continue; + } + + let identity_ref = format!("{}:{}", file_uuid, identity_id); + + // TKG node + let external_id = format!("face_track_{}", tid); + if let Err(e) = sqlx::query(&format!( + "UPDATE {} SET properties = jsonb_set(\ + jsonb_set(properties, '{{identity_ref}}', to_jsonb($1), true),\ + '{{identity_name}}', to_jsonb($2), true)\ + WHERE file_uuid = $3 AND node_type = 'face_track' AND external_id = $4", + nodes_table + )) + .bind(&identity_ref) + .bind(name) + .bind(file_uuid) + .bind(&external_id) + .execute(pool) + .await + { + tracing::warn!("[FaceMatch] TKG update failed for trace {}: {:?}", tid, e); + } + + // Qdrant payload + let _ = face_db + .update_identity_ref_by_trace(file_uuid, tid, &identity_ref) + .await; + + // PostgreSQL face_detections (backward compat) + let rows = sqlx::query(&format!( + "UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = $3", + fd_table + )) + .bind(identity_id) + .bind(file_uuid) + .bind(tid) + .execute(pool) + .await + .map(|r| r.rows_affected()) + .unwrap_or(0); + pg_updated += rows as usize; + } + + // 8b: Stranger traces → stranger_ref + for (&tid, stranger_ref) in &stranger_map { + // TKG node + let external_id = format!("face_track_{}", tid); + if let Err(e) = sqlx::query(&format!( + "UPDATE {} SET properties = jsonb_set(\ + properties, '{{stranger_ref}}', to_jsonb($1), true)\ + WHERE file_uuid = $2 AND node_type = 'face_track' AND external_id = $3", + nodes_table + )) + .bind(stranger_ref) + .bind(file_uuid) + .bind(&external_id) + .execute(pool) + .await + { + tracing::warn!("[FaceMatch] TKG stranger update failed for trace {}: {:?}", tid, e); + } + + // Qdrant payload + let _ = face_db + .update_stranger_ref_by_trace(file_uuid, tid, stranger_ref) + .await; + } + + tracing::info!( + "[FaceMatch] Done: {} matched, {} strangers — {} face_detections updated", + matched.len(), + stranger_trace_count, + pg_updated + ); + Ok(pg_updated) } /// Fallback: PostgreSQL-based matching (original implementation) @@ -1312,3 +1495,220 @@ pub async fn run_identity_agent(db: &PostgresDb, file_uuid: &str) -> anyhow::Res ); Ok(()) } + +/// API handler: POST /api/v1/agents/identity/generate-seeds +async fn generate_seeds_handler( + State(state): State, +) -> Result, (StatusCode, Json)> { + let db = &state.db; + let pool = db.pool(); + + let count = generate_seed_embeddings(db) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"success": false, "message": format!("{}", e)})), + ) + })?; + + // Auto-trigger identity agent for all ready files + if count > 0 { + let ready_files = find_ready_files(pool).await.unwrap_or_default(); + if !ready_files.is_empty() { + tracing::info!( + "[GenerateSeeds] Auto-triggering identity agent for {} files: {:?}", + ready_files.len(), + ready_files + ); + for file_uuid in &ready_files { + let db = state.db.clone(); + let fid = file_uuid.clone(); + tokio::spawn(async move { + match run_identity_agent(&db, &fid).await { + Ok(_) => tracing::info!( + "[GenerateSeeds] Identity agent completed for {}", + fid + ), + Err(e) => tracing::warn!( + "[GenerateSeeds] Identity agent failed for {}: {}", + fid, + e + ), + } + }); + } + } + } + + Ok(Json(serde_json::json!({ + "success": true, + "message": format!("Generated {} seed embeddings", count), + "count": count + }))) +} + +/// Find videos that are ready for identity processing (have face embeddings). +async fn find_ready_files(pool: &sqlx::PgPool) -> anyhow::Result> { + let fd_table = crate::core::db::schema::table_name("face_detections"); + let rows: Vec<(String,)> = sqlx::query_as(&format!( + "SELECT DISTINCT file_uuid FROM {} WHERE embedding IS NOT NULL AND identity_id IS NULL", + fd_table + )) + .fetch_all(pool) + .await?; + Ok(rows.into_iter().map(|r| r.0).collect()) +} + +/// API handler: POST /api/v1/agents/identity/run +async fn run_identity_handler( + State(state): State, + axum::Json(body): axum::Json, +) -> Result, (StatusCode, Json)> { + let file_uuid = body + .get("file_uuid") + .and_then(|v| v.as_str()) + .ok_or_else(|| { + ( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({"success": false, "message": "file_uuid required"})), + ) + })?; + + match run_identity_agent(&state.db, file_uuid).await { + Ok(()) => Ok(Json(serde_json::json!({ + "success": true, + "message": format!("Identity agent completed for {}", file_uuid), + }))), + Err(e) => Ok(Json(serde_json::json!({ + "success": false, + "message": format!("Identity agent failed: {}", e), + }))), + } +} + +/// Read all TMDb identities with profile photos, extract face embeddings, store in Qdrant as seeds. +pub async fn generate_seed_embeddings(db: &PostgresDb) -> anyhow::Result { + use crate::core::db::face_embedding_db::FaceEmbeddingDb; + use std::path::Path; + + let pool = db.pool(); + let id_table = schema::table_name("identities"); + + let rows = sqlx::query_as::<_, (i32, String, String, i32, String)>(&format!( + "SELECT id, name, uuid::text, tmdb_id, tmdb_profile FROM {} \ + WHERE source='tmdb' AND tmdb_profile IS NOT NULL", + id_table + )) + .fetch_all(pool) + .await?; + + if rows.is_empty() { + tracing::warn!("[GenerateSeeds] No TMDb identities with profile photos"); + return Ok(0); + } + + let scripts_dir = std::env::var("MOMENTRY_SCRIPTS_DIR") + .unwrap_or_else(|_| "/Users/accusys/momentry_core_0.1/scripts".to_string()); + let python_path = std::env::var("MOMENTRY_PYTHON_PATH") + .unwrap_or_else(|_| "/opt/homebrew/bin/python3.11".to_string()); + + let extract_script = Path::new(&scripts_dir).join("extract_face_embedding.py"); + let face_db = FaceEmbeddingDb::new(); + + let mut success = 0usize; + for (id, name, uuid, tmdb_id, profile_url) in &rows { + tracing::info!("[GenerateSeeds] Processing {} ({})", name, uuid); + + // Download profile image + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(30)) + .build() + .unwrap_or_else(|_| reqwest::Client::new()); + let resp = client.get(profile_url).send().await; + let image_bytes = match resp { + Ok(r) if r.status().is_success() => r.bytes().await.unwrap_or_default(), + _ => { + tracing::warn!("[GenerateSeeds] Failed to download: {} from {}", name, profile_url); + continue; + } + }; + + if image_bytes.is_empty() { + tracing::warn!("[GenerateSeeds] Empty image for {}", name); + continue; + } + + // Save to temp file + let temp_dir = std::env::temp_dir().join("momentry_seed_faces"); + std::fs::create_dir_all(&temp_dir)?; + let temp_img = temp_dir.join(format!("{}.jpg", uuid)); + std::fs::write(&temp_img, &image_bytes)?; + + // Extract embedding with timeout + use tokio::time::timeout; + let output = timeout( + std::time::Duration::from_secs(180), + tokio::process::Command::new(&python_path) + .arg(&extract_script) + .arg(&temp_img) + .output(), + ) + .await + .map_err(|_| anyhow::anyhow!("Extract embedding timed out for {}", name))??; + + let _ = std::fs::remove_file(&temp_img); + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + tracing::warn!( + "[GenerateSeeds] Extraction failed for {}: {}", + name, + stderr.trim() + ); + continue; + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let extract_result: serde_json::Value = match serde_json::from_str(&stdout) { + Ok(v) => v, + Err(e) => { + tracing::warn!("[GenerateSeeds] Parse error for {}: {}", name, e); + continue; + } + }; + + let embedding: Vec = match serde_json::from_value( + extract_result.get("embedding").ok_or_else(|| anyhow::anyhow!("No embedding"))?.clone(), + ) { + Ok(v) => v, + Err(e) => { + tracing::warn!("[GenerateSeeds] Embedding format error for {}: {}", name, e); + continue; + } + }; + + let embedding_f32: Vec = embedding.into_iter().map(|v| v as f32).collect(); + + // Store in Qdrant + match face_db + .upsert_seed_embedding(uuid, name, *tmdb_id, &embedding_f32) + .await + { + Ok(_) => { + success += 1; + tracing::info!("[GenerateSeeds] Stored seed for {}", name); + } + Err(e) => { + tracing::warn!("[GenerateSeeds] Qdrant error for {}: {}", name, e); + } + } + } + + tracing::info!( + "[GenerateSeeds] Done: {}/{} seeds generated", + success, + rows.len() + ); + Ok(success) +} diff --git a/src/api/identity_api.rs b/src/api/identity_api.rs index bb41fd9..ebb173a 100644 --- a/src/api/identity_api.rs +++ b/src/api/identity_api.rs @@ -7,6 +7,7 @@ use axum::{ }; use serde::{Deserialize, Serialize}; use sqlx::Row; +use std::process::Command; use crate::core::db::ResourceRecord; @@ -45,6 +46,10 @@ pub fn identity_routes() -> Router { "/api/v1/identity/:identity_uuid/profile-image", post(upload_profile_image).get(get_profile_image), ) + .route( + "/api/v1/identity/:identity_uuid/profile-image/from-face", + post(set_profile_from_face), + ) .route( "/api/v1/identity/:identity_uuid/status", get(get_identity_status), @@ -1279,6 +1284,163 @@ async fn get_profile_image( Err(StatusCode::NOT_FOUND) } +#[derive(Debug, Deserialize)] +pub struct SetProfileFromFaceRequest { + pub file_uuid: String, + pub face_id: Option, + pub id: Option, +} + +async fn set_profile_from_face( + State(state): State, + Path(identity_uuid): Path, + Json(req): Json, +) -> Result, (StatusCode, Json)> { + use crate::core::db::schema; + let fd_table = schema::table_name("face_detections"); + let videos_table = schema::table_name("videos"); + + let uuid_clean = identity_uuid.replace('-', ""); + + let face_identifier = match (&req.face_id, req.id) { + (Some(fid), _) => fid.clone(), + (None, Some(id)) => id.to_string(), + (None, None) => { + return Err(( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({"success": false, "message": "Either face_id or id is required"})), + )); + } + }; + + let use_id_field = req.id.is_some(); + + let row: Option<(i64, i32, i32, i32, i32, f64)> = if use_id_field { + sqlx::query_as(&format!( + "SELECT frame_number, x, y, width, height, confidence FROM {} WHERE file_uuid = $1 AND id = $2", + fd_table + )) + .bind(&req.file_uuid) + .bind(req.id.unwrap()) + .fetch_optional(state.db.pool()) + .await + } else { + sqlx::query_as(&format!( + "SELECT frame_number, x, y, width, height, confidence FROM {} WHERE file_uuid = $1 AND face_id = $2", + fd_table + )) + .bind(&req.file_uuid) + .bind(&face_identifier) + .fetch_optional(state.db.pool()) + .await + } + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"success": false, "message": format!("DB error: {}", e)})), + ) + })?; + + let (frame_number, x, y, width, height, confidence) = row.ok_or_else(|| { + ( + StatusCode::NOT_FOUND, + Json(serde_json::json!({"success": false, "message": "Face not found"})), + ) + })?; + + let video_row: Option<(String, Option, Option)> = sqlx::query_as(&format!( + "SELECT file_path, width, height FROM {} WHERE file_uuid = $1", + videos_table + )) + .bind(&req.file_uuid) + .fetch_optional(state.db.pool()) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"success": false, "message": format!("DB error: {}", e)})), + ) + })?; + + let (file_path, video_width, video_height) = video_row.ok_or_else(|| { + ( + StatusCode::NOT_FOUND, + Json(serde_json::json!({"success": false, "message": "Video file not found"})), + ) + })?; + + let vw = video_width.unwrap_or(1920); + let vh = video_height.unwrap_or(1080); + + crate::core::thumbnail::validator::validate_crop(x, y, width, height, vw, vh).map_err(|e| { + ( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({"success": false, "message": format!("Crop validation failed: {}", e)})), + ) + })?; + + let select = format!("select=eq(n\\,{})", frame_number); + let vf = format!("{},crop={}:{}:{}:{}", select, width, height, x, y); + + let output = Command::new("ffmpeg") + .args([ + "-i", + &file_path, + "-vf", + &vf, + "-frames:v", + "1", + "-f", + "image2pipe", + "-vcodec", + "mjpeg", + "-", + ]) + .output() + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"success": false, "message": format!("FFmpeg failed: {}", e)})), + ) + })?; + + if !output.status.success() { + return Err(( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"success": false, "message": "FFmpeg extraction failed"})), + )); + } + + crate::core::thumbnail::validator::validate_jpeg(&output.stdout).map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"success": false, "message": format!("JPEG validation failed: {}", e)})), + ) + })?; + + let dir = crate::core::identity::storage::identity_dir(&uuid_clean); + std::fs::create_dir_all(&dir).map_err(|e| { + (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"success": false, "message": format!("Failed to create dir: {}", e)}))) + })?; + + let file_name = "profile.jpg"; + let file_path = dir.join(file_name); + std::fs::write(&file_path, &output.stdout).map_err(|e| { + (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({"success": false, "message": format!("Failed to write file: {}", e)}))) + })?; + + let pool = state.db.pool().clone(); + let uuid_clone = uuid_clean.clone(); + let _ = crate::core::identity::storage::save_identity_file_by_pool(&pool, &uuid_clone).await; + + Ok(Json(ProfileImageResponse { + success: true, + identity_uuid: uuid_clean, + path: file_path.to_string_lossy().to_string(), + message: format!("Profile image set from face {} (frame {}, confidence {:.2})", face_identifier, frame_number, confidence), + })) +} + async fn get_identity_json( State(state): State, Path(identity_uuid): Path, diff --git a/src/api/identity_binding.rs b/src/api/identity_binding.rs index ce8f5ee..4789ae8 100644 --- a/src/api/identity_binding.rs +++ b/src/api/identity_binding.rs @@ -93,15 +93,38 @@ pub async fn bind_identity( ) })?; - // Capture old identity_id before bind - let old_identity_id: Option = sqlx::query_scalar(&format!( - "SELECT identity_id FROM {} WHERE file_uuid = $1 AND face_id = $2", - table - )) - .bind(&req.file_uuid) - .bind(&req.face_id) - .fetch_optional(state.db.pool()) - .await + let face_identifier = match (&req.face_id, req.id) { + (Some(fid), _) => fid.clone(), + (None, Some(id)) => id.to_string(), + (None, None) => { + return Err(( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({"error": "Either face_id or id is required"})), + )); + } + }; + + let use_id_field = req.id.is_some(); + + let old_identity_id: Option = if use_id_field { + sqlx::query_scalar(&format!( + "SELECT identity_id FROM {} WHERE file_uuid = $1 AND id = $2", + table + )) + .bind(&req.file_uuid) + .bind(req.id.unwrap()) + .fetch_optional(state.db.pool()) + .await + } else { + sqlx::query_scalar(&format!( + "SELECT identity_id FROM {} WHERE file_uuid = $1 AND face_id = $2", + table + )) + .bind(&req.file_uuid) + .bind(&face_identifier) + .fetch_optional(state.db.pool()) + .await + } .map_err(|e| { ( StatusCode::INTERNAL_SERVER_ERROR, @@ -110,16 +133,27 @@ pub async fn bind_identity( })? .flatten(); - // Direct UPDATE face_detections.identity_id - let result = sqlx::query(&format!( - "UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND face_id = $3", - table - )) - .bind(identity_id) - .bind(&req.file_uuid) - .bind(&req.face_id) - .execute(state.db.pool()) - .await + let result = if use_id_field { + sqlx::query(&format!( + "UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND id = $3", + table + )) + .bind(identity_id) + .bind(&req.file_uuid) + .bind(req.id.unwrap()) + .execute(state.db.pool()) + .await + } else { + sqlx::query(&format!( + "UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND face_id = $3", + table + )) + .bind(identity_id) + .bind(&req.file_uuid) + .bind(&face_identifier) + .execute(state.db.pool()) + .await + } .map_err(|e| { ( StatusCode::INTERNAL_SERVER_ERROR, @@ -127,6 +161,67 @@ pub async fn bind_identity( ) })?; + let trace_id: Option = if use_id_field { + sqlx::query_scalar(&format!( + "SELECT trace_id FROM {} WHERE file_uuid = $1 AND id = $2 LIMIT 1", + table + )) + .bind(&req.file_uuid) + .bind(req.id.unwrap()) + .fetch_optional(state.db.pool()) + .await + } else { + sqlx::query_scalar(&format!( + "SELECT trace_id FROM {} WHERE file_uuid = $1 AND face_id = $2 LIMIT 1", + table + )) + .bind(&req.file_uuid) + .bind(&face_identifier) + .fetch_optional(state.db.pool()) + .await + } + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"error": e.to_string()})), + ) + })? + .flatten(); + + // Update Qdrant + TKG if trace_id exists + if let Some(tid) = trace_id { + // 1. Update Qdrant payload + let face_db = crate::core::db::FaceEmbeddingDb::new(); + if let Err(e) = face_db + .update_identity_by_trace(&req.file_uuid, tid, &uuid_clean) + .await + { + tracing::warn!( + "[bind] Failed to update Qdrant identity_uuid for trace {}: {}", + tid, e + ); + } + + // 2. Update TKG face_track node (dual-field design) + let tkg_table = crate::core::db::schema::table_name("tkg_nodes"); + let ext_id = format!("face_track_{}", tid); + let identity_ref = format!("{}:identity_{}", req.file_uuid, identity_id); + + let _ = sqlx::query(&format!( + "UPDATE {} SET properties = properties || $1::jsonb - 'stranger_ref' \ + WHERE file_uuid = $2 AND node_type = 'face_track' AND external_id = $3", + tkg_table + )) + .bind(serde_json::json!({ + "identity_uuid": uuid_clean, + "identity_ref": identity_ref + })) + .bind(&req.file_uuid) + .bind(&ext_id) + .execute(state.db.pool()) + .await; + } + // Clear bind redo stack let _ = sqlx::query(&format!( "DELETE FROM {} WHERE identity_id = $1 AND is_undone = true AND operation IN ('bind','unbind','bind_trace')", @@ -144,10 +239,10 @@ pub async fn bind_identity( crate::api::middleware::AuthSource::ApiKey => "api_key", }; let before = serde_json::json!({ - "file_uuid": req.file_uuid, "face_id": req.face_id, "identity_id_before": old_identity_id + "file_uuid": req.file_uuid, "face_id": face_identifier, "identity_id_before": old_identity_id }); let after = serde_json::json!({ - "file_uuid": req.file_uuid, "face_id": req.face_id, "identity_id_after": identity_id + "file_uuid": req.file_uuid, "face_id": face_identifier, "identity_id_after": identity_id }); let _ = sqlx::query(&format!( "INSERT INTO {} (identity_id, operation, before_snapshot, after_snapshot, is_undone, user_id, user_source) VALUES ($1, 'bind', $2, $3, false, $4, $5)", @@ -161,7 +256,6 @@ pub async fn bind_identity( .execute(state.db.pool()) .await; - // Sync identity JSON file if let Err(e) = crate::core::identity::storage::save_identity_file_by_pool(state.db.pool(), &uuid_clean) .await @@ -177,7 +271,7 @@ pub async fn bind_identity( success: true, message: format!( "Bound face {} of {} to {}", - req.face_id, req.file_uuid, name + face_identifier, req.file_uuid, name ), data: Some(serde_json::json!({"rows_affected": result.rows_affected()})), })) @@ -193,15 +287,38 @@ pub async fn unbind_identity( let id_table = crate::core::db::schema::table_name("identities"); let history_table = crate::core::db::schema::table_name("identity_history"); - // Capture old identity_id before unbind - let old_identity_id: Option = sqlx::query_scalar(&format!( - "SELECT identity_id FROM {} WHERE file_uuid = $1 AND face_id = $2", - table - )) - .bind(&req.file_uuid) - .bind(&req.face_id) - .fetch_optional(state.db.pool()) - .await + let face_identifier = match (&req.face_id, req.id) { + (Some(fid), _) => fid.clone(), + (None, Some(id)) => id.to_string(), + (None, None) => { + return Err(( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({"error": "Either face_id or id is required"})), + )); + } + }; + + let use_id_field = req.id.is_some(); + + let old_identity_id: Option = if use_id_field { + sqlx::query_scalar(&format!( + "SELECT identity_id FROM {} WHERE file_uuid = $1 AND id = $2", + table + )) + .bind(&req.file_uuid) + .bind(req.id.unwrap()) + .fetch_optional(state.db.pool()) + .await + } else { + sqlx::query_scalar(&format!( + "SELECT identity_id FROM {} WHERE file_uuid = $1 AND face_id = $2", + table + )) + .bind(&req.file_uuid) + .bind(&face_identifier) + .fetch_optional(state.db.pool()) + .await + } .map_err(|e| { ( StatusCode::INTERNAL_SERVER_ERROR, @@ -210,14 +327,25 @@ pub async fn unbind_identity( })? .flatten(); - let result = sqlx::query(&format!( - "UPDATE {} SET identity_id = NULL WHERE file_uuid = $1 AND face_id = $2", - table - )) - .bind(&req.file_uuid) - .bind(&req.face_id) - .execute(state.db.pool()) - .await + let result = if use_id_field { + sqlx::query(&format!( + "UPDATE {} SET identity_id = NULL WHERE file_uuid = $1 AND id = $2", + table + )) + .bind(&req.file_uuid) + .bind(req.id.unwrap()) + .execute(state.db.pool()) + .await + } else { + sqlx::query(&format!( + "UPDATE {} SET identity_id = NULL WHERE file_uuid = $1 AND face_id = $2", + table + )) + .bind(&req.file_uuid) + .bind(&face_identifier) + .execute(state.db.pool()) + .await + } .map_err(|e| { ( StatusCode::INTERNAL_SERVER_ERROR, @@ -225,15 +353,85 @@ pub async fn unbind_identity( ) })?; - // Phase 2.3: Also update TKG node (find face_track_id first) - let trace_id_opt: Option = sqlx::query_scalar(&format!( - "SELECT trace_id FROM {} WHERE file_uuid = $1 AND face_id = $2", - table - )) - .bind(&req.file_uuid) - .bind(&req.face_id) - .fetch_optional(state.db.pool()) - .await + let trace_id: Option = if use_id_field { + sqlx::query_scalar(&format!( + "SELECT trace_id FROM {} WHERE file_uuid = $1 AND id = $2 LIMIT 1", + table + )) + .bind(&req.file_uuid) + .bind(req.id.unwrap()) + .fetch_optional(state.db.pool()) + .await + } else { + sqlx::query_scalar(&format!( + "SELECT trace_id FROM {} WHERE file_uuid = $1 AND face_id = $2 LIMIT 1", + table + )) + .bind(&req.file_uuid) + .bind(&face_identifier) + .fetch_optional(state.db.pool()) + .await + } + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"error": e.to_string()})), + ) + })? + .flatten(); + + // Clear Qdrant + TKG if trace_id exists + if let Some(tid) = trace_id { + // 1. Clear Qdrant payload + let face_db = crate::core::db::FaceEmbeddingDb::new(); + if let Err(e) = face_db + .clear_identity_by_trace(&req.file_uuid, tid) + .await + { + tracing::warn!( + "[unbind] Failed to clear Qdrant identity_uuid for trace {}: {}", + tid, e + ); + } + + // 2. Update TKG face_track node (restore stranger_ref) + let tkg_table = crate::core::db::schema::table_name("tkg_nodes"); + let ext_id = format!("face_track_{}", tid); + let stranger_ref = format!("{}:stranger_trace_{}", req.file_uuid, tid); + + let _ = sqlx::query(&format!( + "UPDATE {} SET properties = properties || $1::jsonb - 'identity_uuid' - 'identity_ref' \ + WHERE file_uuid = $2 AND node_type = 'face_track' AND external_id = $3", + tkg_table + )) + .bind(serde_json::json!({ + "stranger_ref": stranger_ref + })) + .bind(&req.file_uuid) + .bind(&ext_id) + .execute(state.db.pool()) + .await; + } + + let trace_id_opt: Option = if use_id_field { + sqlx::query_scalar(&format!( + "SELECT trace_id FROM {} WHERE file_uuid = $1 AND id = $2", + table + )) + .bind(&req.file_uuid) + .bind(req.id.unwrap()) + .fetch_optional(state.db.pool()) + .await + } else { + sqlx::query_scalar(&format!( + "SELECT trace_id FROM {} WHERE file_uuid = $1 AND face_id = $2", + table + )) + .bind(&req.file_uuid) + .bind(&face_identifier) + .fetch_optional(state.db.pool()) + .await + } .ok() .flatten(); @@ -251,9 +449,7 @@ pub async fn unbind_identity( .await; } - // Record history if there was a binding if let Some(identity_id) = old_identity_id { - // Clear bind redo stack let _ = sqlx::query(&format!( "DELETE FROM {} WHERE identity_id = $1 AND is_undone = true AND operation IN ('bind','unbind','bind_trace')", history_table @@ -262,7 +458,6 @@ pub async fn unbind_identity( .execute(state.db.pool()) .await; - // Insert history record let uid = auth.user_id.to_string(); let usrc = match auth.source { crate::api::middleware::AuthSource::Jwt => "jwt", @@ -270,10 +465,10 @@ pub async fn unbind_identity( crate::api::middleware::AuthSource::ApiKey => "api_key", }; let before = serde_json::json!({ - "file_uuid": req.file_uuid, "face_id": req.face_id, "identity_id_before": old_identity_id + "file_uuid": req.file_uuid, "face_id": face_identifier, "identity_id_before": old_identity_id }); let after = serde_json::json!({ - "file_uuid": req.file_uuid, "face_id": req.face_id, "identity_id_after": null + "file_uuid": req.file_uuid, "face_id": face_identifier, "identity_id_after": null }); let _ = sqlx::query(&format!( "INSERT INTO {} (identity_id, operation, before_snapshot, after_snapshot, is_undone, user_id, user_source) VALUES ($1, 'unbind', $2, $3, false, $4, $5)", @@ -315,7 +510,7 @@ pub async fn unbind_identity( Ok(Json(ApiResponse { success: true, - message: format!("Unbound face {} from {}", req.face_id, req.file_uuid), + message: format!("Unbound face {} from {}", face_identifier, req.file_uuid), data: Some(serde_json::json!({"rows_affected": result.rows_affected()})), })) } @@ -933,14 +1128,14 @@ pub async fn get_identity_traces( COUNT(*)::bigint AS frame_count, MIN(fd.frame_number)::int AS first_frame, MAX(fd.frame_number)::int AS last_frame, - ROUND(MIN(fd.frame_number)::numeric / NULLIF(v.fps, 0)::numeric, 1)::float8 AS first_sec, - ROUND(MAX(fd.frame_number)::numeric / NULLIF(v.fps, 0)::numeric, 1)::float8 AS last_sec, + COALESCE(ROUND(MIN(fd.frame_number)::numeric / NULLIF(v.fps, 0)::numeric, 1), 0)::float8 AS first_sec, + COALESCE(ROUND(MAX(fd.frame_number)::numeric / NULLIF(v.fps, 0)::numeric, 1), 0)::float8 AS last_sec, ROUND(AVG(fd.confidence)::numeric, 4)::float8 AS avg_confidence FROM {} fd - LEFT JOIN dev.videos v ON fd.file_uuid = v.file_uuid - WHERE fd.identity_id = $1 - GROUP BY trace_id, v.fps - ORDER BY trace_id + LEFT JOIN videos v ON fd.file_uuid = v.file_uuid + WHERE fd.identity_id = $1 AND fd.trace_id IS NOT NULL + GROUP BY fd.file_uuid, fd.trace_id, v.fps + ORDER BY fd.trace_id LIMIT $2 OFFSET $3"#, fd_table )) @@ -953,7 +1148,7 @@ pub async fn get_identity_traces( // Get total count for pagination let total: (i64,) = sqlx::query_as(&format!( - "SELECT COUNT(*) FROM (SELECT 1 FROM {} fd WHERE trace_id) sub", + "SELECT COUNT(*) FROM (SELECT 1 FROM {} fd WHERE fd.identity_id = $1 AND fd.trace_id IS NOT NULL GROUP BY fd.trace_id) sub", fd_table )) .bind(identity_id) @@ -1864,6 +2059,188 @@ pub async fn bind_history( })) } +// ============================================================================ +// Pending Person API (file-scoped) +// ============================================================================ + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CreatePendingPersonRequest { + #[serde(default)] + pub trace_ids: Vec, + pub name: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PendingPersonItem { + pub identity_uuid: String, + pub identity_id: i32, + pub name: String, + pub created_at: String, + pub trace_count: i64, + pub bound_traces: Option>, +} + +/// Create a pending person under a file, optionally binding traces. +pub async fn create_pending_person( + State(state): State, + Extension(_auth): Extension, + Path(file_uuid): Path, + Json(req): Json, +) -> Result>, (StatusCode, Json)> { + let id_table = crate::core::db::schema::table_name("identities"); + let fd_table = crate::core::db::schema::table_name("face_detections"); + let nodes_table = crate::core::db::schema::table_name("tkg_nodes"); + + // Auto-generate name if not provided + let name = if let Some(n) = &req.name { + n.clone() + } else { + let count: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(*) FROM {} WHERE file_uuid = $1 AND status = 'pending'", + id_table + )) + .bind(&file_uuid) + .fetch_one(state.db.pool()) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"error": e.to_string()})), + ) + })?; + format!("Person {}", count + 1) + }; + + // Create identity with pending status + let identity_row: (i32, String) = sqlx::query_as(&format!( + "INSERT INTO {} (name, identity_type, source, status, file_uuid) VALUES ($1, 'people', 'manual', 'pending', $2) RETURNING id, uuid::text", + id_table + )) + .bind(&name) + .bind(&file_uuid) + .fetch_one(state.db.pool()) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"error": format!("Failed to create identity: {}", e)})), + ) + })?; + + let (identity_id, identity_uuid): (i32, String) = identity_row; + + // Bind traces if provided + let bound_traces = if !req.trace_ids.is_empty() { + // Update face_detections + let _ = sqlx::query(&format!( + "UPDATE {} SET identity_id = $1 WHERE file_uuid = $2 AND trace_id = ANY($3)", + fd_table + )) + .bind(identity_id) + .bind(&file_uuid) + .bind(&req.trace_ids) + .execute(state.db.pool()) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"error": format!("Failed to bind traces: {}", e)})), + ) + })?; + + // Update TKG nodes + for &tid in &req.trace_ids { + let external_id = format!("face_track_{}", tid); + let _ = sqlx::query(&format!( + "UPDATE {} SET properties = jsonb_set(\ + jsonb_set(properties, '{{identity_id}}', $1::jsonb, false),\ + '{{identity_name}}', $2::jsonb, false)\ + WHERE file_uuid = $3 AND node_type = 'face_track' AND external_id = $4", + nodes_table + )) + .bind(identity_id) + .bind(&name) + .bind(&file_uuid) + .bind(&external_id) + .execute(state.db.pool()) + .await; + } + Some(req.trace_ids.clone()) + } else { + None + }; + + // Sync identity file + let _ = crate::core::identity::storage::save_identity_file_by_pool( + state.db.pool(), + &identity_uuid, + ) + .await; + + Ok(Json(ApiResponse { + success: true, + message: format!("Created pending person: {} (uuid: {})", name, identity_uuid), + data: Some(serde_json::json!({ + "identity_uuid": identity_uuid, + "identity_id": identity_id, + "name": name, + "bound_traces": bound_traces.map(|v| v.len()).unwrap_or(0), + })), + })) +} + +/// List pending persons for a file. +pub async fn list_pending_persons( + State(state): State, + Extension(_auth): Extension, + Path(file_uuid): Path, +) -> Result>>, (StatusCode, Json)> { + let id_table = crate::core::db::schema::table_name("identities"); + let fd_table = crate::core::db::schema::table_name("face_detections"); + + let rows: Vec<(i32, String, String, chrono::NaiveDateTime)> = sqlx::query_as(&format!( + "SELECT id, uuid::text, name, created_at FROM {} WHERE file_uuid = $1 AND status = 'pending' ORDER BY created_at DESC", + id_table + )) + .bind(&file_uuid) + .fetch_all(state.db.pool()) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"error": e.to_string()})), + ) + })?; + + let mut items = Vec::new(); + for (id, uuid, name, created_at) in rows { + let trace_count: i64 = sqlx::query_scalar(&format!( + "SELECT COUNT(DISTINCT trace_id) FROM {} WHERE identity_id = $1 AND file_uuid = $2", + fd_table + )) + .bind(id) + .bind(&file_uuid) + .fetch_one(state.db.pool()) + .await + .unwrap_or(0); + + items.push(PendingPersonItem { + identity_uuid: uuid, + identity_id: id, + name, + created_at: created_at.format("%Y-%m-%d %H:%M:%S").to_string(), + trace_count, + bound_traces: None, + }); + } + + Ok(Json(ApiResponse { + success: true, + message: format!("Found {} pending persons for {}", items.len(), file_uuid), + data: Some(items), + })) +} + pub fn identity_binding_routes() -> Router { Router::new() .route("/api/v1/identity/:identity_uuid/bind", post(bind_identity)) @@ -1892,4 +2269,12 @@ pub fn identity_binding_routes() -> Router { .route("/api/v1/identity/merge/:merge_id/undo", post(undo_merge)) .route("/api/v1/identity/merge/:merge_id/redo", post(redo_merge)) .route("/api/v1/identity/merge/history", get(get_merge_history)) + .route( + "/api/v1/file/:file_uuid/pending-person", + post(create_pending_person), + ) + .route( + "/api/v1/file/:file_uuid/pending-persons", + get(list_pending_persons), + ) } diff --git a/src/api/processing.rs b/src/api/processing.rs index 928c15b..472b355 100644 --- a/src/api/processing.rs +++ b/src/api/processing.rs @@ -59,6 +59,7 @@ struct JobDetailResponse { created_at: String, started_at: Option, updated_at: Option, + queue_position: Option, } #[derive(Debug, Serialize)] @@ -286,6 +287,31 @@ async fn trigger_processing( tracing::error!("[TRIGGER] Failed to update monitor job for {}: {}", file_uuid, e); StatusCode::INTERNAL_SERVER_ERROR })?; + + // Update videos.processing_status to PROCESSING immediately + let processor_names_upper: Vec = processors_to_run.iter().map(|p| p.to_uppercase()).collect(); + let progress: serde_json::Map = processors_to_run.iter().map(|p| { + (p.to_uppercase(), serde_json::json!({ + "current_frame": 0, "total_frames": 0, "percentage": 0, "status": "pending" + })) + }).collect(); + let status = serde_json::json!({ + "phase": "PROCESSING", + "active_processors": processor_names_upper, + "total_frames": 0, + "progress": progress + }); + sqlx::query(&format!( + "UPDATE {videos_table} SET processing_status = $1, updated_at = CURRENT_TIMESTAMP WHERE file_uuid = $2" + )) + .bind(&status) + .bind(&file_uuid) + .execute(state.db.pool()) + .await + .map_err(|e| { + tracing::error!("[TRIGGER] Failed to update processing status for {}: {}", file_uuid, e); + StatusCode::INTERNAL_SERVER_ERROR + })?; let processors_to_run_refs: Vec<&str> = processors_to_run.iter().map(|s| s.as_str()).collect(); @@ -531,6 +557,21 @@ async fn get_job(Path(uuid): Path) -> Result, St started_at, updated_at, ) = job.ok_or(StatusCode::NOT_FOUND)?; + + // Calculate queue position if status is 'pending' + let queue_position = if status == "pending" { + sqlx::query_scalar::<_, i64>(&format!( + "SELECT COUNT(*) + 1 FROM {} WHERE status = 'pending' AND created_at < (SELECT created_at FROM {} WHERE uuid = $1)", + jobs_table, jobs_table + )) + .bind(&uuid) + .fetch_one(pg.pool()) + .await + .ok() + .map(|pos| pos as i32) + } else { + None + }; Ok(Json(JobDetailResponse { id, @@ -543,6 +584,7 @@ async fn get_job(Path(uuid): Path) -> Result, St created_at, started_at, updated_at, + queue_position, })) } @@ -655,28 +697,27 @@ async fn get_processor_counts( } if let Ok(content) = std::fs::read_to_string(&json_path) { -if let Ok(json) = serde_json::from_str::(&content) { -// CUT: prioritize scenes count over frame_count -if proc_name == "cut" { -frame_count = json -.get("scenes") -.and_then(|v| v.as_array()) -.map(|arr| arr.len() as u32); -} else { -// Standard frame_count field -frame_count = json -.get("frame_count") -.and_then(|v| v.as_u64()) -.map(|v| v as u32); - -// YOLO: frames array -if frame_count.is_none() { -frame_count = json -.get("frames") -.and_then(|v| v.as_array()) -.map(|arr| arr.len() as u32); -} -} + if let Ok(json) = serde_json::from_str::(&content) { + // CUT: prioritize scenes count over frame_count + if proc_name == "cut" { + frame_count = json + .get("scenes") + .and_then(|v| v.as_array()) + .map(|arr| arr.len() as u32); + } else if proc_name == "yolo" { + // YOLO: use metadata.total_frames (avoids parsing huge frames array) + frame_count = json + .get("metadata") + .and_then(|m| m.get("total_frames")) + .and_then(|v| v.as_u64()) + .map(|v| v as u32); + } else { + // Standard frame_count field + frame_count = json + .get("frame_count") + .and_then(|v| v.as_u64()) + .map(|v| v as u32); + } segment_count = json .get("segments") @@ -738,6 +779,7 @@ pub fn processing_routes() -> Router { ) .route("/api/v1/progress/:file_uuid", post(get_progress)) .route("/api/v1/jobs", post(list_jobs)) + .route("/api/v1/job/:uuid", get(get_job)) .route("/api/v1/config/cache", post(cache_toggle)) .route("/api/v1/config/auto-pipeline", post(auto_pipeline_toggle)) .route( diff --git a/src/core/db/face_embedding_db.rs b/src/core/db/face_embedding_db.rs index f2679df..e3bdc47 100644 --- a/src/core/db/face_embedding_db.rs +++ b/src/core/db/face_embedding_db.rs @@ -23,6 +23,14 @@ pub struct FaceEmbeddingPayload { pub yaw: f64, pub pitch: f64, pub roll: f64, + #[serde(skip_serializing_if = "Option::is_none")] + pub identity_uuid: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub identity_ref: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub stranger_ref: Option, + #[serde(skip_serializing_if = "Option::is_none", rename = "type")] + pub r#type: Option, } #[derive(Debug, Clone, Deserialize)] @@ -166,13 +174,117 @@ impl FaceEmbeddingDb { .context("Failed to batch upsert face embeddings")?; if !response.status().is_success() { + let status = response.status(); let text = response.text().await.unwrap_or_default(); - anyhow::bail!("Qdrant batch upsert failed: {}", text); + anyhow::bail!("Qdrant batch upsert failed (HTTP {}): {}", status, text); } Ok(points.len()) } + pub async fn update_identity_by_trace( + &self, + file_uuid: &str, + trace_id: i32, + identity_uuid: &str, + ) -> Result { + let url = format!( + "{}/collections/{}/points", + self.base_url, self.collection_name + ); + + let body = serde_json::json!({ + "filter": { + "must": [ + { + "key": "file_uuid", + "match": { "value": file_uuid } + }, + { + "key": "trace_id", + "match": { "value": trace_id } + } + ] + }, + "payload": { + "identity_uuid": identity_uuid + } + }); + + let response = self + .client + .post(&url) + .header("api-key", &self.api_key) + .header("Content-Type", "application/json") + .json(&body) + .send() + .await + .context("Failed to update identity_uuid in Qdrant")?; + + if !response.status().is_success() { + let text = response.text().await.unwrap_or_default(); + anyhow::bail!("Qdrant identity update failed: {}", text); + } + + tracing::info!( + "[FaceEmbedding] Updated identity_uuid={} for file={}, trace={}", + identity_uuid, file_uuid, trace_id + ); + + Ok(1) + } + + pub async fn clear_identity_by_trace( + &self, + file_uuid: &str, + trace_id: i32, + ) -> Result { + let url = format!( + "{}/collections/{}/points", + self.base_url, self.collection_name + ); + + let body = serde_json::json!({ + "filter": { + "must": [ + { + "key": "file_uuid", + "match": { "value": file_uuid } + }, + { + "key": "trace_id", + "match": { "value": trace_id } + } + ] + }, + "payload": { + "identity_uuid": null + } + }); + + let response = self + .client + .post(&url) + .header("api-key", &self.api_key) + .header("Content-Type", "application/json") + .json(&body) + .send() + .await + .context("Failed to clear identity_uuid in Qdrant")?; + + if !response.status().is_success() { + let text = response.text().await.unwrap_or_default(); + anyhow::bail!("Qdrant identity clear failed: {}", text); + } + + tracing::info!( + "[FaceEmbedding] Cleared identity_uuid for file={}, trace={}", + file_uuid, trace_id + ); + + Ok(1) + } + pub async fn search_similar( &self, query_embedding: &[f32], @@ -294,6 +406,26 @@ impl FaceEmbeddingDb { .get("roll") .and_then(|v| v.as_f64()) .unwrap_or(0.0), + identity_uuid: r + .payload + .get("identity_uuid") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + identity_ref: r + .payload + .get("identity_ref") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + stranger_ref: r + .payload + .get("stranger_ref") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + r#type: r + .payload + .get("type") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), }; FaceEmbeddingPoint { id, @@ -498,6 +630,26 @@ impl FaceEmbeddingDb { .get("roll") .and_then(|v| v.as_f64()) .unwrap_or(0.0), + identity_uuid: r + .payload + .get("identity_uuid") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + identity_ref: r + .payload + .get("identity_ref") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + stranger_ref: r + .payload + .get("stranger_ref") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + r#type: r + .payload + .get("type") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), }; (id, r.vector, payload) }) @@ -537,6 +689,258 @@ impl FaceEmbeddingDb { Ok(0) } + + pub async fn upsert_seed_embedding( + &self, + identity_uuid: &str, + identity_name: &str, + tmdb_id: i32, + embedding: &[f32], + ) -> Result<()> { + let url = format!( + "{}/collections/{}/points?wait=true", + self.base_url, self.collection_name + ); + + let point_id = identity_uuid.to_string(); + let payload = serde_json::json!({ + "file_uuid": "", + "trace_id": 0, + "frame": 0, + "bbox_x": 0.0, + "bbox_y": 0.0, + "bbox_w": 0.0, + "bbox_h": 0.0, + "confidence": 0.0, + "yaw": 0.0, + "pitch": 0.0, + "roll": 0.0, + "identity_uuid": identity_uuid, + "identity_ref": serde_json::Value::Null, + "stranger_ref": serde_json::Value::Null, + "identity_name": identity_name, + "tmdb_id": tmdb_id, + "type": "identity_seed", + }); + + let body = serde_json::json!({ + "points": [{ + "id": point_id, + "vector": embedding, + "payload": payload + }] + }); + + let response = self + .client + .put(&url) + .header("api-key", &self.api_key) + .header("Content-Type", "application/json") + .json(&body) + .send() + .await + .context("Failed to upsert seed embedding")?; + + if !response.status().is_success() { + let text = response.text().await.unwrap_or_default(); + anyhow::bail!("Qdrant seed upsert failed: {}", text); + } + + tracing::info!( + "[SeedEmbedding] Stored seed for identity_uuid={}, name={}", + identity_uuid, identity_name + ); + + Ok(()) + } + + pub async fn get_seed_embeddings( + &self, + ) -> Result)>> { + let url = format!( + "{}/collections/{}/points/scroll", + self.base_url, self.collection_name + ); + + let body = serde_json::json!({ + "limit": 10000, + "with_payload": true, + "with_vector": true, + "filter": { + "must": [ + {"key": "type", "match": { "value": "identity_seed" }} + ] + } + }); + + let response = self + .client + .post(&url) + .header("api-key", &self.api_key) + .header("Content-Type", "application/json") + .json(&body) + .send() + .await + .context("Failed to scroll seed embeddings")?; + + let status = response.status(); + let text = response.text().await.unwrap_or_default(); + + if !status.is_success() { + anyhow::bail!("Qdrant scroll failed: {} - {}", status, text); + } + + #[derive(Deserialize)] + struct ScrollResult { + result: ScrollPoints, + } + + #[derive(Deserialize)] + struct ScrollPoints { + points: Vec, + } + + #[derive(Deserialize)] + struct PointResult { + id: serde_json::Value, + vector: Vec, + payload: HashMap, + } + + let parsed: ScrollResult = + serde_json::from_str(&text).context("Failed to parse Qdrant scroll response")?; + + let results: Vec<(String, String, Vec)> = parsed + .result + .points + .into_iter() + .filter_map(|r| { + let identity_uuid = r + .payload + .get("identity_uuid") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let identity_name = r + .payload + .get("identity_name") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + if identity_uuid.is_empty() { + None + } else { + Some((identity_uuid, identity_name, r.vector)) + } + }) + .collect(); + + Ok(results) + } + + pub async fn update_identity_ref_by_trace( + &self, + file_uuid: &str, + trace_id: i32, + identity_ref: &str, + ) -> Result { + let url = format!( + "{}/collections/{}/points/payload", + self.base_url, self.collection_name + ); + + let body = serde_json::json!({ + "filter": { + "must": [ + { + "key": "file_uuid", + "match": { "value": file_uuid } + }, + { + "key": "trace_id", + "match": { "value": trace_id } + } + ] + }, + "payload": { + "identity_ref": identity_ref + } + }); + + let response = self + .client + .post(&url) + .header("api-key", &self.api_key) + .header("Content-Type", "application/json") + .json(&body) + .send() + .await + .context("Failed to update identity_ref in Qdrant")?; + + if !response.status().is_success() { + let text = response.text().await.unwrap_or_default(); + anyhow::bail!("Qdrant identity_ref update failed: {}", text); + } + + tracing::info!( + "[FaceEmbedding] Updated identity_ref={} for file={}, trace={}", + identity_ref, file_uuid, trace_id + ); + + Ok(1) + } + + pub async fn update_stranger_ref_by_trace( + &self, + file_uuid: &str, + trace_id: i32, + stranger_ref: &str, + ) -> Result { + let url = format!( + "{}/collections/{}/points/payload", + self.base_url, self.collection_name + ); + + let body = serde_json::json!({ + "filter": { + "must": [ + { + "key": "file_uuid", + "match": { "value": file_uuid } + }, + { + "key": "trace_id", + "match": { "value": trace_id } + } + ] + }, + "payload": { + "stranger_ref": stranger_ref + } + }); + + let response = self + .client + .post(&url) + .header("api-key", &self.api_key) + .header("Content-Type", "application/json") + .json(&body) + .send() + .await + .context("Failed to update stranger_ref in Qdrant")?; + + if !response.status().is_success() { + let text = response.text().await.unwrap_or_default(); + anyhow::bail!("Qdrant stranger_ref update failed: {}", text); + } + + tracing::info!( + "[FaceEmbedding] Updated stranger_ref={} for file={}, trace={}", + stranger_ref, file_uuid, trace_id + ); + + Ok(1) + } } impl Default for FaceEmbeddingDb { diff --git a/src/core/db/postgres_db.rs b/src/core/db/postgres_db.rs index 492892c..cdad2a2 100644 --- a/src/core/db/postgres_db.rs +++ b/src/core/db/postgres_db.rs @@ -5,6 +5,7 @@ use serde_json; use sqlx::{postgres::PgPoolOptions, PgPool, Row}; use std::sync::Arc; use tokio::sync::RwLock; +use tracing::{info, warn, error}; use uuid::Uuid; use super::{schema, Database, QdrantDb}; @@ -448,6 +449,7 @@ pub enum ProcessorType { FiveW1H, Appearance, MediaPipe, + FaceCluster, } impl sqlx::Type for ProcessorType { @@ -487,6 +489,7 @@ impl ProcessorType { ProcessorType::FiveW1H => "5w1h", ProcessorType::Appearance => "appearance", ProcessorType::MediaPipe => "mediapipe", + ProcessorType::FaceCluster => "face_cluster", } } @@ -505,6 +508,7 @@ impl ProcessorType { "5w1h" => Some(ProcessorType::FiveW1H), "appearance" => Some(ProcessorType::Appearance), "mediapipe" => Some(ProcessorType::MediaPipe), + "face_cluster" => Some(ProcessorType::FaceCluster), _ => None, } } @@ -524,13 +528,14 @@ impl ProcessorType { ProcessorType::FiveW1H => 0.1, ProcessorType::Appearance => 0.3, ProcessorType::MediaPipe => 0.3, + ProcessorType::FaceCluster => 0.7, } } pub fn uses_gpu(&self) -> bool { match self { ProcessorType::Yolo | ProcessorType::Face | ProcessorType::Pose | ProcessorType::Hand => true, - ProcessorType::MediaPipe => false, + ProcessorType::MediaPipe | ProcessorType::FaceCluster => false, _ => false, } } @@ -550,6 +555,7 @@ impl ProcessorType { ProcessorType::FiveW1H => 256, ProcessorType::Appearance => 512, ProcessorType::MediaPipe => 1024, + ProcessorType::FaceCluster => 1024, } } @@ -568,6 +574,7 @@ impl ProcessorType { ProcessorType::FiveW1H => Some("gemma4"), ProcessorType::Appearance => None, ProcessorType::MediaPipe => Some("mediapipe/holistic"), + ProcessorType::FaceCluster => Some("sklearn/agglomerative"), } } @@ -583,6 +590,7 @@ impl ProcessorType { ], ProcessorType::FiveW1H => vec![ProcessorType::Story], ProcessorType::Appearance => vec![ProcessorType::Pose], + ProcessorType::FaceCluster => vec![ProcessorType::Face], ProcessorType::Hand => vec![], ProcessorType::MediaPipe => vec![], _ => vec![], @@ -597,6 +605,7 @@ impl ProcessorType { ProcessorType::Yolo, ProcessorType::Ocr, ProcessorType::Face, + ProcessorType::FaceCluster, ProcessorType::Pose, ProcessorType::Hand, ProcessorType::Appearance, @@ -611,7 +620,8 @@ impl ProcessorType { | ProcessorType::Pose | ProcessorType::Hand | ProcessorType::Appearance - | ProcessorType::MediaPipe => PipelineType::Frame, + | ProcessorType::MediaPipe + | ProcessorType::FaceCluster => PipelineType::Frame, ProcessorType::Cut | ProcessorType::Asr @@ -1074,9 +1084,9 @@ impl PostgresDb { let mj_cols = [ "video_id BIGINT", "user_id BIGINT", - "processors TEXT[]", - "completed_processors TEXT[]", - "failed_processors TEXT[]", + "processors TEXT[] DEFAULT '{\"asr\",\"cut\",\"yolo\",\"ocr\",\"face\",\"pose\",\"asrx\"}'", + "completed_processors TEXT[] DEFAULT '{}'", + "failed_processors TEXT[] DEFAULT '{}'", ]; for col in &mj_cols { let (col_name, col_def) = col.split_once(' ').unwrap_or((col, "")); @@ -1087,6 +1097,10 @@ impl PostgresDb { .execute(pool) .await?; } + // Update existing rows to have default processors array + sqlx::query("UPDATE monitor_jobs SET processors = '{\"asr\",\"cut\",\"yolo\",\"ocr\",\"face\",\"pose\",\"asrx\"}' WHERE processors IS NULL OR processors = '{}'") + .execute(pool) + .await?; sqlx::query("CREATE INDEX IF NOT EXISTS idx_monitor_jobs_status ON monitor_jobs(status)") .execute(pool) .await?; @@ -1869,16 +1883,16 @@ impl PostgresDb { .await? } else { // Insert new job - sqlx::query( - &format!( - r#" - INSERT INTO {} (uuid, video_path, status, video_id) - VALUES ($1, $2, 'pending', $3) +sqlx::query( + &format!( + r#" + INSERT INTO {} (uuid, video_path, status, video_id, processors) + VALUES ($1, $2, 'pending', $3, ARRAY['asr','cut','yolo','ocr','face','face_cluster','pose','asrx']) RETURNING id, uuid, video_path, status, current_processor, progress_total, progress_current, error_count, last_error, started_at::TEXT, updated_at::TEXT, created_at::TEXT, processors, completed_processors, failed_processors, video_id "#, - jobs_table + jobs_table + ) ) - ) .bind(uuid) .bind(video_path) .bind(video_id_i64) @@ -3176,6 +3190,40 @@ impl PostgresDb { Ok(r.rows_affected()) } + pub async fn retry_failed_processor( + &self, + result_id: i32, + max_retries: i32, + ) -> Result { + let table = schema::table_name("processor_results"); + use sqlx::Row; + + let current_retry: i32 = sqlx::query_scalar(&format!( + "SELECT COALESCE(retry_count, 0) FROM {} WHERE id = $1", + table + )) + .bind(result_id) + .fetch_one(&self.pool) + .await?; + + if current_retry < max_retries { + sqlx::query(&format!( + "UPDATE {} SET status = 'pending', error_message = NULL, retry_count = $1, updated_at = CURRENT_TIMESTAMP WHERE id = $2", + table + )) + .bind(current_retry + 1) + .bind(result_id) + .execute(&self.pool) + .await?; + + info!("🔄 Retrying processor (result_id={}, retry_count={}/{})", result_id, current_retry + 1, max_retries); + Ok(true) + } else { + info!("⚠️ Processor exceeded max retries (result_id={}, retry_count={})", result_id, current_retry); + Ok(false) + } + } + pub async fn search_bm25( &self, query: &str, diff --git a/src/core/person_identity.rs b/src/core/person_identity.rs index a802f6d..2023b53 100644 --- a/src/core/person_identity.rs +++ b/src/core/person_identity.rs @@ -69,7 +69,8 @@ pub struct IdentityBinding { #[derive(Debug, Clone, Deserialize, Serialize)] pub struct BindIdentityRequest { pub file_uuid: String, - pub face_id: String, + pub face_id: Option, + pub id: Option, } #[derive(Debug, Clone, Deserialize, Serialize)] @@ -81,7 +82,8 @@ pub struct BindIdentityTraceRequest { #[derive(Debug, Clone, Deserialize, Serialize)] pub struct UnbindIdentityRequest { pub file_uuid: String, - pub face_id: String, + pub face_id: Option, + pub id: Option, } #[derive(Debug, Clone, Deserialize, Serialize)] diff --git a/src/core/processor/face_clustering.rs b/src/core/processor/face_clustering.rs new file mode 100644 index 0000000..5ba1caf --- /dev/null +++ b/src/core/processor/face_clustering.rs @@ -0,0 +1,75 @@ +use anyhow::{Context, Result}; +use serde::{Deserialize, Serialize}; +use std::time::Duration; + +use super::executor::PythonExecutor; + +const FACE_CLUSTER_TIMEOUT: Duration = Duration::from_secs(3600); + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct FaceClusterResult { + pub clusters: Vec, + pub frames: Vec, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct FaceClusterInfo { + pub cluster_id: String, + pub face_count: usize, + pub representative_face: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct FaceClusterFrame { + pub frame: u64, + pub timestamp: f64, + pub faces: Vec, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ClusteredFace { + pub face_id: String, + pub cluster_id: String, + pub confidence: f32, +} + +pub async fn process_face_cluster( + video_path: &str, + output_path: &str, + uuid: Option<&str>, + frames: Option<&[i64]>, +) -> Result { + let executor = PythonExecutor::new()?; + let script_path = executor.script_path("fast_face_clustering_processor.py"); + + tracing::info!("[FACE_CLUSTER] Starting face clustering: {}", video_path); + + if !script_path.exists() { + tracing::warn!("[FACE_CLUSTER] Script not found, returning empty result"); + return Ok(FaceClusterResult { + clusters: vec![], + frames: vec![], + }); + } + + executor + .run_with_frames( + "fast_face_clustering_processor.py", + &[video_path, output_path], + uuid, + "FACE_CLUSTER", + Some(FACE_CLUSTER_TIMEOUT), + frames, + ) + .await + .with_context(|| format!("Failed to run face clustering script"))?; + + let json_str = std::fs::read_to_string(output_path).context("Failed to read FACE_CLUSTER output")?; + + let result: FaceClusterResult = + serde_json::from_str(&json_str).context("Failed to parse FACE_CLUSTER output")?; + + tracing::info!("[FACE_CLUSTER] Result: {} clusters, {} frames", result.clusters.len(), result.frames.len()); + + Ok(result) +} \ No newline at end of file diff --git a/src/core/processor/mod.rs b/src/core/processor/mod.rs index 0cfdabf..b64b7c7 100644 --- a/src/core/processor/mod.rs +++ b/src/core/processor/mod.rs @@ -7,6 +7,7 @@ pub mod clip; pub mod cut; pub mod executor; pub mod face; +pub mod face_clustering; pub mod face_recognition; pub mod hand; pub mod heuristic_scene; @@ -32,6 +33,9 @@ pub use clip::{ pub use cut::{process_cut, CutResult, CutScene}; pub use executor::{validate_python_env, PythonExecutor, RetryConfig}; pub use face::{process_face, Face, FaceFrame, FaceResult}; +pub use face_clustering::{ + process_face_cluster, ClusteredFace, FaceClusterFrame, FaceClusterInfo, FaceClusterResult, +}; pub use face_recognition::{ process_face_recognition, register_face, FaceAttributes, FaceCluster, FaceIdentity, FacePose, FaceRecognitionFrame, FaceRecognitionResult, FaceRegistrationResult, RecognizedFace, diff --git a/src/core/processor/tkg.rs b/src/core/processor/tkg.rs index 8f92f0a..7ca3d83 100644 --- a/src/core/processor/tkg.rs +++ b/src/core/processor/tkg.rs @@ -129,7 +129,7 @@ async fn populate_face_embeddings_to_qdrant( // Load from face_detections table let fd_table = t("face_detections"); let rows: Vec<(i32, i64, f64, f64, f64, f64, f64, Option>)> = sqlx::query_as(&format!( - "SELECT trace_id::int, frame_number::bigint, x::float8, y::float8, width::float8, height::float8, confidence::float8, embedding \ + "SELECT trace_id::int, frame_number::bigint, x::float8, y::float8, width::float8, height::float8, confidence::float8, embedding::float4[] \ FROM {} WHERE file_uuid = $1 AND trace_id IS NOT NULL AND embedding IS NOT NULL", fd_table )) @@ -165,11 +165,20 @@ async fn populate_face_embeddings_to_qdrant( yaw, pitch, roll, + identity_uuid: None, + identity_ref: None, + stranger_ref: None, + r#type: None, }; points.push((point_id, emb.clone(), payload)); } } + info!( + "[TKG-Phase1] Attempting to store {} face embeddings in Qdrant for {}", + points.len(), + file_uuid + ); let count = face_db.batch_upsert(points).await?; info!( "[TKG-Phase1] Stored {} face embeddings in Qdrant for {}", @@ -401,19 +410,7 @@ fn detect_mutual_gaze( #[derive(Debug, Deserialize)] struct YoloJson { #[serde(default)] -frames: Vec, -} - -#[derive(Debug, Deserialize)] -struct YoloFrameData { -#[serde(default)] -frame: u32, -#[serde(default)] -timestamp: f64, -#[serde(default)] -detections: Vec, -#[serde(default)] -objects: Vec, +frames: HashMap, } #[derive(Debug, Deserialize)] @@ -1033,7 +1030,7 @@ async fn build_yolo_object_nodes( .with_context(|| format!("Failed to parse {:?}", yolo_path))?; let mut class_counts: HashMap = HashMap::new(); -for fdata in &yolo.frames { +for fdata in yolo.frames.values() { let dets = if !fdata.detections.is_empty() { &fdata.detections } else { @@ -1277,9 +1274,9 @@ async fn build_co_occurrence_edges_from_qdrant( let mut edge_count = 0; for (frame, faces) in frame_faces.iter() { -let yolo_frame = match yolo.frames.iter().find(|f| f.frame == *frame as u32) { -Some(f) => f, -None => continue, +let yolo_frame = match yolo.frames.get(&frame.to_string()) { + Some(f) => f, + None => continue, }; let dets = if !yolo_frame.detections.is_empty() { @@ -1391,9 +1388,9 @@ async fn build_co_occurrence_edges_from_pg( let mut edge_count = 0; for face in &face_rows { -let yolo_frame = match yolo.frames.iter().find(|f| f.frame == face.frame_number as u32) { -Some(f) => f, -None => continue, +let yolo_frame = match yolo.frames.get(&face.frame_number.to_string()) { + Some(f) => f, + None => continue, }; let dets = if !yolo_frame.detections.is_empty() { @@ -2411,7 +2408,9 @@ async fn build_gaze_track_nodes_from_face_json( let nodes_table = t("tkg_nodes"); sqlx::query(&format!( "INSERT INTO {} (file_uuid, external_id, label, node_type, properties, created_at) \ - VALUES ($1, $2, $3, 'gaze_track', $4, NOW())", + VALUES ($1, $2, $3, 'gaze_track', $4, NOW()) \ + ON CONFLICT (file_uuid, node_type, external_id) \ + DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties)", nodes_table )) .bind(file_uuid) @@ -3063,7 +3062,9 @@ async fn build_lip_track_nodes_from_face_json( let nodes_table = t("tkg_nodes"); sqlx::query(&format!( "INSERT INTO {} (file_uuid, external_id, label, node_type, properties, created_at) \ - VALUES ($1, $2, $3, 'lip_track', $4, NOW())", + VALUES ($1, $2, $3, 'lip_track', $4, NOW()) \ + ON CONFLICT (file_uuid, node_type, external_id) \ + DO UPDATE SET properties = COALESCE(EXCLUDED.properties, tkg_nodes.properties)", nodes_table )) .bind(file_uuid) @@ -3814,10 +3815,10 @@ async fn build_hand_object_edges(pool: &PgPool, file_uuid: &str, output_dir: &st let yolo_frames: HashMap> = yolo.frames .iter() - .filter_map(|f| { + .filter_map(|(frame_key, f)| { let objs = if !f.objects.is_empty() { &f.objects } else { &f.detections }; if !objs.is_empty() { - Some((f.frame as u64, objs)) + frame_key.parse::().ok().map(|n| (n, objs)) } else { None } diff --git a/src/worker/job_worker.rs b/src/worker/job_worker.rs index 5504983..8595d62 100644 --- a/src/worker/job_worker.rs +++ b/src/worker/job_worker.rs @@ -646,6 +646,10 @@ impl JobWorker { Ok(()) } } + crate::core::db::ProcessorType::FaceCluster => { + info!("Face clustering processor completed for {}", job.uuid); + Ok(()) + } crate::core::db::ProcessorType::Pose => { if let Ok(result) = serde_json::from_str::< crate::core::processor::PoseResult, @@ -1093,6 +1097,33 @@ vector, .filter(|r| job_processors.contains(&r.processor_type.as_str().to_string())) .any(|r| matches!(r.status, crate::core::db::ProcessorJobStatus::Pending)); + const MAX_RETRIES: i32 = 3; + + if any_failed && !any_pending { + let failed_processors_to_retry: Vec = results + .iter() + .filter(|r| { + job_processors.contains(&r.processor_type.as_str().to_string()) + && matches!(r.status, crate::core::db::ProcessorJobStatus::Failed) + && r.retry_count < MAX_RETRIES + }) + .map(|r| r.id) + .collect(); + + if !failed_processors_to_retry.is_empty() { + info!("🔄 Attempting to retry {} failed processors...", failed_processors_to_retry.len()); + + for result_id in failed_processors_to_retry { + if let Ok(true) = self.db.retry_failed_processor(result_id, MAX_RETRIES).await { + if let Ok(mut conn) = self.redis.get_conn().await { + let redis_key = format!("momentry:progress:{}", uuid); + let _: Result = redis::AsyncCommands::del(&mut conn, &redis_key).await; + } + } + } + } + } + let any_skipped = results .iter() .filter(|r| job_processors.contains(&r.processor_type.as_str().to_string())) diff --git a/src/worker/processor.rs b/src/worker/processor.rs index 8b3f4f2..ff891f2 100644 --- a/src/worker/processor.rs +++ b/src/worker/processor.rs @@ -747,6 +747,27 @@ impl ProcessorPool { pid: 0, }) } + ProcessorType::FaceCluster => { + let result = processor::process_face_cluster( + video_path, + output_path.to_str().unwrap(), + uuid, + Some(&sample_frames), + ) + .await?; + tracing::info!( + "FACE_CLUSTER completed, output: {}", + output_path.to_str().unwrap() + ); + Ok(ProcessorOutput { + data: serde_json::to_value(result)?, + chunks_produced: 0, + frames_processed: 0, + total_frames: 0, + retry_count: 0, + pid: 0, + }) + } ProcessorType::Pose => { let result = processor::process_pose( video_path,