From f6a24e8cb56a102857431b0c6cbe5e523b36e6a4 Mon Sep 17 00:00:00 2001 From: Accusys Date: Fri, 22 May 2026 09:56:10 +0800 Subject: [PATCH] docs: thumbnail auto-detect + representative-frame endpoint in 08_media.md; sync wasm --- docs_v1.0/API_WORKSPACE/modules/08_media.md | 121 +++++++++++++++++++- docs_v1.0/doc_wasm/modules/07_identity.md | 118 ++++++++++++++++++- docs_v1.0/doc_wasm/modules/08_media.md | 121 +++++++++++++++++++- 3 files changed, 351 insertions(+), 9 deletions(-) diff --git a/docs_v1.0/API_WORKSPACE/modules/08_media.md b/docs_v1.0/API_WORKSPACE/modules/08_media.md index cf81696..e6927cc 100644 --- a/docs_v1.0/API_WORKSPACE/modules/08_media.md +++ b/docs_v1.0/API_WORKSPACE/modules/08_media.md @@ -194,6 +194,8 @@ Uses a built-in 5×7 bitmap font renderer to draw labels directly on video frame Extract a single frame from a video as JPEG image. Uses FFmpeg `select` filter. +When `frame` is omitted, the system automatically selects the best representative frame using the TKG bridge (see algorithm below). + **Auth**: Required **Scope**: file-level @@ -201,7 +203,7 @@ Extract a single frame from a video as JPEG image. Uses FFmpeg `select` filter. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `frame` | integer | Yes | — | Zero-based frame number to extract | +| `frame` | integer | No | auto-detect | Zero-based frame number to extract. Omit for auto-detect. | | `x` | integer | No | — | Crop start X (left edge). Requires `y`, `w`, `h`. | | `y` | integer | No | — | Crop start Y (top edge). Requires `x`, `w`, `h`. | | `w` | integer | No | — | Crop width in pixels. Requires `x`, `y`, `h`. | @@ -209,9 +211,26 @@ Extract a single frame from a video as JPEG image. Uses FFmpeg `select` filter. All four crop params (`x`, `y`, `w`, `h`) must be provided together or omitted. -#### Example +#### Auto-detect Algorithm + +When `frame` is not provided, the endpoint finds the best frame using this fallback chain: + +1. **Main characters**: find the two identities with the most face detections (TMDb source) +2. **Mutual gaze**: if their face traces have a TKG `CO_OCCURS_WITH` edge with `mutual_gaze=true`, take `first_frame` +3. **Co-occurrence**: fallback to the first frame where both identities appear together +4. **Single identity**: if only one main identity exists, take its highest-quality face frame +5. **Any identity**: fallback to the best-quality face frame across all identities +6. **Error**: if no face exists, returns `404` + +The selected frame is constrained to the **first half of the video** (`total_frames / 2`). + +#### Examples ```bash +# Auto-detect best representative frame +curl -s "$API/api/v1/file/$FILE_UUID/thumbnail" \ + -H "X-API-Key: $KEY" -o representative.jpg + # Extract frame 1000 (full frame) curl -s "$API/api/v1/file/bd80fec92b0b6963d177a2c55bf713e2/thumbnail?frame=1000" \ -H "Authorization: Bearer $JWT" -o frame_1000.jpg @@ -224,10 +243,104 @@ curl -s "$API/api/v1/file/bd80fec92b0b6963d177a2c55bf713e2/thumbnail?frame=1000& #### Response - **200**: `image/jpeg` binary data -- **404**: File not found +- **404**: File not found / No faces in file (auto-detect) - **500**: FFmpeg error (e.g., frame number exceeds video duration) -### `GET /api/v1/file/:file_uuid/clip` +#### Technical Details + +| Detail | Value | +|--------|-------| +| **Backend** | FFmpeg (`ffmpeg-full`) | +| **Filter** | `select=eq(n\,FRAME)` to select frame, optional `crop=W:H:X:Y` | +| **Output** | Single JPEG via pipe (`image2pipe`, `mjpeg` codec) | +| **Cache** | `Cache-Control: public, max-age=86400` (24h) | +| **Frame number** | Zero-based (`frame=0` = first frame of video) | + +--- + +### `GET /api/v1/file/:file_uuid/representative-frame` + +Return JSON metadata about the best representative frame for the video. Uses the same auto-detect algorithm as `GET /thumbnail` (without crop support). + +**Auth**: Required +**Scope**: file-level + +#### Example + +```bash +curl -s "$API/api/v1/file/$FILE_UUID/representative-frame" \ + -H "X-API-Key: $KEY" | jq '.' +``` + +#### Response (200) + +```json +{ + "success": true, + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "frame_number": 38165, + "timestamp_secs": 1526.6, + "face_quality": 37292.97, + "main_identities": [ + { + "identity_uuid": "c3545906-c82d-4b66-aa1d-150bc02decce", + "name": "Audrey Hepburn", + "face_count": 16456 + }, + { + "identity_uuid": "2b0ddefe-e2a9-4533-9308-b375594604d5", + "name": "Cary Grant", + "face_count": 10643 + } + ], + "traces": [ + { + "trace_id": 919, + "identity_uuid": "2b0ddefe-e2a9-4533-9308-b375594604d5", + "name": "Cary Grant", + "x": 764, + "y": 237, + "width": 199, + "height": 199, + "confidence": 0.8426 + }, + { + "trace_id": 920, + "identity_uuid": "c3545906-c82d-4b66-aa1d-150bc02decce", + "name": "Audrey Hepburn", + "x": 1143, + "y": 312, + "width": 215, + "height": 215, + "confidence": 0.8068 + } + ] +} +``` + +#### Response Fields + +| Field | Type | Description | +|-------|------|-------------| +| `frame_number` | integer | Selected representative frame number (primary coordinate) | +| `timestamp_secs` | float | Time in seconds (derived from `frame_number / fps`) | +| `face_quality` | float | Quality score `area × confidence` of the best face at this frame | +| `main_identities` | array | Top 2 most frequent TMDb identities in the file | +| `main_identities[].name` | string | Identity display name | +| `main_identities[].face_count` | integer | Total face detections count | +| `traces` | array | All face traces present at the selected frame | +| `traces[].trace_id` | integer | Face trace ID | +| `traces[].identity_uuid` | string or null | Matched identity UUID | +| `traces[].name` | string or null | Identity name | +| `traces[].x, y, width, height` | integer | Bounding box coordinates | +| `traces[].confidence` | float | Detection confidence (0.0–1.0) | + +#### Error Responses + +| HTTP | When | +|------|------| +| `404` | File not found / No faces in file | +| `500` | Database error | Extract a video clip (time range) as MPEG-TS stream. Uses FFmpeg `-ss` fast seek. diff --git a/docs_v1.0/doc_wasm/modules/07_identity.md b/docs_v1.0/doc_wasm/modules/07_identity.md index d0ca1b1..66dc65a 100644 --- a/docs_v1.0/doc_wasm/modules/07_identity.md +++ b/docs_v1.0/doc_wasm/modules/07_identity.md @@ -74,6 +74,66 @@ Delete an identity permanently. --- +### `PATCH /api/v1/identity/:identity_uuid` + +**Auth**: Required +**Scope**: identity-level + +Partially update an identity. Only provided fields are modified. The `name` field is a display label and may repeat across identities (removed UNIQUE constraint). Aliases for multilingual display are stored in `metadata.aliases` (see BCP 47 reference below). + +#### Request (JSON, all fields optional) + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string | New display name | +| `metadata` | object | Merged into existing metadata. Use `"aliases"` key for locale-tagged names | +| `status` | string | `"confirmed"`, `"pending"`, or `"skipped"` | +| `identity_type` | string | `"people"`, `"brand"`, `"object"`, `"concept"`, etc. | + +#### Example + +```bash +# Update name and add aliases +curl -s -X PATCH "$API/api/v1/identity/$IDENTITY_UUID" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "John Smith", + "metadata": { + "aliases": [ + {"locale": "en", "name": "John Smith"}, + {"locale": "zh-TW", "name": "約翰·史密斯"}, + {"locale": "ja", "name": "ジョン・スミス"} + ] + } + }' + +# Update status only +curl -s -X PATCH "$API/api/v1/identity/$IDENTITY_UUID" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"status": "confirmed"}' +``` + +#### Response (200) + +```json +{ + "success": true, + "identity_uuid": "a9a901056d6b46ff92da0c3c1a57dff4", + "updated_fields": ["name", "metadata"] +} +``` + +#### Error Responses + +| HTTP | When | +|------|------| +| `400` | No fields to update or invalid UUID format | +| `404` | Identity not found | + +--- + ### `GET /api/v1/identity/:identity_uuid/files` **Auth**: Required @@ -330,7 +390,63 @@ curl -s "$API/api/v1/identity/$IDENTITY_UUID/profile-image" \ |----------------|-------| | `content-type` | `image/jpeg` or `image/png` | +--- +## Alias System (BCP 47 Locale Tags) + +Identity aliases support multilingual display names. Aliases are stored in `metadata.aliases` as an array of `{locale, name}` objects. + +### BCP 47 Locale Tags Reference + +| Locale | Tag | Example | +|--------|-----|---------| +| English | `en` | John Smith | +| Traditional Chinese | `zh-TW` | 約翰·史密斯 | +| Simplified Chinese | `zh-CN` | 约翰·史密斯 | +| Japanese | `ja` | ジョン・スミス | +| Korean | `ko` | 존 스미스 | +| Cantonese | `yue` | 約翰·史密夫 | +| French | `fr` | John Smith (French spelling) | +| Spanish | `es` | Juan Smith | +| Arabic | `ar` | جون سميث | +| Russian | `ru` | Джон Смит | +| Thai | `th` | จอห์น สมิธ | + +BCP 47 is the IETF standard for language tags. Format: `language` (e.g. `en`, `ja`) or `language-Region` (e.g. `zh-TW`, `zh-CN`). Region suffix distinguishes regional variants. + +### Frontend Display Logic + +```javascript +function getDisplayName(identity, preferredLocale) { + // 1. Exact locale match + const match = identity.metadata?.aliases?.find(a => a.locale === preferredLocale); + if (match) return match.name; + + // 2. Language-only match (zh-TW → zh) + const lang = preferredLocale.split('-')[0]; + const langMatch = identity.metadata?.aliases?.find(a => a.locale.startsWith(lang)); + if (langMatch) return langMatch.name; + + // 3. Fallback to identity.name + return identity.name; +} +``` + +### Updating Aliases via PATCH + +```json +PATCH /api/v1/identity/:identity_uuid +{ + "metadata": { + "aliases": [ + {"locale": "en", "name": "John Smith"}, + {"locale": "zh-TW", "name": "約翰·史密斯"} + ] + } +} +``` + +This **replaces** the entire `aliases` array. To add to existing aliases, include all existing entries in the request. --- -*Updated: 2026-05-19 12:49:24* +*Updated: 2026-05-22 diff --git a/docs_v1.0/doc_wasm/modules/08_media.md b/docs_v1.0/doc_wasm/modules/08_media.md index cf81696..e6927cc 100644 --- a/docs_v1.0/doc_wasm/modules/08_media.md +++ b/docs_v1.0/doc_wasm/modules/08_media.md @@ -194,6 +194,8 @@ Uses a built-in 5×7 bitmap font renderer to draw labels directly on video frame Extract a single frame from a video as JPEG image. Uses FFmpeg `select` filter. +When `frame` is omitted, the system automatically selects the best representative frame using the TKG bridge (see algorithm below). + **Auth**: Required **Scope**: file-level @@ -201,7 +203,7 @@ Extract a single frame from a video as JPEG image. Uses FFmpeg `select` filter. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `frame` | integer | Yes | — | Zero-based frame number to extract | +| `frame` | integer | No | auto-detect | Zero-based frame number to extract. Omit for auto-detect. | | `x` | integer | No | — | Crop start X (left edge). Requires `y`, `w`, `h`. | | `y` | integer | No | — | Crop start Y (top edge). Requires `x`, `w`, `h`. | | `w` | integer | No | — | Crop width in pixels. Requires `x`, `y`, `h`. | @@ -209,9 +211,26 @@ Extract a single frame from a video as JPEG image. Uses FFmpeg `select` filter. All four crop params (`x`, `y`, `w`, `h`) must be provided together or omitted. -#### Example +#### Auto-detect Algorithm + +When `frame` is not provided, the endpoint finds the best frame using this fallback chain: + +1. **Main characters**: find the two identities with the most face detections (TMDb source) +2. **Mutual gaze**: if their face traces have a TKG `CO_OCCURS_WITH` edge with `mutual_gaze=true`, take `first_frame` +3. **Co-occurrence**: fallback to the first frame where both identities appear together +4. **Single identity**: if only one main identity exists, take its highest-quality face frame +5. **Any identity**: fallback to the best-quality face frame across all identities +6. **Error**: if no face exists, returns `404` + +The selected frame is constrained to the **first half of the video** (`total_frames / 2`). + +#### Examples ```bash +# Auto-detect best representative frame +curl -s "$API/api/v1/file/$FILE_UUID/thumbnail" \ + -H "X-API-Key: $KEY" -o representative.jpg + # Extract frame 1000 (full frame) curl -s "$API/api/v1/file/bd80fec92b0b6963d177a2c55bf713e2/thumbnail?frame=1000" \ -H "Authorization: Bearer $JWT" -o frame_1000.jpg @@ -224,10 +243,104 @@ curl -s "$API/api/v1/file/bd80fec92b0b6963d177a2c55bf713e2/thumbnail?frame=1000& #### Response - **200**: `image/jpeg` binary data -- **404**: File not found +- **404**: File not found / No faces in file (auto-detect) - **500**: FFmpeg error (e.g., frame number exceeds video duration) -### `GET /api/v1/file/:file_uuid/clip` +#### Technical Details + +| Detail | Value | +|--------|-------| +| **Backend** | FFmpeg (`ffmpeg-full`) | +| **Filter** | `select=eq(n\,FRAME)` to select frame, optional `crop=W:H:X:Y` | +| **Output** | Single JPEG via pipe (`image2pipe`, `mjpeg` codec) | +| **Cache** | `Cache-Control: public, max-age=86400` (24h) | +| **Frame number** | Zero-based (`frame=0` = first frame of video) | + +--- + +### `GET /api/v1/file/:file_uuid/representative-frame` + +Return JSON metadata about the best representative frame for the video. Uses the same auto-detect algorithm as `GET /thumbnail` (without crop support). + +**Auth**: Required +**Scope**: file-level + +#### Example + +```bash +curl -s "$API/api/v1/file/$FILE_UUID/representative-frame" \ + -H "X-API-Key: $KEY" | jq '.' +``` + +#### Response (200) + +```json +{ + "success": true, + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "frame_number": 38165, + "timestamp_secs": 1526.6, + "face_quality": 37292.97, + "main_identities": [ + { + "identity_uuid": "c3545906-c82d-4b66-aa1d-150bc02decce", + "name": "Audrey Hepburn", + "face_count": 16456 + }, + { + "identity_uuid": "2b0ddefe-e2a9-4533-9308-b375594604d5", + "name": "Cary Grant", + "face_count": 10643 + } + ], + "traces": [ + { + "trace_id": 919, + "identity_uuid": "2b0ddefe-e2a9-4533-9308-b375594604d5", + "name": "Cary Grant", + "x": 764, + "y": 237, + "width": 199, + "height": 199, + "confidence": 0.8426 + }, + { + "trace_id": 920, + "identity_uuid": "c3545906-c82d-4b66-aa1d-150bc02decce", + "name": "Audrey Hepburn", + "x": 1143, + "y": 312, + "width": 215, + "height": 215, + "confidence": 0.8068 + } + ] +} +``` + +#### Response Fields + +| Field | Type | Description | +|-------|------|-------------| +| `frame_number` | integer | Selected representative frame number (primary coordinate) | +| `timestamp_secs` | float | Time in seconds (derived from `frame_number / fps`) | +| `face_quality` | float | Quality score `area × confidence` of the best face at this frame | +| `main_identities` | array | Top 2 most frequent TMDb identities in the file | +| `main_identities[].name` | string | Identity display name | +| `main_identities[].face_count` | integer | Total face detections count | +| `traces` | array | All face traces present at the selected frame | +| `traces[].trace_id` | integer | Face trace ID | +| `traces[].identity_uuid` | string or null | Matched identity UUID | +| `traces[].name` | string or null | Identity name | +| `traces[].x, y, width, height` | integer | Bounding box coordinates | +| `traces[].confidence` | float | Detection confidence (0.0–1.0) | + +#### Error Responses + +| HTTP | When | +|------|------| +| `404` | File not found / No faces in file | +| `500` | Database error | Extract a video clip (time range) as MPEG-TS stream. Uses FFmpeg `-ss` fast seek.