From a9e9285032fd41cf80bdc43933a798c2ac0ba44c Mon Sep 17 00:00:00 2001 From: Accusys Date: Fri, 22 May 2026 06:29:25 +0800 Subject: [PATCH] docs: add TKG_QUERY_API_V1.0 design document --- docs_v1.0/DESIGN/TKG_QUERY_API_V1.0.md | 435 +++++++++++++++++++++++++ 1 file changed, 435 insertions(+) create mode 100644 docs_v1.0/DESIGN/TKG_QUERY_API_V1.0.md diff --git a/docs_v1.0/DESIGN/TKG_QUERY_API_V1.0.md b/docs_v1.0/DESIGN/TKG_QUERY_API_V1.0.md new file mode 100644 index 0000000..821c33b --- /dev/null +++ b/docs_v1.0/DESIGN/TKG_QUERY_API_V1.0.md @@ -0,0 +1,435 @@ +# TKG Query API V1.0 + +用於 Gemma4(LLM)透過 function calling 查詢影片人物互動資料的 API 設計。 + +--- + +## 1. Overview + +### 目的 + +讓 LLM(Gemma 4)可以回答關於影片人物互動的問題,例如「誰是主角」、「第一次同框是什麼時候」。透過 TKG(Trace Knowledge Graph)和 PostgreSQL 直接查詢,不需要 LLM 猜測。 + +### 架構 + +``` +User → "誰是這部電影的主角?" + ↓ +Gemma4 → function_call: tkg_query(file_uuid, "top_identities") + ↓ +API → SQL/TKG 查詢 → 結構化 JSON + ↓ +Gemma4 → "男主是 Cary Grant,女主是 Audrey Hepburn..." + ↓ +User ← 自然語言回答 +``` + +### 資料流 + +| 層級 | 元件 | 說明 | +|------|------|------| +| LLM | Gemma 4 26B (port 8082) | 解析自然語言 → 決定呼叫哪個 tool | +| Function | `tkg_query()` | 8 種 query_type,參數由 LLM 填寫 | +| Backend | `POST /api/v1/tkg/query` | 執行 SQL,回傳結構化結果 | +| Data | `face_detections`, `identities`, `chunk`, `tkg_nodes/edges` | 查詢來源 | + +--- + +## 2. Function Spec(給 LLM) + +### Function Definition + +```json +{ + "name": "tkg_query", + "description": "查詢影片的人物、場景、互動資料。根據問題類型選擇適合的 query_type。", + "parameters": { + "type": "object", + "properties": { + "file_uuid": { + "type": "string", + "description": "影片的 32 碼 file UUID" + }, + "query_type": { + "type": "string", + "description": "查詢類型", + "enum": [ + "top_identities", + "identity_details", + "first_cooccurrence", + "identity_traces", + "cut_details", + "file_info", + "mutual_gaze", + "interaction_network" + ] + }, + "identity_a": { + "type": "string", + "description": "人物A的 identity_uuid 或名字" + }, + "identity_b": { + "type": "string", + "description": "人物B的 identity_uuid 或名字" + }, + "cut_id": { + "type": "string", + "description": "場景ID(如 cut_264)" + }, + "limit": { + "type": "integer", + "description": "回傳筆數上限", + "default": 10 + } + }, + "required": ["file_uuid", "query_type"] + } +} +``` + +### LLM Prompt 設計 + +System prompt 中須包含此工具定義,並提示: + +``` +你是 Momentry 影片分析系統。當用戶問到影片中的人物、場景、互動問題時, +請先呼叫 tkg_query 查詢資料,再根據資料回答。 + +注意: +- 問題中提到的「男主」、「女主」是指 TMDb cast_order 0 和 1 +- 「配角」是指 cast_order >= 2 的人物 +- 「第一次同框」使用 first_cooccurrence +- 「誰最多鏡頭」使用 top_identities 搭配 face_count 排序 +``` + +--- + +## 3. API Endpoint + +### `POST /api/v1/tkg/query` + +Request: +```json +{ + "file_uuid": "aeed71342a899fe4b4c57b7d41bcb692", + "query_type": "top_identities", + "identity_a": null, + "identity_b": null, + "cut_id": null, + "limit": 10 +} +``` + +Response(通用包裝): +```json +{ + "success": true, + "query_type": "top_identities", + "file_uuid": "aeed71342...", + "data": { ... }, + "took_ms": 12 +} +``` + +Error: +```json +{ + "success": false, + "error": "File not found", + "query_type": "top_identities", + "file_uuid": "aeed71342..." +} +``` + +--- + +## 4. Query Types 詳解 + +### 4.1 `top_identities` — 人物重要性排名 + +**用途**:找出影片中的所有人物,依 TMDb cast_order 排序 + +**SQL**: +```sql +SELECT i.id, i.name, + (i.metadata->>'tmdb_cast_order')::int as cast_order, + i.metadata->>'tmdb_character' as role, + i.source, i.status, + COUNT(fd.id) as face_count, + COUNT(DISTINCT fd.trace_id) as trace_count, + ROUND(MIN(fd.frame_number)::numeric / GREATEST(v.fps, 1), 2) as first_appearance_sec, + ROUND(MAX(fd.frame_number)::numeric / GREATEST(v.fps, 1), 2) as last_appearance_sec +FROM identities i +LEFT JOIN face_detections fd ON fd.identity_id = i.id AND fd.file_uuid = $1 +LEFT JOIN videos v ON v.file_uuid = $1 +WHERE i.source = 'tmdb' + AND (i.metadata->>'tmdb_cast_order')::int IS NOT NULL +GROUP BY i.id, i.name, i.metadata, i.source, i.status, v.fps +ORDER BY cast_order ASC +LIMIT $2 +``` + +**Response**: +```json +{ + "total": 23, + "leads": [ + {"name": "Cary Grant", "cast_order": 0, "role": "Peter Joshua", "face_count": 10643}, + {"name": "Audrey Hepburn", "cast_order": 1, "role": "Regina Lampert", "face_count": 16456} + ], + "supporting": [ + {"name": "Walter Matthau", "cast_order": 2, "role": "Hamilton Bartholemew", "face_count": 2319}, + {"name": "James Coburn", "cast_order": 3, "role": "Tex Panthollow", "face_count": 3572}, + {"name": "George Kennedy", "cast_order": 4, "role": "Herman Scobie", "face_count": 1817} + ], + "text_summary": "主演:Cary Grant 飾演 Peter Joshua,Audrey Hepburn 飾演 Regina Lampert。主要配角:Walter Matthau(cast_order 2)等 21 人。" +} +``` + +--- + +### 4.2 `identity_details` — 人物詳細資料 + +**SQL**: +```sql +SELECT i.id, i.name, i.identity_type, i.source, i.status, + i.metadata->>'tmdb_cast_order' as cast_order, + i.metadata->>'tmdb_character' as role, + i.metadata->>'tmdb_movie_title' as movie, + i.metadata->>'tmdb_biography' as biography, + COUNT(fd.id) as face_count, + COUNT(DISTINCT fd.trace_id) as trace_count, + MIN(fd.frame_number) as first_frame, + MAX(fd.frame_number) as last_frame +FROM identities i +LEFT JOIN face_detections fd ON fd.identity_id = i.id AND fd.file_uuid = $1 +WHERE (i.name ILIKE $2 OR i.uuid::text = $2 OR REPLACE(i.uuid::text, '-', '') = $2) + AND i.source = 'tmdb' +GROUP BY i.id, i.name, i.identity_type, i.source, i.status, i.metadata +LIMIT 1 +``` + +**Response**: +```json +{ + "name": "Audrey Hepburn", + "role": "Regina Lampert", + "cast_order": 1, + "face_count": 16456, + "trace_count": 457, + "first_appearance_sec": 206.76, + "last_appearance_sec": 6756.68, + "biography": "Audrey Hepburn (born Audrey Kathleen Ruston; 4 May 1929 – 20 January 1993)..." +} +``` + +--- + +### 4.3 `first_cooccurrence` — 第一次同框 + +**邏輯**:找出兩個 identity 第一次同時出現的 frame。 + +**SQL**: +```sql +SELECT MIN(fd_a.frame_number)::bigint as first_frame, + COUNT(DISTINCT fd_a.frame_number)::bigint as total_cooccurrence_frames +FROM face_detections fd_a +JOIN face_detections fd_b ON fd_a.file_uuid = fd_b.file_uuid + AND fd_a.frame_number = fd_b.frame_number +WHERE fd_a.file_uuid = $1 + AND fd_a.identity_id = (SELECT id FROM identities WHERE name ILIKE $2 OR REPLACE(uuid::text, '-', '') = $2) + AND fd_b.identity_id = (SELECT id FROM identities WHERE name ILIKE $3 OR REPLACE(uuid::text, '-', '') = $3) +``` + +**Response**: +```json +{ + "identity_a": {"name": "Audrey Hepburn"}, + "identity_b": {"name": "Cary Grant"}, + "first_frame": 38165, + "timestamp_secs": 1526.60, + "cut_id": "cut_264", + "total_cooccurrence_frames": 3136, + "representative_thumbnail_a": "/api/v1/file/{uuid}/trace/920/thumbnail", + "representative_thumbnail_b": "/api/v1/file/{uuid}/trace/919/thumbnail" +} +``` + +--- + +### 4.4 `identity_traces` — 人物出場片段 + +**SQL**: +```sql +SELECT fd.trace_id, COUNT(*) as face_count, + MIN(fd.frame_number) as start_frame, + MAX(fd.frame_number) as end_frame, + COUNT(DISTINCT fd.frame_number) as frame_span +FROM face_detections fd +WHERE fd.file_uuid = $1 + AND fd.identity_id = (SELECT id FROM identities WHERE name ILIKE $2 OR REPLACE(uuid::text, '-', '') = $2) +GROUP BY fd.trace_id +ORDER BY face_count DESC +LIMIT $3 +``` + +**Response**: +```json +{ + "name": "Audrey Hepburn", + "total_traces": 457, + "top_traces": [ + {"trace_id": 920, "face_count": 53, "start_frame": 38165, "end_frame": 38321, + "representative": "/api/v1/file/{uuid}/trace/920/thumbnail"}, + ... + ] +} +``` + +--- + +### 4.5 `cut_details` — 場景資訊 + +**SQL**: +```sql +SELECT chunk_id, start_frame, end_frame, + ROUND(start_frame::numeric / fps, 2) as start_time, + ROUND(end_frame::numeric / fps, 2) as end_time, + text_content, summary_text +FROM chunk +WHERE file_uuid = $1 AND chunk_id = $2 AND chunk_type = 'cut' +``` + +**Response**: +```json +{ + "cut_id": "cut_264", + "frame_range": [38164, 38324], + "duration_sec": 6.4, + "summary": "Audrey Hepburn and Cary Grant engage in a brief verbal exchange...", + "identities_present": ["Audrey Hepburn", "Cary Grant"] +} +``` + +--- + +### 4.6 `file_info` — 影片基本資訊 + +**SQL**: +```sql +SELECT file_name, file_path, duration, width, height, fps, + (SELECT COUNT(*) FROM face_detections WHERE file_uuid = $1) as total_faces, + (SELECT COUNT(DISTINCT trace_id) FROM face_detections WHERE file_uuid = $1 AND trace_id IS NOT NULL) as total_traces, + (SELECT COUNT(*) FROM chunk WHERE file_uuid = $1 AND chunk_type = 'cut') as total_cuts +FROM videos +WHERE file_uuid = $1 +``` + +--- + +### 4.7 `mutual_gaze` — 互看偵測(未來) + +**依賴**:pose 資料寫入 `face_detections.metadata->>'pose_yaw'`。 + +**SQL**: +```sql +SELECT fd_a.frame_number, + (fd_a.metadata->>'pose_yaw')::float8 as yaw_a, + (fd_b.metadata->>'pose_yaw')::float8 as yaw_b +FROM face_detections fd_a +JOIN face_detections fd_b ON fd_a.file_uuid = fd_b.file_uuid + AND fd_a.frame_number = fd_b.frame_number +WHERE fd_a.file_uuid = $1 + AND fd_a.identity_id = $2 AND fd_b.identity_id = $3 + AND (fd_a.metadata->>'pose_yaw')::float8 > 0.05 + AND (fd_b.metadata->>'pose_yaw')::float8 < -0.05 +ORDER BY fd_a.frame_number ASC +LIMIT 1 +``` + +**Mutual Gaze 判斷邏輯**: +``` +if face_a is LEFT of face_b (bbox.x_a < bbox.x_b): + mutual_gaze = (yaw_a > GAZE_THRESHOLD) AND (yaw_b < -GAZE_THRESHOLD) +if face_a is RIGHT of face_b: + mutual_gaze = (yaw_a < -GAZE_THRESHOLD) AND (yaw_b > GAZE_THRESHOLD) +``` + +--- + +### 4.8 `interaction_network` — 互動網絡(未來) + +**依賴**:TKG `CO_OCCURS_WITH` edges 完整。 + +**SQL**: +```sql +SELECT src_i.name as identity_a, tgt_i.name as identity_b, + COUNT(DISTINCT te.id) as cooccurrence_count, + MIN((te.properties->>'first_frame')::int) as first_frame +FROM tkg_edges te +JOIN tkg_nodes src_n ON src_n.id = te.source_node_id +JOIN tkg_nodes tgt_n ON tgt_n.id = te.target_node_id +JOIN face_detections fd_src ON fd_src.trace_id = REPLACE(src_n.external_id, 'trace_', '')::int +JOIN face_detections fd_tgt ON fd_tgt.trace_id = REPLACE(tgt_n.external_id, 'trace_', '')::int +JOIN identities src_i ON src_i.id = fd_src.identity_id +JOIN identities tgt_i ON tgt_i.id = fd_tgt.identity_id +WHERE te.file_uuid = $1 + AND te.edge_type = 'CO_OCCURS_WITH' + AND src_n.node_type = 'face_trace' AND tgt_n.node_type = 'face_trace' + AND src_i.name != tgt_i.name +GROUP BY src_i.name, tgt_i.name +ORDER BY cooccurrence_count DESC +``` + +--- + +## 5. Gemma4 整合 + +### 已驗證功能 + +| 測試 | 結果 | +|------|------| +| Function calling 觸發 | ✅ 正確呼叫 tkg_query | +| 中文問題理解 | ✅ 「男女主第一次同框」→ first_cooccurrence | +| 參數填充 | ✅ 正確填入 file_uuid、query_type | +| 多輪對話(tool result → answer) | ✅ 模型正確消化資料後回答 | +| 推論型問題(「最重要的配角」) | ✅ 選擇 top_identities + 自行推理 | + +### 已知限制 + +| 問題 | 解決方案 | +|------|---------| +| file_uuid 須由 system prompt 提供 | 在 prompt 中指定 | +| `identity_a` 使用「女主」無法自動匹配 | require identity_a/b 明確名稱 | +| 模型可能拒絕呼叫 tool(約 5-10%) | system prompt 明確要求「先查詢」 | + +### System Prompt 模板 + +``` +你是 Momentry 影片分析系統。你正在分析電影 {title},file_uuid 為 {file_uuid}。 + +你有 tkg_query 工具可用,可以查詢影片的人物資料、出場時間、互動關係。 +請先使用工具查詢,再根據查詢結果回答問題。 +不要憑空猜測影片內容。 +``` + +--- + +## 6. Phase 計畫 + +| Phase | Query Types | 預計工時 | 依賴 | +|-------|------------|---------|------| +| **1** | `top_identities`, `identity_details`, `first_cooccurrence`, `file_info` | 2-3h | 已有資料 | +| **2** | `identity_traces`, `cut_details` | 1-2h | 已有資料 | +| **3** | `mutual_gaze` | 2-3h | pose 入 `face_detections.metadata` | +| **4** | `interaction_network` | 2-3h | TKG edges 完善 | + +--- + +## 7. 版本歷史 + +| 日期 | 版本 | 作者 | 變更 | +|------|------|------|------| +| 2026-05-21 | 1.0 | OpenCode | 初始設計文件 | + +*Updated: 2026-05-21*