From 80812128e21006af565b37e7efd2a09574576bd6 Mon Sep 17 00:00:00 2001 From: M5Max128 Date: Thu, 21 May 2026 01:11:44 +0800 Subject: [PATCH] merge: resolve conflicts with M5Max128 local changes --- .gitignore | 1 + .../TRACE/TRACE_API_REFERENCE_V1.0.0.md | 5 ++- docs_v1.0/REFERENCE/DEMO_RUNNER_V1.0.0.md | 31 +++++++++++++++++-- src/core/chunk/mod.rs | 1 + 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 4d6a46c..3c32429 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ __pycache__/ node_modules/ *.log /tmp/ +*.log diff --git a/docs_v1.0/API_V1.0.0/TRACE/TRACE_API_REFERENCE_V1.0.0.md b/docs_v1.0/API_V1.0.0/TRACE/TRACE_API_REFERENCE_V1.0.0.md index 48d4434..b3c1838 100644 --- a/docs_v1.0/API_V1.0.0/TRACE/TRACE_API_REFERENCE_V1.0.0.md +++ b/docs_v1.0/API_V1.0.0/TRACE/TRACE_API_REFERENCE_V1.0.0.md @@ -134,6 +134,7 @@ Aggregated face traces with sorting and filtering. | `limit` | int | 200 | Max faces (capped 1000) | | `offset` | int | 0 | Pagination | | `interpolate` | bool | false | Enable linear interpolation | +| `dimension` | string | — | If `"3d"`, returns `z_rel` depth per detection | #### Response @@ -153,13 +154,15 @@ Aggregated face traces with sorting and filtering. "width": 187, "height": 187, "confidence": 0.834, - "interpolated": false + "interpolated": false, + "z_rel": 0.045 } ] } ``` Interpolated frames: `id=0, confidence=0.0, interpolated=true`. +When `?dimension=3d`, each face includes `z_rel` (0.0 = nearest, 1.0 = farthest), derived from bbox area ratio. Without `dimension=3d`, `z_rel` is omitted. #### Interpolation Algorithm diff --git a/docs_v1.0/REFERENCE/DEMO_RUNNER_V1.0.0.md b/docs_v1.0/REFERENCE/DEMO_RUNNER_V1.0.0.md index 52d9924..9a5800a 100644 --- a/docs_v1.0/REFERENCE/DEMO_RUNNER_V1.0.0.md +++ b/docs_v1.0/REFERENCE/DEMO_RUNNER_V1.0.0.md @@ -39,6 +39,7 @@ python3.11 scripts/demo_runner.py demo.json --voice en_US | `markdown` | 用 md_reader Preview 渲染 .md 文件(含 Mermaid) | `cmd`(檔案路徑) | | `note` | 純文字解說 | `note` | | `separator` | 章節分隔線 | `label` | +| `ask` | 互動問答 — 問問題、等回應、顯示解答 | `question`, `answer` | ## JSON 腳本結構 @@ -66,6 +67,12 @@ python3.11 scripts/demo_runner.py demo.json --voice en_US "note": "說明文字", "cmd": "docs_v1.0/API_V1.0.0/API_USAGE_GUIDE_V1.0.0.md", "focus": "自動聚焦的章節名稱" + }, + { + "type": "ask", + "label": "互動問答", + "question": "問題文字(語音會朗讀)", + "answer": "解答文字(語音會朗讀)" } ] } @@ -92,7 +99,7 @@ python3.11 scripts/demo_runner.py demo.json --voice en_US ## 語音指令(--voice-control) -啟用麥克風語音控制,可用說的操作展示流程: +啟用 Display Audio 麥克風語音控制,可用說的操作展示流程: ```bash python3 scripts/demo_runner.py demo.json --voice zh_TW --voice-control @@ -105,7 +112,25 @@ python3 scripts/demo_runner.py demo.json --voice zh_TW --voice-control | "重複" | "repeat" / "again" | 重複朗讀當前解說 | | "跳到第 5 步" | "go to 5" | 跳到指定步驟 | -語音辨識使用 Google Speech Recognition(需網路),背景執行不影響主流程。 +語音辨識使用 **faster-whisper small**(離線、中英雙語),背景執行不影響主流程。 +模型快取:`~/.cache/huggingface/hub/models--Systran--faster-whisper-small/`。 + +## 互動問答(ask 步驟) + +`ask` 步驟讓展示系統問問題、等待使用者回答、顯示預設解答: + +- 有 `--voice-control` 時:自動錄音 4 秒 → faster-whisper 轉文字 → 顯示辨識結果 +- 無語音控制時:鍵盤輸入(Enter 送出) +- 解答由 TTS 朗讀 + 螢幕顯示 + +```json +{ + "type": "ask", + "label": "互動問答", + "question": "您知道 Momentry Core 可以分析哪些類型的資料嗎?", + "answer": "可以分析影片中的人臉、文字、物件、姿勢、聲音等。" +} +``` ## 展示節奏 @@ -155,5 +180,5 @@ python3 scripts/demo_runner.py demo.json --voice zh_TW --voice-control | 檔案 | 說明 | |------|------| | `scripts/demo_runner.py` | 執行器主程式 | -| `docs_v1.0/API_V1.0.0/DEMO_SCRIPT_v1.0.0.json` | 21 步驟預設展示腳本 | +| `docs_v1.0/API_V1.0.0/DEMO_SCRIPT_v1.0.0.json` | 23 步驟預設展示腳本(含 ask 互動問答) | | `~/_md_reader/target/release/md_reader` | Markdown 渲染工具 | diff --git a/src/core/chunk/mod.rs b/src/core/chunk/mod.rs index f30e5ac..73c4545 100644 --- a/src/core/chunk/mod.rs +++ b/src/core/chunk/mod.rs @@ -6,6 +6,7 @@ pub mod types; pub use rule1_ingest::execute_rule1; pub use rule3_ingest::ingest_rule3; +pub use trace_ingest::ingest_traces; pub use splitter::{AsrSegment, ChunkSplitter}; pub use trace_ingest::ingest_traces; pub use types::{Chunk, ChunkType};