merge: resolve conflicts with M5Max128 local changes
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -15,3 +15,4 @@ __pycache__/
|
||||
node_modules/
|
||||
*.log
|
||||
/tmp/
|
||||
*.log
|
||||
|
||||
@@ -134,6 +134,7 @@ Aggregated face traces with sorting and filtering.
|
||||
| `limit` | int | 200 | Max faces (capped 1000) |
|
||||
| `offset` | int | 0 | Pagination |
|
||||
| `interpolate` | bool | false | Enable linear interpolation |
|
||||
| `dimension` | string | — | If `"3d"`, returns `z_rel` depth per detection |
|
||||
|
||||
#### Response
|
||||
|
||||
@@ -153,13 +154,15 @@ Aggregated face traces with sorting and filtering.
|
||||
"width": 187,
|
||||
"height": 187,
|
||||
"confidence": 0.834,
|
||||
"interpolated": false
|
||||
"interpolated": false,
|
||||
"z_rel": 0.045
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Interpolated frames: `id=0, confidence=0.0, interpolated=true`.
|
||||
When `?dimension=3d`, each face includes `z_rel` (0.0 = nearest, 1.0 = farthest), derived from bbox area ratio. Without `dimension=3d`, `z_rel` is omitted.
|
||||
|
||||
#### Interpolation Algorithm
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@ python3.11 scripts/demo_runner.py demo.json --voice en_US
|
||||
| `markdown` | 用 md_reader Preview 渲染 .md 文件(含 Mermaid) | `cmd`(檔案路徑) |
|
||||
| `note` | 純文字解說 | `note` |
|
||||
| `separator` | 章節分隔線 | `label` |
|
||||
| `ask` | 互動問答 — 問問題、等回應、顯示解答 | `question`, `answer` |
|
||||
|
||||
## JSON 腳本結構
|
||||
|
||||
@@ -66,6 +67,12 @@ python3.11 scripts/demo_runner.py demo.json --voice en_US
|
||||
"note": "說明文字",
|
||||
"cmd": "docs_v1.0/API_V1.0.0/API_USAGE_GUIDE_V1.0.0.md",
|
||||
"focus": "自動聚焦的章節名稱"
|
||||
},
|
||||
{
|
||||
"type": "ask",
|
||||
"label": "互動問答",
|
||||
"question": "問題文字(語音會朗讀)",
|
||||
"answer": "解答文字(語音會朗讀)"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -92,7 +99,7 @@ python3.11 scripts/demo_runner.py demo.json --voice en_US
|
||||
|
||||
## 語音指令(--voice-control)
|
||||
|
||||
啟用麥克風語音控制,可用說的操作展示流程:
|
||||
啟用 Display Audio 麥克風語音控制,可用說的操作展示流程:
|
||||
|
||||
```bash
|
||||
python3 scripts/demo_runner.py demo.json --voice zh_TW --voice-control
|
||||
@@ -105,7 +112,25 @@ python3 scripts/demo_runner.py demo.json --voice zh_TW --voice-control
|
||||
| "重複" | "repeat" / "again" | 重複朗讀當前解說 |
|
||||
| "跳到第 5 步" | "go to 5" | 跳到指定步驟 |
|
||||
|
||||
語音辨識使用 Google Speech Recognition(需網路),背景執行不影響主流程。
|
||||
語音辨識使用 **faster-whisper small**(離線、中英雙語),背景執行不影響主流程。
|
||||
模型快取:`~/.cache/huggingface/hub/models--Systran--faster-whisper-small/`。
|
||||
|
||||
## 互動問答(ask 步驟)
|
||||
|
||||
`ask` 步驟讓展示系統問問題、等待使用者回答、顯示預設解答:
|
||||
|
||||
- 有 `--voice-control` 時:自動錄音 4 秒 → faster-whisper 轉文字 → 顯示辨識結果
|
||||
- 無語音控制時:鍵盤輸入(Enter 送出)
|
||||
- 解答由 TTS 朗讀 + 螢幕顯示
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "ask",
|
||||
"label": "互動問答",
|
||||
"question": "您知道 Momentry Core 可以分析哪些類型的資料嗎?",
|
||||
"answer": "可以分析影片中的人臉、文字、物件、姿勢、聲音等。"
|
||||
}
|
||||
```
|
||||
|
||||
## 展示節奏
|
||||
|
||||
@@ -155,5 +180,5 @@ python3 scripts/demo_runner.py demo.json --voice zh_TW --voice-control
|
||||
| 檔案 | 說明 |
|
||||
|------|------|
|
||||
| `scripts/demo_runner.py` | 執行器主程式 |
|
||||
| `docs_v1.0/API_V1.0.0/DEMO_SCRIPT_v1.0.0.json` | 21 步驟預設展示腳本 |
|
||||
| `docs_v1.0/API_V1.0.0/DEMO_SCRIPT_v1.0.0.json` | 23 步驟預設展示腳本(含 ask 互動問答) |
|
||||
| `~/_md_reader/target/release/md_reader` | Markdown 渲染工具 |
|
||||
|
||||
@@ -6,6 +6,7 @@ pub mod types;
|
||||
|
||||
pub use rule1_ingest::execute_rule1;
|
||||
pub use rule3_ingest::ingest_rule3;
|
||||
pub use trace_ingest::ingest_traces;
|
||||
pub use splitter::{AsrSegment, ChunkSplitter};
|
||||
pub use trace_ingest::ingest_traces;
|
||||
pub use types::{Chunk, ChunkType};
|
||||
|
||||
Reference in New Issue
Block a user