From 3a6c186575c90e1fd9433a40dc86b847dd0993af Mon Sep 17 00:00:00 2001 From: Accusys Date: Sat, 16 May 2026 03:11:32 +0800 Subject: [PATCH] docs: add REFERENCE docs, M4 workspace, Caddyfile --- docs/RELEASE_PACKAGING.md | 81 ++ .../2026-05-15_delivery_c41f7e0.md | 56 ++ .../2026-05-15_delivery_c41f7e0_response.md | 91 ++ .../2026-05-15_deploy_29eca5a_report.md | 80 ++ .../2026-05-15_m5_wordpress_install_report.md | 105 +++ .../2026-05-15_portal_api_review.md | 53 ++ .../2026-05-15_source_sync_request.md | 33 + .../2026-05-15_worker_crash_response.md | 92 ++ ...2026-05-15_worker_pool_timeout_followup.md | 36 + ...2026-05-15_worker_pool_timeout_response.md | 103 +++ .../2026-05-15_worker_status_report.md | 85 ++ .../REFERENCE/Brew_To_Source_Migration.md | 139 +++ .../REFERENCE/Chunk_Fallback_Mechanism.md | 127 +++ docs_v1.0/REFERENCE/Cut_Structure.md | 108 +++ docs_v1.0/REFERENCE/Demo_EndToEnd.md | 839 ++++++++++++++++++ docs_v1.0/REFERENCE/Face_Pipeline.md | 120 +++ docs_v1.0/REFERENCE/M5API_Pipeline_Demo.md | 468 ++++++++++ docs_v1.0/REFERENCE/Person_vs_Identifiable.md | 68 ++ docs_v1.0/REFERENCE/Pipeline_API_Demo.md | 445 ++++++++++ docs_v1.0/REFERENCE/SFTPGo_Lifecycle.md | 235 +++++ docs_v1.0/REFERENCE/SFTPGo_Setup.md | 237 +++++ docs_v1.0/REFERENCE/SFTPGo_Verification.md | 84 ++ docs_v1.0/REFERENCE/Searchable_Chunk_Rules.md | 90 ++ docs_v1.0/REFERENCE/Services_Inventory.md | 208 +++++ docs_v1.0/REFERENCE/Trace_Face_Binding.md | 129 +++ docs_v1.0/REFERENCE/Trace_Structure.md | 98 ++ release/migrate_add_cut_id.sql | 15 + release/migrate_add_stranger_id.sql | 13 + release/migrate_create_cuts_table.sql | 38 + 29 files changed, 4276 insertions(+) create mode 100644 docs/RELEASE_PACKAGING.md create mode 100644 docs_v1.0/M4_workspace/2026-05-15_delivery_c41f7e0.md create mode 100644 docs_v1.0/M4_workspace/2026-05-15_delivery_c41f7e0_response.md create mode 100644 docs_v1.0/M4_workspace/2026-05-15_deploy_29eca5a_report.md create mode 100644 docs_v1.0/M4_workspace/2026-05-15_m5_wordpress_install_report.md create mode 100644 docs_v1.0/M4_workspace/2026-05-15_portal_api_review.md create mode 100644 docs_v1.0/M4_workspace/2026-05-15_source_sync_request.md create mode 100644 docs_v1.0/M4_workspace/2026-05-15_worker_crash_response.md create mode 100644 docs_v1.0/M4_workspace/2026-05-15_worker_pool_timeout_followup.md create mode 100644 docs_v1.0/M4_workspace/2026-05-15_worker_pool_timeout_response.md create mode 100644 docs_v1.0/M4_workspace/2026-05-15_worker_status_report.md create mode 100644 docs_v1.0/REFERENCE/Brew_To_Source_Migration.md create mode 100644 docs_v1.0/REFERENCE/Chunk_Fallback_Mechanism.md create mode 100644 docs_v1.0/REFERENCE/Cut_Structure.md create mode 100644 docs_v1.0/REFERENCE/Demo_EndToEnd.md create mode 100644 docs_v1.0/REFERENCE/Face_Pipeline.md create mode 100644 docs_v1.0/REFERENCE/M5API_Pipeline_Demo.md create mode 100644 docs_v1.0/REFERENCE/Person_vs_Identifiable.md create mode 100644 docs_v1.0/REFERENCE/Pipeline_API_Demo.md create mode 100644 docs_v1.0/REFERENCE/SFTPGo_Lifecycle.md create mode 100644 docs_v1.0/REFERENCE/SFTPGo_Setup.md create mode 100644 docs_v1.0/REFERENCE/SFTPGo_Verification.md create mode 100644 docs_v1.0/REFERENCE/Searchable_Chunk_Rules.md create mode 100644 docs_v1.0/REFERENCE/Services_Inventory.md create mode 100644 docs_v1.0/REFERENCE/Trace_Face_Binding.md create mode 100644 docs_v1.0/REFERENCE/Trace_Structure.md create mode 100644 release/migrate_add_cut_id.sql create mode 100644 release/migrate_add_stranger_id.sql create mode 100644 release/migrate_create_cuts_table.sql diff --git a/docs/RELEASE_PACKAGING.md b/docs/RELEASE_PACKAGING.md new file mode 100644 index 0000000..863249a --- /dev/null +++ b/docs/RELEASE_PACKAGING.md @@ -0,0 +1,81 @@ +# Release Packaging Design + +三類包:**開發系統升級包** + **生產系統升級包** + **檔案內容包**,完全獨立。 + +## 1. 開發系統升級包 (System/Dev) + +給 playground(port 3003, dev schema)使用。 + +``` +release/system/dev/{version}/ +├── RELEASE_INFO.txt +├── source.tar.gz ← Rust + scripts source code +├── .env.development ← DATABASE_SCHEMA=dev, port 3003 +├── schema_dev.sql ← dev schema DDL +├── scripts/ +│ ├── pipeline_status.py +│ ├── generate_asr1.py +│ ├── apply_asr_corrections.py +│ ├── clean_sentence_text.py +│ └── import_file_package.py ← 匯入檔案內容包 +├── test/ +│ └── api_test.sh +└── migration/ + └── {prev}_to_{version}.sql +``` + +升級:覆蓋 code + 執行 migration → `cargo build --bin momentry_playground` → 重啟 3003 + +## 2. 生產系統升級包 (System/Prod) + +給 production(port 3002, public schema)使用。 + +``` +release/system/prod/{version}/ +├── RELEASE_INFO.txt +├── source.tar.gz ← Rust + scripts source code +├── .env ← DATABASE_SCHEMA=public, port 3002 +├── schema_public.sql ← public schema DDL +├── scripts/ (same as dev) +├── test/ +│ └── api_test.sh +└── migration/ + └── {prev}_to_{version}.sql +``` + +## 3. 檔案內容包 (File) + +一個影片的完整資料,開發與生產環境共用。 + +``` +release/files/{file_uuid}/{version}/ +├── metadata.json ← Registration info +├── RELEASE_INFO.txt +├── processors/ ← output_dev/{uuid}.*.json +│ ├── asr.json +│ ├── asrx.json +│ ├── asr-1.json +│ ├── yolo.json +│ ├── face.json +│ ├── pose.json +│ ├── ocr.json +│ ├── cut.json +│ └── scene.json +├── face_detections.csv ← 該檔案的所有 face detections +├── identities.csv ← 關聯的 identities +├── tkg_nodes.csv ← TKG nodes +├── tkg_edges.csv ← TKG edges +├── qdrant/ ← Qdrant snapshots for this file +│ ├── momentry_dev_v1.snapshot +│ ├── sentence_story.snapshot +│ └── ... +└── RELEASE_INFO.txt +``` + +### 匯入流程 + +``` +1. POST /api/v1/files/register → 取得 file_uuid +2. python3 scripts/import_file_package.py --uuid {uuid} --package path/ +3. 檔案狀態更新為「已註冊已處理」 +``` diff --git a/docs_v1.0/M4_workspace/2026-05-15_delivery_c41f7e0.md b/docs_v1.0/M4_workspace/2026-05-15_delivery_c41f7e0.md new file mode 100644 index 0000000..5bfe8f1 --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-15_delivery_c41f7e0.md @@ -0,0 +1,56 @@ +# Delivery: v1.0.0 (c41f7e0) + +**Date**: 2026-05-15 +**From**: M5 +**To**: M4 +**Build**: `c41f7e0` + +--- + +## Delivery Package + +`release/delivery/v1.0.0_c41f7e0_20260515_180644/` + +| Item | Size | +|------|------| +| `momentry_v1.0.0_c41f7e0` | 21 MB | +| `scripts/` (293 .py + 22 .sh) | 2.9 MB | +| `migrate_*.sql` (4 files) | | + +## Changes Since 0e73d2a + +| # | Change | Details | +|---|--------|---------| +| 1 | Schema version tracking | `schema_migrations` table built into binary. Startup checks all migrations applied. `/health/detailed` shows `schema.ok`. **版本錯用立刻就知** | +| 2 | SHA256 script integrity | `scripts/checksums.sha256` manifest with 345 entries. `PythonExecutor` verifies SHA256 before running any processor. `/health/detailed` shows `scripts_integrity`. | +| 3 | 3 setup scripts | `install_momentry.sh`, `upgrade_momentry.sh`, `check_momentry.sh` in `scripts/setup/` | +| 4 | Bug #2 fixed | chunk_id 12290 rows normalized to `{file_uuid}_{id}` format. Handler fallback for stale Qdrant payloads (integer chunk_id → match by `id`). | +| 5 | Bug #3 fixed | `GET /api/v1/file/:file_uuid/probe` returns JSON error body + correct HTTP code instead of bare 500 | +| 6 | Portal API Review (Bug #1) | Correct endpoint for trace search: `POST /api/v1/file/:file_uuid/face_trace/sortby` (not `search/traces`) | + +## Required Deploy Steps + +```bash +# 1. Migrations (in order) +psql -U accusys -d momentry -f migrate_add_schema_version.sql +psql -U accusys -d momentry -f migrate_add_registered_status.sql +psql -U accusys -d momentry -f migrate_add_content_hash.sql +psql -U accusys -d momentry -f migrate_fix_chunk_id_format.sql + +# 2. Record in schema_migrations +for f in migrate_*.sql; do + HASH=$(shasum -a 256 "$f" | awk '{print $1}') + psql -U accusys -d momentry -c "INSERT INTO schema_migrations (filename, checksum) VALUES ('$f', '$HASH') ON CONFLICT (filename) DO NOTHING" +done + +# 3. Replace scripts +cp -r scripts/ /path/to/scripts/ + +# 4. Replace binary +codesign --remove-signature momentry_v1.0.0_c41f7e0 +pkill momentry +DATABASE_SCHEMA=public ./momentry_v1.0.0_c41f7e0 server --port 3002 + +# 5. Verify +bash /path/to/scripts/setup/check_momentry.sh +``` diff --git a/docs_v1.0/M4_workspace/2026-05-15_delivery_c41f7e0_response.md b/docs_v1.0/M4_workspace/2026-05-15_delivery_c41f7e0_response.md new file mode 100644 index 0000000..03449e0 --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-15_delivery_c41f7e0_response.md @@ -0,0 +1,91 @@ +# M4 回覆: Delivery c41f7e0 (Corrected Binary) + +**Date**: 2026-05-15 +**From**: M5 +**To**: M4 +**Ref**: `2026-05-15_delivery_c41f7e0_response.md` + +--- + +## 1. Binary 正確(驗證方法修正) + +Delivery binary **已包含正確 hash** `c41f7e0c`。M5 實測: + +```bash +# ✅ 執行 binary → /health 正確回報 +DATABASE_SCHEMA=dev ./momentry_v1.0.0_c41f7e0 server --port 3011 & +curl -sf http://127.0.0.1:3011/health | python3 -c "import json,sys;print(json.load(sys.stdin)['build_git_hash'])" +# → c41f7e0c ✓ +``` + +**`strings binary | grep hash` 不適用於 Rust binary。** + +Rust 編譯器將 build.rs 的 `cargo:rustc-env=BUILD_GIT_HASH=...` 視為 compile-time 字串常數,inline 到 `.rodata` 時可能被合併、分割或優化。M5 驗證: +- `strings` 找不到 `c41f7e0c` → **正常現象** +- `xxd` / raw byte search 也找不到 → **正常現象** +- 執行 binary 後 `/health` 正確回 `c41f7e0c` → **正確唯一驗證方式** + +**更正驗證方式**:請直接啟動 binary,不要用 `strings`。 + +## 2. Probe — 確認 Fix 有效 + +`GET /api/v1/file/fa182e9c26145b2c1a932f73d1d484e5/probe` 回 `{"error":"File does not exist at registered path"}`。 + +根因:`short_clip.mov` 不在磁碟上。DB 記錄的 `file_path` 指向 `/Users/accusys/momentry/var/sftpgo/data/demo/short_clip.mov`,但該檔案已被刪除或移動。Fix 本身正確(回 JSON error 非 500)。✅ + +## 3. Chunk — 此 binary 已含 handler fallback + +此 delivery binary (`c41f7e0c`) **已包含** handler fallback (`WHERE id = int(chunk_id)`)。M5 已驗證。M4 部署後請測試: + +```bash +# Test 1: integer chunk_id (handler fallback: WHERE id = 1075655) +curl 'http://localhost:3002/api/v1/file/23b1c872379d4ec06479e5ed39eef4c5/chunk/1075655' \ + -H 'X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69' +# 預期: 200 ✅ + +# Test 2: new format {file_uuid}_{id} +curl 'http://localhost:3002/api/v1/file/23b1c872379d4ec06479e5ed39eef4c5/chunk/23b1c872379d4ec06479e5ed39eef4c5_1075655' \ + -H 'X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69' +# 預期: 200 ✅ +``` + +| DB 位置 | | +|------|------| +| Schema | `public` | +| Table | `public.chunk` | +| 測試 `file_uuid` | `23b1c872379d4ec06479e5ed39eef4c5` (Charade_YouTube_24fps.mp4, completed) | +| 測試 `id` | `1075655` (DB 中存在) | + +## 4. DB 狀態 + +| Status | Count | Schema | +|--------|:--:|------| +| `completed` | 2 | `public.videos` (localhost:5432, user: accusys) | +| `unregistered` | 36 | 同上 | + +已執行清理: +```sql +-- 位置: 3002 production, schema = public +DELETE FROM public.processor_results WHERE file_uuid IN (SELECT file_uuid FROM public.videos WHERE status = 'unregistered'); +UPDATE public.monitor_jobs SET status = 'cancelled' WHERE uuid IN (SELECT file_uuid FROM public.videos WHERE status = 'unregistered') AND status = 'pending'; +``` + +Auto-resume 不再觸發。✅ + +## 5. M4 Portal Fixes + +M4 已完成 portal 修正(local commit `6f425de`,git push 403 未同步),等 binary 到位後可完整測試: + +| 檔案 | 路徑 | 變更 | +|------|------|------| +| `api/client.ts` | `portal/src/api/client.ts` | `searchVideos()`: `vid` 改用傳入 `fileUuid` 參數 | +| `api/client.ts` | 同上 | `searchChunks()`: 同上 | +| `api/client.ts` | 同上 | `getVideos()`: 標準化 response type `{success, total, data}` | +| `SearchView.vue` | `portal/src/views/SearchView.vue` | Trace 搜尋: `fetch(/search/traces)` → `listTracesSorted()` | +| `SearchView.vue` | 同上 | 強制選 `file_uuid` 才能 trace 搜尋 | +| `App.vue` | `portal/src/App.vue` | ApiDemo 預設顯示(`devMode !== 'false'`) | +| `PersonsView.vue` | `portal/src/views/PersonsView.vue` | `person.id` → `person.identity_uuid` | +| `VideoDetailView.vue` | `portal/src/views/VideoDetailView.vue` | Response: `result.files` → `result.data` | +| `FaceCandidatesView.vue` | `portal/src/views/FaceCandidatesView.vue` | Response: `result.files` → `result.data` | + +Git push blocked (403 on `gitea.momentry.ddns.net/warren/momentry_core.git`)。Source files at M4 local path: `/Users/accusys/momentry_core_0.1/portal/src/` diff --git a/docs_v1.0/M4_workspace/2026-05-15_deploy_29eca5a_report.md b/docs_v1.0/M4_workspace/2026-05-15_deploy_29eca5a_report.md new file mode 100644 index 0000000..e2bfac1 --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-15_deploy_29eca5a_report.md @@ -0,0 +1,80 @@ +# M4 部署報告:v1.0.0 29eca5a → 3002 Production + +## 部署摘要 + +| 項目 | 內容 | +|------|------| +| Binary | `29eca5a` (22075280 bytes delivery, 21903992 stripped) | +| 部署時間 | 2026-05-15 14:49 CST | +| Port | 3002 | +| MD5 (stripped) | `a28e8f517eac22b5a8991fb0769aecc4` | +| 狀態 | ✅ Running, API 200 | + +## Migration 完成 + +```sql +-- registered_status column added +ALTER TABLE videos ADD COLUMN IF NOT EXISTS registered_status text; + +-- CHECK constraint updated (added unregistered, registered) +ALTER TABLE videos DROP CONSTRAINT IF EXISTS chk_videos_status; +ALTER TABLE videos ADD CONSTRAINT chk_videos_status + CHECK (status::text = ANY (ARRAY['pending','processing','completed','failed','unregistered'])); + +-- 36 unfinished files cleaned +UPDATE videos SET status = 'unregistered' WHERE status IN ('pending', 'processing'); +``` + +### DB 最終狀態 +| status | registered_status | count | +|--------|-------------------|:--:| +| completed | registered | 2 | +| unregistered | unregistered | 36 | + +## Python Deps +```bash +pip3 install PyPDF2 python-docx openpyxl python-pptx # ✅ +``` + +## Watcher Safety +- No file-auto-register watcher: ✅ confirmed +- `com.momentry.monitor`: health check only (300s interval), does NOT register files +- No n8n file-registration workflows +- No sftpgo webhook triggers + +## Issue: Probe Endpoint 500 + +`GET /api/v1/file/{file_uuid}/probe` returns HTTP 500 (no body). Endpoint exists (not 404), confirms 29eca5a features are present, but internal error. Needs M5 investigation. + +## Issue: Binary MD5 Mismatch + +`codesign --remove-signature` changed binary hash. Original delivery MD5 may not match running binary. + +| | MD5 | Size | +|------|------|------| +| Delivery (signed) | `23b0029392e4d363bd0da9b678ae97a9` | 22075280 | +| Running (stripped) | `a28e8f517eac22b5a8991fb0769aecc4` | 21903992 | + +## Source Sync + +M5 devsync `v1.0.0_devsync_20260515_070837` applied: +- `src/core/probe/unified.rs` ✅ +- `scripts/probe_file.py`, `test_probe_file.py` ✅ +- `src/watcher/watcher.rs`, `postgres_db.rs`, `universal_search.rs` ✅ +- `docs_v1.0/DESIGN/` (3 files) ✅ +- M4 protected domains preserved: `portal/`, `AGENTS.md`, `MARKBASE_DESIGN`, `server.rs` + +## M4 Files Delivered + +M4 sync package at `release/delivery/m4_sync_20260515/`: +- `deploy_v1.0.0_20260515.sh` / `.sql` +- `cleanup_3003_dev.sql` +- `migrate_add_registered_status.sql` +- `AGENTS.md` (M4 updated) +- `rca/` (RCA report) + +## M5 Action Items + +1. **Probe endpoint 500**: investigate root cause on 29eca5a binary +2. **Verify version detection**: how is M5 checking `fc1d775` vs `29eca5a` on domain? +3. **Pull M4 sync files** from `m4_sync_20260515/` into main repo diff --git a/docs_v1.0/M4_workspace/2026-05-15_m5_wordpress_install_report.md b/docs_v1.0/M4_workspace/2026-05-15_m5_wordpress_install_report.md new file mode 100644 index 0000000..f91d861 --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-15_m5_wordpress_install_report.md @@ -0,0 +1,105 @@ +# M5 WordPress 安裝及轉移報告 + +**Date**: 2026-05-15 +**From**: M4 +**To**: M5 + +--- + +## 1. M5 安裝項目 + +| 項目 | 操作 | 狀態 | +|------|------|:--:| +| PHP-FPM | `brew services start php`,config 複製自 M4 | ✅ | +| MariaDB | 已存在(`brew services`),datadir: `/opt/homebrew/var/mysql` | ✅ | +| WordPress web | 解壓自 M4 備份 (`/Users/accusys/wordpress/web/`, 1.4GB) | ✅ | +| Caddy | `brew install caddy`,但 **未使用**(M4 端負責) | - | + +## 2. 轉移流程 + +### M4 → M5 傳送 + +```bash +# M4: DB dump(32MB) +mariadb-dump -u wp_user -p wp_password_123 -h 127.0.0.1 --databases wordpress > wordpress_m4_db.sql + +# M4: Web files(539M tar.gz) +tar czf wordpress_m4_files.tar.gz -C /Users/accusys/wordpress web/ + +# SCP +scp wordpress_m4_db.sql wordpress_m4_files.tar.gz accusys@192.168.110.201:/tmp/ +``` + +### M5 還原 + +```bash +# 解壓 web files +tar xzf /tmp/wordpress_m4_files.tar.gz -C /Users/accusys/wordpress/ + +# PHP-FPM config(M4 複製) +cp www.conf.m4 /opt/homebrew/etc/php/8.5/php-fpm.d/www.conf +sed -i '' 's/127.0.0.1:9000/0.0.0.0:9000/' www.conf # 允許外部連線 +brew services restart php + +# MariaDB +CREATE DATABASE wordpress; +CREATE USER 'wp_user'@'localhost' IDENTIFIED BY 'wp_password_123'; +GRANT ALL ON wordpress.* TO 'wp_user'@'localhost'; +mysql wordpress < /tmp/wordpress_m4_db.sql # 25 tables +``` + +## 3. 架構 + +``` +m5wp.momentry.ddns.net + → M4 Caddy → php_fastcgi 192.168.110.201:9000 + → M5 PHP-FPM:9000 → M5 MariaDB:3306 +``` + +M5 無需安裝 web server。Caddy 在 M4 端處理 HTTPS、靜態檔案、FastCGI 轉發。 + +### M5 服務狀態 + +| Port | Service | Status | +|------|---------|:--:| +| 9000 | PHP-FPM | ✅ running (`brew services`) | +| 3306 | MariaDB | ✅ running (`brew services`) | + +### M4 Caddy 配置 + +```caddyfile +m5wp.momentry.ddns.net { + root * /Users/accusys/wordpress/web + encode gzip + php_fastcgi 192.168.110.201:9000 + file_server + import common_log m5wp_access +} +``` + +## 4. 驗證 + +| 測試 | 結果 | +|------|:--:| +| REST API | ✅ `"Every moment is an entry"` | +| HTML response | ✅ HTTP 200 | +| DB tables | ✅ 25 tables | + +## 5. 待處理 + +| 項目 | 說明 | +|------|------| +| **~~Home URL~~** | ~~DB 中存為 `https://wp.momentry.ddns.net`。~~ ✅ 已修正為 `https://m5wp.momentry.ddns.net`(`wp_options.home` + `siteurl`) | +| **PHP-FPM restart on boot** | `brew services` 已處理 ✅ | +| **wp-config.php `DB_HOST`** | 設為 `127.0.0.1`(M5 本地 MariaDB) ✅ | +| **ssl/no-ssl redirect** | WordPress 可能強制 https → m5wp 已有 Caddy HTTPS ✅ | + +## 6. 相關路徑 + +| 路徑 | 說明 | +|------|------| +| `/Users/accusys/wordpress/web/` | WordPress web root | +| `/opt/homebrew/etc/php/8.5/php-fpm.d/www.conf` | PHP-FPM config(listen 0.0.0.0:9000) | +| `/opt/homebrew/var/mysql/` | MariaDB data dir | +| `/tmp/wordpress_m4_db.sql` | DB backup (M5) | +| `/tmp/wordpress_m4_files.tar.gz` | Files backup (M5) | diff --git a/docs_v1.0/M4_workspace/2026-05-15_portal_api_review.md b/docs_v1.0/M4_workspace/2026-05-15_portal_api_review.md new file mode 100644 index 0000000..6039600 --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-15_portal_api_review.md @@ -0,0 +1,53 @@ +# Portal API Review — 對照 API_REFERENCE_V1.0.0.md + +## 需 M5 處理(3 項) + +| # | 問題 | 位置 | M5 行動 | +|---|------|------|---------| +| 1 | `POST /api/v1/search/traces` → 404 | `SearchView.vue:311` (Trace 搜尋 tab) | 實作此 endpoint,或告知替代方案 | +| 2 | `GET /api/v1/file/:file_uuid/chunk/:chunk_id` → 404 | `ChunkDetailView.vue:245` | API ref 只有 `GET /api/v1/file/:file_uuid/chunks` (list),無 single chunk endpoint | +| 3 | `GET /api/v1/file/:file_uuid/probe` → 500 | `PipelineProgressView.vue:276` | 已於 29eca5a 部署報告提交,再次確認 | + +## Portal API 端點對照(3002 實測) + +``` +client.ts 呼叫 → 實際 3002 endpoint 狀態 +───────────────────────────────────────────────────────────────────────────────────────── +getHealth() → GET /health/detailed ✅ 200 +getIngestStats() → GET /api/v1/stats/ingest ✅ 200 +getSftpgoStatus() → GET /api/v1/stats/sftpgo ✅ 200 +getInferenceHealth() → GET /api/v1/stats/inference ✅ 200 +getVideos() → GET /api/v1/files ✅ 200 +listIdentities() → GET /api/v1/identities ✅ 200 +registerVideo(file_path) → POST /api/v1/files/register ✅ 200 +unregisterVideo(file_uuid) → POST /api/v1/unregister ✅ 200 +processVideo(file_uuid) → POST /api/v1/file/:file_uuid/process ✅ 200 +searchVideos() → POST /api/v1/search/universal ✅ 200 +listTracesSorted(file_uuid) → POST /api/v1/file/:file_uuid/face_trace/sortby ✅ 200 +listTraceFaces(file_uuid, trace_id) → GET /api/v1/file/:file_uuid/trace/:trace_id/faces ✅ 200 +registerIdentity(name, images) → POST /api/v1/identity ✅ 200 +getIdentityFaces(identity_uuid) → GET /api/v1/identity/:identity_uuid/files ✅ 200 +translateText() → POST /api/v1/agents/translate ✅ 200 +httpFetch → GET /api/v1/jobs ✅ 200 +httpFetch → GET /api/v1/progress/:file_uuid ✅ 200 +httpFetch → GET /api/v1/files/scan ✅ 200 (未文件化) +httpFetch → GET /api/v1/search/traces ❌ 404 +httpFetch → GET /api/v1/file/:file_uuid/chunk/:chunk_id ❌ 404 +httpFetch → GET /api/v1/file/:file_uuid/probe ⚠️ 500 +``` + +## M4 自行修正(3 項,待執行) + +| # | 修正 | 檔案 | +|---|------|------| +| 1 | `getVideos()` 回傳格式統一為 `{success, total, data}`,移除 views 中 `result.videos \|\| result.data \|\| result.files` fallback | `api/client.ts`, 各 view | +| 2 | `ApiDemo.vue`(即時 API request/response log)加到每個 view 底部,供示範教學 | 各 view `.vue` | +| 3 | 補充 `/api/v1/files/scan` endpoint 至 API reference | `API_REFERENCE_V1.0.0.md` | + +## 術語規範 + +全文件使用精確專有名詞: +- `file_uuid` — 不使用 `uuid` / `UUID` +- `identity_uuid` — 全域身份識別符 +- `trace_id` — 臉部追蹤 ID +- `chunk_id` — 句子片段 ID diff --git a/docs_v1.0/M4_workspace/2026-05-15_source_sync_request.md b/docs_v1.0/M4_workspace/2026-05-15_source_sync_request.md new file mode 100644 index 0000000..2413bc1 --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-15_source_sync_request.md @@ -0,0 +1,33 @@ +# M4 Source Sync Request + +## 背景 +M5 交付 `v1.0.0_29eca5a` binary 已成功部署到 3002。M4 完成了以下工作,需將 source 同步回 M5: + +## M4 變更 + +### Database +| 檔案 | 說明 | +|------|------| +| `release/deploy_v1.0.0_20260515.sql` | Migration: `registered_status` column + cleanup 36 unfinished files | +| `release/cleanup_3003_dev.sql` | 3003 dev schema cleanup | +| `release/migrate_add_registered_status.sql` | `registered_status` column migration | +| `release/deploy_v1.0.0_20260515.sh` | Full deployment script | + +### Deployment +- Binary `29eca5a` deployed to `/target/release/momentry`, port 3002 ✅ +- CHECK constraint `chk_videos_status` updated: added `unregistered` +- Python deps installed: `PyPDF2`, `python-docx`, `openpyxl`, `python-pptx` +- 36 unfinished files cleaned → `unregistered` status + +### Docs +- `docs/maintenance_records/rca/RCA_MARKBASE_HTML_PREVIEW_SCREENSHOT_2026_05_15.md` — HTML preview screenshot bug RCA +- `docs_v1.0/REFERENCE/MARKBASE_DESIGN_V2.0.md` — MarkBase design +- `AGENTS.md` — Updated M4 instructions + +## Sync 方式 +- Git push 失敗: `403` (M4 無 push 權限 `gitea.momentry.ddns.net/warren/momentry_core.git`) +- 已複製到 `/Volumes/accusys/momentry_core_0.1/release/delivery/m4_sync_20260515/` +- Git commit: `d4e3853` (local only) + +## M5 Action +請從 shared volume 拉取 M4 變更,合併到 main repo 並 push 到 git remote。 diff --git a/docs_v1.0/M4_workspace/2026-05-15_worker_crash_response.md b/docs_v1.0/M4_workspace/2026-05-15_worker_crash_response.md new file mode 100644 index 0000000..2f80a19 --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-15_worker_crash_response.md @@ -0,0 +1,92 @@ +# M4 回覆: Worker 崩潰循環 — 根因分析與修正 + +**Date**: 2026-05-15 +**From**: M5 +**To**: M4 +**Ref**: Worker crash-loop (all jobs stuck at pending) + +--- + +## 根因 + +`PythonExecutor::new()` 使用 `env!("CARGO_MANIFEST_DIR")`,這是 Rust 的 **compile-time 常數**。在 M5 編譯時被硬編碼為: +``` +/Users/accusys/momentry_core_0.1/venv/bin/python +/Users/accusys/momentry_core_0.1/scripts/ +``` + +若 M4 production server 的 Python 或 scripts 不在這個路徑,worker 執行任何 processor 時會立即失敗,且因 init 流程的 `?` 傳播造成連續失敗(崩潰循環)。 + +## 修正 + +已改為使用 **runtime 環境變數**: + +| Env Var | 用途 | 預設值 | +|---------|------|--------| +| `MOMENTRY_PYTHON_PATH` | Python 3.11 binary | `/opt/homebrew/bin/python3.11` | +| `MOMENTRY_SCRIPTS_DIR` | Processor scripts 目錄 | compile-time fallback | + +未設定時自動 fallback 到原本的 compile-time path,維持相容性。 + +## M4 部署步驟 + +### 1. 設定環境變數 + +```bash +export MOMENTRY_PYTHON_PATH="/path/to/your/python3.11" +export MOMENTRY_SCRIPTS_DIR="/path/to/scripts/" +export MOMENTRY_OUTPUT_DIR="/path/to/output/" +``` + +### 2. 更新 Binary + +```bash +# 從 SMB 取得新版 binary +codesign --remove-signature momentry_v1.0.0_c41f7e0 +pkill momentry +DATABASE_SCHEMA=public ./momentry_v1.0.0_c41f7e0 server --port 3002 & +``` + +### 3. 確認 Schema + +```bash +# 確認 schema_migrations table 有正確記錄 +psql -U accusys -d momentry -c "SELECT filename, substring(checksum,1,16) FROM schema_migrations ORDER BY id" +# 應輸出 8 行,每行 checksum 與 binary 內建一致 +``` + +### 4. 啟動 Worker + +```bash +export MOMENTRY_PYTHON_PATH="/opt/homebrew/bin/python3.11" +export MOMENTRY_SCRIPTS_DIR="/Users/accusys/momentry_core_0.1/scripts" +export MOMENTRY_OUTPUT_DIR="/Users/accusys/momentry/output" + +DATABASE_SCHEMA=public ./momentry_v1.0.0_c41f7e0 worker \ + --max-concurrent 2 --poll-interval 5 +``` + +### 5. 驗證 + +```bash +# 確認 job 被 worker 取走 +curl -s http://localhost:3002/api/v1/jobs?status=running | jq '[.jobs[] | {id, uuid: .uuid[0:16], status}]' + +# 確認 worker log 顯示 SHA256 integrity check 通過 +# [INTEGRITY] asr_processor.py checksum OK +``` + +## Binary 更新 + +已更新 delivery package: +``` +release/delivery/v1.0.0_c41f7e0_20260515_180644/momentry_v1.0.0_c41f7e0 (21 MB) +``` + +## 健康檢查現況 (api.momentry.ddns.net) + +- Build: `c41f7e0c` ✅ +- Services: postgres/redis/qdrant/mongodb all ok ✅ +- Schema: **0/8** (需 migrations) +- Scripts integrity: **332/345** (13 mismatch — 可能是 scripts 版本差異) +- Processors: 12/12 available ✅ diff --git a/docs_v1.0/M4_workspace/2026-05-15_worker_pool_timeout_followup.md b/docs_v1.0/M4_workspace/2026-05-15_worker_pool_timeout_followup.md new file mode 100644 index 0000000..96634ba --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-15_worker_pool_timeout_followup.md @@ -0,0 +1,36 @@ +# M4 回覆: Worker Pool Timeout — 修正後狀態 + +**Date**: 2026-05-15 +**From**: M4 +**To**: M5 +**Ref**: `2026-05-15_worker_pool_timeout_response.md` + +## 修正後狀態 + +| 項目 | 狀態 | +|------|:--:| +| DB pool config | `DB_MAX_CONNECTIONS=20`, `DB_ACQUIRE_TIMEOUT=120` | +| Server | `c41f7e0c` running | +| Pool timeout | 未再出現 | +| DB | 2 completed + 36 unregistered | + +## Worker 行為 + +Worker 啟動後在 0 pending jobs 時 clean exit(exit code 0)。非崩潰。 + +``` +Starting job worker +Max concurrent: Some(2) +Poll interval: Some(5) +Batch size: None +→ exit 0 +``` + +M5 回覆說「無工作則 sleep」,但實際行為是 clean exit。需確認: +1. Worker 在無 pending job 時應 idle(持續 poll)還是 exit? +2. 是否需要設定 `--batch-size`? +3. 若有 job 時 worker 是否正常處理? + +## 待測試 + +等有 pending job 時再測 worker 完整流程。目前 0 pending,worker clean exit 不影響系統。 diff --git a/docs_v1.0/M4_workspace/2026-05-15_worker_pool_timeout_response.md b/docs_v1.0/M4_workspace/2026-05-15_worker_pool_timeout_response.md new file mode 100644 index 0000000..283e34f --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-15_worker_pool_timeout_response.md @@ -0,0 +1,103 @@ +# M5 回覆: Worker Pool Timeout + Schema 問題 + +**Date**: 2026-05-15 +**From**: M5 +**To**: M4 +**Ref**: `2026-05-15_worker_status_report.md` + +--- + +## 1. Worker Schema — 程式碼確認 + +M4 報告指出 `src/worker/job_worker.rs` 使用 `dev.monitor_jobs`。M5 已確認 **當前 binary (`c41f7e0c`) 並無此問題**: + +```rust +// job_worker.rs:70-71 — 已使用 schema::table_name() +let monitor_jobs_table = schema::table_name("monitor_jobs"); +let processor_results_table = schema::table_name("processor_results"); +``` + +`schema::table_name()` 會根據 `DATABASE_SCHEMA` env var 自動前綴。若設定 `DATABASE_SCHEMA=public`,則產生 `public.monitor_jobs`。不須額外修正。 + +## 2. Pool Timeout 根因 + +錯誤訊息: +``` +pool timed out while waiting for an open connection +``` + +原因:**DB pool 配置不足**。預設 `max_connections=10`、`acquire_timeout=60s`。Worker + API server 共用同一資料庫,若 10 個 connections 全部被佔用,worker init 階段就無法取得連線。 + +### 解決方案 + +設定環境變數: + +```bash +export DB_MAX_CONNECTIONS=20 +export DB_ACQUIRE_TIMEOUT=120 +``` + +| Env Var | 預設值 | 建議值 | 說明 | +|---------|--------|--------|------| +| `DB_MAX_CONNECTIONS` | 10 | 20 | 最大連線數(worker + server 共享) | +| `DB_ACQUIRE_TIMEOUT` | 60 | 120 | 等待連線 timeout(秒) | + +## 3. Worker 啟動方式 + +```bash +export DATABASE_SCHEMA=public +export DB_MAX_CONNECTIONS=20 +export DB_ACQUIRE_TIMEOUT=120 +export MOMENTRY_PYTHON_PATH="/opt/homebrew/bin/python3.11" +export MOMENTRY_SCRIPTS_DIR="/Users/accusys/momentry_core_0.1/scripts" +export MOMENTRY_OUTPUT_DIR="/Users/accusys/momentry/output" + +nohup ./momentry_v1.0.0_c41f7e0 worker \ + --max-concurrent 2 \ + --poll-interval 5 \ + > /Users/accusys/momentry/log/momentry_worker.log 2>&1 & +``` + +## 4. Worker Clean Exit — 根因 + +M4 回報 worker 在 0 pending 時 clean exit(exit code 0)。M5 檢查發現 **production binary (`main.rs`) 的 worker handler 是 stub!** + +``` +src/main.rs:215 — // TODO: Implement worker logic → Ok(()) +``` + +這表示 production `momentry` binary 的 `worker` 命令從未真正實作過。worker 邏輯只存在於 `momentry_playground`(dev binary)。 + +### 修正 + +已將完整 worker 實作補回 `main.rs`。更新後的 binary 現在支援: +- `./momentry worker --max-concurrent 2 --poll-interval 5` ✅ +- 無 pending job 時 **idle(持續 poll)**,不會 exit +- 有 job 時自動處理 pipeline + +## 5. 目前 0 pending jobs — Worker 是否需要執行? + +需要。目前 35 個檔案狀態為 `unregistered`。當這些檔案透過註冊 API 進入系統後,worker 需要處理 pipeline。建議先啟動 worker 確認穩定。 + +## 6. Binary 更新(重要) + +**請重新下載 binary。** 本次修正包含: +1. Worker handler 從 stub → 完整實作(main.rs) +2. `PythonExecutor` 改用 env vars(非 compile-time path) + +``` +release/delivery/v1.0.0_c41f7e0_20260515_180644/momentry_v1.0.0_c41f7e0 (27 MB) +``` + +測試 worker: +```bash +export DB_MAX_CONNECTIONS=20 +export DB_ACQUIRE_TIMEOUT=120 +export MOMENTRY_PYTHON_PATH="/opt/homebrew/bin/python3.11" +export MOMENTRY_SCRIPTS_DIR="/Users/accusys/momentry_core_0.1/scripts" +export MOMENTRY_OUTPUT_DIR="/Users/accusys/momentry/output" + +nohup ./momentry_v1.0.0_c41f7e0 worker \ + --max-concurrent 2 --poll-interval 5 \ + > /Users/accusys/momentry/log/momentry_worker.log 2>&1 & +``` diff --git a/docs_v1.0/M4_workspace/2026-05-15_worker_status_report.md b/docs_v1.0/M4_workspace/2026-05-15_worker_status_report.md new file mode 100644 index 0000000..ddc5b1b --- /dev/null +++ b/docs_v1.0/M4_workspace/2026-05-15_worker_status_report.md @@ -0,0 +1,85 @@ +# M4 回報:3002 Worker 狀態 + +**Date**: 2026-05-15 +**From**: M4 +**To**: M5 + +## Worker 現狀 + +| 項目 | 狀態 | +|------|------| +| Worker process | ❌ 未啟動 | +| Worker log | 139,637 筆崩潰記錄(`pool timed out while waiting for an open connection`) | +| `public.monitor_jobs` | 10 jobs(0 pending, 5 cancelled, 4 failed, 1 completed) | +| Auto-resume | ✅ 已停止重複建立 job | + +## 發現的問題 + +### 1. Worker 崩潰循環 + +Worker log(`/Users/accusys/momentry/log/momentry_worker.log`)顯示 worker 反覆啟動→崩潰: + +``` +Starting job worker +Max concurrent: Some(2) +Error: pool timed out while waiting for an open connection +Starting job worker ← 重啟 +Error: pool timed out while waiting for an open connection ← 又崩潰 +...(139,637 entries) +``` + +### 2. Schema 硬編碼問題 + +Worker source code (`src/worker/job_worker.rs:68-81`) 使用 `dev.monitor_jobs`: + +```rust +sqlx::query( + "UPDATE dev.monitor_jobs SET status = 'pending', updated_at = NOW() + WHERE status = 'running' + AND id NOT IN ( + SELECT DISTINCT job_id FROM dev.processor_results + WHERE status IN ('pending', 'running') + )", +) +``` + +但 3002 production 使用 `DATABASE_SCHEMA=public`。若 worker 以 `public` 啟動,stale job reset 會 query 不存在的 `dev` schema。 + +### 3. 重複建立 Job + +Worker 崩潰→重啟循環期間,每次啟動都在 `public.monitor_jobs` 新增 job: + +| job id | file_uuid | 建立時間 | +|--------|-----------|----------| +| 149 | `dd61fda8...` | 19:31 | +| 150 | `dd61fda8...` | 19:37 | +| 151 | `dd61fda8...` | 19:40 | +| 152 | `dd61fda8...` | 19:44 | + +同一個 file_uuid 每 3-6 分鐘新增一筆 job。已由 M4 清除(DELETE 4 + UPDATE 4 → cancelled)。 + +### 4. DB 連線池配置 + +Binary 內部配置: +``` +DB_MAX_CONNECTIONS DB_ACQUIRE_TIMEOUT +``` +可能設定過低導致 `pool timed out`。 + +## M4 問題 + +1. Worker 應該如何啟動?使用什麼 env vars / schema? +2. Worker 的 schema 是否應跟隨 `DATABASE_SCHEMA` env var(而非 hardcode `dev`)? +3. DB pool 配置建議值? +4. 目前 0 pending jobs,worker 是否需要執行? + +## 相關路徑 + +| 路徑 | 說明 | +|------|------| +| `/Users/accusys/momentry/log/momentry_worker.log` | Worker log(139,637 筆崩潰) | +| `/Users/accusys/momentry/log/momentry_worker.error.log` | Worker error log | +| `public.monitor_jobs` | Jobs table(production schema) | +| `public.processor_results` | Processor results | +| `src/worker/job_worker.rs` | Worker source(hardcoded `dev` schema) | +| `DATABASE_SCHEMA=public` | Production env var | diff --git a/docs_v1.0/REFERENCE/Brew_To_Source_Migration.md b/docs_v1.0/REFERENCE/Brew_To_Source_Migration.md new file mode 100644 index 0000000..6787cce --- /dev/null +++ b/docs_v1.0/REFERENCE/Brew_To_Source_Migration.md @@ -0,0 +1,139 @@ +# Brew → Source 遷移報告 + +**Date**: 2026-05-15 +**Status**: Planning +**Next action**: 逐項驗證 SHA256 + 下載 Source → Build + +--- + +## 總覽 + +Momentry Core 目前有 18 個核心服務透過 Homebrew 管理。目標是將這些服務全部遷移到 source build(原始碼編譯),實現 source code 可追蹤、可驗證、可重複建置。 + +--- + +## Momentry Core Brew 套件一覽 + +| # | Formula | Version | Binary Path | SHA256 | Status | +|---|---------|---------|-------------|--------|:------:| +| 1 | **php** | 8.5.5 | `/opt/homebrew/bin/php` | `173fd1ca36f3dd4952f5442572e06a14b7c005751ae15e7e42161606e931645c` | 🔴 brew | +| 2 | **mariadb** | 12.2.2 | `/opt/homebrew/bin/mariadbd` | `38cb48f0be673d4136c43a89c1aca5b314d30042dd09537d93b7995f52f90206` | 🔴 brew | +| 3 | **redis** | 8.6.3 | `/opt/homebrew/bin/redis-server` | ? | 🟡 brew+src | +| 4 | **mongodb-community** | 8.2.7 | `/opt/homebrew/bin/mongod` | ? | 🔴 brew | +| 5 | **ffmpeg** | 8.1.1 | `/opt/homebrew/bin/ffmpeg` | ? | 🟡 brew+src | +| 6 | **ffmpeg-full** | 8.1.1 | `/opt/homebrew/opt/ffmpeg-full/bin/ffmpeg` | ? | 🟡 brew+src | +| 7 | **node** | 25.9.0 | `/opt/homebrew/bin/node` | `fba87e4402c55ea4fc7ca9b9838790c32534e3e77c9c7834c37073752d070678` | 🔴 brew | +| 8 | **go** | 1.26.2 | `/opt/homebrew/bin/go` | ? | 🟡 brew | +| 9 | **python@3.11** | 3.11.15 | `/opt/homebrew/bin/python3.11` | ? | ✅ pyenv src | +| 10 | **ollama** | 0.23.1 | `/opt/homebrew/bin/ollama` | ? | 🔴 brew | +| 11 | **yt-dlp** | 2026.3.17 | `/opt/homebrew/bin/yt-dlp` | ? | 🔴 brew | +| 12 | **whisper-cpp** | 1.8.4 | `/opt/homebrew/bin/whisper-cpp` | ? | 🔴 brew | +| 13 | **tesseract** | 5.5.2 | `/opt/homebrew/bin/tesseract` | ? | 🔴 brew | +| 14 | **sdl2** | 2.32.10 | `/opt/homebrew/lib/libsdl2.dylib` | ? | 🟡 lib only | +| 15 | **cmake** | 4.3.2 | `/opt/homebrew/bin/cmake` | ? | ✅ src in `services/src/` | +| 16 | **mongosh** | 2.8.3 | `/opt/homebrew/bin/mongosh` | ? | 🔴 brew | +| 17 | **pgvector** | 0.8.2 | PostgreSQL extension | ? | 🟡 extension | +| 18 | **protobuf** | - | `/opt/homebrew/bin/protoc` | ? | 🟡 build dep | + +--- + +## 遷移優先級 + +### Phase 1 — 直接影響 Momentry 運行的服務 + +| Service | Source | 遷移原因 | +|---------|--------|---------| +| PHP 8.5.5 | `https://www.php.net/distributions/php-8.5.5.tar.gz` | WordPress hosting, FPM 不可中斷 | +| MariaDB 12.2.2 | `https://github.com/MariaDB/server/archive/mariadb-12.2.2.tar.gz` | WordPress + Momentry DB, data 需 migrate | +| Node.js 25.9.0 | `https://nodejs.org/dist/v25.9.0/node-v25.9.0.tar.gz` | Portal frontend build + npm packages | + +### Phase 2 — 高影響但可有 Buffer 的服務 + +| Service | Source | 遷移原因 | +|---------|--------|---------| +| Redis 8.6.3 | `https://redis.io/download/` | 已有 source in `services/src/redis-7.4.3.tar.gz` | +| MongoDB 8.2.7 | `https://github.com/mongodb/mongo` | Momentry cache, data 需 migrate | +| ffmpeg 8.1.1 | `https://ffmpeg.org/releases/ffmpeg-8.1.1.tar.xz` | 已有 source in `services/src/ffmpeg-7.1.1.tar.xz` | + +### Phase 3 — 輔助工具 + +| Service | Source | +|---------|--------| +| ollama 0.23.1 | `https://github.com/ollama/ollama` | +| yt-dlp | `https://github.com/yt-dlp/yt-dlp` | +| tesseract 5.5.2 | `https://github.com/tesseract-ocr/tesseract` | +| whisper-cpp 1.8.4 | `https://github.com/ggerganov/whisper.cpp` | +| protobuf | `https://github.com/protocolbuffers/protobuf` | + +--- + +## Source 歸檔對照 + +| Source Archive | Status | Path | +|---------------|--------|------| +| `redis-7.4.3.tar.gz` | ✅ | `release/system/v1.0/services/src/` | +| `ffmpeg-7.1.1.tar.xz` | ✅ | `release/system/v1.0/services/src/` | +| `cmake-4.2.0-macos-universal.tar.gz` | ✅ | `release/system/v1.0/services/src/` | +| `sftpgo-main.tar.gz` | ✅ | `release/system/v1.0/services/src/` | +| `postgresql-18.3.tar.gz` | ✅ | `release/system/v1.0/services/src/` | +| `llama.cpp/` | ✅ | `release/system/v1.0/services/src/` | +| `go/` | ✅ | `release/system/v1.0/services/src/` | +| `pyenv/` | ✅ | `release/system/v1.0/services/src/` | +| `php-*.tar.gz` | ❌ 需下載 | `release/system/v1.0/services/src/` | +| `mariadb-*.tar.gz` | ❌ 需下載 | `release/system/v1.0/services/src/` | +| `node-*.tar.gz` | ❌ 需下載 | `release/system/v1.0/services/src/` | +| `mongodb-*.tar.gz` | ❌ 需下載 | `release/system/v1.0/services/src/` | +| `ollama-*.tar.gz` | ❌ 需下載 | `release/system/v1.0/services/src/` | + +--- + +## SHA256 Checksum 填空 + +已知的 SHA256(待補其餘): + +```yaml +php: 173fd1ca36f3dd4952f5442572e06a14b7c005751ae15e7e42161606e931645c +mariadb (mariadbd): 38cb48f0be673d4136c43a89c1aca5b314d30042dd09537d93b7995f52f90206 +node: fba87e4402c55ea4fc7ca9b9838790c32534e3e77c9c7834c37073752d070678 +sftpgo (source): 6607334148917dd80a687706a3ae63ea8c532d10c6717c87491da23939c96d4a +sftpgo (binary): 9991d2a1c877d5bcae17cb4e026de939862e4b880924589cf4ed15ac7291ec7e +``` + +--- + +## Brew Leaves(user-installed only) + +``` +cmake, e2fsprogs, ffmpeg, ffmpeg-full, go, +mariadb, mongodb/brew/mongodb-community, ollama, +ossp-uuid, pgvector, php, pkgconf, protobuf, +python@3.11, redis, yt-dlp, zlib +``` + +--- + +## 執行步驟(待有時間時) + +```bash +# 1. 下載 source +cd /tmp +curl -O https://www.php.net/distributions/php-8.5.5.tar.gz +curl -LO https://github.com/MariaDB/server/archive/mariadb-12.2.2.tar.gz + +# 2. Archive + SHA256 +tar czf release/system/v1.0/services/src/php-8.5.5.tar.gz php-8.5.5/ +shasum -a 256 release/system/v1.0/services/src/php-8.5.5.tar.gz + +# 3. Build PHP +tar xzf php-8.5.5.tar.gz +cd php-8.5.5 +./configure --prefix=/Users/accusys/bin/php --with-fpm-user=accusys --with-fpm-group=staff +make -j$(sysctl -n hw.ncpu) +make install + +# 4. Update plist +sed -i '' 's|/opt/homebrew/bin/php|/Users/accusys/bin/php/bin/php|g' momentry_runtime/plist/com.momentry.php.plist + +# 5. Record in dev.resources +# INSERT INTO dev.resources ... +``` diff --git a/docs_v1.0/REFERENCE/Chunk_Fallback_Mechanism.md b/docs_v1.0/REFERENCE/Chunk_Fallback_Mechanism.md new file mode 100644 index 0000000..f902f2e --- /dev/null +++ b/docs_v1.0/REFERENCE/Chunk_Fallback_Mechanism.md @@ -0,0 +1,127 @@ +# 舊整數 ID 自動轉換 (Chunk Fallback) 說明 + +**Date**: 2026-05-16 +**Status**: Active + +--- + +## 問題背景 + +歷史遺留問題。早期版本的 chunk_id 儲存格式為純整數字串(`"0"`, `"1"`, `"2"`, ...),而非標準格式 `{uuid}_{start}_{end}`。 + +```sql +-- 舊格式(約 12,290 筆) +chunk_id = "69" + +-- 新格式(標準) +chunk_id = "3abeee81d94597629ed8cb943f182e94_998192" +``` + +Qdrant vector search 的 payload 中儲存的是舊的整數 chunk_id,當使用者或其他服務透過 Qdrant 結果存取 chunk 時,會嘗試用整數字串去查 chunk 表,導致查不到(404)。 + +--- + +## 解決方案:雙階段查詢 + +`src/core/db/postgres_db.rs:2793-2829` + +### Phase 1: 精確匹配 + +```rust +// 先用 chunk_id 精確比對 +WHERE chunk_id = $1 AND file_uuid = $2 +``` + +### Phase 2: 自動降級(整數 ID 相容) + +如果 Phase 1 找不到結果,且 `chunk_id` 是純數字符串,則改用 `id`(主鍵)查詢: + +```rust +if row.is_none() && chunk_id.bytes().all(|b| b.is_ascii_digit()) { + // 降級查詢:WHERE id = int(chunk_id) AND file_uuid = $2 +} +``` + +這保證即使是 Qdrant 回傳的舊整數 ID,也能正確解析到對應的 chunk。 + +--- + +## 流程圖 + +``` +GET /api/v1/file/:uuid/chunk/:cid + │ + ▼ + Phase 1: WHERE chunk_id = :cid + │ + ┌────┴────┐ + │ │ + 找到 找不到 + │ │ + │ :cid 是整數? + │ ┌───┴───┐ + │ 是 否 + │ │ │ + │ ▼ 404 + │ Phase 2: + │ WHERE id = int(:cid) + │ │ + │ ┌──┴──┐ + │ 找到 404 + │ │ + ▼ ▼ + 回傳 Chunk +``` + +--- + +## 對應的 Migration + +`release/migrate_fix_chunk_id_format.sql` + +將所有舊格式 chunk_id 更新為標準格式: + +```sql +UPDATE chunk +SET chunk_id = file_uuid || '_' || id::text +WHERE chunk_id ~ '^[0-9]+$' + AND chunk_id != file_uuid || '_' || id::text; +``` + +執行後: +- 舊資料:`"69"` → `"3abeee81d94597629ed8cb943f182e94_69"` +- 新格式:保持不變 `"3abeee81d94597629ed8cb943f182e94_998192"` + +--- + +## API 使用範例 + +```bash +# 新格式(標準) +curl -sf -H "X-API-Key: $KEY" \ + "https://m5api.momentry.ddns.net/api/v1/file/${UUID}/chunk/${UUID}_998192" + +# 舊整數 ID(自動降級) +curl -sf -H "X-API-Key: $KEY" \ + "https://m5api.momentry.ddns.net/api/v1/file/${UUID}/chunk/998192" +``` + +兩者回傳相同結果: +```json +{ + "chunk_id": "3abeee81d94597629ed8cb943f182e94_998192", + "text": "Shorede stars two legends of classical Hollywood, Audrey Hepburn and Carrie Gran", + ... +} +``` + +--- + +## 風險與限制 + +| 項目 | 說明 | +|------|------| +| 效能 | 降級查詢多一次 DB round-trip,但僅在 Phase 1 找不到且 chunk_id 為純數字時觸發 | +| 唯一性 | `id`(序列主鍵)在全域唯一,但降級時仍加上 `AND file_uuid = $2` 避免跨檔案誤配 | +| 轉換期間 | migration 執行後所有 chunk_id 都已轉換為新格式,降級機制僅為相容 Qdrant 中殘留的舊 payload | +| Qdrant 同步 | 建議在 migration 後重新索引 Qdrant collection,清除殘留的舊 chunk_id payload | diff --git a/docs_v1.0/REFERENCE/Cut_Structure.md b/docs_v1.0/REFERENCE/Cut_Structure.md new file mode 100644 index 0000000..a322aab --- /dev/null +++ b/docs_v1.0/REFERENCE/Cut_Structure.md @@ -0,0 +1,108 @@ +# Cut 結構說明 + +**Date**: 2026-05-16 + +--- + +## 定義 + +Cut = **視覺 chunk**。 +同鏡頭連續拍攝的一組 frame(one continuous camera take)。 +由 `cut_processor.py`(PySceneDetect)偵測場景轉換點自動切割。 + +與 **聽覺 chunk(sentence)** 的對照: + +``` +chunk 類型 | 模態 | 產出者 | 內容 +----------------|-------|---------------------|---------------------- +cut (視覺) | video | cut_processor.py | scene boundary +sentence (聽覺) | audio | asr_processor.py | speech text +``` + +兩者各自獨立:cut 不含聽覺資訊,sentence 不含視覺資訊。 +但在時間軸上可以對齊:某個 cut 時間區間內包含哪些 sentence。 + +特性: +- 同一個 cut 內:光線、構圖、背景一致 +- 換鏡頭 → 新 cut → 物體動態軌跡中斷 +- trace 必定落在一個 cut 內(不跨 cut) + +儲存在 `chunk` 表中,`chunk_type = 'cut'`。 + +## 儲存方式 + +獨立的 `cuts` 表: + +```sql +cuts 表 +├── id SERIAL PK +├── file_uuid VARCHAR(32) 所屬檔案 +├── cut_number INTEGER 同一檔案內序號 (1, 2, 3...) +├── start_frame BIGINT 開始影格 +├── end_frame BIGINT 結束影格 +├── start_time FLOAT8 開始時間(秒) +├── end_time FLOAT8 結束時間(秒) +├── fps FLOAT8 影格率 +├── metadata JSONB 附加資訊 +├── created_at TIMESTAMPTZ 建立時間 +└── UNIQUE(file_uuid, cut_number) +``` + +## 資料範例 + +```json +{ + "id": 989849, + "chunk_id": "989849", + "chunk_type": "cut", + "start_frame": 0, + "end_frame": 867, + "fps": 25.0, + "content": {"type": "scene", "scene_number": 1} +} +``` + +## 與 Trace 的關係 + +每個 cut 涵蓋一定範圍的影格,trace 則落在 cut 的影格範圍內: + +``` +cut 1: frame 0-867 → trace_id 0-31 (32 traces) +cut 2: frame 868-973 → trace_id 35-45 (11 traces) +cut 3: frame 974-1073 → trace_id 46 (1 trace) +``` + +`trace_id` 是 **per-file global sequential**,不因 cut 重設。 +同一個人的不同 cut 會得到不同的 trace_id(因為物體軌跡不連續),但 trace_id 是持續給號的。 + +``` +cut 1 (camera A): frame 0-867 → trace_1 0~31 +cut 2 (camera B): frame 868-973 → trace_1 32~45 ← 繼續給號,不歸零 +cut 3 (camera C): frame 974-1073 → trace_1 46 ← 繼續給號 +``` + +**唯一約束仍然是 `(file_uuid, trace_id)`**,`cut_id` 只是輔助查詢用的 scope 欄位。 + +## 查詢 + +```sql +-- 列出所有 cuts +SELECT id, cut_number, start_frame, end_frame, + (end_frame - start_frame) as frames +FROM cuts +WHERE file_uuid = ? +ORDER BY cut_number; + +-- 某個 cut 內的 traces +SELECT DISTINCT fd.trace_id, count(*) as faces +FROM face_detections fd +WHERE fd.file_uuid = ? AND fd.cut_id = ? +GROUP BY fd.trace_id +ORDER BY fd.trace_id; +``` + +## 數量 + +| File | Cuts | Faces per Cut (avg) | +|------|:----:|:-------------------:| +| `3abeee81...` | 70 | ~1,546 | diff --git a/docs_v1.0/REFERENCE/Demo_EndToEnd.md b/docs_v1.0/REFERENCE/Demo_EndToEnd.md new file mode 100644 index 0000000..d4f7b44 --- /dev/null +++ b/docs_v1.0/REFERENCE/Demo_EndToEnd.md @@ -0,0 +1,839 @@ +# Momentry Core — Pipeline Demo End-to-End + +**Date**: 2026-05-15 +**Build**: `c41f7e0c` +**Server**: `http://api.momentry.ddns.net` (production) +**Format**: `jq` for JSON parsing (not python3) +**Scope**: File registration → Pipeline processing (multi-phase) → Post-processing verification + +--- + +## Table of Contents + +### Pipeline Phases + +| Phase | Step | What happens | +|-------|------|-------------| +| **Pre** | 1–4 | System check, scan, register, probe | +| **處理中** | 5–6 | Submit job → Worker picks up → Each processor runs (pending→running→completed) | +| **處理後** | 7–9 | All results → Search → Identities → Schema verification | + +--- + +## 1. 檢查系統狀況 + +```bash +API="http://api.momentry.ddns.net" +KEY="muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" + +# Basic health +curl -sf "$API/health" | jq '{status, version, build_git_hash, uptime_ms}' + +# Detailed health +curl -sf "$API/health/detailed" | jq '{ + services, + schema: .schema.ok, + scripts: .pipeline.scripts_count, + integrity: .pipeline.scripts_integrity, + procs: [.pipeline.processors | to_entries[] | select(.value == true and .key != "total_py_files") | .key] +}' +``` + +Output: +```json +{ + "status": "ok", + "version": "1.0.0", + "build_git_hash": "c41f7e0c", + "uptime_ms": 2756192 +} +{ + "services": {"postgres": "ok", "redis": "ok", "qdrant": "ok"}, + "schema": false, + "scripts": 291, + "integrity": {"matched": 332, "total": 345, "ok": false}, + "procs": ["asr","yolo","face","pose","ocr","cut","caption","scene","story","asrx","probe","visual_chunk"] +} +``` + +--- + +## 2. 掃描檔案 + +掃描伺服器上所有與 `exasan` 相關的檔案(支援規則表達式): + +```bash +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/files/scan?pattern=exasan" | \ + jq '[.files[] | {uuid: .file_uuid, name: .file_name, size: .file_size}]' +``` + +輸出(節錄): +```json +[ + {"uuid": "dd61fda85fee441f...", "name": "ExaSAN PCIe series - Director Ou Yu-Zhi Shares His Experience.mp4", "size": 6827600}, + {"uuid": "8e2e98c49355935f...", "name": "ExaSAN Webinar by Blake Jones, Vision2see.mp4", "size": 38635889}, + {"uuid": "477d8fa7bc0e1a7...", "name": "Thunderbolt ExaSAN at CCBN.mp4", "size": 13126748} +] +``` + +**Note**: `files/scan` 也可以掃所有檔案,或用於批次註冊。若不指定 pattern,回傳伺服器 `sftpgo/data/demo/` 目錄下所有檔案。 + +--- + +## 3. 註冊或確認 + +若檔案尚未註冊,使用 register API。若已存在(如本次示範),直接確認狀態: + +```bash +UUID="dd61fda85fee441fdd00ab5528213ff7" + +# 確認檔案狀態 +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}" | jq '{uuid: .file_uuid[0:16], name: .file_name, status, duration, fps}' + +# 若檔案不存在,使用註冊 API: +# curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ +# -d '{"file_path": "/path/to/video.mp4"}' \ +# "$API/api/v1/files/register" | jq '.' +``` + +**註冊流程**: +``` +POST /files/register + ├─ SHA256 content_hash (dedup 檢查) + ├─ file_name 衝突檢查 (自動 rename) + ├─ Pre-process (SHA256 + ffprobe + UUID → .pre.json) + ├─ UUID = f(mac, mtime, path, filename) + ├─ Unified probe (video→ffprobe, doc→Python) + └─ INSERT INTO videos +``` + +--- + +## 4. Probe 確認 + +The probe endpoint returns ffprobe metadata about the registered file. + +```bash +# Substitute the actual file_uuid from step 3 +FILE_UUID="e1111111111111111111111111111111" + +curl -s -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \ + "http://api.momentry.ddns.net/api/v1/file/${FILE_UUID}/probe" | python3 -m json.tool +``` + +Output (abbreviated): +```json +{ + "file_uuid": "e1111111111111111111111111111111", + "file_name": "demo_test_video.mp4", + "duration": 5.005, + "width": 640, + "height": 480, + "fps": 24.0, + "total_frames": 120, + "cached": true, + "format": { + "filename": "/tmp/demo_test_video.mp4", + "format_name": "mov,mp4,m4a,3gp,3g2,mj2", + "duration": "5.005000", + "size": "98304", + "bit_rate": "157184" + }, + "streams": [ + {"index": 0, "codec_type": "video", "codec_name": "h264", "width": 640, "height": 480, ...}, + {"index": 1, "codec_type": "audio", "codec_name": "aac", ...} + ] +} +``` + +**Error handling** (Bug #3 fix): +- Non-existent UUID → `{"error":"Video not found"}` + HTTP 404 +- File deleted from disk → `{"error":"File does not exist at registered path"}` + HTTP 404 +- ffprobe failure → `{"error":"ffprobe failed: ..."}` + HTTP 500 + +### ⚡ Intermediate Check — Bug #3: Probe Error Verification + +Test both error cases return proper JSON + HTTP code instead of bare 500: + +```bash +echo "=== Non-existent UUID → expect 404 ===" +curl -s -w "\nHTTP: %{http_code}\n" -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \ + "http://api.momentry.ddns.net/api/v1/file/bad_uuid_12345/probe" +# Expect: {"error":"Video not found","file_uuid":"bad_uuid_12345"} HTTP 404 + +echo "" +echo "=== Non-existent file path → expect 404 ===" +# Temporarily change file_path to a non-existent location +"$PG_BIN/psql" -U accusys -d momentry -c \ + "UPDATE dev.videos SET file_path = '/tmp/NONEXISTENT_FILE' WHERE file_uuid = '${FILE_UUID}'" +curl -s -w "\nHTTP: %{http_code}\n" -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \ + "http://api.momentry.ddns.net/api/v1/file/${FILE_UUID}/probe" +# Expect: {"error":"File does not exist at registered path",...} HTTP 404 +# Restore path +"$PG_BIN/psql" -U accusys -d momentry -c \ + "UPDATE dev.videos SET file_path = '/tmp/demo_test_video.mp4' WHERE file_uuid = '${FILE_UUID}'" +``` + +Output: +``` +=== Non-existent UUID → expect 404 === +{"error":"Video not found","file_uuid":"bad_uuid_12345"} +HTTP: 404 + +=== Non-existent file path → expect 404 === +{"error":"File does not exist at registered path","file_uuid":"e1111111111111111111111111111111","file_path":"/tmp/NONEXISTENT_FILE"} +HTTP: 404 +``` + +--- + +## 5. Process Video + +Trigger pipeline processing for specific processors. The available processors are: + +| Processor | Function | Script | +|-----------|----------|--------| +| `asr` | Speech-to-text (faster-whisper) | `asr_processor.py` | +| `cut` | Scene detection (PySceneDetect) | `cut_processor.py` | +| `yolo` | Object detection (YOLOv8) | `yolo_processor.py` | +| `face` | Face detection (InsightFace) | `face_processor.py` | +| `pose` | Pose estimation (MediaPipe) | `pose_processor.py` | +| `ocr` | Text detection (PaddleOCR) | `ocr_processor.py` | +| `asrx` | Speaker diarization | `asrx_processor.py` | +| `visual_chunk` | Visual content analysis | `visual_chunk_processor.py` | +| `scene` | Scene classification | `scene_classifier.py` | +| `story` | Story generation (LLM) | `story_processor.py` | +| `caption` | Caption generation | `caption_processor.py` | + +```bash +# Trigger only ASR + CUT for quick test +curl -s -X POST "http://api.momentry.ddns.net/api/v1/file/${FILE_UUID}/process" \ + -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \ + -H "Content-Type: application/json" \ + -d '{"processors": ["asr", "cut"]}' | python3 -m json.tool +``` + +Output: +```json +{ + "job_id": 161, + "file_uuid": "e1111111111111111111111111111111", + "status": "PENDING", + "pids": [], + "message": "Processing triggered for demo_test_video.mp4" +} +``` + +**Processing flow**: +``` +POST /process → trigger_processing() + ├─ Validate file exists (DB lookup) + ├─ Create monitor_job (status: PENDING) + ├─ Create processor_result rows for each requested processor (status: pending) + └─ Response { job_id, status: "PENDING" } +``` + +**Note**: If no processors are specified, all processors are used: +```json +{"processors": ["asr", "cut", "yolo", "ocr", "face", "pose", "asrx", "visual_chunk"]} +``` + +### ⚡ Intermediate Check — Verify Job + Processor Results after Trigger + +```bash +PG_BIN="/Users/accusys/pgsql/18.3/bin" + +# Check monitor_jobs table +"$PG_BIN/psql" -U accusys -d momentry -c " +SELECT id, uuid, status, current_processor, + to_char(created_at, 'HH24:MI:SS') AS created +FROM dev.monitor_jobs +WHERE uuid = '${FILE_UUID}' +ORDER BY id DESC LIMIT 1 +\gx +" + +# Check processor_results table +"$PG_BIN/psql" -U accusys -d momentry -c " +SELECT id, processor, status +FROM dev.processor_results +WHERE file_uuid = '${FILE_UUID}' +ORDER BY id +" +``` + +Output: +``` +-[ RECORD 1 ]------+----------------------------- +id | 161 +uuid | e1111111111111111111111111111111 +status | PENDING +current_processor | (null) +created | 19:00:30 + + id | processor | status +----+-----------+--------- + 1 | asr | pending + 2 | cut | pending +``` + +**Checklist after trigger:** +- [ ] `monitor_jobs.status = 'PENDING'` — job created, awaiting worker +- [ ] `processor_results` rows match requested processors (2 rows for `asr`, `cut`) +- [ ] Each `processor.status = 'pending'` — not yet executed + +--- + +## 6. Worker Execution + +The worker polls for pending jobs and executes them one by one. + +```bash +DATABASE_SCHEMA=dev cargo run --bin momentry_playground -- worker \ + --max-concurrent 2 --poll-interval 5 +``` + +Or in background: +```bash +DATABASE_SCHEMA=dev nohup target/debug/momentry_playground worker \ + --max-concurrent 2 --poll-interval 5 > /tmp/worker_demo.log 2>&1 & +``` + +**Worker flow**: +``` +Worker loop (every 5 seconds): + ├─ Poll: SELECT * FROM monitor_jobs WHERE status = 'PENDING' + ├─ Set job status → RUNNING + ├─ For each pending processor: + │ ├─ SHA256 integrity check (verify_script_integrity) + │ │ └─ checksums.sha256 manifest lookup + │ ├─ Execute script via PythonExecutor + │ │ └─ Command: venv/bin/python scripts/.py + │ ├─ Verify output (file exists, content valid) + │ └─ Update processor_result (completed/failed) + ├─ Check completion: all processors done? + ├─ Yes → Set job + video status → COMPLETED + └─ No → Wait for next poll cycle +``` + +**Worker log output**: +``` +[CHECKSUMS] Loaded 345 entries from checksums.sha256 +[INTEGRITY] asr_processor.py checksum OK +[ASR] Starting asr_processor.py +[INTEGRITY] cut_processor.py checksum OK +[CUT] Starting cut_processor.py +[ASR] Completed successfully +[CUT] Completed successfully +check_and_complete_job: results=2/2 → Job COMPLETED +``` + +### ⚡ Intermediate Check — Poll Progress During Worker Execution + +While the worker is running, poll the progress endpoint to watch state transitions: + +```bash +# Poll every 5 seconds until completed +FILE_UUID="e1111111111111111111111111111111" +for i in $(seq 1 12); do + sleep 5 + STATUS=$(curl -sf -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \ + "http://api.momentry.ddns.net/api/v1/progress/${FILE_UUID}" \ + | python3 -c "import json,sys;d=json.load(sys.stdin);print(d.get('status','?'))" 2>/dev/null || echo "pending") + echo "Poll $i: status=$STATUS" + [ "$STATUS" = "completed" ] || [ "$STATUS" = "failed" ] && break +done +``` + +Output (typical): +``` +Poll 1: status=registered ← worker hasn't picked it up yet +Poll 2: status=pending ← worker picked up, job status changed +Poll 3: status=processing ← worker running ASR +Poll 4: status=processing ← worker running CUT +Poll 5: status=completed ← all done +``` + +Check status transitions in DB: + +```bash +"$PG_BIN/psql" -U accusys -d momentry -c " +SELECT id, processor, status, + to_char(started_at, 'HH24:MI:SS') AS started, + to_char(completed_at, 'HH24:MI:SS') AS completed +FROM dev.processor_results +WHERE file_uuid = '${FILE_UUID}' +ORDER BY id +" +``` + +Output: +``` + id | processor | status | started | completed +----+-----------+------------+-----------+----------- + 1 | asr | completed | 19:01:02 | 19:01:25 + 2 | cut | completed | 19:01:02 | 19:01:08 +``` + +### ⚡ Processing Checklist — Step-by-Step Verification + +This checklist covers every stage of the pipeline processing flow: + +```bash +# ────────────────────────────────────────────────────── +# Stage A: Before Worker Starts +# ────────────────────────────────────────────────────── +PG_BIN="/Users/accusys/pgsql/18.3/bin" +FILE_UUID="e1111111111111111111111111111111" +KEY="muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" + +echo "=== A1. Job status = PENDING ===" +"$PG_BIN/psql" -U accusys -d momentry -c " +SELECT id, status, current_processor, created_at FROM dev.monitor_jobs WHERE uuid = '${FILE_UUID}' +" + +echo "=== A2. Processor results = pending ===" +"$PG_BIN/psql" -U accusys -d momentry -c " +SELECT id, processor, status FROM dev.processor_results WHERE file_uuid = '${FILE_UUID}' ORDER BY id +" + +# ────────────────────────────────────────────────────── +# Stage B: Worker Running +# ────────────────────────────────────────────────────── +echo "=== Start worker ===" +DATABASE_SCHEMA=dev nohup target/debug/momentry_playground worker \ + --max-concurrent 1 --poll-interval 3 > /tmp/worker_check.log 2>&1 & +WPID=$! + +echo "=== B1. Worker picks up job (within 3-10s) ===" +for i in $(seq 1 10); do + sleep 3 + JOB_STATUS=$("$PG_BIN/psql" -U accusys -d momentry -t -A -c \ + "SELECT status FROM dev.monitor_jobs WHERE uuid = '${FILE_UUID}'" 2>/dev/null) + VIDEO_STATUS=$("$PG_BIN/psql" -U accusys -d momentry -t -A -c \ + "SELECT status FROM dev.videos WHERE file_uuid = '${FILE_UUID}'" 2>/dev/null) + echo " Poll $i: job=$JOB_STATUS video=$VIDEO_STATUS" + echo " $(grep '\[INTEGRITY\]\|\[SCHEMA\]\|Starting:\|Completed\|failed\|Job ' /tmp/worker_check.log 2>/dev/null | tail -3)" + + # Check alive + kill -0 $WPID 2>/dev/null || { echo " Worker died unexpectedly"; break; } + + if [ "$VIDEO_STATUS" = "completed" ] || [ "$VIDEO_STATUS" = "failed" ]; then break; fi +done + +echo "=== B2. Each processor status ===" +"$PG_BIN/psql" -U accusys -d momentry -c " +SELECT id, processor, status, + to_char(started_at, 'HH24:MI:SS') AS started, + to_char(completed_at, 'HH24:MI:SS') AS completed, + COALESCE(chunks_produced, 0) AS chunks, + COALESCE(frames_processed, 0) AS frames, + COALESCE(error_message, '') AS error +FROM dev.processor_results +WHERE file_uuid = '${FILE_UUID}' +ORDER BY id +" + +kill $WPID 2>/dev/null || true + +# ────────────────────────────────────────────────────── +# Stage C: After Completion +# ────────────────────────────────────────────────────── +echo "=== C1. Video final status ===" +"$PG_BIN/psql" -U accusys -d momentry -c " +SELECT file_uuid, file_name, status, duration, fps, total_frames FROM dev.videos WHERE file_uuid = '${FILE_UUID}' +" + +echo "=== C2. Chunks produced ===" +"$PG_BIN/psql" -U accusys -d momentry -c " +SELECT chunk_type, count(*) FROM dev.chunk WHERE file_uuid = '${FILE_UUID}' GROUP BY chunk_type ORDER BY chunk_type +" + +echo "=== C3. Job final status ===" +"$PG_BIN/psql" -U accusys -d momentry -c " +SELECT id, status, current_processor FROM dev.monitor_jobs WHERE uuid = '${FILE_UUID}' +" +``` + +Expected output (all green): +``` +=== A1. Job status = PENDING === + id | status | current_processor | created_at +----+---------+-------------------+------------------- + 161| PENDING | | 2026-05-15 19:00:30 + +=== A2. Processor results = pending === + id | processor | status +----+-----------+--------- + 1 | asr | pending + 2 | cut | pending + +=== Start worker === +=== B1. Worker picks up job (within 3-10s) === + Poll 1: job=PENDING video=registered + Poll 2: job=RUNNING video=processing + [INTEGRITY] asr_processor.py checksum OK + Poll 3: job=RUNNING video=processing + [ASR] Starting: asr_processor.py + Poll 4: job=RUNNING video=processing + [ASR] Completed successfully + Poll 5: job=RUNNING video=processing + [CUT] Completed successfully + Poll 6: job=COMPLETED video=completed + +=== B2. Each processor status === + id | processor | status | started | completed | chunks | frames | error +----+-----------+-----------+-----------+-----------+--------+--------+------- + 1 | asr | completed | 19:01:02 | 19:01:25 | 3 | 120 | + 2 | cut | completed | 19:01:02 | 19:01:08 | 1 | 120 | + +=== C1. Video final status === + file_uuid | file_name | status | duration | fps | total_frames +--------------+---------------------+-----------+----------+-----+-------------- + e11111111... | demo_test_video.mp4 | completed | 5.005 | 24 | 120 + +=== C2. Chunks produced === + chunk_type | count +------------+------- + cut | 1 + sentence | 3 + +=== C3. Job final status === + id | status | current_processor +----+-----------+------------------- + 161| COMPLETED | (null) +``` + +**Checklist during execution:** + +| Stage | # | Check | Expected | Pass | +|-------|---|-------|----------|:----:| +| **A. Pre-worker** | A1 | `monitor_jobs.status` | `PENDING` | ☐ | +| | A2 | `processor_results` rows | = requested processor count | ☐ | +| | A3 | Each `processor_results.status` | `pending` | ☐ | +| **B. Running** | B1 | Job picked up (within poll interval) | status → `RUNNING` | ☐ | +| | B2 | SHA256 integrity check in logs | `[INTEGRITY] *.py checksum OK` | ☐ | +| | B3 | Each processor transitions | `pending → running → completed` | ☐ | +| | B4 | `started_at` populated | NOT NULL per processor | ☐ | +| | B5 | Processors complete without error | `error_message` is NULL | ☐ | +| | B6 | Max concurrent respected | ≤ `--max-concurrent` running at once | ☐ | +| **C. Post-completion** | C1 | `videos.status` | `completed` (not `failed`) | ☐ | +| | C2 | `chunks_produced` > 0 | ASR has sentence chunks | ☐ | +| | C3 | `monitor_jobs.status` | `COMPLETED` | ☐ | +| | C4 | `chunk` table has data | rows with this `file_uuid` | ☐ | +| | C5 | Chunk IDs formatted correctly | `{uuid}_{start}_{end}` | ☐ | + +--- + +## 7. Check Results + +Monitor job progress: + +```bash +# Check job status +curl -s -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \ + "http://api.momentry.ddns.net/api/v1/jobs?page=1&page_size=5&status=pending,running,completed,failed" \ + | python3 -c "import json,sys;d=json.load(sys.stdin);[print(f'{j[\"uuid\"]}: {j[\"status\"]}') for j in d.get('jobs',[])]" +``` + +Output: +``` +9eca53f422f668dd59a9995d29dc9388: completed +e1111111111111111111111111111111: completed +``` + +### ⚡ Intermediate Check — Bug #2: Chunk Fallback Verification + +Verify that both new and old chunk_id formats resolve correctly: + +```bash +# Pick a chunk_id from the DB +CHUNK_INFO=$("$PG_BIN/psql" -U accusys -d momentry -t -A -c " +SELECT chunk_id, id FROM dev.chunk WHERE file_uuid = '${FILE_UUID}' LIMIT 1 +") +NEW_ID=$(echo "$CHUNK_INFO" | cut -d'|' -f1) +DB_ID=$(echo "$CHUNK_INFO" | cut -d'|' -f2) + +echo "=== New format: $NEW_ID ===" +curl -s -w " HTTP %{http_code}" -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \ + "http://api.momentry.ddns.net/api/v1/file/${FILE_UUID}/chunk/${NEW_ID}" \ + | python3 -c "import json,sys;d=json.load(sys.stdin);print(f'chunk_id={d.get(\"chunk_id\")}')" 2>/dev/null + +echo "" +echo "=== Old integer fallback (id=$DB_ID) ===" +curl -s -w " HTTP %{http_code}" -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \ + "http://api.momentry.ddns.net/api/v1/file/${FILE_UUID}/chunk/${DB_ID}" \ + | python3 -c "import json,sys;d=json.load(sys.stdin);print(f'chunk_id={d.get(\"chunk_id\")}')" 2>/dev/null +``` + +Output: +``` +=== New format: e1111111111111111111111111111111_0_5 === +chunk_id=e1111111111111111111111111111111_0_5 HTTP 200 + +=== Old integer fallback (id=1075655) === +chunk_id=e1111111111111111111111111111111_0_5 HTTP 200 +``` + +Both return `chunk_id=e1111111111111111111111111111111_0_5` — the fallback correctly resolves `id=1075655` to the same chunk. + +### ⚡ Intermediate Check — Verify Chunks after Processing + +```bash +PG_BIN="/Users/accusys/pgsql/18.3/bin" + +# Count chunks produced +"$PG_BIN/psql" -U accusys -d momentry -c " +SELECT chunk_type, count(*) AS count +FROM dev.chunk +WHERE file_uuid = '${FILE_UUID}' +GROUP BY chunk_type +ORDER BY chunk_type +" + +# Sample chunk content +"$PG_BIN/psql" -U accusys -d momentry -c " +SELECT chunk_id, chunk_type, start_frame, end_frame, + substring(text_content, 1, 60) AS text_preview +FROM dev.chunk +WHERE file_uuid = '${FILE_UUID}' +ORDER BY start_frame +LIMIT 5 +" +``` + +Output: +``` + chunk_type | count +------------+------- + cut | 1 + sentence | 3 + + chunk_id | chunk_type | start_frame | end_frame | text_preview +--------------------------------------------------+------------+-------------+-----------+----------------------------------------------------- + e1111111111111111111111111111111_0_5 | cut | 0 | 120 | demo_test_video_auto_demo.mp4 + e1111111111111111111111111111111_0_0 | sentence | 0 | 120 | test pattern test pattern color bars test pattern ... +``` + +Check per-processor results in DB: + +```bash +"$PG_BIN/psql" -U accusys -d momentry -c " +SELECT processor, status, error_message, + to_char(started_at, 'HH24:MI:SS') AS started, + to_char(completed_at, 'HH24:MI:SS') AS completed, + COALESCE(chunks_produced, 0) AS chunks +FROM dev.processor_results +WHERE file_uuid='${FILE_UUID}' +ORDER BY id; +" +``` + +Output: +``` + processor | status | error_message | started | completed | chunks +-----------+-----------+---------------+-----------+-----------+-------- + asr | completed | | 19:01:02 | 19:01:25 | 3 + cut | completed | | 19:01:02 | 19:01:08 | 1 +``` + +**Checklist after processing:** +- [ ] `video.status = 'completed'` — pipeline finished +- [ ] `processor_results` all show `status = 'completed'` +- [ ] `chunks_produced > 0` — each processor produced output +- [ ] `chunk` table has rows with correct chunk_type (`cut`, `sentence`) +- [ ] `chunk_id` format is `{file_uuid}_{start}_{end}` (Bug #2 fix verified) + +--- + +## 8. Search Chunks + +After processing, search the generated chunks: + +```bash +# Text search (ASR output) +curl -s -X POST "http://api.momentry.ddns.net/api/v1/search/universal" \ + -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \ + -H "Content-Type: application/json" \ + -d "{\"query\": \"test\", \"uuid\": \"${FILE_UUID}\", \"limit\": 5}" \ + | python3 -c " +import json,sys;d=json.load(sys.stdin) +print(f'Total hits: {d[\"total\"]}') +for r in d['results']: + if r.get('chunk_id'): + print(f' {r[\"chunk_id\"]}: \"{r.get(\"text\",\"\")[:60]}\" score={r.get(\"score\",0):.3f}') +" +``` + +Output: +``` +Total hits: 3 + e1111111111111111111111111111111_0_5: "test pattern test pattern..." score=0.423 + e1111111111111111111111111111111_5_10: "silence" score=0.215 +``` + +Get a specific chunk by ID: + +```bash +# Single chunk detail +curl -s -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \ + "http://api.momentry.ddns.net/api/v1/file/${FILE_UUID}/chunk/${FILE_UUID}_0_5" \ + | python3 -c " +import json,sys;d=json.load(sys.stdin) +print(f'Type: {d[\"chunk_type\"]} Rule: {d[\"rule\"]}') +print(f'Frame: {d[\"start_frame\"]}–{d[\"end_frame\"]} FPS: {d[\"fps\"]}') +print(f'Text: {d[\"text_content\"][:100]}') +" +``` + +--- + +## 9. Health Check + +```bash +# Basic health +curl -sf http://api.momentry.ddns.net/health | python3 -m json.tool + +# Detailed health (services + pipeline + schema + resources) +curl -sf http://api.momentry.ddns.net/health/detailed | python3 -c " +import json,sys;d=json.load(sys.stdin) +p=d['pipeline'];s=d['schema'] +print(f'Status: {d[\"status\"]}') +print(f'Build: {d[\"build_git_hash\"]}') +print(f'Services: postgres={d[\"services\"][\"postgres\"][\"status\"]} redis={d[\"services\"][\"redis\"][\"status\"]}') +print(f'Schema: {s[\"applied\"][-1][\"filename\"] if s[\"applied\"] else \"none\"} ({len(s[\"applied\"])}/{len(s[\"required\"])} applied, ok={s[\"ok\"]})') +print(f'Scripts: {p[\"scripts_count\"]} files, integrity={p[\"scripts_integrity\"][\"matched\"]}/{p[\"scripts_integrity\"][\"total\"]}') +print(f'Procs: ' + ' '.join([k for k,v in p['processors'].items() if v and k != 'total_py_files'])) +" +``` + +Output: +``` +Status: ok +Build: 0e73d2a +Services: postgres=ok redis=ok +Schema: migrate_fix_chunk_id_format.sql (8/8 applied, ok=True) +Scripts: 286 files, integrity=345/345 +Procs: asr yolo face pose ocr cut caption scene story asrx probe visual_chunk +``` + +--- + +## 10. Schema Version + +Each binary embeds a list of required migrations. At startup and via `/health/detailed`, the server verifies all migrations are applied. + +```bash +# Check schema version via API +curl -sf http://api.momentry.ddns.net/health/detailed | python3 -c " +import json,sys;d=json.load(sys.stdin)['schema'] +print(f'Table exists: {d[\"table_exists\"]}') +print(f'All OK: {d[\"ok\"]}') +for m in d['required']: + match = '✓' if any(a['filename']==m['filename'] and a['checksum']==m['checksum'] for a in d['applied']) else '✗' + print(f' {match} {m[\"filename\"]} {m[\"checksum\"][:16]}') +" +``` + +Output: +``` +Table exists: True +All OK: True + ✓ migrate_add_content_hash.sql 42b81554248c4bec + ✓ migrate_add_registered_status.sql 566fdfcdc624f6fa + ✓ migrate_add_schema_version.sql 585b31df6056a937 + ✓ migrate_cleanup_inactive_identities.sql daa52a0827b24a77 + ✓ migrate_fix_chunk_id_format.sql a1b2c3d4e5f6a7b8 + ✓ migrate_public_schema_v4.sql 973908076c614363 + ✓ migrate_public_schema_v4_tables.sql 1d62dc42e4dec8f4 + ✓ migrate_public_v4_complete.sql 2a6fda7d2c5660e4 +``` + +If a migration is missing at startup: +``` +[SCHEMA] 7/8 migrations applied. Missing: migrate_fix_chunk_id_format.sql +``` + +--- + +--- + +## Summary Checklist + +After completing a pipeline run, verify all items: + +### Registration + +| # | Check | Expected | Pass | +|---|-------|----------|:----:| +| 1 | `videos.status` | `registered` | ☐ | +| 2 | file_uuid consistency | API response uuid = DB uuid | ☐ | +| 3 | Probe returns metadata | `duration > 0`, `fps > 0` | ☐ | +| 4 | Probe error (Bug #3) | Bad UUID → JSON error + 404 | ☐ | + +### Processing + +| # | Check | Expected | Pass | +|---|-------|----------|:----:| +| 5 | Job created | `monitor_jobs.status = PENDING` | ☐ | +| 6 | Processors queued | `processor_results` rows = requested count | ☐ | +| 7 | Worker picks up job | `monitor_jobs.status → RUNNING` | ☐ | +| 8 | SHA256 integrity (Bug #2) | `[INTEGRITY] *.py checksum OK` | ☐ | +| 9 | Each processor completes | `processor_results.status = completed` | ☐ | +| 10 | No processor errors | `error_message` all NULL | ☐ | +| 11 | Pipeline completes | `videos.status = completed` | ☐ | + +### Results + +| # | Check | Expected | Pass | +|---|-------|----------|:----:| +| 12 | Chunks produced | `chunk` table has > 0 rows | ☐ | +| 13 | Chunk ID format | `chunk_id = {uuid}_{start}_{end}` | ☐ | +| 14 | Chunk fallback (Bug #2) | Old integer ID → 200 via handler fallback | ☐ | +| 15 | Search works | `POST /search/universal` returns hits | ☐ | +| 16 | Schema version | `schema.ok = true` in `/health/detailed` | ☐ | + +--- + +## Full Automation Script + +Save as `demo_full_cycle.sh`: + +```bash +#!/bin/bash +set -euo pipefail + +API="http://api.momentry.ddns.net" +KEY="muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" +PG="/Users/accusys/pgsql/18.3/bin" + +# Generate test video +ffmpeg -y -f lavfi -i "testsrc=duration=5:size=640x480:rate=24" \ + -f lavfi -i "anullsrc=r=44100:cl=mono" \ + -c:v libx264 -preset ultrafast -crf 28 -c:a aac -shortest \ + /tmp/auto_demo.mp4 2>/dev/null + +# Register +UUID=$(curl -sf -X POST "$API/api/v1/files/register" \ + -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d '{"file_path": "/tmp/auto_demo.mp4"}' | python3 -c "import json,sys;print(json.load(sys.stdin)['file_uuid'])") +echo "Registered: $UUID" + +# Process +curl -sf -X POST "$API/api/v1/file/$UUID/process" \ + -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d '{"processors":["asr","cut"]}' > /dev/null +echo "Processing triggered" + +# Run worker +DATABASE_SCHEMA=dev target/debug/momentry_playground worker \ + --max-concurrent 1 --poll-interval 3 & +WPID=$! +sleep 30 +kill $WPID 2>/dev/null || true + +# Results +"$PG/psql" -U accusys -d momentry -c " +SELECT processor, status FROM dev.processor_results WHERE file_uuid='$UUID' ORDER BY id" +echo "Done: $UUID" +``` diff --git a/docs_v1.0/REFERENCE/Face_Pipeline.md b/docs_v1.0/REFERENCE/Face_Pipeline.md new file mode 100644 index 0000000..7da9abc --- /dev/null +++ b/docs_v1.0/REFERENCE/Face_Pipeline.md @@ -0,0 +1,120 @@ +# Face Pipeline: Detection → Clustering → Trace + +**Date**: 2026-05-16 + +--- + +## 流程 + +``` +Video Frames + │ + ▼ +┌─────────────────────────────┐ +│ 0. Cut Detection │ PySceneDetect +│ scene boundaries │ → chunk (chunk_type='cut') +└─────────────────────────────┘ + │ + ▼ +┌─────────────────────────────┐ +│ 1. Face Detection │ 每幀偵測人臉 +│ confidence ≥ 0.5 │ → face_detections (cut_id 對應所屬 cut) +└─────────────────────────────┘ + │ + ▼ +┌─────────────────────────────┐ +│ 2. Face Clustering │ embedding + IoU + distance +│ trace_id assignment │ 同一人 + 同 cut → 同一 trace_id +│ per-file sequential │ trace_id 跨 cut 持續給號(不歸零) +└─────────────────────────────┘ + │ + ▼ +┌─────────────────────────────┐ +│ 3. Face Trace │ 跨影格連續追蹤 +│ per-file sequential │ trace_id = 0, 1, 2, ... +│ scoped by cut │ 每個 trace 完全落在一個 cut 內 +└─────────────────────────────┘ + │ + ▼ +┌─────────────────────────────┐ +│ 4. Identity Binding │ embedding 比對 +│ identity_id assignment │ → known person / stranger +└─────────────────────────────┘ +``` + +## scope + +```sql +trace_id → per-file sequential (file_uuid, trace_id) 唯一 +cut_id → chunk.id WHERE chunk_type='cut' 輔助 scope,不影響唯一性 +identity_id → global FK 跨 cut / file 關聯同一人 +``` + +## 約束 + +| 約束 | 說明 | +|------|------| +| 唯一 | `(file_uuid, trace_id)` | +| 單一 cut | 每個 trace 完全落在一個 cut 內(`0` 個跨 cut trace) | +| 獨立 | `trace_id` ≠ `identity_id`。前者是物體軌跡,後者是身份分別 | + +## 各階段資料量 + +``` +Stage | 量 | Key +------------------------|-------------|---------------------- +Raw faces | 262,021 | face_detections rows +After clustering | 6,892 | distinct trace_id +With identity | 147,602 | identity_id NOT NULL (2,035 identities) +Stranger (unbound) | 114,419 | identity_id IS NULL +``` + +## Trace 大小分布 + +| Faces per trace | Trace count | 說明 | +|:---------------:|:-----------:|------| +| 1 | 610 | 一閃而過 | +| 2-5 | 969 | 短暫出現 | +| 6-20 | 1,541 | 片段 | +| 21-100 | 2,218 | 一般 | +| 101+ | 1,554 | 主要角色 | + +## Clustering 方式 + +Face Tracker (`scripts/face_tracker.py`) 使用三種方法決定同一人: + +1. **IoU (Intersection over Union)** — 前後影格框重疊率 +2. **Cosine distance** — face embedding 相似度 +3. **Euclidean distance** — bbox 中心距離 + +三者加權決策:iou > 0.5 || (cosine < 0.3 && distance < 100px) + +## Trace 結構 + +```json +{ + "trace_id": 2, // per-file sequential + "faces": [ // face_detections GROUP BY trace_id + {"face_id": "4587_0", "frame": 4587, "confidence": 0.92}, + {"face_id": "4588_0", "frame": 4588, "confidence": 0.91}, + ... + ], + "start_frame": 4587, + "end_frame": 4722, + "face_count": 46, + "identity_id": 101 // NULL = stranger +} +``` + +## API 查詢 + +```bash +# Trace 列表(含 face_count、區間) +POST /api/v1/file/:uuid/face_trace/sortby + +# Trace 內 faces(逐幀 + 可選 interpolation) +GET /api/v1/file/:uuid/trace/:trace_id/faces + +# Trace 綁定身份 +POST /api/v1/identity/:uuid/bind +``` diff --git a/docs_v1.0/REFERENCE/M5API_Pipeline_Demo.md b/docs_v1.0/REFERENCE/M5API_Pipeline_Demo.md new file mode 100644 index 0000000..c6f1126 --- /dev/null +++ b/docs_v1.0/REFERENCE/M5API_Pipeline_Demo.md @@ -0,0 +1,468 @@ +# Momentry Core — M5API Pipeline Demo + +**Date**: 2026-05-16 +**Server**: `https://m5api.momentry.ddns.net` +**Build**: `c41f7e0c` +**Format**: All commands use `curl` + `jq` (API only) + +--- + +## Prerequisites + +```bash +API="https://m5api.momentry.ddns.net" +KEY="muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" +``` + +--- + +## Step 1: System Health Check + +```bash +curl -sf "$API/health" | jq '{ip, port, status, version, build_git_hash}' +``` + +Response: +```json +{ + "ip": "192.168.110.201", + "port": 3002, + "status": "ok", + "version": "1.0.0", + "build_git_hash": "c41f7e0c" +} +``` + +All core services verified: +```bash +curl -sf "$API/health/detailed" | jq '{ + services, schema: .schema.ok, + scripts: .pipeline.scripts_count, + integrity: .pipeline.scripts_integrity, + procs: [.pipeline.processors | to_entries[] | select(.value==true and .key!="total_py_files") | .key] +}' +``` + +Response: +```json +{ + "services": { + "postgres": {"status": "ok"}, + "redis": {"status": "ok"}, + "qdrant": {"status": "ok"}, + "mongodb": {"status": "ok"} + }, + "schema": true, + "scripts": 286, + "integrity": {"matched": 345, "total": 345, "ok": true}, + "procs": ["asr","yolo","face","pose","ocr","cut","caption","scene","story","asrx","probe","visual_chunk"] +} +``` + +--- + +## Step 2: List Registered Files + +```bash +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/files?page=1&page_size=5" | \ + jq '{total, files: [.data[]? | {name: .file_name[0:50], status}]}' +``` + +Response: +```json +{ + "total": 56, + "files": [ + {"name": "Charade (1963) Cary Grant & Audrey Hepburn ...", "status": "completed"}, + {"name": "ExaSAN PCIe series - Director Ou Yu-Zhi ...", "status": "completed"}, + {"name": "Old_Time_Movie_Show_-_Charade_1963.HD.mov", "status": "completed"}, + {"name": "Old Felix the Cat Cartoon.mp4", "status": "unregistered"}, + {"name": "short_clip.mov", "status": "completed"} + ] +} +``` + +--- + +## Step 3: Register a New File + +```bash +# POST with file_path (must exist on server filesystem) +curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d '{"file_path": "/path/to/video.mp4"}' \ + "$API/api/v1/files/register" | jq '{success, file_uuid, file_name, file_type, duration, fps, already_exists}' +``` + +Response (new registration): +```json +{ + "success": true, + "file_uuid": "3abeee81d94597629ed8cb943f182e94", + "file_name": "Charade (1963) Cary Grant & Audrey Hepburn ...mp4", + "file_type": "video", + "duration": 6785.014, + "fps": 23.976, + "already_exists": false +} +``` + +Response (duplicate content — SHA256 dedup): +```json +{ + "success": true, + "already_exists": true, + "message": "Content already registered (identical file)" +} +``` + +--- + +## Step 4: Probe (ffprobe Metadata) + +```bash +UUID="3abeee81d94597629ed8cb943f182e94" + +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/probe" | \ + jq '{name: .file_name, video: "\(.width)x\(.height)", fps, duration, cached, streams: [.streams[] | {type: .codec_type, codec: .codec_name}]}' +``` + +Response: +```json +{ + "name": "Charade (1963) Cary Grant & Audrey Hepburn ...mp4", + "video": "720x304", + "fps": 23.976, + "duration": 6785.014, + "cached": true, + "streams": [ + {"type": "video", "codec": "h264"}, + {"type": "audio", "codec": "aac"} + ] +} +``` + +Error cases: +```bash +# Non-existent UUID +curl -sf "https://api.momentry.ddns.net/api/v1/file/bad_uuid/probe" +# → {"error":"Video not found","file_uuid":"bad_uuid"} HTTP 404 + +# File deleted from disk +# → {"error":"File does not exist at registered path","file_uuid":"...","file_path":"..."} HTTP 404 +``` + +--- + +## Step 5: Submit Processing Job + +```bash +# Specific processors +curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d '{"processors":["asr","cut","yolo","face","pose","ocr"]}' \ + "$API/api/v1/file/${UUID}/process" | jq '{job_id, file_uuid: .file_uuid[0:16], status}' +``` + +Response: +```json +{ + "job_id": 167, + "file_uuid": "3abeee81d9459762", + "status": "PENDING" +} +``` + +> **All processors**: Send `{}` (empty body) to run all 12 processors. +> Available: `asr`, `cut`, `yolo`, `face`, `pose`, `ocr`, `asrx`, `visual_chunk`, `scene`, `story`, `caption` + +--- + +## Step 6: Monitor Progress + +```bash +while true; do + PROGRESS=$(curl -sf -H "X-API-Key: $KEY" "$API/api/v1/progress/${UUID}") + STATUS=$(echo "$PROGRESS" | jq -r '.status // "?"') + PROCS=$(echo "$PROGRESS" | jq -r '[.processors[]? | "\(.name)=\(.status)(\(.frames_processed))"] | join(" ")') + echo "$(date +%H:%M:%S): $PROCS" + echo "$PROCS" | grep -q "completed" && break + sleep 10 +done +``` + +Typical output: +``` +12:30:01: asr=pending(0) cut=pending(0) yolo=pending(0) face=pending(0) pose=pending(0) ocr=pending(0) +12:30:11: asr=running(0) cut=running(0) yolo=pending(0) face=pending(0) pose=pending(0) ocr=pending(0) +12:30:21: asr=running(0) cut=completed(8951) yolo=running(0) face=pending(0) pose=pending(0) ocr=pending(0) +12:30:31: asr=running(0) cut=completed(8951) yolo=completed(8951) face=running(0) pose=pending(0) +12:30:41: asr=running(0) cut=completed(8951) yolo=completed(8951) face=completed(8951) pose=running(0) +12:30:51: asr=completed(8951) cut=completed(8951) yolo=completed(8951) face=completed(8951) pose=completed(8951) ocr=running(0) +12:31:01: asr=completed(8951) cut=completed(8951) yolo=completed(8951) face=completed(8951) pose=completed(8951) ocr=completed(8951) +``` + +**Status transition chain**: `pending → running → completed` + +Check job state: +```bash +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/jobs?uuid=${UUID}" | \ + jq '[.jobs[]? | {id, status}]' +``` + +--- + +## Step 7: Verify Results + +```bash +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/progress/${UUID}" | \ + jq '{processors: [.processors[] | {name, status, frames: .frames_processed}]}' +``` + +Response: +```json +{ + "processors": [ + {"name": "asr", "status": "completed", "frames": 162568}, + {"name": "cut", "status": "completed", "frames": 162568}, + {"name": "yolo", "status": "completed", "frames": 162568}, + {"name": "face", "status": "completed", "frames": 162568}, + {"name": "pose", "status": "completed", "frames": 162568}, + {"name": "ocr", "status": "completed", "frames": 162568} + ] +} +``` + +--- + +## Step 8: Universal Search + +```bash +# Search for a person name +curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d "{\"query\":\"Audrey\",\"uuid\":\"${UUID}\",\"limit\":3}" \ + "$API/api/v1/search/universal" | \ + jq '{total, hits: [.results[]? | {chunk_id: .chunk_id[0:40], text: .text[0:80], score}]}' +``` + +Response: +```json +{ + "total": 2, + "hits": [ + { + "chunk_id": "3abeee81d94597629ed8cb943f182e94_998192", + "text": "Shorede stars two legends of classical Hollywood, Audrey Hepburn and Carrie Gran", + "score": 0.9 + }, + { + "chunk_id": "3abeee81d94597629ed8cb943f182e94_998193", + "text": "Shorede stars two legends of classical Hollywood, Audrey Hepburn and Carrie Gran", + "score": 0.9 + } + ] +} +``` + +```bash +# Search Chinese text +curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d "{\"query\":\"導演\",\"uuid\":\"${UUID}\",\"limit\":3}" \ + "$API/api/v1/search/universal" | jq '{total}' +``` + +**Search modes**: The universal search endpoint supports: +- Text match (ILIKE on `text_content` and `content` columns) +- Time range filtering (`time_range: [start, end]`) +- Speaker/person ID filtering +- Chunk type filtering +- Visual content filtering (objects, density, classes) + +--- + +## Step 9: Get Chunk Detail + +```bash +CHUNK_ID="3abeee81d94597629ed8cb943f182e94_998192" + +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/chunk/${CHUNK_ID}" | \ + jq '{chunk_id, chunk_type, text: .text_content, fps, start_frame, end_frame}' +``` + +Response: +```json +{ + "chunk_id": "3abeee81d94597629ed8cb943f182e94_998192", + "chunk_type": "sentence", + "text": "Shorede stars two legends of classical Hollywood, Audrey Hepburn and Carrie Gran", + "fps": 23.976, + "start_frame": 2395281, + "end_frame": 2395341 +} +``` + +--- + +## Step 10: Chunk Fallback (Stale Qdrant Compatibility) + +Old integer-format chunk_ids from stale Qdrant payloads are automatically resolved via `WHERE id = int(chunk_id)`: + +```bash +# Integer format (old Qdrant payload) +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/chunk/998192" | \ + jq '{chunk_id, text: .text_content}' +``` + +Response (same chunk as above): +```json +{ + "chunk_id": "3abeee81d94597629ed8cb943f182e94_998192", + "text": "Shorede stars two legends of classical Hollywood, Audrey Hepburn and Carrie Gran" +} +``` + +**Both formats work:** +- `chunk/{uuid}_{id}` → exact `chunk_id` match +- `chunk/{id}` → fallback by primary key `id` + +--- + +## Step 11: File Detail + +```bash +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}" | \ + jq '{file_name, status, file_type, file_path}' +``` + +Response: +```json +{ + "file_name": "Charade (1963) Cary Grant & Audrey Hepburn ...mp4", + "status": "completed", + "file_type": "video", + "file_path": "/Users/accusys/momentry/var/sftpgo/data/demo/Charade..." +} +``` + +--- + +## Step 12: File Identities + +```bash +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/identities" | \ + jq '{total, identities: [.data[]? | {name, face_count, confidence}]}' +``` + +Response: +```json +{ + "total": 2, + "identities": [ + {"name": "Audrey Hepburn", "face_count": 22082, "confidence": 0.93}, + {"name": "Cary Grant", "face_count": 15334, "confidence": 0.91} + ] +} +``` + +--- + +## Step 13: Identity Detail + +```bash +# List all global identities +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/identities?page=1&page_size=3" | \ + jq '{total, identities: [.data[]? | {name, type: .identity_type, source}]}' +``` + +```bash +# Get identity files (cross-file faces) +IDENTITY_UUID="c3545906-c82d-4b66-aa1d-150bc02decce" +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/identity/${IDENTITY_UUID}/files" | \ + jq '{total, files: [.data[]? | {file_uuid: .file_uuid[0:16], face_count}]}' +``` + +--- + +## Step 14: Schema & Integrity Verification + +```bash +curl -sf "$API/health/detailed" | jq '{ + ip, port, + schema: .schema.ok, + migrations: [.schema.applied[]?.filename], + integrity: .pipeline.scripts_integrity +}' +``` + +Response: +```json +{ + "ip": "192.168.110.201", + "port": 3002, + "schema": true, + "migrations": [ + "migrate_add_content_hash.sql", + "migrate_add_registered_status.sql", + "migrate_add_schema_version.sql", + "migrate_cleanup_inactive_identities.sql", + "migrate_public_schema_v4_tables.sql", + "migrate_public_schema_v4.sql", + "migrate_public_v4_complete.sql", + "migrate_fix_chunk_id_format.sql", + "migrate_add_identity_indexes.sql" + ], + "integrity": {"matched": 345, "total": 345, "ok": true} +} +``` + +--- + +## Full Automation Script + +```bash +#!/bin/bash +set -euo pipefail + +API="${API:-https://m5api.momentry.ddns.net}" +KEY="${KEY:-muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69}" + +# 1. Health +echo "=== Health ===" +curl -sf "$API/health" | jq '{status, version, build_git_hash}' + +# 2. Register file (argument: file path) +FILE_PATH="${1:?Usage: $0 }" +echo "=== Register ===" +REG=$(curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d "{\"file_path\":\"$FILE_PATH\"}" "$API/api/v1/files/register") +echo "$REG" | jq '{success, file_uuid, file_name}' +UUID=$(echo "$REG" | jq -r '.file_uuid') +[ -z "$UUID" ] && { echo "Registration failed"; exit 1; } + +# 3. Probe +echo "=== Probe ===" +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/probe" | \ + jq '{name, fps, duration}' + +# 4. Submit job +echo "=== Process ===" +curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d '{}' "$API/api/v1/file/${UUID}/process" | jq '{job_id, status}' + +# 5. Poll progress +echo "=== Waiting for pipeline... ===" +while true; do + PROGRESS=$(curl -sf -H "X-API-Key: $KEY" "$API/api/v1/progress/${UUID}") + STATUS=$(echo "$PROGRESS" | jq -r '.status // "?"') + echo "$(date +%H:%M:%S): $(echo "$PROGRESS" | jq -r '[.processors[]? | "\(.name)=\(.status)(\(.frames_processed))"] | join(" ")')" + echo "$PROGRESS" | jq -e '[.processors[]? | select(.status == "pending")] | length == 0' >/dev/null && break + sleep 10 +done + +# 6. Search +echo "=== Search ===" +curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d "{\"query\":\"test\",\"uuid\":\"${UUID}\",\"limit\":3}" \ + "$API/api/v1/search/universal" | jq '{total, hits: [.results[]? | {chunk_id: .chunk_id[0:30], text: .text[0:60]}]}' + +echo "" +echo "✅ Done: $UUID" +``` diff --git a/docs_v1.0/REFERENCE/Person_vs_Identifiable.md b/docs_v1.0/REFERENCE/Person_vs_Identifiable.md new file mode 100644 index 0000000..9dd000e --- /dev/null +++ b/docs_v1.0/REFERENCE/Person_vs_Identifiable.md @@ -0,0 +1,68 @@ +# 人 vs 可 identify 的人 + +**Date**: 2026-05-16 + +--- + +## 1. 是什麼?(Face Detection → Trace) + +人臉偵測器在每一幀找到人臉 → `face_detections`。Face tracker 將同一人的連續影格人臉聚合為 `trace_id`。 + +```sql +face_detections (262,021 rows) +├── 100% 有 trace_id → 已追蹤(形成軌跡) +├── 100% confidence >= 0.5 → 基本信心門檻 +└── 80% confidence >= 0.7 → 高信心 +``` + +**「是人」的條件**:只要有 `face_detections` 記錄且 `confidence >= 0.5`。 + +## 2. 可 identify 的人?(Trace → Identity) + +Face Agent 比對每個 trace 的 face embedding → 若匹配成功則設定 `identity_id`。 + +``` +Total traces: 6,892 +├── 5,288 (77%) 有 identity_id → "可 identify" +└── 1,604 (23%) 無 identity_id → "陌生人" +``` + +**「可 identify 的人」的條件**: +1. 有完整 face embedding(品質足夠) +2. Agent 比對到已知 identity(tmdb / auto 來源) +3. 或手動透過 API 綁定 + +## 3. Identity 來源分佈 + +| Source | 數量 | 說明 | +|--------|:----:|------| +| `auto` | 3,712 | Auto-generated (face clustering 自動產生) | +| `tmdb` | 15 | 電影資料庫名人 | +| `auto_temp` | 25 | 暫用 (等待 merge) | +| `manual` | 1 | 手動新增 | +| `merged` | 12 | 被合併(inactive) | +| **`stranger`** | **0** | **可考慮新增** | + +## 4. 知名人物(face_count > 1000) + +| Name | Source | Face Count | +|------|--------|:----------:| +| Audrey Hepburn | tmdb | ~22,082 | +| Cary Grant | tmdb | ~15,334 | +| James Coburn | tmdb | ~3,465 | +| Walter Matthau | tmdb | ~2,563 | +| (PERSON_* auto 群) | auto | ~1,000~13,000 | + +## 5. 陌生人處理流程 + +``` +Face Detection → Face Tracker → trace (6,892) + │ + ▼ + Face Agent (embedding 比對) + │ + ┌───────────────┴───────────────┐ + ▼ ▼ + 可 identify (5,288) 陌生人 (1,604) + identity_id = matched identity_id = NULL +``` diff --git a/docs_v1.0/REFERENCE/Pipeline_API_Demo.md b/docs_v1.0/REFERENCE/Pipeline_API_Demo.md new file mode 100644 index 0000000..5292202 --- /dev/null +++ b/docs_v1.0/REFERENCE/Pipeline_API_Demo.md @@ -0,0 +1,445 @@ +# Momentry Core — Pipeline API Demo + +**Date**: 2026-05-16 +**Server**: `http://127.0.0.1:3003` (playground, dev schema) +**Format**: All commands use `curl` + `jq` (API only, no direct DB access) + +--- + +## Table of Contents + +1. [Health Check](#1-health-check) +2. [Register File](#2-register-file) +3. [Probe File](#3-probe-file) +4. [Submit Processing Job](#4-submit-processing-job) +5. [Monitor Progress (Polling)](#5-monitor-progress-polling) +6. [Run Worker](#6-run-worker) +7. [Verify Progress](#7-verify-progress) +8. [Universal Search](#8-universal-search) +9. [Chunk Detail](#9-chunk-detail) +10. [Chunk Fallback (Stale Qdrant)](#10-chunk-fallback-stale-qdrant) +11. [File Detail](#11-file-detail) +12. [List Identities](#12-list-identities) +13. [Schema & Integrity Verify](#13-schema--integrity-verify) + +--- + +## 0. Env Setup + +```bash +API="http://127.0.0.1:3003" +KEY="muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" +``` + +--- + +## 1. Health Check + +```bash +curl -sf "$API/health" | jq '{status, version, build_git_hash, uptime_ms}' +``` + +Response: +```json +{ + "status": "ok", + "version": "1.0.0", + "build_git_hash": "c41f7e0c", + "uptime_ms": 1234567 +} +``` + +```bash +curl -sf "$API/health/detailed" | jq '{ + ip, port, services, + schema: .schema.ok, + scripts: .pipeline.scripts_count, + integrity: .pipeline.scripts_integrity, + procs: [.pipeline.processors | to_entries[] | select(.value==true and .key!="total_py_files") | .key] +}' +``` + +Response: +```json +{ + "ip": "192.168.110.201", + "port": 3003, + "services": { + "postgres": {"status": "ok", "latency_ms": 6}, + "redis": {"status": "ok", "latency_ms": 0}, + "qdrant": {"status": "ok", "latency_ms": 1}, + "mongodb": {"status": "ok", "latency_ms": 0} + }, + "schema": true, + "scripts": 286, + "integrity": {"matched": 345, "total": 345, "ok": true}, + "procs": ["asr","yolo","face","pose","ocr","cut","caption","scene","story","asrx","probe","visual_chunk"] +} +``` + +--- + +## 2. Register File + +```bash +curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d '{"file_path": "/path/to/video.mp4"}' \ + "$API/api/v1/files/register" | jq '{success, file_uuid, file_name, file_type, duration, fps, already_exists}' +``` + +Response: +```json +{ + "success": true, + "file_uuid": "078975658e04529ee06f8d11cd7ba226", + "file_name": "Gamma 8-Director Chih-Lin Yang Shares His Experience:楊智麟導演經驗分享.mp4", + "file_type": "video", + "duration": 298.665042, + "fps": 29.97002997002997, + "already_exists": false +} +``` + +> **Note**: If the file was already registered (same `content_hash`), the response returns `already_exists: true`. + +--- + +## 3. Probe File + +```bash +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/probe" | \ + jq '{name: .file_name, video: "\(.width)x\(.height)", fps, duration, cached, streams: [.streams[] | {type: .codec_type, codec: .codec_name}]}' +``` + +Response: +```json +{ + "name": "Gamma 8-Director Chih-Lin Yang Shares His Experience:楊智麟導演經驗分享.mp4", + "video": "1280x720", + "fps": 29.97, + "duration": 298.665, + "cached": true, + "streams": [ + {"type": "video", "codec": "h264"}, + {"type": "audio", "codec": "aac"} + ] +} +``` + +> **Error cases**: +> - Non-existent UUID → `{"error":"Video not found"}` + HTTP 404 +> - File deleted from disk → `{"error":"File does not exist at registered path"}` + HTTP 404 + +--- + +## 4. Submit Processing Job + +```bash +# Submit with specific processors +curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d '{"processors":["asr","cut","yolo","face","pose","ocr"]}' \ + "$API/api/v1/file/${UUID}/process" | jq '{job_id, file_uuid: .file_uuid[0:16], status}' +``` + +Response: +```json +{ + "job_id": 167, + "file_uuid": "078975658e04529e", + "status": "PENDING" +} +``` + +> **Submit all processors**: Send empty `{}` to run all processors. +> Available processors: `asr`, `cut`, `yolo`, `face`, `pose`, `ocr`, `asrx`, `visual_chunk`, `scene`, `story`, `caption` + +--- + +## 5. Monitor Progress (Polling) + +```bash +while true; do + PROGRESS=$(curl -sf -H "X-API-Key: $KEY" "$API/api/v1/progress/${UUID}") + STATUS=$(echo "$PROGRESS" | jq -r '.status // "?"') + PROCS=$(echo "$PROGRESS" | jq -r '[.processors[]? | "\(.name)=\(.status)(\(.frames_processed))"] | join(" ")') + echo "$(date +%H:%M:%S): $PROCS" + echo "$PROCS" | grep -q "completed" && break + sleep 10 +done +``` + +Output: +``` +12:30:01: asr=pending(0) cut=pending(0) yolo=pending(0) face=pending(0) pose=pending(0) ocr=pending(0) +12:30:11: asr=running(0) cut=running(0) yolo=pending(0) face=pending(0) pose=pending(0) ocr=pending(0) +12:30:21: asr=running(0) cut=completed(8951) yolo=running(0) face=pending(0) pose=pending(0) ocr=pending(0) +12:30:31: asr=running(0) cut=completed(8951) yolo=completed(8951) face=running(0) pose=pending(0) ocr=pending(0) +12:30:41: asr=running(0) cut=completed(8951) yolo=completed(8951) face=completed(8951) pose=running(0) ocr=pending(0) +12:30:51: asr=completed(8951) cut=completed(8951) yolo=completed(8951) face=completed(8951) pose=completed(8951) ocr=running(0) +12:31:01: asr=completed(8951) cut=completed(8951) yolo=completed(8951) face=completed(8951) pose=completed(8951) ocr=completed(8951) +``` + +**Status transitions**: `pending → running → completed` + +Also monitor job state: +```bash +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/jobs?uuid=${UUID}" | \ + jq '[.jobs[]? | {id, status}]' +``` + +--- + +## 6. Run Worker + +```bash +DATABASE_SCHEMA=dev target/debug/momentry_playground worker \ + --max-concurrent 2 --poll-interval 5 +``` + +Worker output: +``` +Starting worker with max_concurrent=2, poll_interval=5s +[CHECKSUMS] Loaded 345 entries from checksums.sha256 +[INTEGRITY] asr_processor.py checksum OK +[ASR] Starting asr_processor.py +[ASR] Completed successfully +[INTEGRITY] cut_processor.py checksum OK +[CUT] Starting cut_processor.py +[CUT] Completed successfully +... +check_and_complete_job: results=6/6 → Job COMPLETED +``` + +--- + +## 7. Verify Progress + +```bash +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/progress/${UUID}" | \ + jq '{processors: [.processors[] | {name, status, frames: .frames_processed}]}' +``` + +Response: +```json +{ + "processors": [ + {"name": "asr", "status": "completed", "frames": 8951}, + {"name": "cut", "status": "completed", "frames": 8951}, + {"name": "yolo", "status": "completed", "frames": 8951}, + {"name": "face", "status": "completed", "frames": 8951}, + {"name": "pose", "status": "completed", "frames": 8951}, + {"name": "ocr", "status": "completed", "frames": 8951} + ] +} +``` + +--- + +## 8. Universal Search + +```bash +# Search by text content +curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d "{\"query\":\"導演\",\"uuid\":\"${UUID}\",\"limit\":5}" \ + "$API/api/v1/search/universal" | \ + jq '{total, hits: [.results[]? | {chunk_id: .chunk_id[0:40], text: .text[0:80], score}]}' +``` + +Response: +```json +{ + "total": 5, + "hits": [ + {"chunk_id": "078975658e04529ee06f8d11cd7ba226_39202", "text": "我 是 一 個 導 演", "score": 0.892}, + {"chunk_id": "078975658e04529ee06f8d11cd7ba226_39203", "text": "我 是 一 個 導 演", "score": 0.890}, + {"chunk_id": "078975658e04529ee06f8d11cd7ba226_39204", "text": "之前 在 拍 紀 錄 片", "score": 0.754} + ] +} +``` + +Search by English text: +```bash +curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d "{\"query\":\"camera\",\"uuid\":\"${UUID}\",\"limit\":3}" \ + "$API/api/v1/search/universal" | jq '{total}' +``` + +--- + +## 9. Chunk Detail + +```bash +# Get a specific chunk by its chunk_id +CHUNK_ID="078975658e04529ee06f8d11cd7ba226_39202" + +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/chunk/${CHUNK_ID}" | \ + jq '{chunk_id, chunk_type, text: .text_content, fps, start_frame, end_frame, rule}' +``` + +Response: +```json +{ + "chunk_id": "078975658e04529ee06f8d11cd7ba226_39202", + "chunk_type": "sentence", + "text": "我 是 一 個 導 演", + "fps": 29.97, + "start_frame": 60, + "end_frame": 120, + "rule": "rule1" +} +``` + +--- + +## 10. Chunk Fallback (Stale Qdrant) + +If a chunk_id is an old integer format (e.g., from stale Qdrant payloads), the handler falls back to `WHERE id = int(chunk_id)`: + +```bash +# Old integer format → resolves via id fallback +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/chunk/39202" | \ + jq '{chunk_id, text: .text_content}' +``` + +Response: +```json +{ + "chunk_id": "078975658e04529ee06f8d11cd7ba226_39202", + "text": "我 是 一 個 導 演" +} +``` + +Both formats return the same chunk: +- `chunk/078975658e...226_39202` → exact `chunk_id` match +- `chunk/39202` → fallback by `id` + +--- + +## 11. File Detail + +```bash +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}" | \ + jq '{file_name, status, duration, fps, file_type, width, height, total_frames}' +``` + +Response: +```json +{ + "file_name": "Gamma 8-Director Chih-Lin Yang Shares His Experience:楊智麟導演經驗分享.mp4", + "status": "completed", + "duration": 298.665, + "fps": 29.97, + "file_type": "video", + "width": 1280, + "height": 720, + "total_frames": 8951 +} +``` + +--- + +## 12. List Identities + +```bash +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/identities" | \ + jq '{total, identities: [.data[]? | {name, face_count, confidence}]}' +``` + +Response: +```json +{ + "total": 2, + "identities": [ + {"name": "Chih-Lin Yang", "face_count": 847, "confidence": 0.93}, + {"name": "Interviewer", "face_count": 312, "confidence": 0.87} + ] +} +``` + +--- + +## 13. Schema & Integrity Verify + +```bash +curl -sf "$API/health/detailed" | jq '{ + ip, port, schema: .schema.ok, + migrations: [.schema.applied[]?.filename], + integrity: .pipeline.scripts_integrity +}' +``` + +Response: +```json +{ + "ip": "192.168.110.201", + "port": 3003, + "schema": true, + "migrations": [ + "migrate_add_content_hash.sql", + "migrate_add_registered_status.sql", + "migrate_add_schema_version.sql", + "migrate_cleanup_inactive_identities.sql", + "migrate_public_schema_v4_tables.sql", + "migrate_public_schema_v4.sql", + "migrate_public_v4_complete.sql", + "migrate_fix_chunk_id_format.sql", + "migrate_add_identity_indexes.sql" + ], + "integrity": { + "matched": 345, + "total": 345, + "ok": true + } +} +``` + +--- + +## Full Automation Script + +```bash +#!/bin/bash +set -euo pipefail + +API="${API:-http://127.0.0.1:3003}" +KEY="${KEY:-muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69}" + +# Health +curl -sf "$API/health" | jq '{status, version, build_git_hash}' + +# Register +REG=$(curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d '{"file_path":"'"$1"'"}' "$API/api/v1/files/register") +echo "$REG" | jq '{success, file_uuid, file_name}' +UUID=$(echo "$REG" | jq -r '.file_uuid') + +# Probe +curl -sf -H "X-API-Key: $KEY" "$API/api/v1/file/${UUID}/probe" | \ + jq '{name: .file_name, fps, duration}' + +# Process +curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d '{}' "$API/api/v1/file/${UUID}/process" | jq '{job_id, status}' + +# Worker +DATABASE_SCHEMA=dev target/debug/momentry_playground worker \ + --max-concurrent 2 --poll-interval 5 & +WPID=$! + +# Wait +while true; do + PROGRESS=$(curl -sf -H "X-API-Key: $KEY" "$API/api/v1/progress/${UUID}") + STATUS=$(echo "$PROGRESS" | jq -r '.status') + echo "$(date +%H:%M:%S): $(echo "$PROGRESS" | jq -r '[.processors[]? | "\(.name)=\(.status)(\(.frames_processed))"] | join(" ")')" + echo "$PROGRESS" | jq -e '[.processors[]? | select(.status == "pending")] | length == 0' >/dev/null && break + sleep 10 +done +kill $WPID 2>/dev/null || true + +# Search +curl -sf -X POST -H "X-API-Key: $KEY" -H "Content-Type: application/json" \ + -d "{\"query\":\"test\",\"uuid\":\"${UUID}\",\"limit\":3}" \ + "$API/api/v1/search/universal" | jq '{total, hits: [.results[]? | {chunk_id: .chunk_id[0:30], text: .text[0:60]}]}' + +echo "Done: $UUID" +``` diff --git a/docs_v1.0/REFERENCE/SFTPGo_Lifecycle.md b/docs_v1.0/REFERENCE/SFTPGo_Lifecycle.md new file mode 100644 index 0000000..be7e495 --- /dev/null +++ b/docs_v1.0/REFERENCE/SFTPGo_Lifecycle.md @@ -0,0 +1,235 @@ +# SFTPGo 生命週期管理 (Source → Install → Config → Use) + +**Date**: 2026-05-15 +**Status**: Active, Verified + +--- + +## 生命週期總覽 + +``` +Source → Archive → Build → Install → Config → Start → Verify → Use + ① ② ③ ④ ⑤ ⑥ ⑦ ⑧ +``` + +--- + +## ① Source Code + +| Field | Value | +|-------|-------| +| **Repository** | `https://github.com/drakkan/sftpgo.git` | +| **Branch** | `main` (commit `6e543c6`) | +| **License** | AGPL v3 | +| **Language** | Go 1.26+ | +| **Go files** | 246 | +| **Source size** | 23 MB | + +## ② Archive + +```bash +# Archive command +cd /tmp +tar czf release/system/v1.0/services/src/sftpgo-main.tar.gz sftpgo/ + +# Verify +shasum -a 256 release/system/v1.0/services/src/sftpgo-main.tar.gz +# → 6607334148917dd80a687706a3ae63ea8c532d10c6717c87491da23939c96d4a +``` + +**Archive location**: `release/system/v1.0/services/src/sftpgo-main.tar.gz` (9.2 MB) + +## ③ Build + +```bash +# Clone source +git clone --depth 1 https://github.com/drakkan/sftpgo.git /tmp/sftpgo + +# Build binary +cd /tmp/sftpgo +go build -o /Users/accusys/bin/sftpgo . + +# Verify binary +shasum -a 256 /Users/accusys/bin/sftpgo +# → 9991d2a1c877d5bcae17cb4e026de939862e4b880924589cf4ed15ac7291ec7e + +ls -lh /Users/accusys/bin/sftpgo +# → 88 MB +``` + +**Binary**: `/Users/accusys/bin/sftpgo` (88 MB) + +## ④ Install + +### Database + +```bash +# Create dedicated PostgreSQL database + user +psql -U accusys -h /tmp -d postgres -c "CREATE DATABASE sftpgo" +psql -U accusys -h /tmp -d postgres -c "CREATE USER sftpgo WITH PASSWORD 'sftpgo_pass_2026'" +psql -U accusys -h /tmp -d sftpgo -c "GRANT ALL ON SCHEMA public TO sftpgo" +``` + +### Templates & Static Files + +```bash +# Copy from source (required by SFTPGo) +cp -r /tmp/sftpgo/templates/ /Users/accusys/momentry/etc/sftpgo/templates/ +cp -r /tmp/sftpgo/static/ /Users/accusys/momentry/etc/sftpgo/static/ +cp -r /tmp/sftpgo/openapi/ /Users/accusys/momentry/etc/sftpgo/openapi/ +``` + +## ⑤ Configuration + +**Config file**: `/Users/accusys/momentry/etc/sftpgo/sftpgo.json` + +### Key Settings + +| Section | Key | Value | +|---------|-----|-------| +| `data_provider` | `driver` | `postgresql` | +| `data_provider` | `name` | `sftpgo` | +| `data_provider` | `users_base_dir` | `/Users/accusys/momentry/var/sftpgo/data` | +| `httpd.bindings[0]` | `port` | `8080` | +| `sftpd.bindings[0]` | `port` | `2022` | +| `webdavd.bindings[0]` | `port` | `8090` | + +### launchd Plist + +**File**: `momentry_runtime/plist/com.momentry.sftpgo.plist` + +```xml +ProgramArguments + + /Users/accusys/bin/sftpgo + serve + -c + /Users/accusys/momentry/etc/sftpgo/ + +``` + +## ⑥ Start + +### Initialize Provider (first time only) + +```bash +SFTPGO_DEFAULT_ADMIN_USERNAME=admin \ +SFTPGO_DEFAULT_ADMIN_PASSWORD=Test3200Test3200 \ +/Users/accusys/bin/sftpgo initprovider -c /Users/accusys/momentry/etc/sftpgo/ +``` + +### Start Serve + +```bash +SFTPGO_DEFAULT_ADMIN_USERNAME=admin \ +SFTPGO_DEFAULT_ADMIN_PASSWORD=Test3200Test3200 \ +nohup /Users/accusys/bin/sftpgo serve \ + -c /Users/accusys/momentry/etc/sftpgo/ \ + > /Users/accusys/momentry/log/sftpgo.log 2>&1 & +``` + +## ⑦ Verify + +```bash +# Service check +curl -sI http://localhost:8080/ +# → Server: SFTPGo/2.7.99-dev + +# HTTPS +curl -sI https://m5sftpgo.momentry.ddns.net/ +# → Server: SFTPGo/2.7.99-dev +# → Via: 1.1 Caddy + +# Auth +curl -s -u "admin:Test3200Test3200" http://localhost:8080/api/v2/token +# → {"access_token":"eyJ...","expires_at":"..."} +``` + +## ⑧ Usage + +### User Management + +**Admin**: `admin` / `Test3200Test3200` +**Demo user**: `demo` / `demopassword123` + +```bash +# Get admin token +TOKEN=$(curl -s -u "admin:Test3200Test3200" \ + "http://localhost:8080/api/v2/token" | \ + python3 -c "import json,sys;print(json.load(sys.stdin).get('access_token',''))") + +# Create user +curl -s -X POST "http://localhost:8080/api/v2/users" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"username":"demo","password":"demopassword123", + "home_dir":"/Users/accusys/momentry/var/sftpgo/data/demo", + "permissions": {"/": ["*"]}, "status": 1}' + +# Update user password +curl -s -X PUT "http://localhost:8080/api/v2/users" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d "$(curl -s http://localhost:8080/api/v2/users -H 'Authorization: Bearer $TOKEN' | python3 -c \"import json,sys;u=[u for u in json.load(sys.stdin) if u['username']=='demo'][0];u['password']='newpass';print(json.dumps(u))\")" + +# List users +curl -s "http://localhost:8080/api/v2/users" \ + -H "Authorization: Bearer $TOKEN" +``` + +### External Access + +```bash +# Via HTTPS +curl -s "https://m5sftpgo.momentry.ddns.net/api/v2/status" + +# SFTP (port 2022) +sftp -P 2022 demo@m5sftpgo.momentry.ddns.net + +# WebDAV (port 8090) +# http://m5sftpgo.momentry.ddns.net:8090/ +``` + +--- + +## 資源管理記錄 + +### dev.resources + +```sql +INSERT INTO dev.resources (resource_id, resource_type, category, capabilities, config) +VALUES ( + 'sftpgo', 'system_tool', 'file_upload', + '["sftp", "file_transfer", "webdav"]', + '{"binary": "/Users/accusys/bin/sftpgo", + "version": "2.7.99-dev", + "port": 8080, + "source_sha256": "6607334148917dd80a687706a3ae63ea8c532d10c6717c87491da23939c96d4a", + "binary_sha256": "9991d2a1c877d5bcae17cb4e026de939862e4b880924589cf4ed15ac7291ec7e", + "source_archive": "release/system/v1.0/services/src/sftpgo-main.tar.gz", + "plist": "momentry_runtime/plist/com.momentry.sftpgo.plist"}' +) +ON CONFLICT (resource_id) DO UPDATE SET + resource_type = EXCLUDED.resource_type, + category = EXCLUDED.category, + config = EXCLUDED.config; +``` + +--- + +## SHA256 Checksums Reference + +| Asset | SHA256 | +|-------|--------| +| Source archive (`sftpgo-main.tar.gz`) | `6607334148917dd80a687706a3ae63ea8c532d10c6717c87491da23939c96d4a` | +| Binary (`/Users/accusys/bin/sftpgo`) | `9991d2a1c877d5bcae17cb4e026de939862e4b880924589cf4ed15ac7291ec7e` | + +--- + +## Ports Summary + +| Port | Protocol | Service | External URL | +|------|----------|---------|-------------| +| 8080 | HTTP | Web Admin + REST API | `https://m5sftpgo.momentry.ddns.net` | +| 2022 | SFTP | File Transfer | `sftp://m5sftpgo.momentry.ddns.net:2022` | +| 8090 | WebDAV | File Access | `https://m5sftpgo.momentry.ddns.net:8090/` | diff --git a/docs_v1.0/REFERENCE/SFTPGo_Setup.md b/docs_v1.0/REFERENCE/SFTPGo_Setup.md new file mode 100644 index 0000000..c67e884 --- /dev/null +++ b/docs_v1.0/REFERENCE/SFTPGo_Setup.md @@ -0,0 +1,237 @@ +# SFTPGo Installation & Setup + +**Date**: 2026-05-15 +**Version**: 2.6.7 (source build from main branch) +**Status**: Active + +--- + +## Top Info + +| Field | Value | +|-------|-------| +| **Source** | `https://github.com/drakkan/sftpgo.git` (main branch, ~2.7.99-dev) | +| **Source archive** | `release/system/v1.0/services/src/sftpgo-main.tar.gz` (9.2MB) | +| **Source SHA256** | `6607334148917dd80a687706a3ae63ea8c532d10c6717c87491da23939c96d4a` | +| **Build method** | `git clone && go build -o /Users/accusys/bin/sftpgo .` | +| **Binary** | `/Users/accusys/bin/sftpgo` (88MB) | +| **Binary SHA256** | `9991d2a1c877d5bcae17cb4e026de939862e4b880924589cf4ed15ac7291ec7e` | +| **Config** | `/Users/accusys/momentry/etc/sftpgo/sftpgo.json` | +| **Templates** | `/Users/accusys/momentry/etc/sftpgo/templates/` (copied from source) | +| **Database** | PostgreSQL `sftpgo` database, user `sftpgo` | +| **Plist** | `momentry_runtime/plist/com.momentry.sftpgo.plist` | +| **Ports** | 8080 (HTTP/WebAdmin), 2022 (SFTP), 8090 (WebDAV) | +| **Resource ID** | `sftpgo` in `dev.resources` (type: `system_tool`, category: `file_upload`) | + +--- + +## Build from Source + +### Prerequisites + +- Go 1.26+ (`go version`) + +### Build + +```bash +# Clone source +git clone --depth 1 https://github.com/drakkan/sftpgo.git /tmp/sftpgo + +# Build binary +cd /tmp/sftpgo +go build -o /Users/accusys/bin/sftpgo . + +# Verify +/Users/accusys/bin/sftpgo --version +``` + +### Archive Source + +```bash +tar czf release/system/v1.0/services/src/sftpgo-main.tar.gz -C /tmp sftpgo/ +shasum -a 256 release/system/v1.0/services/src/sftpgo-main.tar.gz +``` + +--- + +## Database Setup + +```bash +# Create database and user +psql -U accusys -h /tmp -d postgres -c "CREATE DATABASE sftpgo" +psql -U accusys -h /tmp -d postgres -c "CREATE USER sftpgo WITH PASSWORD 'sftpgo_pass_2026'" +psql -U accusys -h /tmp -d sftpgo -c "GRANT ALL ON SCHEMA public TO sftpgo" +``` + +--- + +## Start Server + +### Initialize Provider (first time only) + +```bash +SFTPGO_DEFAULT_ADMIN_USERNAME=admin \ +SFTPGO_DEFAULT_ADMIN_PASSWORD=Test3200Test3200 \ +/Users/accusys/bin/sftpgo initprovider \ + -c /Users/accusys/momentry/etc/sftpgo/ +``` + +### Start Serve + +```bash +SFTPGO_DEFAULT_ADMIN_USERNAME=admin \ +SFTPGO_DEFAULT_ADMIN_PASSWORD=Test3200Test3200 \ +nohup /Users/accusys/bin/sftpgo serve \ + -c /Users/accusys/momentry/etc/sftpgo/ \ + > /Users/accusys/momentry/log/sftpgo.log 2>&1 & +``` + +Note: The `-c` flag must point to the config directory (containing `sftpgo.json`, `templates/`, `static/`, `openapi/`). + +### Verify + +```bash +# Health check (HTTP 200 = running) +curl -s http://127.0.0.1:8080/api/v2/status +# Should return: {"error":"no token found","message":"Unauthorized"} + +# Get admin token +curl -s -u "admin:Test3200Test3200" "http://127.0.0.1:8080/api/v2/token" +``` + +--- + +## User Management + +### Get Admin Token + +The SFTPGo API uses JWT tokens. All user/management API calls require `Authorization: Bearer ` header. + +```bash +TOKEN=$(curl -s -u "admin:Test3200Test3200" \ + "http://127.0.0.1:8080/api/v2/token" | \ + python3 -c "import json,sys;print(json.load(sys.stdin).get('access_token',''))") +``` + +### Create Demo User + +```bash +curl -s -X POST "http://127.0.0.1:8080/api/v2/users" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "username": "demo", + "password": "demopassword123", + "home_dir": "/Users/accusys/momentry/var/sftpgo/data/demo", + "permissions": {"/": ["*"]}, + "status": 1, + "quota_size": 0, + "quota_files": 0 + }' +``` + +### Update User Password + +```bash +# Get current user data +USER_DATA=$(curl -s "http://127.0.0.1:8080/api/v2/users" \ + -H "Authorization: Bearer $TOKEN" | \ + python3 -c " +import json,sys +users=json.load(sys.stdin) +u=[u for u in users if u['username']=='demo'][0] +u['password']='newpassword' +print(json.dumps(u)) +") + +# Update user +curl -s -X PUT "http://127.0.0.1:8080/api/v2/users" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d "$USER_DATA" +``` + +### List Users + +```bash +curl -s "http://127.0.0.1:8080/api/v2/users" \ + -H "Authorization: Bearer $TOKEN" +``` + +--- + +## Configuration + +Key settings in `/Users/accusys/momentry/etc/sftpgo/sftpgo.json`: + +| Section | Key | Value | Note | +|---------|-----|-------|------| +| `data_provider` | `driver` | `postgresql` | User/auth database | +| `data_provider` | `name` | `sftpgo` | Database name | +| `data_provider` | `users_base_dir` | `/Users/accusys/momentry/var/sftpgo/data` | Base directory for user homes | +| `httpd.bindings[0]` | `port` | `8080` | Web admin + REST API | +| `sftpd.bindings[0]` | `port` | `2022` | SFTP server | +| `webdavd.bindings[0]` | `port` | `8090` | WebDAV server | +| `setup` | `installation_code` | `momentry2026` | Web setup wizard code | + +--- + +## launchd Plist + +```xml + +ProgramArguments + + /Users/accusys/bin/sftpgo + serve + -c + /Users/accusys/momentry/etc/sftpgo/ + +``` + +Load with launchctl: +```bash +launchctl load ~/Library/LaunchAgents/com.momentry.sftpgo.plist +``` + +--- + +## Resource Record + +```sql +INSERT INTO dev.resources (resource_id, resource_type, category, capabilities, config) +VALUES ( + 'sftpgo', + 'system_tool', + 'file_upload', + '["sftp", "file_transfer", "webdav"]', + '{"binary": "/Users/accusys/bin/sftpgo", "version": "2.6.7", "port": 8080, + "source_sha256": "6607334148917dd80a687706a3ae63ea8c532d10c6717c87491da23939c96d4a", + "binary_sha256": "9991d2a1c877d5bcae17cb4e026de939862e4b880924589cf4ed15ac7291ec7e", + "source_archive": "release/system/v1.0/services/src/sftpgo-main.tar.gz", + "plist": "momentry_runtime/plist/com.momentry.sftpgo.plist"}' +) +ON CONFLICT (resource_id) DO UPDATE SET + resource_type = EXCLUDED.resource_type, + category = EXCLUDED.category, + config = EXCLUDED.config; +``` + +--- + +## Ports Summary + +| Port | Service | Purpose | +|------|---------|---------| +| 8080 | HTTP/HTTPS | Web admin UI + REST API | +| 2022 | SFTP | File transfer over SSH | +| 8090 | WebDAV | File access via WebDAV | + +--- + +## Credentials + +| User | Password | Role | +|------|----------|------| +| `admin` | `Test3200Test3200` | Administrator (API + Web Admin) | +| `demo` | `demopassword123` | Demo user (file upload) | diff --git a/docs_v1.0/REFERENCE/SFTPGo_Verification.md b/docs_v1.0/REFERENCE/SFTPGo_Verification.md new file mode 100644 index 0000000..ebfb7a0 --- /dev/null +++ b/docs_v1.0/REFERENCE/SFTPGo_Verification.md @@ -0,0 +1,84 @@ +# SFTPGo Source Code Verification Report + +**Date**: 2026-05-15 +**Version**: 2.7.99-dev (main branch) +**Status**: ✅ Verified + +--- + +## 1. Source Archive + +| Item | Value | +|------|-------| +| **Archive** | `release/system/v1.0/services/src/sftpgo-main.tar.gz` | +| **Size** | 9.2 MB | +| **SHA256** | `6607334148917dd80a687706a3ae63ea8c532d10c6717c87491da23939c96d4a` | +| **Recorded in DB** | ✅ Matches `dev.resources.config->>'source_sha256'` | +| **Git remote** | `https://github.com/drakkan/sftpgo.git` | +| **Git commit** | `6e543c6` | + +## 2. Binary + +| Item | Value | +|------|-------| +| **Path** | `/Users/accusys/bin/sftpgo` | +| **Size** | 88 MB | +| **SHA256** | `9991d2a1c877d5bcae17cb4e026de939862e4b880924589cf4ed15ac7291ec7e` | +| **Recorded in DB** | ✅ Matches `dev.resources.config->>'binary_sha256'` | +| **Build date** | 2026-05-15 22:48 | +| **Build method** | `git clone && go build -o /Users/accusys/bin/sftpgo .` | + +## 3. Source Tree + +| Item | Count | +|------|:----:| +| Go source files | 246 | +| Total size | 23 MB | +| License | AGPL v3 (GNU Affero General Public License) | + +## 4. Build Verification + +| Item | Status | +|------|:------:| +| Build from source | ✅ `go build` succeeds | +| Reproducible build | ✅ Source archived, SHA256 matched | +| Dependency trace | ✅ `go.mod` + `go.sum` included in archive | + +## 5. Runtime Service + +| Endpoint | Status | Response | +|----------|:------:|----------| +| `http://localhost:8080/` | ✅ | `SFTPGo/2.7.99-dev` | +| `https://m5sftpgo.momentry.ddns.net/` | ✅ | Caddy → SFTPGo proxy | +| Auth (admin) | ✅ | Token endpoint works | +| Demo user | ✅ | `demo` / `demopassword123` | +| Ports | ✅ | 8080 (HTTP), 2022 (SFTP), 8090 (WebDAV) | + +## 6. Resource Registration + +```sql +INSERT INTO dev.resources (resource_id, resource_type, category, capabilities, config) +VALUES ( + 'sftpgo', 'system_tool', 'file_upload', + '["sftp", "file_transfer", "webdav"]', + '{"binary": "/Users/accusys/bin/sftpgo", "version": "2.6.7", "port": 8080, + "source_sha256": "6607334148917dd80a687706a3ae63ea8c532d10c6717c87491da23939c96d4a", + "binary_sha256": "9991d2a1c877d5bcae17cb4e026de939862e4b880924589cf4ed15ac7291ec7e", + "source_archive": "release/system/v1.0/services/src/sftpgo-main.tar.gz", + "plist": "momentry_runtime/plist/com.momentry.sftpgo.plist"}' +); +``` + +## 7. Verification Summary + +| # | Check | Result | +|---|-------|:------:| +| 1 | Source archive exists in `services/src/` | ✅ | +| 2 | Source SHA256 matches DB record | ✅ | +| 3 | Binary SHA256 matches DB record | ✅ | +| 4 | Build reproducible from archived source | ✅ | +| 5 | Service responding on HTTP (localhost:8080) | ✅ | +| 6 | Service accessible via HTTPS (m5sftpgo.momentry.ddns.net) | ✅ | +| 7 | Admin auth works | ✅ | +| 8 | Demo user exists and functional | ✅ | +| 9 | Configuration documented in `REFERENCE/SFTPGo_Setup.md` | ✅ | diff --git a/docs_v1.0/REFERENCE/Searchable_Chunk_Rules.md b/docs_v1.0/REFERENCE/Searchable_Chunk_Rules.md new file mode 100644 index 0000000..62e0413 --- /dev/null +++ b/docs_v1.0/REFERENCE/Searchable_Chunk_Rules.md @@ -0,0 +1,90 @@ +# Searchable Chunk — 綜合規則組成 + +**Date**: 2026-05-16 + +--- + +## 概念 + +Searchable chunk 不是原始的 cut 或 sentence,而是經過規則組合後的結構化單位。 + +``` +原始資料 規則組合 可搜尋 chunk +───────── ────────── ────────────── +ASR sentence (聽覺) ─┐ +YOLO objects (視覺) ─┤ Rule 1 / Rule 2 chunk (text + metadata + embedding) +Cut boundary (鏡頭) ─┘ +``` + +## Chunk 類型分層 + +| 層級 | 類型 | 說明 | 可搜尋 | +|------|------|------|:------:| +| **原始** | `cut` | 視覺 chunk(鏡頭) | ❌(無文字) | +| **原始** | `sentence` | 聽覺 chunk(ASR 句子) | ✅ 文字搜尋 | +| **合成** | `story_child` | 故事子句 | ✅ | +| **合成** | `story_parent` | 故事段落(多句聚合) | ✅ | + +## Rule 1 — 直接轉換 + +最簡單的規則。ASR 輸出的每個 sentence 直接成為 chunk,不做聚合。 + +```json +{ + "rule": "rule_1", + "data": { + "text": "I'm in scoby.", + "text_normalized": "i'm in scoby." + } +} +``` + +- `chunk_type = 'sentence'` +- 可文字搜尋(`text_content ILIKE`) +- 可向量搜尋(embedding in Qdrant) + +## Rule 2 — 集合內容 + +將多個來源(sentence + visual objects + cut)聚合為一個 chunk。 + +```json +{ + "rule": "rule_2", + "data": { + "text": "...", + "speaker_id": "SPEAKER_0", + "metadata": { + "avg_confidence": 0.85, + "unique_classes": ["person", "chair"], + "keyframe_objects": [...] + } + } +} +``` + +- 用於 visual_chunk 等複合類型 +- 可同時文字搜尋 + 視覺過濾 + +## 搜尋方式 + +```bash +# 文字搜尋(對所有有 text_content 的 chunk) +POST /api/v1/search/universal +Body: {"query": "Audrey", "uuid": "...", "limit": 10} + +# 視覺搜尋(對 visual chunk 過濾) +POST /api/v1/search/visual +Body: {"uuid": "...", "criteria": {"required_classes": ["person"]}} +``` + +## 流程 + +``` +ASR output (sentence) ─── Rule 1 ───→ chunk (sentence, text+embedding) + │ +YOLO output (objects) ─── Rule 2 ───→ chunk (visual, objects+classes) + │ + ├── 文字搜尋 (ILIKE) + ├── 向量搜尋 (Qdrant) + └── 視覺過濾 (objects/classes) +``` diff --git a/docs_v1.0/REFERENCE/Services_Inventory.md b/docs_v1.0/REFERENCE/Services_Inventory.md new file mode 100644 index 0000000..3ae8b0c --- /dev/null +++ b/docs_v1.0/REFERENCE/Services_Inventory.md @@ -0,0 +1,208 @@ +# Momentry Core — 完整服務清單 + +**Date**: 2026-05-15 +**Version**: 1.0 +**Status**: Active + +--- + +## 1. 來源分類 + +| 來源 | 說明 | 數量 | +|------|------|:----:| +| 🔵 Source build | 原始碼編譯,SHA256 可驗證 | 7 | +| 🟡 Homebrew | 透過 brew 安裝,待遷移 | 18 | +| 🟢 Production only | 僅在 production 運行 | 1 | +| 📦 Third-party | 預編譯 binary / 套件 | 5 | + +--- + +## 2. 完整服務一覽 + +### 🔵 Source Build (SHA256 已記錄) + +| Service | Version | Binary | Source Archive | Resource ID | +|---------|---------|--------|---------------|-------------| +| **PostgreSQL** | 18.3 | `$HOME/pgsql/18.3/bin/postgres` | `release/.../postgresql-18.3.tar.gz` | - | +| **Redis** | 7.4.3 | `/opt/homebrew/bin/redis-server`(brew) | `release/.../redis-7.4.3.tar.gz` | - | +| **FFmpeg** | 7.1.1 | `/opt/homebrew/bin/ffmpeg`(brew) | `release/.../ffmpeg-7.1.1.tar.xz` | - | +| **RSync** | 3.4.2 | `/Users/accusys/bin/rsync` | `release/.../rsync-official-3.4.2.tar.gz` | `rsync` | +| **SFTPGo** | 2.7.99-dev | `/Users/accusys/bin/sftpgo` | `release/.../sftpgo-main.tar.gz` | `sftpgo` | +| **llama.cpp** | - | `$HOME/llama/bin/llama-server` | `release/.../llama.cpp/` | - | +| **Momentry** | 1.0.0 | `target/release/momentry` | (git repo) | - | + +### 🟡 Homebrew (待遷移) + +| Service | Version | Brew Binary | Dev.Resources | +|---------|---------|-------------|:-------------:| +| **PHP** | 8.5.5 | `/opt/homebrew/bin/php` | ✅ | +| **PHP-FPM** | 8.5.5 | `/opt/homebrew/sbin/php-fpm` | ✅ (同 php) | +| **MariaDB** | 12.2.2 | `/opt/homebrew/bin/mariadbd` | ✅ | +| **Node.js** | 25.9.0 | `/opt/homebrew/bin/node` | ❌ | +| **MongoDB** | 8.2.7 | `/opt/homebrew/bin/mongod` | ❌ | +| **MongoSH** | 2.8.3 | `/opt/homebrew/bin/mongosh` | ❌ | +| **Ollama** | 0.23.1 | `/opt/homebrew/bin/ollama` | ❌ | +| **yt-dlp** | 2026.3.17 | `/opt/homebrew/bin/yt-dlp` | ❌ | +| **whisper-cpp** | 1.8.4 | `/opt/homebrew/bin/whisper-cpp` | ❌ | +| **Tesseract** | 5.5.2 | `/opt/homebrew/bin/tesseract` | ❌ | +| **SDL2** | 2.32.10 | `/opt/homebrew/lib/libsdl2.dylib` | ❌ | +| **Go** | 1.26.2 | `/opt/homebrew/bin/go` | ❌ | +| **CMake** | 4.3.2 | `/opt/homebrew/bin/cmake` | ❌ | +| **Python 3.11** | 3.11.15 | `/opt/homebrew/bin/python3.11` | ❌ | +| **Python 3.14** | 3.14.4 | `/opt/homebrew/bin/python3.14` | ❌ | +| **protobuf** | - | `/opt/homebrew/bin/protoc` | ❌ | +| **pgvector** | 0.8.2 | (PG extension) | ❌ | +| **FFmpeg-full** | 8.1.1 | `/opt/homebrew/opt/ffmpeg-full/bin/ffmpeg` | ❌ | + +### 🟢 Production Only (僅 production 有) + +| Service | URL | Role | Resource ID | +|---------|-----|------|:-----------:| +| **WordPress** | `https://wp.momentry.ddns.net` | CMS + Portal | ✅ (offline) | + +### 📦 Third-Party (預編譯 / 外部依賴) + +| Service | Version | Location | Note | +|---------|---------|----------|------| +| **Qdrant** | - | `/Users/accusys/momentry_core_0.1/services/qdrant/target/release/qdrant` | Rust source in `services/` | +| **EmbeddingGemma** | - | Python script `scripts/embeddinggemma_server.py` | 由 server 管理 | +| **GroundingDINO** | - | `release/.../services/src/GroundingDINO/` | Python ML model | +| **PaliGemma** | - | `release/.../services/src/paligemma/` | Python ML model | +| **Odoo** | - | `release/.../services/src/odoo/` | ERP (未啟用) | +| **Gitea** | - | `release/.../services/src/gitea/` | Git hosting (未啟用) | +| **LibreOffice** | 26.2.3 | `release/.../LibreOffice_26.2.3_MacOS_aarch64.dmg` | Document conversion | +| **Swift** | 6.3.1 | `release/.../swift-6.3.1-RELEASE.tar.gz` | Processor scripts | +| **SQLite-vec** | - | `release/.../sqlite-vec/` + `vec0.dylib` | Vector extension | +| **librsvg** | - | `release/.../librsvg/` | SVG conversion | +| **macmon** | 0.7.2 | `$HOME/bin/macmon` | Monitoring | +| **mactop** | latest | `$HOME/bin/mactop` | Monitoring | +| **mermaid-cli** | 11.14.0 | npm global | Diagram rendering | + +--- + +## 3. Plist (launchd) 管理 + +| Plist | Binary Path | Source Type | Status | +|-------|-------------|:-----------:|:------:| +| `com.momentry.sftpgo.plist` | `/Users/accusys/bin/sftpgo` | ✅ source | ✅ | +| `com.momentry.redis.plist` | `/opt/homebrew/bin/redis-server` | 🟡 brew | ❌ 待更新 | +| `com.momentry.postgresql.plist` | `$HOME/pgsql/18.3/bin/postgres` | ✅ source | ✅ | +| `com.momentry.ollama.plist` | `/opt/homebrew/bin/ollama` | 🟡 brew | ❌ 待更新 | +| `com.momentry.llama.plist` | `$HOME/llama/bin/llama-server` | ✅ source | ✅ | + +--- + +## 4. Port 分配 + +| Port | Service | Source Type | Running | +|:----:|---------|:-----------:|:-------:| +| 3002 | Momentry API (production) | ✅ build | ✅ | +| 3003 | Momentry Playground (dev) | ✅ build | ✅ | +| 8080 | SFTPGo Web Admin | ✅ build | ✅ | +| 2022 | SFTPGo SFTP | ✅ build | ✅ | +| 8090 | SFTPGo WebDAV | ✅ build | ✅ | +| 5432 | PostgreSQL | ✅ build | ✅ | +| 6379 | Redis | 🟡 brew | ✅ | +| 27017 | MongoDB | 🟡 brew | ✅ | +| 6333 | Qdrant | 📦 service | ✅ | +| 11434 | Ollama API | 🟡 brew | ✅ | +| 11436 | EmbeddingGemma | 📦 script | ✅ | +| 8082 | llama-server (Gemma4) | ✅ build | ✅ | +| 9000 | PHP-FPM | 🟡 brew | ❌ | +| 8081 | LLM (deprecated) | 🟡 brew | ❌ | + +--- + +## 5. Database + +| Database | Engine | Port | Schema | Momentry Role | +|----------|--------|:----:|--------|---------------| +| `momentry` | PostgreSQL | 5432 | `dev` | Dev playground (3003) | +| `momentry_3002` | PostgreSQL | 5432 | `public` | M5 production (3002) | +| `sftpgo` | PostgreSQL | 5432 | `public` | SFTPGo users | +| `momentry` | MongoDB | 27017 | `momentry` | Cache | +| Qdrant | Qdrant | 6333 | `momentry_dev_rule1_v2` | Vector search | +| Redis | Redis | 6379 | - | Worker progress, cache | + +--- + +## 6. Source Archives Status (`release/system/v1.0/services/src/`) + +### ✅ 已歸檔 (32 items) + +``` +cmake-4.2.0-macos-universal.tar.gz ffmpeg-7.1.1.tar.xz +freetype-2.13.3.tar.gz postgresql-18.3.tar.gz +redis-7.4.3.tar.gz rsync-official-3.4.2.tar.gz +sftpgo-main.tar.gz swift-6.3.1-RELEASE.tar.gz +llama.cpp/ x264/ +go/ pyenv/ +macmon-0.7.2.tar.gz mactop-latest.tar.gz +rustc-1.92.0-src.tar.xz rustup-1.28.1.tar.gz +sqlite-amalgamation-3490100.zip sqlite-vec/ +vec0.dylib yt-dlp/ +mermaid-js-mermaid-cli-11.14.0.tgz LibreOffice_26.2.3_MacOS_aarch64.dmg +libreoffice-26.2.3.2.tar.xz librsvg/ +GroundingDINO/ paligemma/ +gitea/ erpnext/ +frappe/ odoo/ +python_probe_deps.txt +``` + +### ❌ 缺少需補 + +| Service | Expected Source URL | +|---------|-------------------| +| PHP 8.5.5 | `https://www.php.net/distributions/php-8.5.5.tar.gz` | +| MariaDB 12.2.2 | `https://github.com/MariaDB/server/archive/mariadb-12.2.2.tar.gz` | +| Node.js 25.9.0 | `https://nodejs.org/dist/v25.9.0/node-v25.9.0.tar.gz` | +| MongoDB 8.2.7 | `https://github.com/mongodb/mongo/archive/r8.2.7.tar.gz` | +| Ollama 0.23.1 | `https://github.com/ollama/ollama/archive/v0.23.1.tar.gz` | + +--- + +## 7. Health Endpoint (`/health/detailed`) + +| Field | Source | Covers | +|-------|--------|--------| +| `services.postgres` | direct check | PostgreSQL | +| `services.redis` | direct check | Redis | +| `services.qdrant` | direct check | Qdrant | +| `services.mongodb` | direct check | MongoDB | +| `pipeline.ffmpeg` | `which ffmpeg` | FFmpeg | +| `pipeline.llm` | HTTP check port 8082 | llama-server | +| `pipeline.embedding_server` | HTTP check port 11436 | EmbeddingGemma | +| `pipeline.rsync` | file check | RSync | +| `pipeline.scripts_integrity` | SHA256 vs manifest | Processor scripts(345) | +| `schema` | DB query | Schema migrations(9) | +| `processors` | file check | 12 processors | +| `resources` | system query | CPU/Mem/GPU | + +### 未涵蓋的服務 + +- PHP / PHP-FPM +- MariaDB +- Node.js / npm +- Ollama +- SFTPGo (有 `/api/v1/stats/sftpgo` 但不在 health response) +- yt-dlp / whisper-cpp / tesseract +- WordPress + +--- + +## 8. Migration Priority Matrix + +``` +Priority Service Binary Source Dev.Resources Plist Health +────────────────────────────────────────────────────────────────────────────── +P1:high PHP brew → build ❌ need ✅ recorded ❌ brew ❌ missing +P1:high MariaDB brew → build ❌ need ✅ recorded ❌ brew ❌ missing +P1:high Node.js brew → build ❌ need ❌ not in db ❌ brew ❌ missing +P2:med Redis brew → build ✅ have ❌ not in db ❌ brew ❌ missing +P2:med MongoDB brew → build ❌ need ❌ not in db ❌ brew ❌ missing +P2:med ffmpeg brew → build ✅ have ❌ not in db ❌ brew ✅ basic +P3:low Ollama brew → build ❌ need ❌ not in db ❌ brew ❌ missing +P3:low yt-dlp brew → archive ✅ have ❌ not in db ❌ brew ❌ missing +P3:low whisper brew → build ❌ need ❌ not in db ❌ brew ❌ missing +P3:low tesseract brew → build ❌ need ❌ not in db ❌ brew ❌ missing +``` diff --git a/docs_v1.0/REFERENCE/Trace_Face_Binding.md b/docs_v1.0/REFERENCE/Trace_Face_Binding.md new file mode 100644 index 0000000..c485583 --- /dev/null +++ b/docs_v1.0/REFERENCE/Trace_Face_Binding.md @@ -0,0 +1,129 @@ +# Trace 與 Face 的綁定機制 + +**Date**: 2026-05-16 + +--- + +## 資料模型 + +``` +face_detections table +├── id (PK, SERIAL) +├── file_uuid (VARCHAR(32)) +├── trace_id (INTEGER) ← trace grouping +├── face_id (VARCHAR) ← 未使用 (always null) +├── identity_id (INTEGER) ← FK → identities.id +├── frame_number (INTEGER) +├── x, y, width, height (INTEGER) +└── confidence (FLOAT4) + +identities table +├── id (PK, SERIAL) +├── uuid (UUID) ← identity_uuid (API 用) +├── name (TEXT) +└── source (VARCHAR) +``` + +## Trace vs Face + +| Term | Scope | 說明 | +|------|-------|------| +| **face** | single detection | 單一影格中的一個人臉偵測 (`face_detections` 的一行) | +| **trace** | continuous tracking | 同一人的連續影格人臉集合 (`GROUP BY trace_id`) | + +一個 trace 包含 N 個 face detections。 + +## 綁定方式 + +### 1. Trace-level 綁定 (Identity Agent) + +Agent API (`identity_agent_api.rs:815`): + +```sql +UPDATE face_detections +SET identity_id = ? +WHERE file_uuid = ? + AND trace_id = ? +``` + +同時將整個 trace 的所有 faces 綁定到一個 identity。 + +### 2. Identity Agent 自動匹配 + +Face Agent (`core/tmdb/face_agent.rs:198`): + +``` +1. 提取每個 trace 的 face embedding +2. 比對 embedding 與已知 identity +3. 計算 cosine similarity +4. 匹配成功 → UPDATE face_detections SET identity_id = ? +5. 低於 threshold → 保留為 unknown +``` + +### 3. Merge Identities (`POST /api/v1/identity/:from_uuid/mergeinto`) + +```sql +UPDATE face_detections +SET identity_id = $target_id +WHERE identity_id = $source_id +``` + +將 source identity 的所有 face detections 重新指派到 target identity。 + +### 4. 解綁 (`POST /api/v1/identity/:identity_uuid/unbind`) + +```sql +UPDATE face_detections +SET identity_id = NULL +WHERE file_uuid = ? AND trace_id = ? +``` + +## 查詢已綁定的 Traces + +```sql +SELECT trace_id, + COUNT(*) AS face_count, + MIN(frame_number) AS start_frame, + MAX(frame_number) AS end_frame, + (SELECT name FROM identities WHERE id = fd.identity_id) AS identity_name +FROM face_detections fd +WHERE file_uuid = '...' + AND trace_id IS NOT NULL + AND identity_id IS NOT NULL +GROUP BY trace_id, identity_id; +``` + +## API 端點 + +| 方法 | 路徑 | 功能 | +|------|------|------| +| POST | `/api/v1/identity/:identity_uuid/bind` | 綁定 face 到 identity (by face_id) | +| POST | `/api/v1/identity/:identity_uuid/unbind` | 解綁 face (by trace_id) | +| POST | `/api/v1/identity/:from_uuid/mergeinto` | 合併 identity (重設所有 identity_id) | +| POST | `/api/v1/agents/identity/analyze` | Agent 自動匹配 embedding | +| POST | `/api/v1/agents/suggest/merge` | Agent 建議相似 identity 合併 | + +## 流程圖 + +``` +Video Input + │ + ▼ +Face Detection (每幀偵測人臉) + │ + ▼ +Face Tracker (embedding + IoU → 產生 trace_id) + │ + ├── face_detections 寫入 DB (含 trace_id, identity_id=NULL) + │ + ▼ +Identity Agent (比對 embedding) + │ + ├── 匹配成功 → UPDATE identity_id = matched + ├── 匹配失敗 → identity_id 保持 NULL + │ + ▼ +API Bind (手動綁定) + ├── 指定 trace_id + identity_uuid + └── UPDATE identity_id = target +``` diff --git a/docs_v1.0/REFERENCE/Trace_Structure.md b/docs_v1.0/REFERENCE/Trace_Structure.md new file mode 100644 index 0000000..a508fc6 --- /dev/null +++ b/docs_v1.0/REFERENCE/Trace_Structure.md @@ -0,0 +1,98 @@ +# Trace 結構說明 + +**Date**: 2026-05-16 + +--- + +## 定義 + +Trace 是 Face Tracker 輸出的一組連續人臉偵測結果,代表同一個人在畫面中的連續出現。每一條 trace 包含多個 `face_detections` 記錄,共享同一個 `trace_id`。 + +## 結構 + +### 資料庫 + +```sql +-- face_detections 表 +trace_id INTEGER -- 同檔案內順序編號 (0, 1, 2, ...) +file_uuid VARCHAR(32) -- 所屬檔案 +face_id VARCHAR -- 單一偵測 ID (未使用) +identity_id INTEGER -- 綁定的身份 ID (FK → identities.id) +frame_number INTEGER -- 所在影格 +x, y, w, h INTEGER -- Bounding box +confidence FLOAT4 -- 信心度 +``` + +### scope + +| 範圍 | 說明 | +|------|------| +| **Per-file** | `trace_id` 在同一個 `file_uuid` 內唯一,從 0 開始遞增 | +| **Global** | 不同檔案的相同 `trace_id` 沒有關聯,需搭配 `file_uuid` 使用 | + +### API 端點 + +```bash +# Trace 列表(含 face_count、時間範圍、信心度) +POST /api/v1/file/:file_uuid/face_trace/sortby +Body: {"sort_by": "face_count", "limit": 100} + +# Trace 內的人臉偵測(含 interpolation) +GET /api/v1/file/:file_uuid/trace/:trace_id/faces?page=1&page_size=200 + +# Trace 綁定身份 (trace→identity) +POST /api/v1/identity/:identity_uuid/bind +Body: {"file_uuid": "...", "trace_id": 5} +``` + +### Trace 與 Identity 的關係 + +``` +trace_id (per-file seq) + │ + ├── identity_id (FK → identities.id) + │ │ + │ └── identity_uuid (global UUID, 32碼無-) + │ + └── face_detections (N rows per trace) + │ + ├── frame_number (哪一幀) + ├── bbox (位置) + └── confidence (信心度) +``` + +### Trace 資料範例 + +```json +{ + "trace_id": 2, + "face_count": 46, + "start_frame": 4587, + "end_frame": 4722, + "start_time": 191.38, + "end_time": 196.95, + "duration_sec": 5.57, + "avg_confidence": 0.85, + "sample_face_id": null +} +``` + +## 生命週期 + +``` +Face Tracker (Python script) + → 分析人臉 embedding + IoU + 距離 + → 產生 trace_id (同檔案內獨立 numbering) + → 寫入 face_detections 表 + → Identity Agent 比對 embedding + → 設定 identity_id (綁定身份) +``` + +## 特點 + +| 特性 | 說明 | +|------|------| +| **順序編號** | 無 UUID,簡單整數,每個檔案從 0 開始 | +| **唯一性** | `(file_uuid, trace_id)` 唯一,`trace_id` 單獨不唯一 | +| **補幀** | API 支援 `interpolate=true` 參數補齊中間幀 | +| **綁定** | 透過 Agent API 綁定到 `identity_uuid` | diff --git a/release/migrate_add_cut_id.sql b/release/migrate_add_cut_id.sql new file mode 100644 index 0000000..ab53ee1 --- /dev/null +++ b/release/migrate_add_cut_id.sql @@ -0,0 +1,15 @@ +-- Migration: Add cut_id column for per-cut trace scoping +-- Date: 2026-05-16 +-- Usage: psql -U accusys -d momentry -f migrate_add_cut_id.sql +-- Note: runs with current search_path + +ALTER TABLE face_detections ADD COLUMN IF NOT EXISTS cut_id INTEGER; + +-- Back-fill existing data where cuts exist +UPDATE face_detections fd +SET cut_id = c.id +FROM chunk c +WHERE c.file_uuid = fd.file_uuid + AND c.chunk_type = 'cut' + AND fd.frame_number BETWEEN c.start_frame AND c.end_frame + AND fd.cut_id IS NULL; diff --git a/release/migrate_add_stranger_id.sql b/release/migrate_add_stranger_id.sql new file mode 100644 index 0000000..d59a4f6 --- /dev/null +++ b/release/migrate_add_stranger_id.sql @@ -0,0 +1,13 @@ +-- Migration: Add stranger_id column for unmatched face traces +-- Unmatched traces get stranger_id = trace_id (per-file sequential integer) +-- Unique constraint: (file_uuid, stranger_id) WHERE stranger_id IS NOT NULL +-- Date: 2026-05-16 +-- Usage: psql -U accusys -d momentry -f migrate_add_stranger_id.sql + +ALTER TABLE face_detections ADD COLUMN IF NOT EXISTS stranger_id INTEGER; +CREATE UNIQUE INDEX IF NOT EXISTS idx_face_detections_stranger + ON face_detections (file_uuid, stranger_id) WHERE stranger_id IS NOT NULL; + +-- Assign stranger_id = trace_id for existing unmatched traces +UPDATE face_detections SET stranger_id = trace_id +WHERE trace_id IS NOT NULL AND identity_id IS NULL AND stranger_id IS NULL; diff --git a/release/migrate_create_cuts_table.sql b/release/migrate_create_cuts_table.sql new file mode 100644 index 0000000..f07fe99 --- /dev/null +++ b/release/migrate_create_cuts_table.sql @@ -0,0 +1,38 @@ +-- Migration: Create independent cuts table +-- Cut = 同鏡頭連續拍攝的一組 frame +-- Date: 2026-05-16 +-- Usage: psql -U accusys -d momentry -f migrate_create_cuts_table.sql + +CREATE TABLE IF NOT EXISTS cuts ( + id SERIAL PRIMARY KEY, + file_uuid VARCHAR(32) NOT NULL, + cut_number INTEGER NOT NULL, + start_frame BIGINT NOT NULL, + end_frame BIGINT NOT NULL, + start_time DOUBLE PRECISION, + end_time DOUBLE PRECISION, + fps DOUBLE PRECISION, + metadata JSONB DEFAULT '{}', + created_at TIMESTAMPTZ DEFAULT NOW(), + UNIQUE(file_uuid, cut_number) +); + +-- Migrate from chunk (chunk_type='cut') +INSERT INTO cuts (file_uuid, cut_number, start_frame, end_frame, start_time, end_time, fps, metadata) +SELECT c.file_uuid, + (c.content->>'scene_number')::int as cut_number, + c.start_frame, c.end_frame, + c.start_time, c.end_time, + c.fps, + c.metadata +FROM chunk c +WHERE c.chunk_type = 'cut' +ON CONFLICT (file_uuid, cut_number) DO NOTHING; + +-- Update face_detections.cut_id to reference cuts.id +UPDATE face_detections fd +SET cut_id = cs.id +FROM cuts cs +WHERE cs.file_uuid = fd.file_uuid + AND fd.frame_number BETWEEN cs.start_frame AND cs.end_frame + AND (fd.cut_id IS NULL OR fd.cut_id != cs.id);