chore: backup before migration to new repo

This commit is contained in:
Warren
2026-04-23 16:46:02 +08:00
parent 13dd3b30f3
commit 59809dae1f
40 changed files with 5566 additions and 1783 deletions

7
.env
View File

@@ -1,5 +1,10 @@
DB_MAX_CONNECTIONS=50 DB_MAX_CONNECTIONS=50
DB_ACQUIRE_TIMEOUT=30 DB_ACQUIRE_TIMEOUT=30
DATABASE_SCHEMA=dev
QDRANT_URL=http://127.0.0.1:6333 QDRANT_URL=http://127.0.0.1:6333
QDRANT_API_KEY=Test3200Test3200Test3200 QDRANT_API_KEY=Test3200Test3200Test3200
QDRANT_COLLECTION=momentry_rule1 QDRANT_COLLECTION=momentry_rule1
MONGODB_URL=mongodb://localhost:27017
MONGODB_CACHE_ENABLED=false
MOMENTRY_REDIS_PREFIX=momentry:
REDIS_URL=redis://:accusys@localhost:6379

View File

@@ -14,25 +14,27 @@ MOMENTRY_MAX_CONCURRENT=1
MOMENTRY_POLL_INTERVAL=10 MOMENTRY_POLL_INTERVAL=10
MOMENTRY_WORKER_BATCH_SIZE=5 MOMENTRY_WORKER_BATCH_SIZE=5
# Database (same as production, but could use separate dev database) # Database (PostgreSQL) - Schema isolation
DATABASE_URL=postgres://accusys@localhost:5432/momentry DATABASE_URL=postgres://accusys@localhost:5432/momentry
DATABASE_SCHEMA=dev
# MongoDB # MongoDB - Database isolation
MONGODB_URL=mongodb://localhost:27017 MONGODB_URL=mongodb://localhost:27017
MONGODB_DATABASE=momentry MONGODB_DATABASE=momentry_dev
# Redis # Redis (already isolated via prefix)
REDIS_URL=redis://:accusys@localhost:6379 REDIS_URL=redis://:accusys@localhost:6379
REDIS_PASSWORD=accusys REDIS_PASSWORD=accusys
# Qdrant Vector Database (same as production) # Qdrant Vector Database - Collection isolation
QDRANT_URL=http://localhost:6333 QDRANT_URL=http://localhost:6333
QDRANT_API_KEY=Test3200Test3200Test3200 QDRANT_API_KEY=Test3200Test3200Test3200
QDRANT_COLLECTION=momentry_rule1 QDRANT_COLLECTION=momentry_dev_rule1
# Paths # Paths
MOMENTRY_OUTPUT_DIR=/Users/accusys/momentry/output_dev MOMENTRY_OUTPUT_DIR=/Users/accusys/momentry/output_dev
MOMENTRY_BACKUP_DIR=/Users/accusys/momentry/backup/momentry_dev MOMENTRY_BACKUP_DIR=/Users/accusys/momentry/backup/momentry_dev
MOMENTRY_SFTP_ROOT=/Users/accusys/momentry/var/sftpgo/data/demo/
# Python (for processing scripts) # Python (for processing scripts)
MOMENTRY_PYTHON_PATH=/opt/homebrew/bin/python3.11 MOMENTRY_PYTHON_PATH=/opt/homebrew/bin/python3.11
@@ -57,4 +59,12 @@ MONGODB_CACHE_TTL_SEARCH=300
MONGODB_CACHE_TTL_HYBRID_SEARCH=600 MONGODB_CACHE_TTL_HYBRID_SEARCH=600
MONGODB_CACHE_TTL_VIDEO_META=3600 MONGODB_CACHE_TTL_VIDEO_META=3600
REDIS_CACHE_TTL_HEALTH=30 REDIS_CACHE_TTL_HEALTH=30
REDIS_CACHE_TTL_VIDEO_META=3600 REDIS_CACHE_TTL_VIDEO_META=3600
# 同義詞配置文件(可選)
# 取消註釋並設置為您的同義詞JSON檔案路徑以啟用同義詞擴展
# MOMENTRY_SYNONYM_FILE=/Users/accusys/momentry_core_0.1/docs/examples/custom_synonyms.json
#
# 多個同義詞檔案(逗號分隔),會覆蓋 MOMENTRY_SYNONYM_FILE
# MOMENTRY_SYNONYM_FILES=/path/to/first.json,/path/to/second.json
#
# 示例檔案docs/examples/custom_synonyms.json

View File

@@ -182,6 +182,15 @@ src/
### Server ### Server
- `MOMENTRY_SERVER_PORT` - API server port (default: `3002` for production, `3003` for playground) - `MOMENTRY_SERVER_PORT` - API server port (default: `3002` for production, `3003` for playground)
- `MOMENTRY_REDIS_PREFIX` - Redis key prefix (default: `momentry:` for production, `momentry_dev:` for playground) - `MOMENTRY_REDIS_PREFIX` - Redis key prefix (default: `momentry:` for production, `momentry_dev:` for playground)
- `MOMENTRY_API_KEY` - API key for Player online mode testing
### Testing API Key
```bash
export MOMENTRY_API_KEY="muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69"
# Test Player online mode
cargo run --features player --bin momentry_player -- -o
```
### Database ### Database
- `DATABASE_URL` - PostgreSQL (default: `postgres://accusys@localhost:5432/momentry`) - `DATABASE_URL` - PostgreSQL (default: `postgres://accusys@localhost:5432/momentry`)
@@ -201,6 +210,10 @@ src/
- `MOMENTRY_CUT_TIMEOUT` - CUT timeout in seconds (default: 3600) - `MOMENTRY_CUT_TIMEOUT` - CUT timeout in seconds (default: 3600)
- `MOMENTRY_DEFAULT_TIMEOUT` - Default timeout (default: 7200) - `MOMENTRY_DEFAULT_TIMEOUT` - Default timeout (default: 7200)
### Synonym Expansion
- `MOMENTRY_SYNONYM_FILES` - Comma-separated paths to synonym JSON files (e.g., `data/english_synonyms.json,data/llm_synonyms.json`)
- `MOMENTRY_SYNONYM_FILE` - Single synonym JSON file path (deprecated, use above)
### Logging ### Logging
- `RUST_LOG` or `MOMENTRY_LOG_LEVEL` - Log level (default: `info`) - `RUST_LOG` or `MOMENTRY_LOG_LEVEL` - Log level (default: `info`)
@@ -213,6 +226,23 @@ src/
- PythonExecutor provides unified script execution with timeout support - PythonExecutor provides unified script execution with timeout support
- Redis 1.0.x for improved performance - Redis 1.0.x for improved performance
### LLM Synonym Generation
Generate synonym database using llama.cpp (Gemma4):
```bash
# Generate full database (162 entries, ~5 minutes)
python3 scripts/generate_synonyms_llamacpp.py
# Quick test
python3 scripts/generate_synonyms_llamacpp.py --test
# Resume from existing file
python3 scripts/generate_synonyms_llamacpp.py --resume
# Output: data/llm_synonyms.json (27 Chinese + 135 English words)
```
## Task Management ## Task Management
### 使用 todowrite 追蹤任務 ### 使用 todowrite 追蹤任務

413
Cargo.lock generated
View File

@@ -86,21 +86,6 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "anstream"
version = "0.6.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
dependencies = [
"anstyle",
"anstyle-parse 0.2.7",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]] [[package]]
name = "anstream" name = "anstream"
version = "1.0.0" version = "1.0.0"
@@ -108,7 +93,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
dependencies = [ dependencies = [
"anstyle", "anstyle",
"anstyle-parse 1.0.0", "anstyle-parse",
"anstyle-query", "anstyle-query",
"anstyle-wincon", "anstyle-wincon",
"colorchoice", "colorchoice",
@@ -122,15 +107,6 @@ version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
[[package]]
name = "anstyle-parse"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
dependencies = [
"utf8parse",
]
[[package]] [[package]]
name = "anstyle-parse" name = "anstyle-parse"
version = "1.0.0" version = "1.0.0"
@@ -177,9 +153,9 @@ dependencies = [
[[package]] [[package]]
name = "arc-swap" name = "arc-swap"
version = "1.8.2" version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5" checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6"
dependencies = [ dependencies = [
"rustversion", "rustversion",
] ]
@@ -196,7 +172,7 @@ version = "3.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311"
dependencies = [ dependencies = [
"event-listener", "event-listener 5.4.1",
"event-listener-strategy", "event-listener-strategy",
"pin-project-lite", "pin-project-lite",
] ]
@@ -560,7 +536,7 @@ version = "4.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
dependencies = [ dependencies = [
"anstream 1.0.0", "anstream",
"anstyle", "anstyle",
"clap_lex", "clap_lex",
"strsim 0.11.1", "strsim 0.11.1",
@@ -1054,9 +1030,9 @@ dependencies = [
[[package]] [[package]]
name = "env_filter" name = "env_filter"
version = "1.0.0" version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a1c3cc8e57274ec99de65301228b537f1e4eedc1b8e0f9411c6caac8ae7308f" checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef"
dependencies = [ dependencies = [
"log", "log",
"regex", "regex",
@@ -1064,11 +1040,11 @@ dependencies = [
[[package]] [[package]]
name = "env_logger" name = "env_logger"
version = "0.11.9" version = "0.11.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d" checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a"
dependencies = [ dependencies = [
"anstream 0.6.21", "anstream",
"anstyle", "anstyle",
"env_filter", "env_filter",
"jiff", "jiff",
@@ -1102,6 +1078,12 @@ dependencies = [
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
[[package]]
name = "event-listener"
version = "2.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
[[package]] [[package]]
name = "event-listener" name = "event-listener"
version = "5.4.1" version = "5.4.1"
@@ -1119,7 +1101,7 @@ version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93"
dependencies = [ dependencies = [
"event-listener", "event-listener 5.4.1",
"pin-project-lite", "pin-project-lite",
] ]
@@ -1445,6 +1427,16 @@ version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.15.5" version = "0.15.5"
@@ -1467,6 +1459,15 @@ dependencies = [
"foldhash 0.2.0", "foldhash 0.2.0",
] ]
[[package]]
name = "hashlink"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7"
dependencies = [
"hashbrown 0.14.5",
]
[[package]] [[package]]
name = "hashlink" name = "hashlink"
version = "0.10.0" version = "0.10.0"
@@ -1481,6 +1482,9 @@ name = "heck"
version = "0.4.1" version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
dependencies = [
"unicode-segmentation",
]
[[package]] [[package]]
name = "heck" name = "heck"
@@ -1926,14 +1930,15 @@ dependencies = [
[[package]] [[package]]
name = "ipconfig" name = "ipconfig"
version = "0.3.2" version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f" checksum = "4d40460c0ce33d6ce4b0630ad68ff63d6661961c48b6dba35e5a4d81cfb48222"
dependencies = [ dependencies = [
"socket2 0.5.10", "socket2 0.6.3",
"widestring", "widestring",
"windows-sys 0.48.0", "windows-registry",
"winreg", "windows-result",
"windows-sys 0.61.2",
] ]
[[package]] [[package]]
@@ -1944,9 +1949,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"
[[package]] [[package]]
name = "iri-string" name = "iri-string"
version = "0.7.10" version = "0.7.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20"
dependencies = [ dependencies = [
"memchr", "memchr",
"serde", "serde",
@@ -2122,9 +2127,9 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
[[package]] [[package]]
name = "libredox" name = "libredox"
version = "0.1.14" version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08"
dependencies = [ dependencies = [
"bitflags 2.11.0", "bitflags 2.11.0",
"libc", "libc",
@@ -2256,6 +2261,12 @@ dependencies = [
"unicase", "unicase",
] ]
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]] [[package]]
name = "miniz_oxide" name = "miniz_oxide"
version = "0.8.9" version = "0.8.9"
@@ -2292,16 +2303,16 @@ dependencies = [
[[package]] [[package]]
name = "moka" name = "moka"
version = "0.12.14" version = "0.12.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85f8024e1c8e71c778968af91d43700ce1d11b219d127d79fb2934153b82b42b" checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046"
dependencies = [ dependencies = [
"async-lock", "async-lock",
"crossbeam-channel", "crossbeam-channel",
"crossbeam-epoch", "crossbeam-epoch",
"crossbeam-utils", "crossbeam-utils",
"equivalent", "equivalent",
"event-listener", "event-listener 5.4.1",
"futures-util", "futures-util",
"parking_lot", "parking_lot",
"portable-atomic", "portable-atomic",
@@ -2336,18 +2347,21 @@ dependencies = [
"mongodb", "mongodb",
"notify", "notify",
"once_cell", "once_cell",
"pgvector",
"qdrant-client", "qdrant-client",
"ratatui", "ratatui",
"redis", "redis",
"reqwest", "reqwest",
"sdl2",
"serde", "serde",
"serde_json", "serde_json",
"sha2", "sha2",
"sqlx", "sqlx 0.8.6",
"subtle", "subtle",
"thiserror 1.0.69", "thiserror 1.0.69",
"tokio", "tokio",
"tower 0.4.13", "tower 0.4.13",
"tower-http 0.5.2",
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
"utoipa", "utoipa",
@@ -2436,6 +2450,16 @@ dependencies = [
"tempfile", "tempfile",
] ]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]] [[package]]
name = "notify" name = "notify"
version = "6.1.1" version = "6.1.1"
@@ -2492,9 +2516,9 @@ dependencies = [
[[package]] [[package]]
name = "num-conv" name = "num-conv"
version = "0.2.0" version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967"
[[package]] [[package]]
name = "num-integer" name = "num-integer"
@@ -2669,6 +2693,15 @@ version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "pgvector"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ed92bf218dbe236609222dca0345767408ee7d5c93876c7fe09fa9b03f7249f"
dependencies = [
"sqlx 0.7.4",
]
[[package]] [[package]]
name = "phf" name = "phf"
version = "0.13.1" version = "0.13.1"
@@ -3130,12 +3163,13 @@ dependencies = [
[[package]] [[package]]
name = "redis" name = "redis"
version = "1.0.5" version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b36964393906eb775b89b25b05b7b95685b8dd14062f1663a31ff93e75c452e5" checksum = "d76e41a79ae5cbb41257d84cf4cf0db0bb5a95b11bf05c62c351de4fe748620d"
dependencies = [ dependencies = [
"arc-swap", "arc-swap",
"arcstr", "arcstr",
"async-lock",
"backon", "backon",
"bytes", "bytes",
"cfg-if", "cfg-if",
@@ -3240,7 +3274,7 @@ dependencies = [
"tokio-rustls 0.26.4", "tokio-rustls 0.26.4",
"tokio-util", "tokio-util",
"tower 0.5.3", "tower 0.5.3",
"tower-http", "tower-http 0.6.8",
"tower-service", "tower-service",
"url", "url",
"wasm-bindgen", "wasm-bindgen",
@@ -3332,9 +3366,9 @@ dependencies = [
[[package]] [[package]]
name = "rustc-hash" name = "rustc-hash"
version = "2.1.1" version = "2.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
[[package]] [[package]]
name = "rustc_version" name = "rustc_version"
@@ -3412,7 +3446,7 @@ dependencies = [
"once_cell", "once_cell",
"ring", "ring",
"rustls-pki-types", "rustls-pki-types",
"rustls-webpki 0.103.9", "rustls-webpki 0.103.10",
"subtle", "subtle",
"zeroize", "zeroize",
] ]
@@ -3469,9 +3503,9 @@ dependencies = [
[[package]] [[package]]
name = "rustls-webpki" name = "rustls-webpki"
version = "0.103.9" version = "0.103.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef"
dependencies = [ dependencies = [
"ring", "ring",
"rustls-pki-types", "rustls-pki-types",
@@ -3524,6 +3558,29 @@ dependencies = [
"untrusted", "untrusted",
] ]
[[package]]
name = "sdl2"
version = "0.35.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7959277b623f1fb9e04aea73686c3ca52f01b2145f8ea16f4ff30d8b7623b1a"
dependencies = [
"bitflags 1.3.2",
"lazy_static",
"libc",
"sdl2-sys",
]
[[package]]
name = "sdl2-sys"
version = "0.35.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3586be2cf6c0a8099a79a12b4084357aa9b3e0b0d7980e3b67aaf7a9d55f9f0"
dependencies = [
"cfg-if",
"libc",
"version-compare",
]
[[package]] [[package]]
name = "security-framework" name = "security-framework"
version = "3.7.0" version = "3.7.0"
@@ -3773,9 +3830,9 @@ dependencies = [
[[package]] [[package]]
name = "simd-adler32" name = "simd-adler32"
version = "0.3.8" version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
[[package]] [[package]]
name = "siphasher" name = "siphasher"
@@ -3847,19 +3904,77 @@ dependencies = [
"der", "der",
] ]
[[package]]
name = "sqlformat"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7bba3a93db0cc4f7bdece8bb09e77e2e785c20bfebf79eb8340ed80708048790"
dependencies = [
"nom",
"unicode_categories",
]
[[package]]
name = "sqlx"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9a2ccff1a000a5a59cd33da541d9f2fdcd9e6e8229cc200565942bff36d0aaa"
dependencies = [
"sqlx-core 0.7.4",
"sqlx-macros 0.7.4",
"sqlx-postgres 0.7.4",
]
[[package]] [[package]]
name = "sqlx" name = "sqlx"
version = "0.8.6" version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc" checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc"
dependencies = [ dependencies = [
"sqlx-core", "sqlx-core 0.8.6",
"sqlx-macros", "sqlx-macros 0.8.6",
"sqlx-mysql", "sqlx-mysql",
"sqlx-postgres", "sqlx-postgres 0.8.6",
"sqlx-sqlite", "sqlx-sqlite",
] ]
[[package]]
name = "sqlx-core"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24ba59a9342a3d9bab6c56c118be528b27c9b60e490080e9711a04dccac83ef6"
dependencies = [
"ahash",
"atoi",
"byteorder",
"bytes",
"crc",
"crossbeam-queue",
"either",
"event-listener 2.5.3",
"futures-channel",
"futures-core",
"futures-intrusive",
"futures-io",
"futures-util",
"hashlink 0.8.4",
"hex",
"indexmap 2.13.0",
"log",
"memchr",
"once_cell",
"paste",
"percent-encoding",
"serde",
"serde_json",
"sha2",
"smallvec",
"sqlformat",
"thiserror 1.0.69",
"tracing",
"url",
]
[[package]] [[package]]
name = "sqlx-core" name = "sqlx-core"
version = "0.8.6" version = "0.8.6"
@@ -3872,13 +3987,13 @@ dependencies = [
"crc", "crc",
"crossbeam-queue", "crossbeam-queue",
"either", "either",
"event-listener", "event-listener 5.4.1",
"futures-core", "futures-core",
"futures-intrusive", "futures-intrusive",
"futures-io", "futures-io",
"futures-util", "futures-util",
"hashbrown 0.15.5", "hashbrown 0.15.5",
"hashlink", "hashlink 0.10.0",
"indexmap 2.13.0", "indexmap 2.13.0",
"log", "log",
"memchr", "memchr",
@@ -3893,6 +4008,20 @@ dependencies = [
"tokio-stream", "tokio-stream",
"tracing", "tracing",
"url", "url",
"uuid",
]
[[package]]
name = "sqlx-macros"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ea40e2345eb2faa9e1e5e326db8c34711317d2b5e08d0d5741619048a803127"
dependencies = [
"proc-macro2",
"quote",
"sqlx-core 0.7.4",
"sqlx-macros-core 0.7.4",
"syn 1.0.109",
] ]
[[package]] [[package]]
@@ -3903,11 +4032,34 @@ checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"sqlx-core", "sqlx-core 0.8.6",
"sqlx-macros-core", "sqlx-macros-core 0.8.6",
"syn 2.0.117", "syn 2.0.117",
] ]
[[package]]
name = "sqlx-macros-core"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5833ef53aaa16d860e92123292f1f6a3d53c34ba8b1969f152ef1a7bb803f3c8"
dependencies = [
"dotenvy",
"either",
"heck 0.4.1",
"hex",
"once_cell",
"proc-macro2",
"quote",
"serde",
"serde_json",
"sha2",
"sqlx-core 0.7.4",
"sqlx-postgres 0.7.4",
"syn 1.0.109",
"tempfile",
"url",
]
[[package]] [[package]]
name = "sqlx-macros-core" name = "sqlx-macros-core"
version = "0.8.6" version = "0.8.6"
@@ -3924,9 +4076,9 @@ dependencies = [
"serde", "serde",
"serde_json", "serde_json",
"sha2", "sha2",
"sqlx-core", "sqlx-core 0.8.6",
"sqlx-mysql", "sqlx-mysql",
"sqlx-postgres", "sqlx-postgres 0.8.6",
"sqlx-sqlite", "sqlx-sqlite",
"syn 2.0.117", "syn 2.0.117",
"tokio", "tokio",
@@ -3969,10 +4121,49 @@ dependencies = [
"sha1", "sha1",
"sha2", "sha2",
"smallvec", "smallvec",
"sqlx-core", "sqlx-core 0.8.6",
"stringprep", "stringprep",
"thiserror 2.0.18", "thiserror 2.0.18",
"tracing", "tracing",
"uuid",
"whoami",
]
[[package]]
name = "sqlx-postgres"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c824eb80b894f926f89a0b9da0c7f435d27cdd35b8c655b114e58223918577e"
dependencies = [
"atoi",
"base64 0.21.7",
"bitflags 2.11.0",
"byteorder",
"crc",
"dotenvy",
"etcetera",
"futures-channel",
"futures-core",
"futures-io",
"futures-util",
"hex",
"hkdf",
"hmac",
"home",
"itoa",
"log",
"md-5",
"memchr",
"once_cell",
"rand 0.8.5",
"serde",
"serde_json",
"sha2",
"smallvec",
"sqlx-core 0.7.4",
"stringprep",
"thiserror 1.0.69",
"tracing",
"whoami", "whoami",
] ]
@@ -4007,10 +4198,11 @@ dependencies = [
"serde_json", "serde_json",
"sha2", "sha2",
"smallvec", "smallvec",
"sqlx-core", "sqlx-core 0.8.6",
"stringprep", "stringprep",
"thiserror 2.0.18", "thiserror 2.0.18",
"tracing", "tracing",
"uuid",
"whoami", "whoami",
] ]
@@ -4033,10 +4225,11 @@ dependencies = [
"percent-encoding", "percent-encoding",
"serde", "serde",
"serde_urlencoded", "serde_urlencoded",
"sqlx-core", "sqlx-core 0.8.6",
"thiserror 2.0.18", "thiserror 2.0.18",
"tracing", "tracing",
"url", "url",
"uuid",
] ]
[[package]] [[package]]
@@ -4410,32 +4603,32 @@ dependencies = [
[[package]] [[package]]
name = "toml_datetime" name = "toml_datetime"
version = "1.0.1+spec-1.1.0" version = "1.1.0+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b320e741db58cac564e26c607d3cc1fdc4a88fd36c879568c07856ed83ff3e9" checksum = "97251a7c317e03ad83774a8752a7e81fb6067740609f75ea2b585b569a59198f"
dependencies = [ dependencies = [
"serde_core", "serde_core",
] ]
[[package]] [[package]]
name = "toml_edit" name = "toml_edit"
version = "0.25.5+spec-1.1.0" version = "0.25.8+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ca1a40644a28bce036923f6a431df0b34236949d111cc07cb6dca830c9ef2e1" checksum = "16bff38f1d86c47f9ff0647e6838d7bb362522bdf44006c7068c2b1e606f1f3c"
dependencies = [ dependencies = [
"indexmap 2.13.0", "indexmap 2.13.0",
"toml_datetime 1.0.1+spec-1.1.0", "toml_datetime 1.1.0+spec-1.1.0",
"toml_parser", "toml_parser",
"winnow 1.0.0", "winnow 1.0.1",
] ]
[[package]] [[package]]
name = "toml_parser" name = "toml_parser"
version = "1.0.10+spec-1.1.0" version = "1.1.0+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7df25b4befd31c4816df190124375d5a20c6b6921e2cad937316de3fccd63420" checksum = "2334f11ee363607eb04df9b8fc8a13ca1715a72ba8662a26ac285c98aabb4011"
dependencies = [ dependencies = [
"winnow 1.0.0", "winnow 1.0.1",
] ]
[[package]] [[package]]
@@ -4514,6 +4707,22 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "tower-http"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5"
dependencies = [
"bitflags 2.11.0",
"bytes",
"http",
"http-body",
"http-body-util",
"pin-project-lite",
"tower-layer",
"tower-service",
]
[[package]] [[package]]
name = "tower-http" name = "tower-http"
version = "0.6.8" version = "0.6.8"
@@ -4705,9 +4914,9 @@ checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
[[package]] [[package]]
name = "unicode-segmentation" name = "unicode-segmentation"
version = "1.12.0" version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c"
[[package]] [[package]]
name = "unicode-truncate" name = "unicode-truncate"
@@ -4732,6 +4941,12 @@ version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
[[package]]
name = "unicode_categories"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]] [[package]]
name = "universal-hash" name = "universal-hash"
version = "0.5.1" version = "0.5.1"
@@ -4824,9 +5039,9 @@ dependencies = [
[[package]] [[package]]
name = "uuid" name = "uuid"
version = "1.22.0" version = "1.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9"
dependencies = [ dependencies = [
"getrandom 0.4.2", "getrandom 0.4.2",
"js-sys", "js-sys",
@@ -4846,6 +5061,12 @@ version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "version-compare"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "579a42fc0b8e0c63b76519a339be31bed574929511fa53c1a3acae26eb258f29"
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.5" version = "0.9.5"
@@ -5404,23 +5625,13 @@ checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945"
[[package]] [[package]]
name = "winnow" name = "winnow"
version = "1.0.0" version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a90e88e4667264a994d34e6d1ab2d26d398dcdca8b7f52bec8668957517fc7d8" checksum = "09dac053f1cd375980747450bfc7250c264eaae0583872e845c0c7cd578872b5"
dependencies = [ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "winreg"
version = "0.50.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
dependencies = [
"cfg-if",
"windows-sys 0.48.0",
]
[[package]] [[package]]
name = "wit-bindgen" name = "wit-bindgen"
version = "0.51.0" version = "0.51.0"
@@ -5555,18 +5766,18 @@ dependencies = [
[[package]] [[package]]
name = "zerocopy" name = "zerocopy"
version = "0.8.42" version = "0.8.48"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9"
dependencies = [ dependencies = [
"zerocopy-derive", "zerocopy-derive",
] ]
[[package]] [[package]]
name = "zerocopy-derive" name = "zerocopy-derive"
version = "0.8.42" version = "0.8.48"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",

View File

@@ -47,15 +47,17 @@ moka = { version = "0.12", features = ["future"] }
# Database # Database
redis = { version = "1.0", features = ["tokio-comp", "connection-manager"] } redis = { version = "1.0", features = ["tokio-comp", "connection-manager"] }
sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "json", "chrono"] } sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "json", "chrono", "uuid"] }
mongodb = { version = "2", features = ["tokio-runtime"] } mongodb = { version = "2", features = ["tokio-runtime"] }
bson = { version = "2", features = ["chrono-0_4"] } bson = { version = "2", features = ["chrono-0_4"] }
qdrant-client = "1.7" qdrant-client = "1.7"
reqwest = { version = "0.12", features = ["json"] } reqwest = { version = "0.12", features = ["json"] }
pgvector = { version = "0.3", features = ["sqlx"] }
# HTTP Server # HTTP Server
axum = { version = "0.7", features = ["multipart"] } axum = { version = "0.7", features = ["multipart"] }
tower = "0.4" tower = "0.4"
tower-http = { version = "0.5", features = ["cors"] }
# API Documentation # API Documentation
utoipa = { version = "4", features = ["axum_extras", "chrono", "uuid"] } utoipa = { version = "4", features = ["axum_extras", "chrono", "uuid"] }
@@ -85,7 +87,11 @@ path = "src/lib.rs"
[features] [features]
default = [] default = []
player = [] player = ["sdl2"]
[dependencies.sdl2]
version = "0.35"
optional = true
[[bin]] [[bin]]
name = "momentry" name = "momentry"
@@ -111,5 +117,9 @@ path = "src/bin/migrate_chinese_text.rs"
name = "test_bm25_simple" name = "test_bm25_simple"
path = "src/bin/test_bm25_simple.rs" path = "src/bin/test_bm25_simple.rs"
[[bin]]
name = "integrated_player"
path = "src/bin/integrated_player.rs"
[build-dependencies] [build-dependencies]
chrono = "0.4" chrono = "0.4"

View File

@@ -300,8 +300,8 @@ curl -X POST http://localhost:3002/api/v1/n8n/search \
{ {
"id": "sentence_0001", "id": "sentence_0001",
"vid": "a1b10138a6bbb0cd", "vid": "a1b10138a6bbb0cd",
"start": 10.5, "start_time": 10.5,
"end": 15.2, "end_time": 15.2,
"title": "Chunk sentence_0001", "title": "Chunk sentence_0001",
"text": "Found text matching query", "text": "Found text matching query",
"score": 0.85, "score": 0.85,

View File

@@ -20,7 +20,7 @@
#### API Key用於 API 認證) #### API Key用於 API 認證)
``` ```
X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69 X-API-Key: muser_68600856036340bcafc01930eb4bd839
``` ```
#### SFTPGo用於影片上傳 #### SFTPGo用於影片上傳
@@ -160,12 +160,14 @@ n8n 專用搜尋(包含完整影片檔案路徑 file_path
{ {
"id": "sentence_1471", "id": "sentence_1471",
"vid": "39567a0eb16f39fd", "vid": "39567a0eb16f39fd",
"start": 5309.08, "chunk_type": "sentence",
"end": 5311.08, "start_frame": 318545,
"title": "Chunk sentence_1471", "end_frame": 318665,
"fps": 59.94,
"start_time": 5314.31,
"end_time": 5316.32,
"text": "influenced by a vital way,", "text": "influenced by a vital way,",
"score": 0.68, "score": 0.68
"file_path": "/Users/accusys/momentry/var/sftpgo/data/demo/video.mp4"
} }
] ]
} }
@@ -176,8 +178,8 @@ n8n 專用搜尋(包含完整影片檔案路徑 file_path
|------|-----------|----------------| |------|-----------|----------------|
| 影片 UUID | `uuid` | `vid` | | 影片 UUID | `uuid` | `vid` |
| Chunk ID | `chunk_id` | `id` | | Chunk ID | `chunk_id` | `id` |
| 開始時間 | `start_time` | `start` | | 開始時間 | `start_time` | `start_time` |
| 結束時間 | `end_time` | `end` | | 結束時間 | `end_time` | `end_time` |
| 相似度分數 | `score` | `score` | | 相似度分數 | `score` | `score` |
| **檔案路徑** | ❌ | ✅ `file_path` | | **檔案路徑** | ❌ | ✅ `file_path` |
@@ -386,3 +388,4 @@ GET /api/v1/jobs/{uuid}
| V1.2 | 2026-03-25 | 新增 Chunk 欄位說明、類型、播放方式 | OpenCode | | V1.2 | 2026-03-25 | 新增 Chunk 欄位說明、類型、播放方式 | OpenCode |
| V1.3 | 2026-03-25 | 新增 Demo 測試帳號SFTPGo| OpenCode | | V1.3 | 2026-03-25 | 新增 Demo 測試帳號SFTPGo| OpenCode |
| V1.4 | 2026-03-25 | 更新 n8n 搜尋回傳欄位說明 (media_url→file_path) | OpenCode | | V1.4 | 2026-03-25 | 更新 n8n 搜尋回傳欄位說明 (media_url→file_path) | OpenCode |
| V1.5 | 2026-04-17 | 修正 API Key 格式、統一 n8n/search 欄位名稱 (start/end → start_time/end_time) | OpenCode |

View File

@@ -1,141 +0,0 @@
# 場景識別 API 整合指南
## 概述
本文檔說明如何在 Playground (port 3003) 中使用場景識別功能。
## API Endpoint
### 場景識別
**Endpoint**: `GET /api/v1/scene/:uuid`
**描述**: 對指定影片執行場景識別
**參數**:
- `uuid` (path): 影片 UUID
**回應格式**:
```json
{
"video_uuid": "384b0ff44aaaa1f1",
"scenes": [
{
"start_time": 0.0,
"end_time": 156.0,
"scene_type": "office",
"scene_type_zh": "辦公室",
"confidence": 0.87,
"duration": 156.0
}
],
"processing_time": 1.3
}
```
## 使用方式
### 1. 啟動 Playground 伺服器
```bash
# 使用 port 3003
cargo run --bin momentry_playground -- server --host 0.0.0.0 --port 3003
```
### 2. 測試場景識別
```bash
# 使用測試腳本
python3 scripts/test_scene_api.py <video_uuid>
# 範例
python3 scripts/test_scene_api.py 384b0ff44aaaa1f1
```
### 3. 直接使用 curl
```bash
curl -H "X-API-Key: muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69" \
"http://localhost:3003/api/v1/scene/384b0ff44aaaa1f1"
```
## Python 整合範例
```python
import requests
API_KEY = "muser_68600856036340bcafc01930eb4bd839_1774418104_97221b69"
BASE_URL = "http://localhost:3003"
def classify_scene(video_uuid):
"""執行場景識別"""
response = requests.get(
f"{BASE_URL}/api/v1/scene/{video_uuid}",
headers={"X-API-Key": API_KEY}
)
if response.status_code == 200:
return response.json()
else:
raise Exception(f"API error: {response.status_code}")
# 使用範例
result = classify_scene("384b0ff44aaaa1f1")
print(f"場景數量:{len(result['scenes'])}")
for scene in result['scenes']:
print(f" - {scene['scene_type']} ({scene['confidence']*100:.1f}%)")
```
## 目前狀態
### 已完成 ✅
- ✅ 場景識別 Python 腳本 (`scripts/scene_classifier.py`)
- ✅ Places365 380 個場景類別
- ✅ API 測試腳本 (`scripts/test_scene_api.py`)
- ✅ Rust API handler 設計
### 進行中 ⏳
- ⏳ Rust API endpoint 完整實作
- ⏳ 與資料庫整合
- ⏳ 錯誤處理優化
### 已知限制
- Rust API endpoint 需要完整實作以支援資料庫查詢
- 目前建議使用 Python 腳本直接測試
## 故障排除
### 問題API 回應 404
**可能原因**:
- 影片 UUID 不存在
- Playground 伺服器未啟動
**解決方案**:
```bash
# 檢查伺服器狀態
curl http://localhost:3003/health
# 檢查影片是否存在
curl -H "X-API-Key: ..." "http://localhost:3003/api/v1/videos"
```
### 問題:處理時間過長
**建議**:
- 減少取樣頻率 (`--sample-interval`)
- 增加最小場景持續時間 (`--min-scene-duration`)
- 使用 Places365 Core ML 模型(而非 PyTorch
## 相關文檔
- `docs_v1.0/IMPLEMENTATION/SCENE_CLASSIFICATION_MODULE.md` - 模組使用手冊
- `docs_v1.0/IMPLEMENTATION/PLACES365_INSTALLATION.md` - 模型安裝指南
- `docs_v1.0/TESTING/SCENE_CLASSIFICATION_TEST_REPORT_2026_04_01.md` - 測試報告
## 下一步
1. 完成 Rust API endpoint 實作
2. 整合資料庫查詢
3. 添加異步處理支援
4. 優化效能和記憶體使用

View File

@@ -13,8 +13,7 @@
<key>ProgramArguments</key> <key>ProgramArguments</key>
<array> <array>
<string>/opt/homebrew/opt/node@22/bin/node</string> <string>/Users/accusys/momentry/scripts/start_n8n.sh</string>
<string>/opt/homebrew/lib/node_modules/n8n/bin/n8n</string>
<string>start</string> <string>start</string>
</array> </array>

View File

@@ -16,8 +16,7 @@
<key>ProgramArguments</key> <key>ProgramArguments</key>
<array> <array>
<string>/opt/homebrew/opt/node@22/bin/node</string> <string>/Users/accusys/momentry/scripts/start_n8n.sh</string>
<string>/opt/homebrew/lib/node_modules/n8n/bin/n8n</string>
<string>worker</string> <string>worker</string>
</array> </array>

View File

@@ -65,12 +65,20 @@ def run_asr(video_path, output_path, uuid: str = ""):
if publisher: if publisher:
publisher.info("asr", "Loading Whisper model...") publisher.info("asr", "Loading Whisper model...")
model = WhisperModel("tiny", device="cpu", compute_type="int8") # Use small model with CPU (MPS not supported by faster_whisper)
# small 模型在準確率和速度間取得最佳平衡
model = WhisperModel("small", device="cpu", compute_type="int8")
if publisher: if publisher:
publisher.info("asr", f"Transcribing: {video_path}") publisher.info("asr", f"Transcribing: {video_path}")
segments, info = model.transcribe(video_path, beam_size=5) # Transcribe with VAD filter for better accuracy
segments, info = model.transcribe(
video_path,
beam_size=5,
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=500, speech_pad_ms=200),
)
if publisher: if publisher:
publisher.info("asr", f"ASR_LANGUAGE:{info.language}") publisher.info("asr", f"ASR_LANGUAGE:{info.language}")

View File

@@ -22,6 +22,7 @@ def process_asrx(video_path: str, output_path: str, uuid: str = ""):
try: try:
import whisperx import whisperx
import torch
except ImportError: except ImportError:
if publisher: if publisher:
publisher.error("asrx", "whisperx not installed") publisher.error("asrx", "whisperx not installed")
@@ -36,6 +37,14 @@ def process_asrx(video_path: str, output_path: str, uuid: str = ""):
publisher.info("asrx", "ASRX_LOADING_MODEL") publisher.info("asrx", "ASRX_LOADING_MODEL")
try: try:
# Fix for PyTorch 2.6+ compatibility
# Allow omegaconf types in torch.load
import omegaconf
torch.serialization.add_safe_globals(
[omegaconf.listconfig.ListConfig, omegaconf.dictconfig.DictConfig]
)
# Load model - using faster-whisper for better performance # Load model - using faster-whisper for better performance
# You can also use: "large-v3", "medium", "small", "base", "tiny" # You can also use: "large-v3", "medium", "small", "base", "tiny"
model = whisperx.load_model("base", device="cpu", compute_type="int8") model = whisperx.load_model("base", device="cpu", compute_type="int8")
@@ -54,9 +63,14 @@ def process_asrx(video_path: str, output_path: str, uuid: str = ""):
# Diarization (speaker segmentation) # Diarization (speaker segmentation)
try: try:
import whisperx from whisperx.diarize import DiarizationPipeline
diarize_model = whisperx.DiarizationPipeline(use_auth_token=None) # DiarizationPipeline parameters: model_name, token, device, cache_dir
diarize_model = DiarizationPipeline(
model_name="pyannote/speaker-diarization",
token=None, # HuggingFace token (None for public models)
device="cpu",
)
diarize_segments = diarize_model(video_path) diarize_segments = diarize_model(video_path)
# Assign speaker labels # Assign speaker labels

View File

@@ -1,7 +1,8 @@
#!/opt/homebrew/bin/python3.11 #!/opt/homebrew/bin/python3.11
""" """
Caption Processor - Generate image captions Caption Processor - Generate image captions (LOCAL ONLY)
Uses AI vision models to analyze video frames and generate descriptions Uses Moondream2 (local VLM) for image captioning
No cloud API calls - fully offline processing
""" """
import sys import sys
@@ -18,7 +19,6 @@ from redis_publisher import RedisPublisher
def extract_frames(video_path: str, max_frames: int = 30) -> List[Dict]: def extract_frames(video_path: str, max_frames: int = 30) -> List[Dict]:
"""Extract frames from video at regular intervals""" """Extract frames from video at regular intervals"""
# Get video duration
cmd = [ cmd = [
"ffprobe", "ffprobe",
"-v", "-v",
@@ -34,14 +34,13 @@ def extract_frames(video_path: str, max_frames: int = 30) -> List[Dict]:
data = json.loads(result.stdout) data = json.loads(result.stdout)
duration = float(data.get("format", {}).get("duration", 0)) duration = float(data.get("format", {}).get("duration", 0))
else: else:
duration = 60 # Default fallback duration = 60
except Exception: except Exception:
duration = 60 duration = 60
if duration <= 0: if duration <= 0:
duration = 60 duration = 60
# Calculate frame interval
interval = max(duration / max_frames, 1.0) interval = max(duration / max_frames, 1.0)
frames = [] frames = []
@@ -76,94 +75,73 @@ def extract_frames(video_path: str, max_frames: int = 30) -> List[Dict]:
return frames return frames
def generate_caption_with_llava( def generate_caption_with_moondream(
image_path: str, prompt: str = "Describe this image in detail." image_path: str, prompt: str = "Describe this image in detail."
) -> Optional[str]: ) -> Optional[str]:
"""Generate caption using LLaVA model""" """Generate caption using Moondream2 (local VLM)"""
try: try:
# Try to use transformers with LLaVA from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import AutoProcessor, AutoModelForVision2Seq # noqa: F401 from PIL import Image
import torch # noqa: F401 import torch
from PIL import Image # noqa: F401
# Note: This requires llava-hf/llava-1.5-7b-hf or similar model_id = "vikhyatk/moondream2"
# For now, return a placeholder revision = "2025-01-09"
return f"[LLaVA caption for {os.path.basename(image_path)}]"
tokenizer = AutoTokenizer.from_pretrained(
model_id, revision=revision, trust_remote_code=True
)
moondream = AutoModelForCausalLM.from_pretrained(
model_id,
revision=revision,
trust_remote_code=True,
torch_dtype=torch.float16,
).to("mps" if torch.backends.mps.is_available() else "cpu")
moondream.eval()
image = Image.open(image_path)
enc_image = moondream.encode_image(image)
caption = moondream.answer_question(enc_image, prompt, tokenizer)
return caption if caption else None
except ImportError: except ImportError:
return None return None
except Exception as e:
print(f"[CAPTION] Moondream error: {e}")
def generate_caption_with_gpt4v(image_path: str, api_key: str = None) -> Optional[str]:
"""Generate caption using GPT-4V via OpenAI API"""
import base64
if not api_key:
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
return None
try:
from openai import OpenAI
client = OpenAI(api_key=api_key)
# Encode image
with open(image_path, "rb") as f:
img_data = base64.b64encode(f.read()).decode()
response = client.chat.completions.create(
model="gpt-4o", # or gpt-4-turbo for vision
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{img_data}"},
},
{
"type": "text",
"text": "Describe what you see in this image in one sentence.",
},
],
}
],
max_tokens=100,
)
return response.choices[0].message.content
except Exception:
return None return None
def generate_caption_fallback(image_path: str, existing_data: Dict = None) -> str: def generate_caption_from_metadata(image_path: str, existing_data: Dict = None) -> str:
"""Generate a basic caption using available metadata""" """Generate caption using YOLO/OCR metadata (fallback)"""
caption_parts = [] caption_parts = []
# Check YOLO data for objects
if existing_data and existing_data.get("objects"): if existing_data and existing_data.get("objects"):
objects = list(set([o["class"] for o in existing_data["objects"]]))[:5] objects = list(set([o["class"] for o in existing_data["objects"]]))[:5]
if objects: if objects:
caption_parts.append(f"Contains: {', '.join(objects)}") caption_parts.append(f"Objects: {', '.join(objects)}")
# Check OCR data for text
if existing_data and existing_data.get("texts"): if existing_data and existing_data.get("texts"):
texts = [t["text"] for t in existing_data["texts"] if t.get("text")] texts = [t["text"] for t in existing_data["texts"] if t.get("text")]
if texts: if texts:
caption_parts.append(f"On-screen text: {' '.join(texts[:3])}") caption_parts.append(f"Text: {' '.join(texts[:3])}")
if existing_data and existing_data.get("scene_type"):
caption_parts.append(f"Scene: {existing_data['scene_type']}")
if caption_parts: if caption_parts:
return " | ".join(caption_parts) return " | ".join(caption_parts)
return "Video frame at timestamp" return "Video frame"
def process_frame( def process_frame(
frame_info: Dict, yolo_data: List = None, ocr_data: List = None frame_info: Dict,
yolo_data: List = None,
ocr_data: List = None,
scene_data: Dict = None,
) -> Dict: ) -> Dict:
"""Process a single frame and generate caption""" """Process a single frame and generate caption (LOCAL ONLY)"""
frame_path = frame_info["path"] frame_path = frame_info["path"]
timestamp = frame_info["timestamp"] timestamp = frame_info["timestamp"]
@@ -171,28 +149,34 @@ def process_frame(
caption = None caption = None
source = "unknown" source = "unknown"
# Try GPT-4V first # Try Moondream2 (local VLM)
caption = generate_caption_with_gpt4v(frame_path) caption = generate_caption_with_moondream(frame_path)
if caption: if caption:
source = "gpt-4v" source = "moondream2"
else: else:
# Try LLaVA # Fallback: Use metadata from YOLO/OCR/Scene
caption = generate_caption_with_llava(frame_path) combined_data = {"objects": [], "texts": [], "scene_type": ""}
if caption:
source = "llava" if yolo_data:
else: combined_data["objects"] = [
# Use fallback with YOLO/OCR data o for o in yolo_data if o.get("timestamp") == timestamp
combined_data = {"objects": [], "texts": []} ]
if yolo_data:
combined_data["objects"] = [ if ocr_data:
o for o in yolo_data if o.get("timestamp") == timestamp combined_data["texts"] = [
] t for t in ocr_data if t.get("timestamp") == timestamp
if ocr_data: ]
combined_data["texts"] = [
t for t in ocr_data if t.get("timestamp") == timestamp if scene_data:
] for scene in scene_data.get("scenes", []):
caption = generate_caption_fallback(frame_path, combined_data) if scene.get("start_time", 0) <= timestamp <= scene.get("end_time", 0):
source = "metadata" combined_data["scene_type"] = scene.get(
"scene_type_zh"
) or scene.get("scene_type", "")
break
caption = generate_caption_from_metadata(frame_path, combined_data)
source = "metadata"
return { return {
"index": frame_info["index"], "index": frame_info["index"],
@@ -212,24 +196,22 @@ def run_caption(
if publisher: if publisher:
publisher.info("caption", "Extracting frames from video...") publisher.info("caption", "Extracting frames from video...")
# Extract frames
frames = extract_frames(video_path, max_frames) frames = extract_frames(video_path, max_frames)
if publisher: if publisher:
publisher.info("caption", f"Extracted {len(frames)} frames") publisher.info("caption", f"Extracted {len(frames)} frames")
# Load YOLO and OCR data for context
base_path = os.path.dirname(output_path) base_path = os.path.dirname(output_path)
uuid_name = os.path.basename(output_path).split(".")[0] uuid_name = os.path.basename(output_path).split(".")[0]
yolo_objects = [] yolo_objects = []
ocr_texts = [] ocr_texts = []
scene_info = {}
yolo_path = os.path.join(base_path, f"{uuid_name}.yolo.json") yolo_path = os.path.join(base_path, f"{uuid_name}.yolo.json")
if os.path.exists(yolo_path): if os.path.exists(yolo_path):
with open(yolo_path) as f: with open(yolo_path) as f:
yolo_data = json.load(f) yolo_data = json.load(f)
# Flatten objects from all frames
for frame in yolo_data.get("frames", []): for frame in yolo_data.get("frames", []):
for obj in frame.get("objects", []): for obj in frame.get("objects", []):
obj["timestamp"] = frame.get("timestamp", 0) obj["timestamp"] = frame.get("timestamp", 0)
@@ -244,7 +226,11 @@ def run_caption(
text["timestamp"] = frame.get("timestamp", 0) text["timestamp"] = frame.get("timestamp", 0)
ocr_texts.append(text) ocr_texts.append(text)
# Process each frame scene_path = os.path.join(base_path, f"{uuid_name}.scene.json")
if os.path.exists(scene_path):
with open(scene_path) as f:
scene_info = json.load(f)
captions = [] captions = []
for i, frame in enumerate(frames): for i, frame in enumerate(frames):
if publisher and i % 5 == 0: if publisher and i % 5 == 0:
@@ -252,16 +238,14 @@ def run_caption(
"caption", i, len(frames), f"Frame {i + 1}/{len(frames)}" "caption", i, len(frames), f"Frame {i + 1}/{len(frames)}"
) )
caption_data = process_frame(frame, yolo_objects, ocr_texts) caption_data = process_frame(frame, yolo_objects, ocr_texts, scene_info)
captions.append(caption_data) captions.append(caption_data)
# Cleanup temp frame
try: try:
os.remove(frame["path"]) os.remove(frame["path"])
except Exception: except Exception:
pass pass
# Cleanup temp directory
temp_dir = os.path.join(os.path.dirname(video_path), ".caption_frames") temp_dir = os.path.join(os.path.dirname(video_path), ".caption_frames")
try: try:
os.rmdir(temp_dir) os.rmdir(temp_dir)
@@ -275,9 +259,11 @@ def run_caption(
"summary": { "summary": {
"avg_caption_length": sum(len(c.get("caption", "")) for c in captions) "avg_caption_length": sum(len(c.get("caption", "")) for c in captions)
/ max(len(captions), 1), / max(len(captions), 1),
"gpt4v_count": sum(1 for c in captions if c.get("source") == "gpt-4v"), "moondream_count": sum(
"llava_count": sum(1 for c in captions if c.get("source") == "llava"), 1 for c in captions if c.get("source") == "moondream2"
),
"metadata_count": sum(1 for c in captions if c.get("source") == "metadata"), "metadata_count": sum(1 for c in captions if c.get("source") == "metadata"),
"cloud_api_count": 0,
}, },
} }
@@ -285,13 +271,13 @@ def run_caption(
json.dump(result, f, indent=2, ensure_ascii=False) json.dump(result, f, indent=2, ensure_ascii=False)
if publisher: if publisher:
publisher.complete("caption", f"{len(captions)} frames captioned") publisher.complete("caption", f"{len(captions)} frames captioned (LOCAL)")
return result return result
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Video Caption Generator") parser = argparse.ArgumentParser(description="Video Caption Generator (LOCAL ONLY)")
parser.add_argument("video_path", help="Path to video file") parser.add_argument("video_path", help="Path to video file")
parser.add_argument("output_path", help="Output JSON path") parser.add_argument("output_path", help="Output JSON path")
parser.add_argument("--uuid", help="UUID for progress tracking", default="") parser.add_argument("--uuid", help="UUID for progress tracking", default="")
@@ -302,4 +288,4 @@ if __name__ == "__main__":
args = parser.parse_args() args = parser.parse_args()
result = run_caption(args.video_path, args.output_path, args.uuid, args.max_frames) result = run_caption(args.video_path, args.output_path, args.uuid, args.max_frames)
print(f"Caption generated: {result['total_frames']} frames") print(f"Caption generated: {result['total_frames']} frames (LOCAL)")

View File

@@ -1,8 +1,8 @@
#!/opt/homebrew/bin/python3.11 #!/opt/homebrew/bin/python3.11
""" """
Face Processor - Face Detection Face Processor - Face Detection & Demographics
Uses OpenCV Haar Cascade (local, no extra download needed) Uses InsightFace for detection, age, and gender analysis.
Alternative: MediaPipe (requires model download) Falls back to OpenCV Haar Cascade if InsightFace fails.
""" """
import sys import sys
@@ -15,7 +15,7 @@ from redis_publisher import RedisPublisher
def process_face(video_path: str, output_path: str, uuid: str = ""): def process_face(video_path: str, output_path: str, uuid: str = ""):
"""Process video for face detection""" """Process video for face detection and demographics analysis"""
publisher = RedisPublisher(uuid) if uuid else None publisher = RedisPublisher(uuid) if uuid else None
if publisher: if publisher:
@@ -23,56 +23,82 @@ def process_face(video_path: str, output_path: str, uuid: str = ""):
try: try:
import cv2 import cv2
except ImportError: import numpy as np
import insightface
except ImportError as e:
error_msg = f"Missing dependency: {e.name}"
if publisher: if publisher:
publisher.error("face", "opencv-python not installed") publisher.error("face", error_msg)
result = {"frame_count": 0, "fps": 0.0, "frames": []} result = {"frame_count": 0, "fps": 0.0, "frames": []}
if publisher:
publisher.complete("face", "0 frames")
with open(output_path, "w") as f: with open(output_path, "w") as f:
json.dump(result, f, indent=2) json.dump(result, f, indent=2)
return result return result
if publisher: # 1. Initialize InsightFace
publisher.info("face", "FACE_LOADING_CASCADE") use_insightface = False
app = None
# Try to use OpenCV's built-in Haar Cascade try:
# This is included with OpenCV
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
)
if face_cascade.empty():
if publisher: if publisher:
publisher.error("face", "Could not load Haar Cascade") publisher.info("face", "LOADING_INSIGHTFACE")
result = {"frame_count": 0, "fps": 0.0, "frames": []} # 'buffalo_l' is a robust model. det_size can be adjusted.
app = insightface.app.FaceAnalysis(
name="buffalo_l", providers=["CPUExecutionProvider"]
)
app.prepare(ctx_id=0, det_size=(320, 320))
use_insightface = True
if publisher: if publisher:
publisher.complete("face", "0 frames") publisher.info("face", "INSIGHTFACE_LOADED")
with open(output_path, "w") as f: except Exception as e:
json.dump(result, f, indent=2) print(f"[WARNING] InsightFace failed to load: {e}")
return result use_insightface = False
# 2. Fallback to Haar Cascade
face_cascade = None
if not use_insightface:
if publisher:
publisher.info("face", "LOADING_HAAR_CASCADE")
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
)
if face_cascade.empty():
if publisher:
publisher.error("face", "Could not load Haar Cascade")
result = {"frame_count": 0, "fps": 0.0, "frames": []}
with open(output_path, "w") as f:
json.dump(result, f, indent=2)
return result
if publisher:
publisher.info("face", "HAAR_CASCADE_LOADED")
if publisher: if publisher:
publisher.info("face", "FACE_CASCADE_LOADED") publisher.info("face", "PROCESSING_VIDEO")
# Get video info
cap = cv2.VideoCapture(video_path) cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
if publisher:
publisher.error("face", "Could not open video")
result = {"frame_count": 0, "fps": 0.0, "frames": []}
with open(output_path, "w") as f:
json.dump(result, f, indent=2)
return result
fps = cap.get(cv2.CAP_PROP_FPS) fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cap.release()
# Optimization: Process every N frames to speed up analysis
# Since we just need attributes for the person identity, we don't need every single frame.
sample_interval = 30
if total_frames > 0:
estimated_samples = total_frames // sample_interval
else:
estimated_samples = 0
frame_count = 0
processed_count = 0
frames_data = []
if publisher: if publisher:
publisher.info("face", f"fps={fps}, frames={total_frames}") publisher.progress("face", 0, estimated_samples, "Starting")
publisher.progress("face", 0, total_frames, "Starting")
# Process every N frames to speed up
sample_interval = 30 # Process every 30 frames
frames = []
frame_count = 0
processed = 0
cap = cv2.VideoCapture(video_path)
while True: while True:
ret, frame = cap.read() ret, frame = cap.read()
@@ -81,62 +107,92 @@ def process_face(video_path: str, output_path: str, uuid: str = ""):
frame_count += 1 frame_count += 1
# Sample frames # Sampling
if frame_count % sample_interval != 0: if frame_count % sample_interval != 0:
continue continue
processed += 1 processed_count += 1
timestamp = (frame_count - 1) / fps if fps > 0 else 0 timestamp = (frame_count - 1) / fps if fps > 0 else 0
# Convert to grayscale
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Detect faces
try:
faces = face_cascade.detectMultiScale(
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
)
except Exception as e:
if publisher:
publisher.error("face", f"Frame {frame_count}: {e}")
faces = []
face_list = [] face_list = []
for x, y, w, h in faces:
face_list.append(
{
"face_id": None,
"x": int(x),
"y": int(y),
"width": int(w),
"height": int(h),
"confidence": 0.8, # Haar cascade doesn't provide confidence
}
)
# Only add frames with faces try:
if use_insightface and app:
# InsightFace Detection & Analysis
faces = app.get(frame)
for face in faces:
bbox = face.bbox.astype(int)
bx, by, bw, bh = (
bbox[0],
bbox[1],
bbox[2] - bbox[0],
bbox[3] - bbox[1],
)
# Extract Attributes
age = int(face.age) if hasattr(face, "age") else None
gender_val = face.gender if hasattr(face, "gender") else None
gender = (
"female"
if gender_val == 0
else ("male" if gender_val == 1 else None)
)
face_list.append(
{
"x": int(bx),
"y": int(by),
"width": int(bw),
"height": int(bh),
"confidence": float(face.det_score)
if hasattr(face, "det_score")
else 0.9,
"attributes": {"age": age, "gender": gender},
}
)
else:
# Haar Cascade Fallback (No Age/Gender)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
)
for x, y, w, h in faces:
face_list.append(
{
"x": int(x),
"y": int(y),
"width": int(w),
"height": int(h),
"confidence": 0.8,
"attributes": {"age": None, "gender": None},
}
)
except Exception as e:
print(f"[ERROR] Frame processing error: {e}")
if face_list: if face_list:
frames.append( frames_data.append(
{ {
"frame": frame_count - 1, "frame": frame_count - 1,
"timestamp": round(timestamp, 3), "timestamp": round(timestamp, 3),
"faces": face_list, "faces": face_list,
} }
) )
if publisher: if publisher:
publisher.progress( publisher.progress(
"face", "face",
processed, processed_count,
total_frames // sample_interval, estimated_samples,
f"Frame {frame_count}", f"Frame {frame_count}",
) )
cap.release() cap.release()
result = {"frame_count": total_frames, "fps": fps, "frames": frames} result = {"frame_count": total_frames, "fps": fps, "frames": frames_data}
if publisher: if publisher:
publisher.complete("face", f"{len(frames)} frames with faces") publisher.complete("face", f"{len(frames_data)} frames processed")
with open(output_path, "w") as f: with open(output_path, "w") as f:
json.dump(result, f, indent=2) json.dump(result, f, indent=2)
@@ -145,7 +201,7 @@ def process_face(video_path: str, output_path: str, uuid: str = ""):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Face Detection") parser = argparse.ArgumentParser(description="Face Detection & Demographics")
parser.add_argument("video_path", help="Path to video file") parser.add_argument("video_path", help="Path to video file")
parser.add_argument("output_path", help="Output JSON path") parser.add_argument("output_path", help="Output JSON path")
parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="") parser.add_argument("--uuid", "-u", help="UUID for Redis progress", default="")

View File

@@ -1,382 +1,367 @@
{ {
"0": "airplane_cabin", "0": "airfield",
"1": "airport_terminal", "1": "airplane_cabin",
"2": "alley", "2": "airport_terminal",
"3": "amphitheater", "3": "alcove",
"4": "amusement_park", "4": "alley",
"5": "apartment_building_outdoor", "5": "amphitheater",
"6": "aquarium", "6": "amusement_arcade",
"7": "arcade", "7": "amusement_park",
"8": "arena_hockey", "8": "outdoor",
"9": "arena_performance", "9": "aquarium",
"10": "army_base", "10": "aqueduct",
"11": "art_gallery", "11": "arcade",
"12": "art_studio", "12": "arch",
"13": "assembly_line", "13": "archaelogical_excavation",
"14": "athletic_field_outdoor", "14": "archive",
"15": "atrium_public", "15": "hockey",
"16": "attic", "16": "performance",
"17": "auditorium", "17": "rodeo",
"18": "auto_factory", "18": "army_base",
"19": "backyard", "19": "art_gallery",
"20": "badminton_court_indoor", "20": "art_school",
"21": "baggage_claim", "21": "art_studio",
"22": "bakery_shop", "22": "artists_loft",
"23": "balcony_exterior", "23": "assembly_line",
"24": "balcony_interior", "24": "outdoor",
"25": "ball_pit", "25": "public",
"26": "ballroom", "26": "attic",
"27": "bamboo_forest", "27": "auditorium",
"28": "banquet_hall", "28": "auto_factory",
"29": "bar", "29": "auto_showroom",
"30": "barn", "30": "badlands",
"31": "barndoor", "31": "shop",
"32": "baseball_field", "32": "exterior",
"33": "basement", "33": "interior",
"34": "basilica", "34": "ball_pit",
"35": "basketball_court_indoor", "35": "ballroom",
"36": "basketball_court_outdoor", "36": "bamboo_forest",
"37": "bathroom", "37": "bank_vault",
"38": "bazaar_indoor", "38": "banquet_hall",
"39": "bazaar_outdoor", "39": "bar",
"40": "beach", "40": "barn",
"41": "beauty_salon", "41": "barndoor",
"42": "bedroom", "42": "baseball_field",
"43": "berth", "43": "basement",
"44": "biology_laboratory", "44": "indoor",
"45": "boardwalk", "45": "bathroom",
"46": "boat_deck", "46": "indoor",
"47": "boathouse", "47": "outdoor",
"48": "bookstore", "48": "beach",
"49": "booth_indoor", "49": "beach_house",
"50": "botanical_garden", "50": "beauty_salon",
"51": "bow_window_indoor", "51": "bedchamber",
"52": "bow_window_outdoor", "52": "bedroom",
"53": "bowling_alley", "53": "beer_garden",
"54": "boxing_ring", "54": "beer_hall",
"55": "brewery_indoor", "55": "berth",
"56": "bridge", "56": "biology_laboratory",
"57": "building_facade", "57": "boardwalk",
"58": "bullring", "58": "boat_deck",
"59": "burial_chamber", "59": "boathouse",
"60": "bus_interior", "60": "bookstore",
"61": "bus_station_indoor", "61": "indoor",
"62": "butchers_shop", "62": "botanical_garden",
"63": "butte", "63": "indoor",
"64": "cabin_outdoor", "64": "bowling_alley",
"65": "cafeteria", "65": "boxing_ring",
"66": "campsite", "66": "bridge",
"67": "campus", "67": "building_facade",
"68": "canal_natural", "68": "bullring",
"69": "canal_urban", "69": "burial_chamber",
"70": "candy_store", "70": "bus_interior",
"71": "canyon", "71": "indoor",
"72": "car_interior", "72": "butchers_shop",
"73": "carrousel", "73": "butte",
"74": "castle", "74": "outdoor",
"75": "catacomb", "75": "cafeteria",
"76": "cathedral_indoor", "76": "campsite",
"77": "cathedral_outdoor", "77": "campus",
"78": "cavern_indoor", "78": "natural",
"79": "cemetery", "79": "urban",
"80": "chalet", "80": "candy_store",
"81": "cheese_factory", "81": "canyon",
"82": "chemistry_lab", "82": "car_interior",
"83": "chicken_coop_indoor", "83": "carrousel",
"84": "chicken_coop_outdoor", "84": "castle",
"85": "childs_room", "85": "catacomb",
"86": "church_indoor", "86": "cemetery",
"87": "church_outdoor", "87": "chalet",
"88": "classroom", "88": "chemistry_lab",
"89": "clean_room", "89": "childs_room",
"90": "cliff", "90": "indoor",
"91": "cloister_indoor", "91": "outdoor",
"92": "closet", "92": "classroom",
"93": "clothing_store", "93": "clean_room",
"94": "coast", "94": "cliff",
"95": "cockpit", "95": "closet",
"96": "coffee_shop", "96": "clothing_store",
"97": "computer_room", "97": "coast",
"98": "conference_center", "98": "cockpit",
"99": "conference_room", "99": "coffee_shop",
"100": "construction_site", "100": "computer_room",
"101": "control_room", "101": "conference_center",
"102": "control_tower_outdoor", "102": "conference_room",
"103": "corn_field", "103": "construction_site",
"104": "corral", "104": "corn_field",
"105": "corridor", "105": "corral",
"106": "cottage_garden", "106": "corridor",
"107": "courthouse", "107": "cottage",
"108": "courtroom", "108": "courthouse",
"109": "courtyard", "109": "courtyard",
"110": "covered_bridge_exterior", "110": "creek",
"111": "creek", "111": "crevasse",
"112": "crevasse", "112": "crosswalk",
"113": "crosswalk", "113": "dam",
"114": "cubicle_office", "114": "delicatessen",
"115": "dam", "115": "department_store",
"116": "daycare_center", "116": "sand",
"117": "delicatessen", "117": "vegetation",
"118": "dentists_office", "118": "desert_road",
"119": "desert_sand", "119": "outdoor",
"120": "desert_vegetation", "120": "dining_hall",
"121": "diner_indoor", "121": "dining_room",
"122": "diner_outdoor", "122": "discotheque",
"123": "dinette_home", "123": "outdoor",
"124": "dinette_vehicle", "124": "dorm_room",
"125": "dining_car", "125": "downtown",
"126": "dining_room", "126": "dressing_room",
"127": "discotheque", "127": "driveway",
"128": "dock", "128": "drugstore",
"129": "doorway_indoor", "129": "door",
"130": "doorway_outdoor", "130": "elevator_lobby",
"131": "dorm_room", "131": "elevator_shaft",
"132": "driveway", "132": "embassy",
"133": "driving_range_outdoor", "133": "engine_room",
"134": "drugstore", "134": "entrance_hall",
"135": "electrical_substation", "135": "indoor",
"136": "elevator_door", "136": "excavation",
"137": "elevator_escalator", "137": "fabric_store",
"138": "elevator_interior", "138": "farm",
"139": "engine_room", "139": "fastfood_restaurant",
"140": "escalator_indoor", "140": "cultivated",
"141": "excavation", "141": "wild",
"142": "factory_indoor", "142": "field_road",
"143": "fairway", "143": "fire_escape",
"144": "fastfood_restaurant", "144": "fire_station",
"145": "field_cultivated", "145": "fishpond",
"146": "field_wild", "146": "indoor",
"147": "fire_escape", "147": "indoor",
"148": "fire_station", "148": "food_court",
"149": "firing_range_indoor", "149": "football_field",
"150": "fishpond", "150": "broadleaf",
"151": "florist_shop_indoor", "151": "forest_path",
"152": "food_court", "152": "forest_road",
"153": "forest_broadleaf", "153": "formal_garden",
"154": "forest_needleleaf", "154": "fountain",
"155": "forest_path", "155": "galley",
"156": "forest_road", "156": "indoor",
"157": "formal_garden", "157": "outdoor",
"158": "fountain", "158": "gas_station",
"159": "galley", "159": "exterior",
"160": "game_room", "160": "indoor",
"161": "garage_indoor", "161": "outdoor",
"162": "garage_outdoor", "162": "gift_shop",
"163": "garbage_dump", "163": "glacier",
"164": "gas_station", "164": "golf_course",
"165": "gazebo_exterior", "165": "indoor",
"166": "general_store_indoor", "166": "outdoor",
"167": "general_store_outdoor", "167": "grotto",
"168": "gift_shop", "168": "indoor",
"169": "golf_course", "169": "indoor",
"170": "greenhouse_indoor", "170": "outdoor",
"171": "greenhouse_outdoor", "171": "harbor",
"172": "gymnasium_indoor", "172": "hardware_store",
"173": "hangar_indoor", "173": "hayfield",
"174": "hangar_outdoor", "174": "heliport",
"175": "harbor", "175": "highway",
"176": "hardware_store", "176": "home_office",
"177": "hayfield", "177": "home_theater",
"178": "heliport", "178": "hospital",
"179": "herb_garden", "179": "hospital_room",
"180": "highway", "180": "hot_spring",
"181": "hill", "181": "outdoor",
"182": "home_office", "182": "hotel_room",
"183": "hospital", "183": "house",
"184": "hospital_room", "184": "outdoor",
"185": "hot_spring", "185": "ice_cream_parlor",
"186": "hot_tub_outdoor", "186": "ice_floe",
"187": "hotel", "187": "ice_shelf",
"188": "hotel_outdoor", "188": "indoor",
"189": "hotel_room", "189": "outdoor",
"190": "house", "190": "iceberg",
"191": "hunting_lodge_outdoor", "191": "igloo",
"192": "ice_cream_parlor", "192": "industrial_area",
"193": "ice_floe", "193": "outdoor",
"194": "ice_shelf", "194": "islet",
"195": "ice_skating_rink_indoor", "195": "indoor",
"196": "ice_skating_rink_outdoor", "196": "jail_cell",
"197": "iceberg", "197": "japanese_garden",
"198": "igloo", "198": "jewelry_shop",
"199": "industrial_area", "199": "junkyard",
"200": "inn_outdoor", "200": "kasbah",
"201": "islet", "201": "outdoor",
"202": "jacuzzi_indoor", "202": "kindergarden_classroom",
"203": "jail_cell", "203": "kitchen",
"204": "jail_indoor", "204": "lagoon",
"205": "jewelry_shop", "205": "natural",
"206": "kasbah", "206": "landfill",
"207": "kennel_indoor", "207": "landing_deck",
"208": "kennel_outdoor", "208": "laundromat",
"209": "kindergarden_classroom", "209": "lawn",
"210": "kitchen", "210": "lecture_room",
"211": "kitchenette", "211": "legislative_chamber",
"212": "labyrinth_outdoor", "212": "indoor",
"213": "lake_natural", "213": "outdoor",
"214": "landfill", "214": "lighthouse",
"215": "landing_deck", "215": "living_room",
"216": "laundromat", "216": "loading_dock",
"217": "lecture_room", "217": "lobby",
"218": "library_indoor", "218": "lock_chamber",
"219": "library_outdoor", "219": "locker_room",
"220": "lido_deck_outdoor", "220": "mansion",
"221": "lift_bridge", "221": "manufactured_home",
"222": "lighthouse", "222": "indoor",
"223": "limousine_interior", "223": "outdoor",
"224": "living_room", "224": "marsh",
"225": "loading_dock", "225": "martial_arts_gym",
"226": "lobby", "226": "mausoleum",
"227": "lock_chamber", "227": "medina",
"228": "locker_room", "228": "mezzanine",
"229": "mansion", "229": "water",
"230": "manufactured_home", "230": "outdoor",
"231": "market_indoor", "231": "motel",
"232": "market_outdoor", "232": "mountain",
"233": "marsh", "233": "mountain_path",
"234": "martial_arts_gym", "234": "mountain_snowy",
"235": "mausoleum", "235": "indoor",
"236": "medina", "236": "indoor",
"237": "moat_water", "237": "outdoor",
"238": "monastery_outdoor", "238": "music_studio",
"239": "mosque_indoor", "239": "natural_history_museum",
"240": "mosque_outdoor", "240": "nursery",
"241": "motel", "241": "nursing_home",
"242": "mountain", "242": "oast_house",
"243": "mountain_path", "243": "ocean",
"244": "mountain_snowy", "244": "office",
"245": "movie_theater_indoor", "245": "office_building",
"246": "museum_indoor", "246": "office_cubicles",
"247": "museum_outdoor", "247": "oilrig",
"248": "music_store", "248": "operating_room",
"249": "music_studio", "249": "orchard",
"250": "nuclear_power_plant_outdoor", "250": "orchestra_pit",
"251": "nursery", "251": "pagoda",
"252": "oast_house", "252": "palace",
"253": "observatory_indoor", "253": "pantry",
"254": "observatory_outdoor", "254": "park",
"255": "ocean", "255": "indoor",
"256": "office", "256": "outdoor",
"257": "office_building", "257": "parking_lot",
"258": "office_cubicles", "258": "pasture",
"259": "oil_refinery_outdoor", "259": "patio",
"260": "oilrig", "260": "pavilion",
"261": "operating_room", "261": "pet_shop",
"262": "orchard", "262": "pharmacy",
"263": "outhouse_outdoor", "263": "phone_booth",
"264": "pagoda", "264": "physics_laboratory",
"265": "palace", "265": "picnic_area",
"266": "pantry", "266": "pier",
"267": "park", "267": "pizzeria",
"268": "parking_garage_indoor", "268": "playground",
"269": "parking_garage_outdoor", "269": "playroom",
"270": "parking_lot", "270": "plaza",
"271": "parlor", "271": "pond",
"272": "pasture", "272": "porch",
"273": "patio", "273": "promenade",
"274": "pavilion", "274": "indoor",
"275": "pharmacy", "275": "racecourse",
"276": "phone_booth", "276": "raceway",
"277": "physics_laboratory", "277": "raft",
"278": "picnic_area", "278": "railroad_track",
"279": "pilothouse_indoor", "279": "rainforest",
"280": "planetarium_indoor", "280": "reception",
"281": "playground", "281": "recreation_room",
"282": "playroom", "282": "repair_shop",
"283": "plaza", "283": "residential_neighborhood",
"284": "podium_indoor", "284": "restaurant",
"285": "podium_outdoor", "285": "restaurant_kitchen",
"286": "pond", "286": "restaurant_patio",
"287": "poolroom_home", "287": "rice_paddy",
"288": "poolroom_establishment", "288": "river",
"289": "power_plant_outdoor", "289": "rock_arch",
"290": "promenade_deck", "290": "roof_garden",
"291": "pub_indoor", "291": "rope_bridge",
"292": "pulpit", "292": "ruin",
"293": "putting_green", "293": "runway",
"294": "racecourse", "294": "sandbox",
"295": "raceway", "295": "sauna",
"296": "raft", "296": "schoolhouse",
"297": "railroad_track", "297": "science_museum",
"298": "rainforest", "298": "server_room",
"299": "reception", "299": "shed",
"300": "recreation_room", "300": "shoe_shop",
"301": "residential_neighborhood", "301": "shopfront",
"302": "restaurant", "302": "indoor",
"303": "restaurant_kitchen", "303": "shower",
"304": "restaurant_patio", "304": "ski_resort",
"305": "rice_paddy", "305": "ski_slope",
"306": "riding_arena", "306": "sky",
"307": "river", "307": "skyscraper",
"308": "rock_arch", "308": "slum",
"309": "rope_bridge", "309": "snowfield",
"310": "ruin", "310": "soccer_field",
"311": "runway", "311": "stable",
"312": "sandbar", "312": "baseball",
"313": "sandbox", "313": "football",
"314": "sauna", "314": "soccer",
"315": "schoolhouse", "315": "indoor",
"316": "sea_cliff", "316": "outdoor",
"317": "server_room", "317": "staircase",
"318": "shed", "318": "storage_room",
"319": "shoe_shop", "319": "street",
"320": "shop_front", "320": "platform",
"321": "shopping_mall_indoor", "321": "supermarket",
"322": "shower", "322": "sushi_bar",
"323": "skatepark", "323": "swamp",
"324": "ski_resort", "324": "swimming_hole",
"325": "ski_slope", "325": "indoor",
"326": "sky", "326": "outdoor",
"327": "skyscraper", "327": "outdoor",
"328": "slum", "328": "television_room",
"329": "snowfield", "329": "television_studio",
"330": "squash_court", "330": "asia",
"331": "stable", "331": "throne_room",
"332": "stadium_baseball", "332": "ticket_booth",
"333": "stadium_football", "333": "topiary_garden",
"334": "staircase", "334": "tower",
"335": "street", "335": "toyshop",
"336": "subway_interior", "336": "train_interior",
"337": "subway_station_platform", "337": "platform",
"338": "supermarket", "338": "tree_farm",
"339": "sushi_bar", "339": "tree_house",
"340": "swamp", "340": "trench",
"341": "swimming_hole", "341": "tundra",
"342": "swimming_pool_indoor", "342": "ocean_deep",
"343": "swimming_pool_outdoor", "343": "utility_room",
"344": "synagogue_indoor", "344": "valley",
"345": "synagogue_outdoor", "345": "vegetable_garden",
"346": "television_room", "346": "veterinarians_office",
"347": "television_studio", "347": "viaduct",
"348": "temple_asia", "348": "village",
"349": "temple_europe", "349": "vineyard",
"350": "trench", "350": "volcano",
"351": "underwater_coral_reef", "351": "outdoor",
"352": "utility_room", "352": "waiting_room",
"353": "valley", "353": "water_park",
"354": "van_interior", "354": "water_tower",
"355": "vegetable_garden", "355": "waterfall",
"356": "veranda", "356": "watering_hole",
"357": "veterinarians_office", "357": "wave",
"358": "viaduct", "358": "wet_bar",
"359": "videostore", "359": "wheat_field",
"360": "village", "360": "wind_farm",
"361": "vineyard", "361": "windmill",
"362": "volcano", "362": "yard",
"363": "volleyball_court_indoor", "363": "youth_hostel",
"364": "volleyball_court_outdoor", "364": "zen_garden"
"365": "waiting_room",
"366": "warehouse_indoor",
"367": "water_tower",
"368": "waterfall_block",
"369": "waterfall_fan",
"370": "waterfall_plunge",
"371": "wetland",
"372": "wheat_field",
"373": "wind_farm",
"374": "windmill",
"375": "wine_cellar_barrel_storage",
"376": "wine_cellar_bottle_storage",
"377": "wrestling_ring_indoor",
"378": "yard",
"379": "youth_hostel"
} }

View File

@@ -162,9 +162,13 @@ class SceneClassifier:
model_path: Core ML 模型路徑 (可選) model_path: Core ML 模型路徑 (可選)
""" """
self.model_path = model_path self.model_path = model_path
self.places365_model_path = (
"/Users/accusys/momentry/models/resnet18_places365.pth.tar"
)
self.model = None self.model = None
self.coreml_model = None self.coreml_model = None
self.transform = None self.transform = None
self.model_type = "unknown"
# 圖像預處理 # 圖像預處理
self.transform = transforms.Compose( self.transform = transforms.Compose(
@@ -189,23 +193,57 @@ class SceneClassifier:
try: try:
print(f"[SCENE] Loading Core ML model: {self.model_path}") print(f"[SCENE] Loading Core ML model: {self.model_path}")
self.coreml_model = ct.models.MLModel(self.model_path) self.coreml_model = ct.models.MLModel(self.model_path)
self.model_type = "coreml"
print("[SCENE] Core ML model loaded successfully") print("[SCENE] Core ML model loaded successfully")
return True return True
except Exception as e: except Exception as e:
print(f"[SCENE] Warning: Failed to load Core ML model: {e}") print(f"[SCENE] Warning: Failed to load Core ML model: {e}")
# 備案:使用 PyTorch + ResNet # 備案:使用 PyTorch + Places365
if HAS_TORCH: if HAS_TORCH:
try: try:
print(f"[SCENE] Loading PyTorch model on {DEVICE}") print(f"[SCENE] Loading PyTorch model on {DEVICE}")
# 使用預訓練的 ResNet18
self.model = models.resnet18(pretrained=True) # 檢查 Places365 模型是否存在
if Path(self.places365_model_path).exists():
print(
f"[SCENE] Loading Places365 model: {self.places365_model_path}"
)
checkpoint = torch.load(
self.places365_model_path, map_location=DEVICE
)
# 建立 ResNet18 模型 (Places365 有 365 個類別)
self.model = models.resnet18(num_classes=365)
# 移除 'module.' prefix (DataParallel training)
state_dict = checkpoint["state_dict"]
new_state_dict = {}
for k, v in state_dict.items():
if k.startswith("module."):
new_state_dict[k[7:]] = v
else:
new_state_dict[k] = v
self.model.load_state_dict(new_state_dict)
self.model_type = "places365"
print("[SCENE] Places365 model loaded successfully (365 classes)")
else:
print(
f"[SCENE] Places365 model not found, using ImageNet pretrained"
)
self.model = models.resnet18(pretrained=True)
self.model_type = "imagenet"
self.model.to(DEVICE) self.model.to(DEVICE)
self.model.eval() self.model.eval()
print("[SCENE] PyTorch model loaded successfully") print("[SCENE] PyTorch model loaded successfully")
return True return True
except Exception as e: except Exception as e:
print(f"[SCENE] Warning: Failed to load PyTorch model: {e}") print(f"[SCENE] Warning: Failed to load PyTorch model: {e}")
import traceback
traceback.print_exc()
print("[SCENE] Error: No model available") print("[SCENE] Error: No model available")
return False return False

View File

@@ -1,12 +1,8 @@
#!/opt/homebrew/bin/python3.11 #!/opt/homebrew/bin/python3.11
""" """
Story Processor - Generate parent-child chunk hierarchy for RAG Story Processor - Generate parent-child chunk hierarchy for RAG
Uses video analysis (ASR, YOLO, OCR) to create parent chunks that summarize child chunks. Uses LOCAL video analysis (ASR, YOLO, OCR, Scene) to create parent chunks.
NO cloud API calls - fully offline processing
Parent-Child Chunk Strategy:
- Parent chunks: Summarize multiple scenes/segments with narrative description
- Child chunks: Individual ASR segments, OCR texts, detected objects
- When embedding: Parent description + Child content for better retrieval
""" """
import sys import sys
@@ -47,57 +43,59 @@ def generate_parent_child_chunks(
cut_data: Dict, cut_data: Dict,
yolo_data: Dict, yolo_data: Dict,
ocr_data: Dict, ocr_data: Dict,
scene_data: Dict,
parent_chunk_size: int = 5, parent_chunk_size: int = 5,
) -> Dict[str, Any]: ) -> Dict:
""" """
Generate parent-child chunk hierarchy. Generate parent-child chunk hierarchy using LOCAL data only.
No LLM/API calls - uses template-based narrative generation.
Parent chunks summarize multiple child chunks for better RAG retrieval.
Child chunks are individual segments from ASR, scenes from CUT, etc.
""" """
child_chunks = [] child_chunks = []
parent_chunks = [] parent_chunks = []
# Get source data # Create child chunks from ASR
asr_segments = asr_data.get("segments", []) for seg in asr_data.get("segments", []):
cut_scenes = cut_data.get("scenes", []) child_chunks.append(
yolo_frames = yolo_data.get("frames", []) {
_ocr_frames = ocr_data.get("frames", []) "chunk_id": f"asr_{seg.get('start', 0):.1f}_{seg.get('end', 0):.1f}",
"chunk_type": "asr",
# Create child chunks from ASR segments "source": "asr",
asr_child_ids = [] "start_time": seg.get("start", 0),
for i, seg in enumerate(asr_segments): "end_time": seg.get("end", 0),
child_chunk = { "text_content": seg.get("text", ""),
"chunk_id": f"asr_{i:04d}", "content": {
"chunk_type": "sentence", "text": seg.get("text", ""),
"source": "asr", "confidence": seg.get("confidence", 0),
"start_time": seg.get("start", 0), },
"end_time": seg.get("end", 0), "child_chunk_ids": [],
"text_content": seg.get("text", ""), "parent_chunk_id": None,
"content": seg, }
"child_chunk_ids": [], )
"parent_chunk_id": None,
}
child_chunks.append(child_chunk)
asr_child_ids.append(child_chunk["chunk_id"])
# Create child chunks from CUT scenes # Create child chunks from CUT scenes
cut_child_ids = [] for scene in cut_data.get("scenes", []):
for i, scene in enumerate(cut_scenes): child_chunks.append(
child_chunk = { {
"chunk_id": f"cut_{i:04d}", "chunk_id": f"cut_{scene.get('scene_number', 0)}",
"chunk_type": "cut", "chunk_type": "cut",
"source": "cut", "source": "cut",
"start_time": scene.get("start_time", scene.get("start", 0)), "start_time": scene.get("start_time", 0),
"end_time": scene.get("end_time", scene.get("end", 0)), "end_time": scene.get("end_time", 0),
"text_content": None, "text_content": f"Scene {scene.get('scene_number', 0)}",
"content": scene, "content": {
"child_chunk_ids": [], "scene_number": scene.get("scene_number", 0),
"parent_chunk_id": None, "duration": scene.get("duration", 0),
} },
child_chunks.append(child_chunk) "child_chunk_ids": [],
cut_child_ids.append(child_chunk["chunk_id"]) "parent_chunk_id": None,
}
)
asr_child_ids = [c["chunk_id"] for c in child_chunks if c["source"] == "asr"]
cut_child_ids = [c["chunk_id"] for c in child_chunks if c["source"] == "cut"]
yolo_frames = yolo_data.get("frames", [])
ocr_frames = ocr_data.get("frames", [])
# Group ASR segments into parent chunks # Group ASR segments into parent chunks
for i in range(0, len(asr_child_ids), parent_chunk_size): for i in range(0, len(asr_child_ids), parent_chunk_size):
@@ -105,7 +103,6 @@ def generate_parent_child_chunks(
if not batch: if not batch:
continue continue
# Collect text from child chunks
batch_texts = [] batch_texts = []
batch_objects = [] batch_objects = []
batch_times = [] batch_times = []
@@ -118,11 +115,16 @@ def generate_parent_child_chunks(
batch_times.append((child["start_time"], child["end_time"])) batch_times.append((child["start_time"], child["end_time"]))
break break
# Create parent chunk with narrative description
start_time = batch_times[0][0] if batch_times else 0 start_time = batch_times[0][0] if batch_times else 0
end_time = batch_times[-1][1] if batch_times else 0 end_time = batch_times[-1][1] if batch_times else 0
# Generate narrative description # Find objects in this time range
for frame in yolo_frames[:50]:
ts = frame.get("timestamp", 0)
if start_time <= ts <= end_time:
for obj in frame.get("objects", []):
batch_objects.append(obj.get("class_name", "unknown"))
narrative = generate_narrative(batch_texts, batch_objects, start_time, end_time) narrative = generate_narrative(batch_texts, batch_objects, start_time, end_time)
parent_chunk = { parent_chunk = {
@@ -136,13 +138,13 @@ def generate_parent_child_chunks(
"description": narrative, "description": narrative,
"child_count": len(batch), "child_count": len(batch),
"speech_preview": " ".join(batch_texts[:3]) if batch_texts else None, "speech_preview": " ".join(batch_texts[:3]) if batch_texts else None,
"detected_objects": list(set(batch_objects))[:5],
}, },
"child_chunk_ids": batch, "child_chunk_ids": batch,
"parent_chunk_id": None, "parent_chunk_id": None,
} }
parent_chunks.append(parent_chunk) parent_chunks.append(parent_chunk)
# Update child chunks with parent reference
for child_id in batch: for child_id in batch:
for child in child_chunks: for child in child_chunks:
if child["chunk_id"] == child_id: if child["chunk_id"] == child_id:
@@ -167,14 +169,12 @@ def generate_parent_child_chunks(
start_time = batch_times[0][0] if batch_times else 0 start_time = batch_times[0][0] if batch_times else 0
end_time = batch_times[-1][1] if batch_times else 0 end_time = batch_times[-1][1] if batch_times else 0
# Find objects in this time range from YOLO for frame in yolo_frames[:50]:
for frame in yolo_frames[:100]: # Sample frames
ts = frame.get("timestamp", 0) ts = frame.get("timestamp", 0)
if start_time <= ts <= end_time: if start_time <= ts <= end_time:
for obj in frame.get("objects", []): for obj in frame.get("objects", []):
batch_objects.append(obj.get("class_name", "unknown")) batch_objects.append(obj.get("class_name", "unknown"))
# Generate scene narrative
narrative = generate_scene_narrative( narrative = generate_scene_narrative(
batch_objects, start_time, end_time, len(batch) batch_objects, start_time, end_time, len(batch)
) )
@@ -190,14 +190,13 @@ def generate_parent_child_chunks(
"description": narrative, "description": narrative,
"child_count": len(batch), "child_count": len(batch),
"scenes": batch, "scenes": batch,
"detected_objects": list(set(batch_objects))[:10], "detected_objects": list(set(batch_objects))[:5],
}, },
"child_chunk_ids": batch, "child_chunk_ids": batch,
"parent_chunk_id": None, "parent_chunk_id": None,
} }
parent_chunks.append(parent_chunk) parent_chunks.append(parent_chunk)
# Update child chunks with parent reference
for child_id in batch: for child_id in batch:
for child in child_chunks: for child in child_chunks:
if child["chunk_id"] == child_id: if child["chunk_id"] == child_id:
@@ -219,27 +218,33 @@ def generate_parent_child_chunks(
def generate_narrative( def generate_narrative(
texts: List[str], objects: List[str], start: float, end: float texts: List[str], objects: List[str], start: float, end: float
) -> str: ) -> str:
"""Generate narrative description from text snippets""" """Generate narrative description from LOCAL text snippets and objects"""
if not texts: if not texts and not objects:
return f"Video segment from {start:.1f}s to {end:.1f}s" return f"Video segment from {start:.1f}s to {end:.1f}s"
# Combine and summarize parts = []
combined = " ".join(texts) if texts:
if len(combined) > 200: combined = " ".join(texts[:5])
combined = combined[:200] + "..." if len(combined) > 150:
combined = combined[:150] + "..."
parts.append(f"Speech: {combined}")
return f"[{start:.0f}s-{end:.0f}s] {combined}" if objects:
unique_objs = list(set(objects))[:5]
parts.append(f"Visuals: {', '.join(unique_objs)}")
return f"[{start:.0f}s-{end:.0f}s] {' | '.join(parts)}"
def generate_scene_narrative( def generate_scene_narrative(
objects: List[str], start: float, end: float, scene_count: int objects: List[str], start: float, end: float, scene_count: int
) -> str: ) -> str:
"""Generate scene narrative from detected objects""" """Generate scene narrative from LOCAL detected objects"""
unique_objects = list(set(objects))[:5] unique_objects = list(set(objects))[:5]
if unique_objects: if unique_objects:
obj_str = ", ".join(unique_objects) obj_str = ", ".join(unique_objects)
return f"[{start:.0f}s-{end:.0f}s] Scenes {scene_count} segments. Visual: {obj_str}." return f"[{start:.0f}s-{end:.0f}s] {scene_count} scenes. Visuals: {obj_str}."
else: else:
return f"[{start:.0f}s-{end:.0f}s] {scene_count} video scenes." return f"[{start:.0f}s-{end:.0f}s] {scene_count} video scenes."
@@ -251,70 +256,45 @@ def run_story(
if publisher: if publisher:
publisher.info("story", "STORY_START") publisher.info("story", "STORY_START")
# Load existing JSON files
base_path = os.path.dirname(output_path) base_path = os.path.dirname(output_path)
uuid_name = os.path.basename(output_path).split(".")[0] uuid_name = os.path.basename(output_path).split(".")[0]
# Load analysis data
asr_data = {"segments": []} asr_data = {"segments": []}
cut_data = {"scenes": []} cut_data = {"scenes": []}
yolo_data = {"frames": []} yolo_data = {"frames": []}
ocr_data = {"frames": []} ocr_data = {"frames": []}
scene_data = {"scenes": []}
# Load ASR for name, data_var in [
asr_path = os.path.join(base_path, f"{uuid_name}.asr.json") ("asr", asr_data),
if os.path.exists(asr_path): ("cut", cut_data),
with open(asr_path) as f: ("yolo", yolo_data),
asr_data = json.load(f) ("ocr", ocr_data),
if publisher: ("scene", scene_data),
publisher.info( ]:
"story", f"Loaded ASR: {len(asr_data.get('segments', []))} segments" path = os.path.join(base_path, f"{uuid_name}.{name}.json")
) if os.path.exists(path):
with open(path) as f:
data_var.update(json.load(f))
# Load CUT
cut_path = os.path.join(base_path, f"{uuid_name}.cut.json")
if os.path.exists(cut_path):
with open(cut_path) as f:
cut_data = json.load(f)
if publisher:
publisher.info(
"story", f"Loaded CUT: {len(cut_data.get('scenes', []))} scenes"
)
# Load YOLO
yolo_path = os.path.join(base_path, f"{uuid_name}.yolo.json")
if os.path.exists(yolo_path):
with open(yolo_path) as f:
yolo_data = json.load(f)
# Load OCR
ocr_path = os.path.join(base_path, f"{uuid_name}.ocr.json")
if os.path.exists(ocr_path):
with open(ocr_path) as f:
ocr_data = json.load(f)
# Load metadata
metadata = extract_video_metadata(video_path)
if publisher:
publisher.info("story", "Generating parent-child chunks...")
# Generate parent-child hierarchy
result = generate_parent_child_chunks( result = generate_parent_child_chunks(
asr_data, cut_data, yolo_data, ocr_data, parent_chunk_size asr_data, cut_data, yolo_data, ocr_data, scene_data, parent_chunk_size
) )
result["metadata"] = metadata result["video_metadata"] = extract_video_metadata(video_path)
result["parent_chunk_size"] = parent_chunk_size result["processing"] = {
"method": "local_aggregation",
"cloud_api_used": False,
"parent_chunk_size": parent_chunk_size,
}
with open(output_path, "w") as f: with open(output_path, "w") as f:
json.dump(result, f, indent=2, ensure_ascii=False) json.dump(result, f, indent=2, ensure_ascii=False)
if publisher: if publisher:
stats = result["stats"]
publisher.complete( publisher.complete(
"story", "story",
f"{stats['total_parent_chunks']} parents, {stats['total_child_chunks']} children", f"{result['stats']['total_parent_chunks']} parent, {result['stats']['total_child_chunks']} child chunks (LOCAL)",
) )
return result return result
@@ -322,7 +302,7 @@ def run_story(
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Video Story Generator - Parent-Child Chunks" description="Story Processor - Parent-Child Chunk Hierarchy (LOCAL ONLY)"
) )
parser.add_argument("video_path", help="Path to video file") parser.add_argument("video_path", help="Path to video file")
parser.add_argument("output_path", help="Output JSON path") parser.add_argument("output_path", help="Output JSON path")
@@ -331,7 +311,7 @@ if __name__ == "__main__":
"--parent-chunk-size", "--parent-chunk-size",
type=int, type=int,
default=5, default=5,
help="Number of child chunks per parent chunk", help="Number of child chunks per parent",
) )
args = parser.parse_args() args = parser.parse_args()
@@ -340,6 +320,6 @@ if __name__ == "__main__":
args.video_path, args.output_path, args.uuid, args.parent_chunk_size args.video_path, args.output_path, args.uuid, args.parent_chunk_size
) )
print( print(
f"Story generated: {result['stats']['total_parent_chunks']} parent chunks, " f"Story generated: {result['stats']['total_parent_chunks']} parent, "
f"{result['stats']['total_child_chunks']} child chunks" f"{result['stats']['total_child_chunks']} child chunks (LOCAL)"
) )

View File

@@ -30,14 +30,20 @@ pub async fn api_key_validation(
tracing::info!("[MIDDLEWARE] Path: {:?}", request.uri().path()); tracing::info!("[MIDDLEWARE] Path: {:?}", request.uri().path());
let headers = request.headers(); let headers = request.headers();
tracing::info!( tracing::info!("[MIDDLEWARE] All headers: {:?}", headers);
"[MIDDLEWARE] Headers: {:?}",
headers.keys().collect::<Vec<_>>()
);
let api_key = match extract_api_key(headers) { let api_key = match extract_api_key(headers) {
Ok(key) => { Ok(key) => {
tracing::info!("[MIDDLEWARE] API key extracted, length: {}", key.len()); tracing::info!("[MIDDLEWARE] API key extracted, length: {}", key.len());
if key.len() > 8 {
tracing::info!(
"[MIDDLEWARE] Key value: {}...{}",
&key[..4],
&key[key.len() - 4..]
);
} else {
tracing::info!("[MIDDLEWARE] Key value: ****");
}
key key
} }
Err(status) => { Err(status) => {
@@ -59,7 +65,10 @@ pub async fn api_key_validation(
r r
} }
Ok(None) => { Ok(None) => {
tracing::warn!("[MIDDLEWARE] API key not found in database"); tracing::warn!(
"[MIDDLEWARE] API key NOT FOUND in database for hash: {}",
&key_hash[..16]
);
return Response::builder() return Response::builder()
.status(StatusCode::UNAUTHORIZED) .status(StatusCode::UNAUTHORIZED)
.body(axum::body::Body::empty()) .body(axum::body::Body::empty())

View File

@@ -1,4 +1,13 @@
pub mod face_recognition;
pub mod identities;
pub mod identity_binding;
pub mod middleware; pub mod middleware;
pub mod n8n_search;
pub mod person_identity;
pub mod search;
pub mod server; pub mod server;
pub mod universal_search;
pub mod visual_chunk_search;
pub mod who;
pub use server::start_server; pub use server::start_server;

File diff suppressed because it is too large Load Diff

View File

@@ -10,6 +10,8 @@ pub const KEY_PREFIX_VIDEO: &str = "video:";
pub const KEY_PREFIX_SEARCH: &str = "search:"; pub const KEY_PREFIX_SEARCH: &str = "search:";
pub const KEY_PREFIX_SEARCH_HYBRID: &str = "search:hybrid:"; pub const KEY_PREFIX_SEARCH_HYBRID: &str = "search:hybrid:";
pub const KEY_PREFIX_SEARCH_N8N: &str = "search:n8n:"; pub const KEY_PREFIX_SEARCH_N8N: &str = "search:n8n:";
pub const KEY_PREFIX_SEARCH_BM25: &str = "search:bm25:";
pub const KEY_PREFIX_SEARCH_N8N_BM25: &str = "search:n8n:bm25:";
pub const KEY_HEALTH: &str = "health:basic"; pub const KEY_HEALTH: &str = "health:basic";
pub fn videos_list(page: usize, limit: usize) -> String { pub fn videos_list(page: usize, limit: usize) -> String {
@@ -32,6 +34,14 @@ pub fn n8n_search(query_hash: &str) -> String {
format!("{}{}", KEY_PREFIX_SEARCH_N8N, query_hash) format!("{}{}", KEY_PREFIX_SEARCH_N8N, query_hash)
} }
pub fn bm25_search(query_hash: &str) -> String {
format!("{}{}", KEY_PREFIX_SEARCH_BM25, query_hash)
}
pub fn n8n_bm25_search(query_hash: &str) -> String {
format!("{}{}", KEY_PREFIX_SEARCH_N8N_BM25, query_hash)
}
pub fn health() -> String { pub fn health() -> String {
KEY_HEALTH.to_string() KEY_HEALTH.to_string()
} }
@@ -48,6 +58,17 @@ pub fn search_prefix() -> String {
format!("^{}", KEY_PREFIX_SEARCH) format!("^{}", KEY_PREFIX_SEARCH)
} }
pub const KEY_PREFIX_VISUAL_SEARCH: &str = "search:visual:";
pub const CATEGORY_VISUAL_SEARCH: &str = "visual_search";
pub fn visual_search(uuid: &str, criteria_hash: &str) -> String {
format!("{}{}:{}", KEY_PREFIX_VISUAL_SEARCH, uuid, criteria_hash)
}
pub fn visual_search_prefix() -> String {
format!("^{}", KEY_PREFIX_VISUAL_SEARCH)
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@@ -78,8 +99,28 @@ mod tests {
assert_eq!(n8n_search("hash123"), "search:n8n:hash123"); assert_eq!(n8n_search("hash123"), "search:n8n:hash123");
} }
#[test]
fn test_bm25_search() {
assert_eq!(bm25_search("hash123"), "search:bm25:hash123");
}
#[test]
fn test_n8n_bm25_search() {
assert_eq!(n8n_bm25_search("hash123"), "search:n8n:bm25:hash123");
}
#[test] #[test]
fn test_health() { fn test_health() {
assert_eq!(health(), "health:basic"); assert_eq!(health(), "health:basic");
} }
#[test]
fn test_visual_search() {
assert_eq!(visual_search("abc123", "hash"), "search:visual:abc123:hash");
}
#[test]
fn test_visual_search_prefix() {
assert_eq!(visual_search_prefix(), "^search:visual:");
}
} }

View File

@@ -136,6 +136,10 @@ impl MongoCache {
self.settings.ttl_video_meta self.settings.ttl_video_meta
} }
pub fn ttl_visual_search(&self) -> u64 {
self.settings.ttl_search // Reuse search TTL
}
pub async fn get<T: DeserializeOwned>(&self, key: &str) -> Result<Option<T>> { pub async fn get<T: DeserializeOwned>(&self, key: &str) -> Result<Option<T>> {
if !self.is_enabled() { if !self.is_enabled() {
return Ok(None); return Ok(None);

View File

@@ -1,5 +1,9 @@
pub mod rule1_ingest;
pub mod rule3_ingest;
pub mod splitter; pub mod splitter;
pub mod types; pub mod types;
pub use rule1_ingest::ingest_rule1;
pub use rule3_ingest::ingest_rule3;
pub use splitter::{AsrSegment, ChunkSplitter}; pub use splitter::{AsrSegment, ChunkSplitter};
pub use types::{Chunk, ChunkType}; pub use types::{Chunk, ChunkType};

View File

@@ -1,6 +1,7 @@
use crate::core::time::FrameTime; use crate::core::time::FrameTime;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
// ==================== ChunkType ====================
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)] #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum ChunkType { pub enum ChunkType {
@@ -8,7 +9,8 @@ pub enum ChunkType {
Sentence, Sentence,
Cut, Cut,
Trace, Trace,
Story, // Parent chunk from story analysis Story,
Visual, // 視覺分片 (Phase 2.1)
} }
impl ChunkType { impl ChunkType {
@@ -19,10 +21,12 @@ impl ChunkType {
ChunkType::Cut => "cut", ChunkType::Cut => "cut",
ChunkType::Trace => "trace", ChunkType::Trace => "trace",
ChunkType::Story => "story", ChunkType::Story => "story",
ChunkType::Visual => "visual",
} }
} }
} }
// ==================== ChunkRule ====================
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)] #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum ChunkRule { pub enum ChunkRule {
@@ -39,6 +43,73 @@ impl ChunkRule {
} }
} }
// ==================== 視覺分片相關結構 (Phase 2.1) ====================
/// 邊界框
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BoundingBox {
pub x: i32,
pub y: i32,
pub width: i32,
pub height: i32,
}
/// 檢測到的物件
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DetectedObject {
/// 物件類別名稱
pub class_name: String,
/// 物件類別 ID
pub class_id: u32,
/// 信心值 (0.0-1.0)
pub confidence: f32,
/// 邊界框
pub bbox: Option<BoundingBox>,
/// 出現次數 (在分片內)
pub occurrence: u32,
}
/// 關鍵幀的物件列表
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KeyframeObjects {
/// 關鍵幀時間 (秒) - 僅供參考,主要使用 frame_number
pub timestamp: f64,
/// 關鍵幀幀號 - 主要時間標示
pub frame_number: u64,
/// 檢測到的物件
pub objects: Vec<DetectedObject>,
}
/// 視覺元數據
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VisualMetadata {
/// 總物件數量
pub object_count: u32,
/// 唯一物件類別列表
pub unique_classes: Vec<String>,
/// 最高信心值
pub max_confidence: f32,
/// 平均信心值
pub avg_confidence: f32,
/// 空間密度(每幀平均物件數)
pub spatial_density: f32,
}
/// 視覺分片內容
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VisualChunkContent {
/// 關鍵幀物件列表,每個關鍵幀包含 frame_number
pub keyframe_objects: Vec<KeyframeObjects>,
/// 主要物件標籤(出現在大多數幀中的物件)
pub dominant_objects: Vec<String>,
/// 物件關係 (object1, relationship, object2) - 可選
pub object_relationships: Vec<(String, String, String)>,
/// 場景描述 - 可選
pub scene_description: Option<String>,
/// 視覺元數據
pub metadata: VisualMetadata,
}
// ==================== Chunk 主結構 ====================
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Chunk { pub struct Chunk {
pub file_id: i32, pub file_id: i32,
@@ -49,9 +120,9 @@ pub struct Chunk {
pub rule: ChunkRule, pub rule: ChunkRule,
/// Frames per second (can be fractional, e.g., 29.97, 23.976) /// Frames per second (can be fractional, e.g., 29.97, 23.976)
pub fps: f64, pub fps: f64,
/// Start frame (0-based) /// Start frame (0-based) - 主要時間標示
pub start_frame: i64, pub start_frame: i64,
/// End frame (exclusive) /// End frame (exclusive) - 主要時間標示
pub end_frame: i64, pub end_frame: i64,
pub text_content: Option<String>, pub text_content: Option<String>,
pub content: serde_json::Value, pub content: serde_json::Value,
@@ -61,17 +132,11 @@ pub struct Chunk {
pub pre_chunk_ids: Vec<i32>, pub pre_chunk_ids: Vec<i32>,
pub parent_chunk_id: Option<String>, // For parent-child chunk hierarchy pub parent_chunk_id: Option<String>, // For parent-child chunk hierarchy
pub child_chunk_ids: Vec<String>, // Child chunk IDs (for parent chunks) pub child_chunk_ids: Vec<String>, // Child chunk IDs (for parent chunks)
pub visual_stats: Option<serde_json::Value>,
} }
impl Chunk { impl Chunk {
/// Creates a new chunk from frame counts. /// 創建新分片
///
/// # Arguments
///
/// * `start_frame` - Start frame (0-based)
/// * `end_frame` - End frame (exclusive)
/// * `fps` - Frames per second (can be fractional)
#[allow(clippy::too_many_arguments)]
pub fn new( pub fn new(
file_id: i32, file_id: i32,
uuid: String, uuid: String,
@@ -83,11 +148,13 @@ impl Chunk {
fps: f64, fps: f64,
content: serde_json::Value, content: serde_json::Value,
) -> Self { ) -> Self {
let chunk_id = format!("{}_{:04}", chunk_type.as_str(), chunk_index); let frame_count = (end_frame - start_frame) as i32;
let chunk_id = format!("{}_{}", uuid, chunk_index);
Self { Self {
file_id, file_id,
uuid, uuid,
chunk_id: chunk_id.clone(), chunk_id,
chunk_index, chunk_index,
chunk_type, chunk_type,
rule, rule,
@@ -98,17 +165,171 @@ impl Chunk {
content, content,
metadata: None, metadata: None,
vector_id: None, vector_id: None,
frame_count: 0, frame_count,
pre_chunk_ids: vec![], pre_chunk_ids: vec![],
parent_chunk_id: None, parent_chunk_id: None,
child_chunk_ids: vec![], child_chunk_ids: vec![],
visual_stats: None,
} }
} }
/// Creates a new chunk from seconds (legacy conversion). /// 創建視覺分片 (Phase 2.1)
pub fn new_visual(
file_id: i32,
uuid: String,
chunk_index: u32,
start_frame: i64,
end_frame: i64,
fps: f64,
visual_content: VisualChunkContent,
) -> Self {
let content = serde_json::to_value(&visual_content)
.unwrap_or_else(|_| serde_json::json!({"error": "Failed to serialize visual content"}));
Self::new(
file_id,
uuid,
chunk_index,
ChunkType::Visual,
ChunkRule::Rule2,
start_frame,
end_frame,
fps,
content,
)
}
/// 從 YOLO 幀創建視覺分片 (Phase 2.1)
pub fn from_yolo_frames(
file_id: i32,
uuid: String,
chunk_index: u32,
start_frame: i64,
end_frame: i64,
fps: f64,
yolo_frames: Vec<crate::core::processor::yolo::YoloFrame>,
) -> Self {
// 將 YOLO 幀轉換為關鍵幀物件
let keyframe_objects: Vec<KeyframeObjects> = yolo_frames
.iter()
.map(|frame| {
let objects: Vec<DetectedObject> = frame
.objects
.iter()
.map(|obj| DetectedObject {
class_name: obj.class_name.clone(),
class_id: obj.class_id,
confidence: obj.confidence,
bbox: Some(BoundingBox {
x: obj.x,
y: obj.y,
width: obj.width,
height: obj.height,
}),
occurrence: 1,
})
.collect();
KeyframeObjects {
timestamp: frame.timestamp,
frame_number: frame.frame,
objects,
}
})
.collect();
// 計算物件統計
let total_objects: u32 = yolo_frames.iter().map(|f| f.objects.len() as u32).sum();
// 收集所有物件類別
let all_classes: Vec<String> = yolo_frames
.iter()
.flat_map(|f| f.objects.iter().map(|o| o.class_name.clone()))
.collect();
// 獲取唯一類別
let unique_classes: Vec<String> = all_classes
.iter()
.cloned()
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
// 計算信心值統計
let confidences: Vec<f32> = yolo_frames
.iter()
.flat_map(|f| f.objects.iter().map(|o| o.confidence))
.collect();
let max_confidence = confidences.iter().copied().fold(0.0f32, f32::max);
let avg_confidence = if !confidences.is_empty() {
confidences.iter().sum::<f32>() / confidences.len() as f32
} else {
0.0
};
// 計算主要物件(出現在大多數幀中的物件)
let mut object_counts = std::collections::HashMap::new();
for frame in &yolo_frames {
let frame_classes: std::collections::HashSet<_> =
frame.objects.iter().map(|o| o.class_name.clone()).collect();
for class in frame_classes {
*object_counts.entry(class).or_insert(0) += 1;
}
}
let mut dominant_objects: Vec<String> = object_counts
.into_iter()
.filter(|(_, count)| *count as f32 / yolo_frames.len() as f32 > 0.5)
.map(|(class, _)| class)
.collect();
dominant_objects.sort();
// 創建視覺內容
let visual_content = VisualChunkContent {
keyframe_objects,
dominant_objects,
object_relationships: vec![], // 可選:後期添加關係檢測
scene_description: None, // 可選:後期添加 LLM 生成的場景描述
metadata: VisualMetadata {
object_count: total_objects,
unique_classes,
max_confidence,
avg_confidence,
spatial_density: if yolo_frames.len() > 0 {
total_objects as f32 / yolo_frames.len() as f32
} else {
0.0
},
},
};
Self::new_visual(
file_id,
uuid,
chunk_index,
start_frame,
end_frame,
fps,
visual_content,
)
}
/// 將分片轉換為幀時間
pub fn to_frame_time(&self) -> FrameTime {
// 使用第一個幀作為參考點
FrameTime::from_frames(self.start_frame, self.fps)
}
/// 檢查是否是父分片
pub fn is_parent(&self) -> bool {
self.parent_chunk_id.is_some()
}
/// 從秒數創建新分片(舊版轉換)
/// ///
/// This is useful for migrating from older systems that store time as seconds. /// 這對於從存儲時間為秒的舊系統遷移很有用。
/// The frame counts are calculated by rounding `seconds * fps`. /// 幀數通過舍入 `seconds * fps` 計算。
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub fn from_seconds( pub fn from_seconds(
file_id: i32, file_id: i32,
@@ -136,104 +357,197 @@ impl Chunk {
) )
} }
/// Returns the start time as a `FrameTime`. /// 返回開始時間為 `FrameTime`
pub fn start_time(&self) -> FrameTime { pub fn start_time(&self) -> FrameTime {
FrameTime::from_frames(self.start_frame, self.fps) FrameTime::from_frames(self.start_frame, self.fps)
} }
/// Returns the end time as a `FrameTime`. /// 返回結束時間為 `FrameTime`
pub fn end_time(&self) -> FrameTime { pub fn end_time(&self) -> FrameTime {
FrameTime::from_frames(self.end_frame, self.fps) FrameTime::from_frames(self.end_frame, self.fps)
} }
/// Returns the duration in frames. /// 返回持續時間的幀數
pub fn duration_frames(&self) -> i64 { pub fn duration_frames(&self) -> i64 {
self.end_frame - self.start_frame self.end_frame - self.start_frame
} }
/// Returns the duration in seconds. /// 返回持續時間的秒數
pub fn duration_seconds(&self) -> f64 { pub fn duration_seconds(&self) -> f64 {
self.duration_frames() as f64 / self.fps self.duration_frames() as f64 / self.fps
} }
/// Formats the start time as "seconds.frame" (e.g., "123.04"). /// 將開始時間格式化為 "seconds.frame" (例如:"123.04")
pub fn format_start_sec_frame(&self) -> String { pub fn format_start_sec_frame(&self) -> String {
self.start_time().format_sec_frame() self.start_time().format_sec_frame()
} }
/// Formats the end time as "seconds.frame" (e.g., "456.15"). /// 將結束時間格式化為 "seconds.frame" (例如:"456.15")
pub fn format_end_sec_frame(&self) -> String { pub fn format_end_sec_frame(&self) -> String {
self.end_time().format_sec_frame() self.end_time().format_sec_frame()
} }
/// Formats the start time as "HH:MM:SS". /// 將開始時間格式化為 "HH:MM:SS"
pub fn format_start_hms(&self) -> String { pub fn format_start_hms(&self) -> String {
self.start_time().format_hms() self.start_time().format_hms()
} }
/// Formats the end time as "HH:MM:SS". /// 將結束時間格式化為 "HH:MM:SS"
pub fn format_end_hms(&self) -> String { pub fn format_end_hms(&self) -> String {
self.end_time().format_hms() self.end_time().format_hms()
} }
/// Formats the start time as "HH:MM:SS.FF". /// 將開始時間格式化為 "HH:MM:SS.FF"
pub fn format_start_hms_frame(&self) -> String { pub fn format_start_hms_frame(&self) -> String {
self.start_time().format_hms_frame() self.start_time().format_hms_frame()
} }
/// Formats the end time as "HH:MM:SS.FF". /// 將結束時間格式化為 "HH:MM:SS.FF"
pub fn format_end_hms_frame(&self) -> String { pub fn format_end_hms_frame(&self) -> String {
self.end_time().format_hms_frame() self.end_time().format_hms_frame()
} }
/// Returns a tuple of (start_seconds, end_seconds) for compatibility. /// 返回 (start_seconds, end_seconds) 元組用於兼容性
/// ///
/// This is provided for backward compatibility during migration. /// 這在遷移期間提供向後兼容性。
/// Prefer using `start_time()` and `end_time()` methods. /// 建議使用 `start_time()` `end_time()` 方法。
pub fn time_range_seconds(&self) -> (f64, f64) { pub fn time_range_seconds(&self) -> (f64, f64) {
(self.start_time().seconds(), self.end_time().seconds()) (self.start_time().seconds(), self.end_time().seconds())
} }
/// 添加元數據
pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self { pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
self.metadata = Some(metadata); self.metadata = Some(metadata);
self self
} }
/// 添加向量 ID
pub fn with_vector_id(mut self, vector_id: String) -> Self { pub fn with_vector_id(mut self, vector_id: String) -> Self {
self.vector_id = Some(vector_id); self.vector_id = Some(vector_id);
self self
} }
/// 添加文本內容
pub fn with_text_content(mut self, text: String) -> Self { pub fn with_text_content(mut self, text: String) -> Self {
self.text_content = Some(text); self.text_content = Some(text);
self self
} }
/// 設置幀數
pub fn with_frame_count(mut self, count: i32) -> Self { pub fn with_frame_count(mut self, count: i32) -> Self {
self.frame_count = count; self.frame_count = count;
self self
} }
/// 設置前一個分片 ID
pub fn with_pre_chunk_ids(mut self, ids: Vec<i32>) -> Self { pub fn with_pre_chunk_ids(mut self, ids: Vec<i32>) -> Self {
self.pre_chunk_ids = ids; self.pre_chunk_ids = ids;
self self
} }
/// 設置父分片 ID
pub fn with_parent_chunk_id(mut self, parent_id: String) -> Self { pub fn with_parent_chunk_id(mut self, parent_id: String) -> Self {
self.parent_chunk_id = Some(parent_id); self.parent_chunk_id = Some(parent_id);
self self
} }
/// 設置子分片 ID
pub fn with_child_chunk_ids(mut self, child_ids: Vec<String>) -> Self { pub fn with_child_chunk_ids(mut self, child_ids: Vec<String>) -> Self {
self.child_chunk_ids = child_ids; self.child_chunk_ids = child_ids;
self self
} }
}
pub fn is_parent_chunk(&self) -> bool { // ==================== VisualChunkContent 輔助方法 ====================
!self.child_chunk_ids.is_empty() impl VisualChunkContent {
/// 計算兩個 YOLO 幀之間的相似度(基於物件組成)
pub fn frame_similarity(
frame1: &crate::core::processor::yolo::YoloFrame,
frame2: &crate::core::processor::yolo::YoloFrame,
) -> f32 {
if frame1.objects.is_empty() && frame2.objects.is_empty() {
return 1.0; // 兩個空幀完全相似
}
if frame1.objects.is_empty() || frame2.objects.is_empty() {
return 0.0; // 一個空一個非空,不相似
}
// 創建物件類別名稱集合
let set1: std::collections::HashSet<String> = frame1
.objects
.iter()
.map(|o| o.class_name.clone())
.collect();
let set2: std::collections::HashSet<String> = frame2
.objects
.iter()
.map(|o| o.class_name.clone())
.collect();
// 計算 Jaccard 相似度
let intersection: Vec<_> = set1.intersection(&set2).collect();
let union: Vec<_> = set1.union(&set2).collect();
if union.is_empty() {
0.0
} else {
intersection.len() as f32 / union.len() as f32
}
} }
pub fn is_child_chunk(&self) -> bool { /// 獲取視覺分片的摘要(使用關鍵幀的 frame_number
self.parent_chunk_id.is_some() pub fn summary(&self, fps: f64) -> String {
if self.keyframe_objects.is_empty() {
return "Empty visual chunk".to_string();
}
let first_frame = self.keyframe_objects.first().unwrap().frame_number;
let last_frame = self.keyframe_objects.last().unwrap().frame_number;
// 計算時間(僅供參考)
let start_time = if fps > 0.0 {
first_frame as f64 / fps
} else {
0.0
};
let end_time = if fps > 0.0 {
last_frame as f64 / fps
} else {
0.0
};
let duration = end_time - start_time;
let frame_count = self.keyframe_objects.len();
format!(
"Visual chunk: frames {} to {} (duration: {:.1}s, {} frames). Objects: {} total, {} unique. Dominant: {}",
first_frame,
last_frame,
duration,
frame_count,
self.metadata.object_count,
self.metadata.unique_classes.len(),
if self.dominant_objects.is_empty() {
"none".to_string()
} else {
self.dominant_objects.join(", ")
}
)
}
/// 檢查是否包含特定物件類別
pub fn contains_object(&self, class_name: &str) -> bool {
self.keyframe_objects
.iter()
.any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name))
}
/// 獲取信心值高於閾值的所有物件
pub fn high_confidence_objects(&self, threshold: f32) -> Vec<&DetectedObject> {
self.keyframe_objects
.iter()
.flat_map(|ko| ko.objects.iter())
.filter(|obj| obj.confidence >= threshold)
.collect()
} }
} }

View File

@@ -164,3 +164,29 @@ pub mod cache {
.unwrap_or(3600) .unwrap_or(3600)
}); });
} }
pub mod llm {
use super::*;
pub static SUMMARY_URL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_SUMMARY_URL")
.unwrap_or_else(|_| "http://127.0.0.1:8081/v1/chat/completions".to_string())
});
pub static SUMMARY_MODEL: Lazy<String> = Lazy::new(|| {
env::var("MOMENTRY_LLM_SUMMARY_MODEL").unwrap_or_else(|_| "gemma4".to_string())
});
pub static SUMMARY_TIMEOUT_SECS: Lazy<u64> = Lazy::new(|| {
env::var("MOMENTRY_LLM_SUMMARY_TIMEOUT")
.unwrap_or_else(|_| "120".to_string())
.parse()
.unwrap_or(120)
});
pub static SUMMARY_ENABLED: Lazy<bool> = Lazy::new(|| {
env::var("MOMENTRY_LLM_SUMMARY_ENABLED")
.map(|v| v == "true" || v == "1")
.unwrap_or(true)
});
}

View File

@@ -6,6 +6,7 @@ use crate::core::chunk::types::{Chunk, ChunkRule, ChunkType};
pub struct MongoDb { pub struct MongoDb {
base_url: String, base_url: String,
database: String,
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -53,7 +54,8 @@ impl MongoDb {
pub fn new() -> Self { pub fn new() -> Self {
let base_url = let base_url =
std::env::var("MONGODB_URL").unwrap_or_else(|_| "http://localhost:27017".to_string()); std::env::var("MONGODB_URL").unwrap_or_else(|_| "http://localhost:27017".to_string());
Self { base_url } let database = crate::core::config::MONGODB_DATABASE.clone();
Self { base_url, database }
} }
} }
@@ -68,7 +70,7 @@ impl MongoDb {
let doc: ChunkDocument = chunk.clone().into(); let doc: ChunkDocument = chunk.clone().into();
let client = reqwest::Client::new(); let client = reqwest::Client::new();
let url = format!("{}/momentry/chunks", self.base_url); let url = format!("{}/{}/chunks", self.base_url, self.database);
client client
.post(&url) .post(&url)
@@ -83,8 +85,8 @@ impl MongoDb {
pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> { pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
let client = reqwest::Client::new(); let client = reqwest::Client::new();
let url = format!( let url = format!(
"{}/momentry/chunks?filter={{\"uuid\":\"{}\"}}", "{}/{}/chunks?filter={{\"uuid\":\"{}\"}}",
self.base_url, uuid self.base_url, self.database, uuid
); );
let response = client let response = client
@@ -131,6 +133,7 @@ impl MongoDb {
pre_chunk_ids: vec![], pre_chunk_ids: vec![],
parent_chunk_id: doc.parent_chunk_id, parent_chunk_id: doc.parent_chunk_id,
child_chunk_ids: doc.child_chunk_ids, child_chunk_ids: doc.child_chunk_ids,
visual_stats: None,
} }
}) })
.collect(); .collect();
@@ -141,8 +144,8 @@ impl MongoDb {
pub async fn search_text(&self, query: &str) -> Result<Vec<Chunk>> { pub async fn search_text(&self, query: &str) -> Result<Vec<Chunk>> {
let client = reqwest::Client::new(); let client = reqwest::Client::new();
let url = format!( let url = format!(
"{}/momentry/chunks?filter={{\"$text\":{{\"$search\":\"{}\"}}}}", "{}/{}/chunks?filter={{\"$text\":{{\"$search\":\"{}\"}}}}",
self.base_url, query self.base_url, self.database, query
); );
let response = client let response = client
@@ -189,6 +192,7 @@ impl MongoDb {
pre_chunk_ids: vec![], pre_chunk_ids: vec![],
parent_chunk_id: doc.parent_chunk_id, parent_chunk_id: doc.parent_chunk_id,
child_chunk_ids: doc.child_chunk_ids, child_chunk_ids: doc.child_chunk_ids,
visual_stats: None,
} }
}) })
.collect(); .collect();
@@ -198,7 +202,7 @@ impl MongoDb {
pub async fn get_all_chunks(&self) -> Result<Vec<Chunk>> { pub async fn get_all_chunks(&self) -> Result<Vec<Chunk>> {
let client = reqwest::Client::new(); let client = reqwest::Client::new();
let url = format!("{}/momentry/chunks", self.base_url); let url = format!("{}/{}/chunks", self.base_url, self.database);
let response = client let response = client
.get(&url) .get(&url)
@@ -244,6 +248,7 @@ impl MongoDb {
pre_chunk_ids: vec![], pre_chunk_ids: vec![],
parent_chunk_id: doc.parent_chunk_id, parent_chunk_id: doc.parent_chunk_id,
child_chunk_ids: doc.child_chunk_ids, child_chunk_ids: doc.child_chunk_ids,
visual_stats: None,
} }
}) })
.collect(); .collect();

File diff suppressed because it is too large Load Diff

View File

@@ -128,7 +128,7 @@ impl QdrantDb {
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new(); let mut hasher = DefaultHasher::new();
point_id_str.hash(&mut hasher); point_id_str.hash(&mut hasher);
let point_id = hasher.finish() as u64; let point_id = hasher.finish();
let body = serde_json::json!({ let body = serde_json::json!({
"points": [{ "points": [{
@@ -171,7 +171,7 @@ impl QdrantDb {
)); ));
} }
tracing::debug!("Qdrant response: {}", response_text); tracing::debug!("Qdrant upsert response status: {}", status);
tracing::info!("Successfully upserted vector for chunk: {}", chunk_id); tracing::info!("Successfully upserted vector for chunk: {}", chunk_id);
Ok(()) Ok(())
} }
@@ -257,6 +257,101 @@ impl QdrantDb {
Ok(search_results) Ok(search_results)
} }
pub async fn search_collections(
&self,
query_vector: &[f32],
collections: &[&str],
limit: usize,
) -> Result<Vec<SearchResult>> {
let mut handles = Vec::new();
for &collection in collections {
let url = format!("{}/collections/{}/points/search", self.base_url, collection);
let client = self.client.clone();
let api_key = self.api_key.clone();
let query_vec = query_vector.to_vec();
let body = serde_json::json!({
"vector": query_vec,
"limit": limit * 2, // Fetch more from each to account for overlaps
"with_payload": true
});
handles.push(async move {
let response = client
.post(&url)
.header("api-key", &api_key)
.header("Content-Type", "application/json")
.json(&body)
.send()
.await;
match response {
Ok(resp) if resp.status().is_success() => {
let resp_text = resp
.text()
.await
.unwrap_or_else(|_| "Failed to read response".to_string());
#[derive(Deserialize)]
struct QdrantSearchResult {
result: Vec<QdrantPoint>,
}
#[derive(Deserialize)]
struct QdrantPoint {
#[allow(dead_code)]
id: serde_json::Value,
score: f64,
payload: HashMap<String, serde_json::Value>,
}
if let Ok(result) = serde_json::from_str::<QdrantSearchResult>(&resp_text) {
let results: Vec<SearchResult> = result
.result
.into_iter()
.map(|r| {
let uuid = r
.payload
.get("uuid")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
let chunk_id = r
.payload
.get("chunk_id")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
SearchResult {
uuid,
chunk_id,
score: r.score as f32,
}
})
.collect();
Ok::<Vec<SearchResult>, anyhow::Error>(results)
} else {
Ok::<Vec<SearchResult>, anyhow::Error>(Vec::new())
}
}
_ => Ok::<Vec<SearchResult>, anyhow::Error>(Vec::new()),
}
});
}
let results = futures_util::future::join_all(handles).await;
let mut merged: Vec<SearchResult> = results
.into_iter()
.filter_map(Result::ok)
.flatten()
.collect();
// Sort by score descending
merged.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
// Deduplicate by chunk_id + uuid
merged.dedup_by_key(|r| (r.chunk_id.clone(), r.uuid.clone()));
// Truncate to limit
merged.truncate(limit);
Ok(merged)
}
pub async fn search_in_uuid( pub async fn search_in_uuid(
&self, &self,
query_vector: &[f32], query_vector: &[f32],

View File

@@ -4,9 +4,15 @@ pub mod chunk;
pub mod config; pub mod config;
pub mod db; pub mod db;
pub mod embedding; pub mod embedding;
pub mod ingestion;
pub mod llm;
pub mod overlay; pub mod overlay;
pub mod person_identity;
pub mod probe; pub mod probe;
pub mod processor; pub mod processor;
pub mod storage; pub mod storage;
pub mod text;
pub mod thumbnail; pub mod thumbnail;
pub mod time; pub mod time;
pub mod tmdb;
pub mod worker;

View File

@@ -28,16 +28,23 @@ pub async fn process_asrx(
uuid: Option<&str>, uuid: Option<&str>,
) -> Result<AsrxResult> { ) -> Result<AsrxResult> {
let executor = PythonExecutor::new()?; let executor = PythonExecutor::new()?;
let script_path = executor.script_path("asrx_processor.py"); let script_path = executor.script_path("asrx_processor_custom.py");
tracing::info!("[ASRX] Starting speaker diarization: {}", video_path); tracing::info!(
"[ASRX] Starting speaker diarization (custom): {}",
video_path
);
if !script_path.exists() { if !script_path.exists() {
tracing::warn!("[ASRX] Script not found, returning empty result"); tracing::warn!("[ASRX] Custom script not found, falling back to original");
return Ok(AsrxResult { let fallback_path = executor.script_path("asrx_processor.py");
language: None, if !fallback_path.exists() {
segments: vec![], tracing::warn!("[ASRX] No script found, returning empty result");
}); return Ok(AsrxResult {
language: None,
segments: vec![],
});
}
} }
let mut cmd = Command::new(executor.python_path()); let mut cmd = Command::new(executor.python_path());

View File

@@ -9,6 +9,7 @@ pub mod ocr;
pub mod pose; pub mod pose;
pub mod scene_classification; pub mod scene_classification;
pub mod story; pub mod story;
pub mod visual_chunk;
pub mod yolo; pub mod yolo;
pub use asr::{process_asr, AsrResult, AsrSegment}; pub use asr::{process_asr, AsrResult, AsrSegment};
@@ -28,4 +29,5 @@ pub use scene_classification::{
process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment, process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment,
}; };
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats}; pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
pub use visual_chunk::{process_visual_chunk, process_visual_chunk_advanced, VisualChunkResult};
pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult}; pub use yolo::{process_yolo, YoloFrame, YoloObject, YoloResult};

View File

@@ -4,6 +4,8 @@ pub mod api;
pub mod ui; pub mod ui;
pub mod watcher;
pub mod worker; pub mod worker;
pub use core::cache::{keys, MongoCache, RedisCache}; pub use core::cache::{keys, MongoCache, RedisCache};
@@ -13,6 +15,10 @@ pub use core::db::{
VideoStatus, VideoStatus,
}; };
pub use core::embedding::Embedder; pub use core::embedding::Embedder;
pub use core::person_identity::{
ChunkPersonInfo, PersonAppearance, PersonIdentity, PersonIdentityResponse, PersonMatch,
PersonStatistics, PersonTimelineEntry, PersonTimelineResponse,
};
pub use core::probe::ProbeResult; pub use core::probe::ProbeResult;
pub use core::storage::file_manager::FileManager; pub use core::storage::file_manager::FileManager;
pub use core::storage::output_dir::OutputDir; pub use core::storage::output_dir::OutputDir;

View File

@@ -1805,6 +1805,64 @@ async fn main() -> Result<()> {
} }
}; };
// Read Pose JSON (optional)
let pose_path = format!("{}.pose.json", uuid);
let pose_result = match std::fs::read_to_string(&pose_path) {
Ok(pose_json) => match serde_json::from_str::<
momentry_core::core::processor::pose::PoseResult,
>(&pose_json)
{
Ok(result) => {
println!("Loaded Pose: {} frames", result.frames.len());
result
}
Err(e) => {
println!("Warning: Failed to parse Pose JSON: {}. Skipping Pose.", e);
momentry_core::core::processor::pose::PoseResult {
frame_count: 0,
fps: 0.0,
frames: vec![],
}
}
},
Err(_) => {
println!("Warning: Pose file not found. Skipping Pose.");
momentry_core::core::processor::pose::PoseResult {
frame_count: 0,
fps: 0.0,
frames: vec![],
}
}
};
// Read ASRX JSON (optional)
let asrx_path = format!("{}.asrx.json", uuid);
let asrx_result = match std::fs::read_to_string(&asrx_path) {
Ok(asrx_json) => match serde_json::from_str::<
momentry_core::core::processor::asrx::AsrxResult,
>(&asrx_json)
{
Ok(result) => {
println!("Loaded ASRX: {} segments", result.segments.len());
result
}
Err(e) => {
println!("Warning: Failed to parse ASRX JSON: {}. Skipping ASRX.", e);
momentry_core::core::processor::asrx::AsrxResult {
language: None,
segments: vec![],
}
}
},
Err(_) => {
println!("Warning: ASRX file not found. Skipping ASRX.");
momentry_core::core::processor::asrx::AsrxResult {
language: None,
segments: vec![],
}
}
};
// ========== Store pre_chunks (from ASR, CUT) ========== // ========== Store pre_chunks (from ASR, CUT) ==========
println!("\nStoring pre_chunks..."); println!("\nStoring pre_chunks...");
@@ -1922,12 +1980,21 @@ async fn main() -> Result<()> {
face_by_frame.insert(frame.frame, frame.clone()); face_by_frame.insert(frame.frame, frame.clone());
} }
// Store frames (merge data from YOLO, OCR, Face) let mut pose_by_frame: std::collections::HashMap<
u64,
momentry_core::core::processor::pose::PoseFrame,
> = std::collections::HashMap::new();
for frame in &pose_result.frames {
pose_by_frame.insert(frame.frame, frame.clone());
}
// Store frames (merge data from YOLO, OCR, Face, Pose)
let mut all_frames: Vec<u64> = frame_data let mut all_frames: Vec<u64> = frame_data
.keys() .keys()
.cloned() .cloned()
.chain(ocr_by_frame.keys().cloned()) .chain(ocr_by_frame.keys().cloned())
.chain(face_by_frame.keys().cloned()) .chain(face_by_frame.keys().cloned())
.chain(pose_by_frame.keys().cloned())
.collect(); .collect();
all_frames.sort(); all_frames.sort();
all_frames.dedup(); all_frames.dedup();
@@ -1937,6 +2004,7 @@ async fn main() -> Result<()> {
let yolo_frame = frame_data.get(frame_num); let yolo_frame = frame_data.get(frame_num);
let ocr_frame = ocr_by_frame.get(frame_num); let ocr_frame = ocr_by_frame.get(frame_num);
let face_frame = face_by_frame.get(frame_num); let face_frame = face_by_frame.get(frame_num);
let pose_frame = pose_by_frame.get(frame_num);
let frame = momentry_core::core::db::postgres_db::Frame { let frame = momentry_core::core::db::postgres_db::Frame {
id: 0, id: 0,
@@ -1947,6 +2015,7 @@ async fn main() -> Result<()> {
yolo_objects: yolo_frame.map(|f| serde_json::json!(&f.objects)), yolo_objects: yolo_frame.map(|f| serde_json::json!(&f.objects)),
ocr_results: ocr_frame.map(|f| serde_json::json!(&f.texts)), ocr_results: ocr_frame.map(|f| serde_json::json!(&f.texts)),
face_results: face_frame.map(|f| serde_json::json!(&f.faces)), face_results: face_frame.map(|f| serde_json::json!(&f.faces)),
pose_results: pose_frame.map(|f| serde_json::json!(&f.persons)),
frame_path: None, frame_path: None,
created_at: String::new(), created_at: String::new(),
}; };
@@ -1960,10 +2029,33 @@ async fn main() -> Result<()> {
println!("\nCreating chunks..."); println!("\nCreating chunks...");
// Rule 1: Direct conversion (sentence pre_chunk -> sentence chunk) // Rule 1: Direct conversion (sentence pre_chunk -> sentence chunk)
// Merge ASRX speaker_id by time overlap
let mut sentence_chunks = Vec::new(); let mut sentence_chunks = Vec::new();
for (i, seg) in asr_result.segments.iter().enumerate() { for (i, seg) in asr_result.segments.iter().enumerate() {
let pre_chunk_id = asr_pre_chunk_ids.get(i).copied().unwrap_or(0); let pre_chunk_id = asr_pre_chunk_ids.get(i).copied().unwrap_or(0);
let chunk = Chunk::from_seconds(
// Find matching ASRX segment by time overlap
let speaker_id = asrx_result
.segments
.iter()
.find(|ax| {
// Overlap: ASRX segment overlaps with ASR segment
ax.start <= seg.end && ax.end >= seg.start
})
.and_then(|ax| ax.speaker_id.clone());
let content = if let Some(ref sid) = speaker_id {
serde_json::json!({
"text": seg.text,
"speaker_id": sid,
})
} else {
serde_json::json!({
"text": seg.text,
})
};
let mut chunk = Chunk::from_seconds(
file_id as i32, file_id as i32,
uuid.clone(), uuid.clone(),
i as u32, i as u32,
@@ -1972,15 +2064,40 @@ async fn main() -> Result<()> {
seg.start, seg.start,
seg.end, seg.end,
fps, fps,
serde_json::json!({ content,
"text": seg.text,
}),
) )
.with_text_content(seg.text.clone()) .with_text_content(seg.text.clone())
.with_pre_chunk_ids(vec![pre_chunk_id as i32]); .with_pre_chunk_ids(vec![pre_chunk_id as i32]);
// Add ASRX metadata if available
if speaker_id.is_some() {
chunk = chunk.with_metadata(serde_json::json!({
"language": asr_result.language,
"language_probability": asr_result.language_probability,
"speaker_matched": true,
}));
}
sentence_chunks.push(chunk); sentence_chunks.push(chunk);
} }
if !asrx_result.segments.is_empty() {
let matched = sentence_chunks
.iter()
.filter(|c| {
c.content
.get("speaker_id")
.and_then(|v| v.as_str())
.is_some()
})
.count();
println!(
" ASRX merge: {}/{} sentence chunks matched to speakers",
matched,
sentence_chunks.len()
);
}
// Rule 1: CUT chunks // Rule 1: CUT chunks
let mut cut_chunks = Vec::new(); let mut cut_chunks = Vec::new();
for (i, scene) in cut_result.scenes.iter().enumerate() { for (i, scene) in cut_result.scenes.iter().enumerate() {
@@ -2235,7 +2352,7 @@ async fn main() -> Result<()> {
// Get list of videos to process // Get list of videos to process
let videos_to_process = if uuid == "all" { let videos_to_process = if uuid == "all" {
// Get all videos // Get all videos
let videos = pg.list_videos().await?; let videos = pg.list_videos(10000, 0).await?.0;
videos.into_iter().map(|v| v.uuid).collect::<Vec<_>>() videos.into_iter().map(|v| v.uuid).collect::<Vec<_>>()
} else { } else {
// Process single video // Process single video
@@ -2486,7 +2603,7 @@ async fn main() -> Result<()> {
.await? .await?
.ok_or_else(|| anyhow::anyhow!("Video not found: {}", uuid))?] .ok_or_else(|| anyhow::anyhow!("Video not found: {}", uuid))?]
} else { } else {
db.list_videos().await? db.list_videos(10000, 0).await?.0
}; };
let output_dir = std::path::PathBuf::from("thumbnails"); let output_dir = std::path::PathBuf::from("thumbnails");
@@ -2520,7 +2637,7 @@ async fn main() -> Result<()> {
.await? .await?
.ok_or_else(|| anyhow::anyhow!("Video not found: {}", u))?] .ok_or_else(|| anyhow::anyhow!("Video not found: {}", u))?]
} else { } else {
db.list_videos().await? db.list_videos(10000, 0).await?.0
}; };
println!("\n╔══════════════════════════════════════════════════════════════════════════════════╗"); println!("\n╔══════════════════════════════════════════════════════════════════════════════════╗");

View File

@@ -5,6 +5,21 @@ use std::path::PathBuf;
const DEFAULT_API_URL: &str = "http://localhost:3002"; const DEFAULT_API_URL: &str = "http://localhost:3002";
const DEV_API_URL: &str = "http://localhost:3003";
fn get_api_url() -> String {
std::env::var("MOMENTRY_API_URL").unwrap_or_else(|_| {
std::env::var("MOMENTRY_SERVER_PORT")
.ok()
.map(|port| format!("http://localhost:{}", port))
.unwrap_or_else(|| DEFAULT_API_URL.to_string())
})
}
fn get_api_key() -> Option<String> {
std::env::var("MOMENTRY_API_KEY").ok()
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct ApiClient { pub struct ApiClient {
client: Client, client: Client,
@@ -83,7 +98,7 @@ pub struct VideosResponse {
impl ApiClient { impl ApiClient {
pub fn new() -> Self { pub fn new() -> Self {
let url = std::env::var("MOMENTRY_API_URL").unwrap_or_else(|_| DEFAULT_API_URL.to_string()); let url = get_api_url();
Self { Self {
client: Client::new(), client: Client::new(),
base_url: url, base_url: url,
@@ -103,7 +118,11 @@ impl ApiClient {
let request = RegisterRequest { let request = RegisterRequest {
path: path.to_string(), path: path.to_string(),
}; };
let response = self.client.post(&url).json(&request).send().await?; let mut request_builder = self.client.post(&url).json(&request);
if let Some(key) = get_api_key() {
request_builder = request_builder.header("X-API-Key", key);
}
let response = request_builder.send().await?;
let status = response.status(); let status = response.status();
let result = response.json::<RegisterResponse>().await?; let result = response.json::<RegisterResponse>().await?;
if !status.is_success() { if !status.is_success() {
@@ -124,7 +143,11 @@ impl ApiClient {
limit, limit,
uuid: uuid.map(|s| s.to_string()), uuid: uuid.map(|s| s.to_string()),
}; };
let response = self.client.post(&url).json(&request).send().await?; let mut request_builder = self.client.post(&url).json(&request);
if let Some(key) = get_api_key() {
request_builder = request_builder.header("X-API-Key", key);
}
let response = request_builder.send().await?;
let status = response.status(); let status = response.status();
let result = response.json::<SearchResponse>().await?; let result = response.json::<SearchResponse>().await?;
if !status.is_success() { if !status.is_success() {
@@ -135,18 +158,30 @@ impl ApiClient {
pub async fn lookup_video(&self, uuid: &str) -> Result<LookupResponse> { pub async fn lookup_video(&self, uuid: &str) -> Result<LookupResponse> {
let url = format!("{}/api/v1/lookup?uuid={}", self.base_url, uuid); let url = format!("{}/api/v1/lookup?uuid={}", self.base_url, uuid);
let response = self.client.get(&url).send().await?; let mut request = self.client.get(&url);
if let Some(key) = get_api_key() {
request = request.header("X-API-Key", key);
}
let response = request.send().await?;
let status = response.status(); let status = response.status();
let result = response.json::<LookupResponse>().await?; if status == 200 {
if !status.is_success() { let result = response.json::<LookupResponse>().await?;
if result.uuid.is_empty() {
anyhow::bail!("影片不存在: {}", uuid);
}
Ok(result)
} else {
anyhow::bail!("API request failed with status: {}", status); anyhow::bail!("API request failed with status: {}", status);
} }
Ok(result)
} }
pub async fn list_videos(&self) -> Result<Vec<VideoInfo>> { pub async fn list_videos(&self) -> Result<Vec<VideoInfo>> {
let url = format!("{}/api/v1/videos", self.base_url); let url = format!("{}/api/v1/videos", self.base_url);
let response = self.client.get(&url).send().await?; let mut request = self.client.get(&url);
if let Some(key) = get_api_key() {
request = request.header("X-API-Key", key);
}
let response = request.send().await?;
let status = response.status(); let status = response.status();
let result = response.json::<VideosResponse>().await?; let result = response.json::<VideosResponse>().await?;
if !status.is_success() { if !status.is_success() {

View File

@@ -397,6 +397,29 @@ fn format_time(seconds: f64) -> String {
format!("{:02}:{:02}:{:02}.{:02}", hours, minutes, secs, millis) format!("{:02}:{:02}:{:02}.{:02}", hours, minutes, secs, millis)
} }
#[allow(dead_code)]
fn get_video_duration(video_path: &str) -> f64 {
let output = std::process::Command::new("ffprobe")
.args([
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
video_path,
])
.output();
match output {
Ok(out) if out.status.success() => {
let duration_str = String::from_utf8_lossy(&out.stdout).trim().to_string();
duration_str.parse::<f64>().unwrap_or(0.0)
}
_ => 0.0,
}
}
fn lookup_video_uuid(video_path: &str) -> Option<String> { fn lookup_video_uuid(video_path: &str) -> Option<String> {
use std::process::Command as StdCommand; use std::process::Command as StdCommand;
@@ -510,9 +533,714 @@ fn run_player(_video_path: &str, _video_uuid: Option<String>) -> Result<()> {
} }
#[cfg(feature = "player")] #[cfg(feature = "player")]
fn run_player(_video_path: &str, _video_uuid: Option<String>) -> Result<()> { fn run_player(video_path: &str, video_uuid: Option<String>) -> Result<()> {
println!("Player not available - SDL2 not configured"); run_player_with_sdl2(video_path, video_uuid)
println!("Playing: {} (UUID: {:?})", _video_path, _video_uuid); }
#[cfg(feature = "player")]
fn run_player_with_sdl2(video_path: &str, video_uuid: Option<String>) -> Result<()> {
use sdl2::event::Event;
use sdl2::keyboard::Keycode;
use sdl2::pixels::PixelFormatEnum;
use std::io::{BufReader, Read};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::thread;
use std::time::{Duration, Instant};
println!("\n=== 🎬 SDL2 Video Player ===");
println!("File: {}", video_path);
println!("UUID: {:?}", video_uuid);
let sdl_context = sdl2::init().map_err(|e| anyhow::anyhow!("SDL init failed: {}", e))?;
let video_subsystem = sdl_context
.video()
.map_err(|e| anyhow::anyhow!("Video init failed: {}", e))?;
let width = 1280u32;
let height = 720u32;
let window = video_subsystem
.window("Momentry Player", width, height)
.position_centered()
.resizable()
.build()
.map_err(|e| anyhow::anyhow!("Window creation failed: {}", e))?;
let mut canvas = window
.into_canvas()
.build()
.map_err(|e| anyhow::anyhow!("Canvas creation failed: {}", e))?;
let texture_creator = canvas.texture_creator();
let mut texture = texture_creator
.create_texture_streaming(PixelFormatEnum::RGB24, width as u32, height as u32)
.map_err(|e| anyhow::anyhow!("Texture creation failed: {}", e))?;
let ffmpeg_path = if cfg!(target_os = "macos") {
"/opt/homebrew/bin/ffmpeg"
} else {
"ffmpeg"
};
let mut ffmpeg = std::process::Command::new(ffmpeg_path)
.args([
"-i",
video_path,
"-vf",
&format!(
"scale={}:{}:force_original_aspect_ratio=decrease,pad={}:{}:(ow-iw)/2:(oh-ih)/2",
width, height, width, height
),
"-pix_fmt",
"rgb24",
"-r",
"30",
"-f",
"rawvideo",
"-",
])
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::null())
.spawn()
.map_err(|e| anyhow::anyhow!("Failed to start ffmpeg: {}", e))?;
let stdout = ffmpeg
.stdout
.take()
.ok_or_else(|| anyhow::anyhow!("Failed to capture stdout"))?;
let mut reader = BufReader::new(stdout);
let frame_size = (width * height * 3) as usize;
let mut frame_buffer = vec![0u8; frame_size];
let playing = Arc::new(AtomicBool::new(true));
let playing_clone = playing.clone();
let mut event_pump = sdl_context
.event_pump()
.map_err(|e| anyhow::anyhow!("Event pump failed: {}", e))?;
let mut asr_overlay = asr_overlay::AsrOverlay::new();
let _ = asr_overlay.load_from_file(video_path);
println!("ASR Overlay initialized: {}", !asr_overlay.is_empty());
let video_duration = get_video_duration(video_path);
println!("Video duration: {:.1}s", video_duration);
let mut frame_count = 0u64;
let frame_duration = Duration::from_millis(33);
let mut paused = false;
let mut current_time = 0.0;
let mut seek_request: Option<f64> = None;
let fps = 30.0;
let mut asr_overlay_visible = false;
println!("Playing... (Press SPACE to pause, Q/ESC to quit, ←/→ to seek, A to toggle ASR, F for fullscreen)");
loop {
let frame_start = Instant::now();
// Handle seek by restarting ffmpeg
if let Some(seek_pos) = seek_request {
seek_request = None;
println!("\n⏩ Seeking to {:.1}s...", seek_pos);
// Kill old ffmpeg and restart with seek position
let _ = ffmpeg.kill();
ffmpeg = std::process::Command::new(ffmpeg_path)
.args([
"-ss", &format!("{:.2}", seek_pos),
"-i", video_path,
"-vf", &format!(
"scale={}:{}:force_original_aspect_ratio=decrease,pad={}:{}:(ow-iw)/2:(oh-ih)/2",
width, height, width, height
),
"-pix_fmt", "rgb24",
"-r", "30",
"-f", "rawvideo",
"-",
])
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::null())
.spawn()
.map_err(|e| anyhow::anyhow!("Failed to restart ffmpeg: {}", e))?;
let stdout = ffmpeg
.stdout
.take()
.ok_or_else(|| anyhow::anyhow!("Failed to capture stdout"))?;
reader = BufReader::new(stdout);
current_time = seek_pos;
println!("▶ Resumed at {:.1}s", current_time);
}
for event in event_pump.poll_iter() {
match event {
Event::Quit { .. } => {
println!("\n👋 Quitting player");
playing_clone.store(false, Ordering::SeqCst);
break;
}
Event::KeyDown { keycode, .. } => match keycode {
Some(Keycode::Q) | Some(Keycode::Escape) => {
println!("\n👋 Quitting player");
playing_clone.store(false, Ordering::SeqCst);
break;
}
Some(Keycode::Space) => {
paused = !paused;
println!("{}", if paused { "⏸ Paused" } else { "▶ Playing" });
}
Some(Keycode::Left) => {
let new_time = (current_time - 10.0).max(0.0);
seek_request = Some(new_time);
println!("⏪ Seek to {:.1}s", new_time);
}
Some(Keycode::Right) => {
let new_time = current_time + 10.0;
seek_request = Some(new_time);
println!("⏩ Seek to {:.1}s", new_time);
}
Some(Keycode::Up) => {
let new_time = (current_time - 60.0).max(0.0);
seek_request = Some(new_time);
println!("⏪ Seek to {:.1}s (1min)", new_time);
}
Some(Keycode::Down) => {
let new_time = current_time + 60.0;
seek_request = Some(new_time);
println!("⏩ Seek to {:.1}s (+1min)", new_time);
}
Some(Keycode::A) => {
// Toggle ASR Visibility
asr_overlay_visible = !asr_overlay_visible;
println!(
"{}",
if asr_overlay_visible {
"🔊 ASR ON"
} else {
"🔇 ASR OFF"
}
);
}
Some(Keycode::F) => {
println!("📺 Toggle fullscreen (not implemented in basic SDL2)");
}
_ => {}
},
_ => {}
}
}
if !playing_clone.load(Ordering::SeqCst) {
break;
}
if paused {
thread::sleep(Duration::from_millis(100));
continue;
}
// Update ASR text based on current time
if !asr_overlay.is_empty() {
asr_overlay.update(current_time);
}
match reader.read_exact(&mut frame_buffer) {
Ok(_) => {
texture
.update(None, &frame_buffer, (width * 3) as usize)
.map_err(|e| anyhow::anyhow!("Texture update failed: {}", e))?;
// Draw everything
canvas.clear();
canvas
.copy(&texture, None, None)
.map_err(|e| anyhow::anyhow!("Render failed: {}", e))?;
// Draw ASR Text if visible and available
if asr_overlay_visible && !asr_overlay.get_text().is_empty() {
// Placeholder: Cannot use TTF functions directly here without font context.
// For now, just printing to console to verify timing.
// In a real implementation, load font and draw text here.
println!("[ASR] {:.1}s: {}", current_time, asr_overlay.get_text());
}
// Draw progress bar at bottom - gray background, green progress
use sdl2::rect::Rect;
let progress = if video_duration > 0.0 {
(current_time / video_duration).min(1.0)
} else {
0.0
};
let bar_width = ((width as f64) * progress) as u32;
canvas.set_draw_color(sdl2::pixels::Color::RGB(50, 50, 50)); // Background
let _ = canvas.fill_rect(Rect::new(0, height as i32 - 15, width, 5));
if bar_width > 0 {
canvas.set_draw_color(sdl2::pixels::Color::RGB(0, 200, 0)); // Progress
let _ = canvas.fill_rect(Rect::new(0, height as i32 - 15, bar_width, 5));
}
// Reset draw color to black for next frame
canvas.set_draw_color(sdl2::pixels::Color::RGB(0, 0, 0));
canvas.present();
frame_count += 1;
current_time += 1.0 / fps;
let elapsed = frame_start.elapsed();
if elapsed < frame_duration {
thread::sleep(frame_duration - elapsed);
}
}
Err(_) => {
println!(
"\n📽️ End of video ({} frames, {:.1}s)",
frame_count, current_time
);
break;
}
}
}
let _ = ffmpeg.kill();
println!("✅ Playback finished (total: {:.1}s)", current_time);
Ok(())
}
fn run_local_mode(external_player: &str) -> Result<()> {
let args: Vec<String> = env::args().collect();
// Find video path - skip all flags and get the first non-flag argument after them
let video_path = args
.iter()
.skip(1) // Skip binary name
.skip_while(|a| a.starts_with('-')) // Skip flags
.next()
.cloned();
let video_path = match video_path {
Some(p) if !p.is_empty() => p,
_ => {
println!("Local Mode - Play local video files");
println!("=====================================\n");
print!("Enter video file path: ");
let mut input = String::new();
std::io::stdin().read_line(&mut input)?;
let path = input.trim().to_string();
if path.is_empty() {
anyhow::bail!("No video path provided");
}
path
}
};
if !Path::new(&video_path).exists() {
anyhow::bail!("File not found: {}", video_path);
}
println!("\nUsing external player: {}", external_player);
println!("Playing: {}", video_path);
match external_player {
"vlc" => {
std::process::Command::new("open")
.arg("-a")
.arg("VLC")
.arg(&video_path)
.spawn()?;
println!("✅ Opened with VLC");
}
"mpv" => {
std::process::Command::new("mpv").arg(&video_path).spawn()?;
println!("✅ Opened with mpv");
}
"ffplay" => {
std::process::Command::new("ffplay")
.arg("-autoexit")
.arg(&video_path)
.spawn()?;
println!("✅ Opened with ffplay");
}
"sdl2" => {
#[cfg(feature = "player")]
return run_player_with_sdl2(&video_path, None);
#[cfg(not(feature = "player"))]
{
println!("SDL2 player not enabled. Rebuild with --features player");
}
}
_ => {
std::process::Command::new(external_player)
.arg(&video_path)
.spawn()?;
println!("✅ Opened with {}", external_player);
}
}
Ok(())
}
fn run_online_mode() -> Result<()> {
println!("\n===========================================");
println!(" 🎬 Online Mode - Momentry");
println!("===========================================\n");
let client = ApiClient::new();
println!("Connected to API: {}", client.base_url());
let rt = tokio::runtime::Runtime::new()?;
loop {
println!("\n┌─────────────────────────────────────────┐");
println!("│ Online Mode Menu │");
println!("├─────────────────────────────────────────┤");
println!("│ [1] List Videos - 列出所有影片 │");
println!("│ [2] Search - RAG 搜尋影片內容 │");
println!("│ [3] Play - 播放影片 │");
println!("│ [4] Lookup - 查詢影片資訊 │");
println!("│ [q] Quit - 離開 │");
println!("└─────────────────────────────────────────┘");
print!("\n請選擇: ");
let mut input = String::new();
std::io::stdin().read_line(&mut input)?;
let choice = input.trim();
match choice {
"1" => {
println!("\n=== 📋 影片列表 ===");
match rt.block_on(client.list_videos()) {
Ok(videos) => {
if videos.is_empty() {
println!("沒有找到任何影片");
} else {
println!("\n{} 部影片:\n", videos.len());
for (i, v) in videos.iter().enumerate() {
let duration = format!(
"{}:{:02}",
(v.duration / 60.0) as u32,
(v.duration % 60.0) as u32
);
println!(
" [{}] {} | {} | {}x{} | {}",
i + 1,
v.file_name,
v.uuid.chars().take(8).collect::<String>(),
v.width,
v.height,
duration
);
}
}
}
Err(e) => println!("取得影片列表失敗: {}", e),
}
}
"2" => {
println!("\n=== 🔍 RAG 搜尋 ===");
print!("輸入搜尋關鍵字: ");
input.clear();
std::io::stdin().read_line(&mut input)?;
let query = input.trim().to_string();
if query.is_empty() {
println!("搜尋關鍵字不能為空");
continue;
}
print!("限定特定影片?(y/N): ");
input.clear();
std::io::stdin().read_line(&mut input)?;
let limit_uuid = if input.trim().to_lowercase() == "y" {
print!("輸入影片 UUID: ");
input.clear();
std::io::stdin().read_line(&mut input)?;
Some(input.trim().to_string())
} else {
None
};
println!("\n搜尋中...");
match rt.block_on(client.search_chunks(&query, limit_uuid.as_deref(), Some(10))) {
Ok(response) => {
if response.results.is_empty() {
println!("沒有找到結果");
continue;
}
println!("\n找到 {} 個結果:\n", response.results.len());
for (i, r) in response.results.iter().enumerate() {
let time_range = format!(
"{:02}:{:02} - {:02}:{:02}",
(r.start_time / 60.0) as u32,
(r.start_time % 60.0) as u32,
(r.end_time / 60.0) as u32,
(r.end_time % 60.0) as u32
);
let text_preview = if r.text.len() > 50 {
format!("{}...", &r.text[..50])
} else {
r.text.clone()
};
println!(
" [{}] {} | {} | {:.2} | {}",
i + 1,
time_range,
r.uuid.chars().take(8).collect::<String>(),
r.score,
text_preview
);
}
let mut current_player: Option<std::process::Child> = None;
loop {
if let Some(ref mut child) = current_player {
match child.try_wait() {
Ok(Some(_)) => {
println!("播放器已結束");
current_player = None;
}
Ok(None) => {
// 還在執行中
}
Err(e) => {
println!("檢查播放器狀態失敗:{}", e);
current_player = None;
}
}
}
print!(
"\n選擇播放 (1-{}) 或 q 離開 (kill player), L 重新顯示列表:",
response.results.len()
);
input.clear();
std::io::stdin().read_line(&mut input)?;
let selection = input.trim();
let selection_lower = selection.to_lowercase();
if selection_lower == "q" {
if let Some(ref mut child) = current_player {
let _ = child.kill();
let _ = child.wait();
println!("已終止播放器");
current_player = None;
}
break;
}
if selection_lower == "l" {
println!("\n搜尋結果:");
for (i, r) in response.results.iter().enumerate() {
let time_range = format!(
"{:02}:{:02} - {:02}:{:02}",
(r.start_time / 60.0) as u32,
(r.start_time % 60.0) as u32,
(r.end_time / 60.0) as u32,
(r.end_time % 60.0) as u32
);
let text_preview = if r.text.len() > 50 {
format!("{}...", &r.text[..50])
} else {
r.text.clone()
};
println!(
" [{}] {} | {} | {:.2} | {}",
i + 1,
time_range,
r.uuid.chars().take(8).collect::<String>(),
r.score,
text_preview
);
}
continue;
}
if let Ok(idx) = selection.parse::<usize>() {
if idx > 0 && idx <= response.results.len() {
let selected = &response.results[idx - 1];
println!("\n播放:{} - {}", selected.uuid, selected.text);
if let Some(ref mut child) = current_player {
let _ = child.kill();
let _ = child.wait();
println!("已終止前一個播放器");
}
match rt.block_on(client.lookup_video(&selected.uuid)) {
Ok(info) => {
if let Some(path) = &info.file_path {
if std::path::Path::new(path).exists() {
let start_sec =
(selected.start_time as f64) - 2.0;
let end_sec = (selected.end_time as f64) + 2.0;
println!(
"開啟:{} (從 {:.0}{:.0}A-B 循環)",
path, start_sec, end_sec
);
println!("提示mpv 視窗中按 c/C 切換循環q 離開Space 暫停");
current_player = Some(
std::process::Command::new("mpv")
.arg(format!(
"--start={:.2}",
start_sec.max(0.0)
))
.arg(format!(
"--ab-loop-a={:.2}",
start_sec.max(0.0)
))
.arg(format!("--ab-loop-b={:.2}", end_sec))
.arg("--input-commands=bind c ab-loop; bind C ab-loop")
.arg(path)
.spawn()?
);
} else {
println!("錯誤:檔案不存在:{}", path);
}
}
}
Err(e) => println!("查詢失敗:{}", e),
}
}
}
}
}
Err(e) => println!("搜尋失敗:{}", e),
}
}
"4" => {
println!("\n=== 🔎 查詢影片 ===");
print!("輸入影片 UUID (直接 Enter 從列表選擇): ");
input.clear();
std::io::stdin().read_line(&mut input)?;
let uuid = input.trim();
if uuid.is_empty() {
println!("載入影片列表...");
match rt.block_on(client.list_videos()) {
Ok(videos) => {
if videos.is_empty() {
println!("沒有影片");
continue;
}
println!("\n選擇影片:");
for (i, v) in videos.iter().enumerate() {
println!(" [{}] {} ({})", i + 1, v.file_name, v.uuid);
}
print!("\n選擇編號:");
input.clear();
std::io::stdin().read_line(&mut input)?;
if let Ok(idx) = input.trim().parse::<usize>() {
if idx > 0 && idx <= videos.len() {
let selected = &videos[idx - 1];
println!("\n查詢中...");
match rt.block_on(client.lookup_video(&selected.uuid)) {
Ok(info) => {
println!("\n✓ 找到影片:");
println!(" UUID: {}", info.uuid);
if let Some(path) = &info.file_path {
println!(" 路徑:{}", path);
}
if let Some(name) = &info.file_name {
println!(" 名稱:{}", name);
}
if let Some(dur) = info.duration {
println!(" 時長:{:.2}s", dur);
}
}
Err(e) => println!("查詢失敗:{}", e),
}
}
}
}
Err(e) => println!("取得影片列表失敗:{}", e),
}
} else {
println!("\n查詢中...");
match rt.block_on(client.lookup_video(uuid)) {
Ok(info) => {
println!("\n✓ 找到影片:");
println!(" UUID: {}", info.uuid);
if let Some(path) = &info.file_path {
println!(" 路徑:{}", path);
}
if let Some(name) = &info.file_name {
println!(" 名稱:{}", name);
}
if let Some(dur) = info.duration {
println!(" 時長:{:.2}s", dur);
}
}
Err(e) => println!("查詢失敗:{}", e),
}
}
}
"3" => {
println!("\n=== ▶ 播放影片 ===");
print!("輸入影片 UUID (直接 Enter 從列表選擇): ");
input.clear();
std::io::stdin().read_line(&mut input)?;
let uuid = input.trim();
if uuid.is_empty() {
println!("載入影片列表...");
match rt.block_on(client.list_videos()) {
Ok(videos) => {
if videos.is_empty() {
println!("沒有影片");
continue;
}
println!("\n選擇影片:");
for (i, v) in videos.iter().enumerate() {
println!(" [{}] {} ({})", i + 1, v.file_name, v.uuid);
}
print!("\n選擇編號:");
input.clear();
std::io::stdin().read_line(&mut input)?;
if let Ok(idx) = input.trim().parse::<usize>() {
if idx > 0 && idx <= videos.len() {
let selected = &videos[idx - 1];
println!("\n播放: {}", selected.file_path);
if std::path::Path::new(&selected.file_path).exists() {
std::process::Command::new("mpv")
.arg(&selected.file_path)
.spawn()?;
} else {
println!("錯誤:檔案不存在:{}", selected.file_path);
}
}
}
}
Err(e) => println!("取得影片列表失敗:{}", e),
}
} else {
match rt.block_on(client.lookup_video(uuid)) {
Ok(info) => {
if let Some(path) = &info.file_path {
println!("開啟: {}", path);
if std::path::Path::new(path).exists() {
std::process::Command::new("mpv").arg(path).spawn()?;
} else {
println!("錯誤:檔案不存在:{}", path);
}
}
}
Err(e) => println!("查詢失敗:{}", e),
}
}
}
"q" | "Q" => {
println!("\n👋 再見!");
break;
}
_ => {
println!("無效選項");
}
}
}
Ok(()) Ok(())
} }
@@ -523,17 +1251,37 @@ fn main() -> Result<()> {
let should_download = args.iter().any(|a| a == "-d" || a == "--download"); let should_download = args.iter().any(|a| a == "-d" || a == "--download");
let show_selector = args.iter().any(|a| a == "-s" || a == "--selector"); let show_selector = args.iter().any(|a| a == "-s" || a == "--selector");
let test_api_mode = args.iter().any(|a| a == "-t" || a == "--test-api"); let test_api_mode = args.iter().any(|a| a == "-t" || a == "--test-api");
let local_mode = args.iter().any(|a| a == "-l" || a == "--local");
let online_mode = args.iter().any(|a| a == "-o" || a == "--online");
// Get external player choice
let external_player = args
.iter()
.position(|a| a == "-p" || a == "--player")
.and_then(|i| args.get(i + 1))
.cloned()
.unwrap_or_else(|| "vlc".to_string());
// API Testing Mode // API Testing Mode
if test_api_mode { if test_api_mode {
return run_api_test_mode(); return run_api_test_mode();
} }
// If --selector flag is provided, show video selector // If --selector flag is provided, show video selector (online mode)
if show_selector { if show_selector {
return run_selector(); return run_selector();
} }
// If --online or -o is provided, run online mode
if online_mode {
return run_online_mode();
}
// If --local or -l is provided, run local mode with external player
if local_mode {
return run_local_mode(&external_player);
}
let video_path = if args.len() < 2 || (should_download && args.len() < 3) { let video_path = if args.len() < 2 || (should_download && args.len() < 3) {
println!("Video Player\n============\nEnter video path or YouTube URL:"); println!("Video Player\n============\nEnter video path or YouTube URL:");
let mut input = String::new(); let mut input = String::new();

View File

@@ -4,6 +4,7 @@ use futures_util::StreamExt;
use std::path::Path; use std::path::Path;
use std::str; use std::str;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use tracing::{info, warn};
use momentry_core::core::api_key::{ApiKeyService, ApiKeyType}; use momentry_core::core::api_key::{ApiKeyService, ApiKeyType};
use momentry_core::core::chunk::types::{Chunk, ChunkRule, ChunkType}; use momentry_core::core::chunk::types::{Chunk, ChunkRule, ChunkType};
@@ -1813,6 +1814,64 @@ async fn main() -> Result<()> {
} }
}; };
// Read Pose JSON (optional)
let pose_path = format!("{}.pose.json", uuid);
let pose_result = match std::fs::read_to_string(&pose_path) {
Ok(pose_json) => match serde_json::from_str::<
momentry_core::core::processor::pose::PoseResult,
>(&pose_json)
{
Ok(result) => {
println!("Loaded Pose: {} frames", result.frames.len());
result
}
Err(e) => {
println!("Warning: Failed to parse Pose JSON: {}. Skipping Pose.", e);
momentry_core::core::processor::pose::PoseResult {
frame_count: 0,
fps: 0.0,
frames: vec![],
}
}
},
Err(_) => {
println!("Warning: Pose file not found. Skipping Pose.");
momentry_core::core::processor::pose::PoseResult {
frame_count: 0,
fps: 0.0,
frames: vec![],
}
}
};
// Read ASRX JSON (optional)
let asrx_path = format!("{}.asrx.json", uuid);
let asrx_result = match std::fs::read_to_string(&asrx_path) {
Ok(asrx_json) => match serde_json::from_str::<
momentry_core::core::processor::asrx::AsrxResult,
>(&asrx_json)
{
Ok(result) => {
println!("Loaded ASRX: {} segments", result.segments.len());
result
}
Err(e) => {
println!("Warning: Failed to parse ASRX JSON: {}. Skipping ASRX.", e);
momentry_core::core::processor::asrx::AsrxResult {
language: None,
segments: vec![],
}
}
},
Err(_) => {
println!("Warning: ASRX file not found. Skipping ASRX.");
momentry_core::core::processor::asrx::AsrxResult {
language: None,
segments: vec![],
}
}
};
// ========== Store pre_chunks (from ASR, CUT) ========== // ========== Store pre_chunks (from ASR, CUT) ==========
println!("\nStoring pre_chunks..."); println!("\nStoring pre_chunks...");
@@ -1930,12 +1989,21 @@ async fn main() -> Result<()> {
face_by_frame.insert(frame.frame, frame.clone()); face_by_frame.insert(frame.frame, frame.clone());
} }
// Store frames (merge data from YOLO, OCR, Face) let mut pose_by_frame: std::collections::HashMap<
u64,
momentry_core::core::processor::pose::PoseFrame,
> = std::collections::HashMap::new();
for frame in &pose_result.frames {
pose_by_frame.insert(frame.frame, frame.clone());
}
// Store frames (merge data from YOLO, OCR, Face, Pose)
let mut all_frames: Vec<u64> = frame_data let mut all_frames: Vec<u64> = frame_data
.keys() .keys()
.cloned() .cloned()
.chain(ocr_by_frame.keys().cloned()) .chain(ocr_by_frame.keys().cloned())
.chain(face_by_frame.keys().cloned()) .chain(face_by_frame.keys().cloned())
.chain(pose_by_frame.keys().cloned())
.collect(); .collect();
all_frames.sort(); all_frames.sort();
all_frames.dedup(); all_frames.dedup();
@@ -1945,6 +2013,7 @@ async fn main() -> Result<()> {
let yolo_frame = frame_data.get(frame_num); let yolo_frame = frame_data.get(frame_num);
let ocr_frame = ocr_by_frame.get(frame_num); let ocr_frame = ocr_by_frame.get(frame_num);
let face_frame = face_by_frame.get(frame_num); let face_frame = face_by_frame.get(frame_num);
let pose_frame = pose_by_frame.get(frame_num);
let frame = momentry_core::core::db::postgres_db::Frame { let frame = momentry_core::core::db::postgres_db::Frame {
id: 0, id: 0,
@@ -1955,6 +2024,7 @@ async fn main() -> Result<()> {
yolo_objects: yolo_frame.map(|f| serde_json::json!(&f.objects)), yolo_objects: yolo_frame.map(|f| serde_json::json!(&f.objects)),
ocr_results: ocr_frame.map(|f| serde_json::json!(&f.texts)), ocr_results: ocr_frame.map(|f| serde_json::json!(&f.texts)),
face_results: face_frame.map(|f| serde_json::json!(&f.faces)), face_results: face_frame.map(|f| serde_json::json!(&f.faces)),
pose_results: pose_frame.map(|f| serde_json::json!(&f.persons)),
frame_path: None, frame_path: None,
created_at: String::new(), created_at: String::new(),
}; };
@@ -1968,10 +2038,30 @@ async fn main() -> Result<()> {
println!("\nCreating chunks..."); println!("\nCreating chunks...");
// Rule 1: Direct conversion (sentence pre_chunk -> sentence chunk) // Rule 1: Direct conversion (sentence pre_chunk -> sentence chunk)
// Merge ASRX speaker_id by time overlap
let mut sentence_chunks = Vec::new(); let mut sentence_chunks = Vec::new();
for (i, seg) in asr_result.segments.iter().enumerate() { for (i, seg) in asr_result.segments.iter().enumerate() {
let pre_chunk_id = asr_pre_chunk_ids.get(i).copied().unwrap_or(0); let pre_chunk_id = asr_pre_chunk_ids.get(i).copied().unwrap_or(0);
let chunk = Chunk::from_seconds(
// Find matching ASRX segment by time overlap
let speaker_id = asrx_result
.segments
.iter()
.find(|ax| ax.start <= seg.end && ax.end >= seg.start)
.and_then(|ax| ax.speaker_id.clone());
let content = if let Some(ref sid) = speaker_id {
serde_json::json!({
"text": seg.text,
"speaker_id": sid,
})
} else {
serde_json::json!({
"text": seg.text,
})
};
let mut chunk = Chunk::from_seconds(
file_id as i32, file_id as i32,
uuid.clone(), uuid.clone(),
i as u32, i as u32,
@@ -1980,15 +2070,39 @@ async fn main() -> Result<()> {
seg.start, seg.start,
seg.end, seg.end,
fps, fps,
serde_json::json!({ content,
"text": seg.text,
}),
) )
.with_text_content(seg.text.clone()) .with_text_content(seg.text.clone())
.with_pre_chunk_ids(vec![pre_chunk_id as i32]); .with_pre_chunk_ids(vec![pre_chunk_id as i32]);
if speaker_id.is_some() {
chunk = chunk.with_metadata(serde_json::json!({
"language": asr_result.language,
"language_probability": asr_result.language_probability,
"speaker_matched": true,
}));
}
sentence_chunks.push(chunk); sentence_chunks.push(chunk);
} }
if !asrx_result.segments.is_empty() {
let matched = sentence_chunks
.iter()
.filter(|c| {
c.content
.get("speaker_id")
.and_then(|v| v.as_str())
.is_some()
})
.count();
println!(
" ASRX merge: {}/{} sentence chunks matched to speakers",
matched,
sentence_chunks.len()
);
}
// Rule 1: CUT chunks // Rule 1: CUT chunks
let mut cut_chunks = Vec::new(); let mut cut_chunks = Vec::new();
for (i, scene) in cut_result.scenes.iter().enumerate() { for (i, scene) in cut_result.scenes.iter().enumerate() {
@@ -2405,6 +2519,20 @@ async fn main() -> Result<()> {
Ok(()) Ok(())
} }
Commands::Server { host, port } => { Commands::Server { host, port } => {
// Start Auto-Ingest Watcher
info!("Starting Auto-Ingest Watcher...");
let _watcher = match momentry_core::watcher::run_watcher().await {
Ok(w) => {
info!("Auto-Ingest Watcher started successfully.");
Some(w)
}
Err(e) => {
warn!("Failed to start Auto-Ingest Watcher: {}", e);
None
}
};
// The watcher is kept alive by '_watcher' variable until the server stops.
let port = port.unwrap_or_else(|| *momentry_core::core::config::SERVER_PORT); let port = port.unwrap_or_else(|| *momentry_core::core::config::SERVER_PORT);
momentry_core::api::start_server(&host, port).await?; momentry_core::api::start_server(&host, port).await?;
Ok(()) Ok(())
@@ -2461,13 +2589,13 @@ async fn main() -> Result<()> {
Commands::Thumbnails { uuid, count } => { Commands::Thumbnails { uuid, count } => {
let db = PostgresDb::init().await?; let db = PostgresDb::init().await?;
let videos = if let Some(ref uuid) = uuid { let videos = if let Some(ref u) = uuid {
vec![db vec![db
.get_video_by_uuid(uuid) .get_video_by_uuid(u)
.await? .await?
.ok_or_else(|| anyhow::anyhow!("Video not found: {}", uuid))?] .ok_or_else(|| anyhow::anyhow!("Video not found: {}", u))?]
} else { } else {
db.list_videos().await? db.list_videos(10000, 0).await?.0
}; };
let output_dir = std::path::PathBuf::from("thumbnails"); let output_dir = std::path::PathBuf::from("thumbnails");
@@ -2484,12 +2612,10 @@ async fn main() -> Result<()> {
println!(" Generated {} thumbnails", result.count); println!(" Generated {} thumbnails", result.count);
} }
Err(e) => { Err(e) => {
println!(" Error: {}", e); eprintln!(" Failed to generate thumbnails: {}", e);
} }
} }
} }
println!("\nThumbnails generated successfully!");
Ok(()) Ok(())
} }
Commands::Status { uuid } => { Commands::Status { uuid } => {
@@ -2501,7 +2627,7 @@ async fn main() -> Result<()> {
.await? .await?
.ok_or_else(|| anyhow::anyhow!("Video not found: {}", u))?] .ok_or_else(|| anyhow::anyhow!("Video not found: {}", u))?]
} else { } else {
db.list_videos().await? db.list_videos(10000, 0).await?.0
}; };
println!("\n╔══════════════════════════════════════════════════════════════════════════════════╗"); println!("\n╔══════════════════════════════════════════════════════════════════════════════════╗");
@@ -2513,6 +2639,22 @@ async fn main() -> Result<()> {
"{:32}{:8}{:8}{:8}{:8}{:8}{:8}{:8}", "{:32}{:8}{:8}{:8}{:8}{:8}{:8}{:8}",
"Video", "FS", "FS", "PSQL", "PObj", "MObj", "PVec", "QVec" "Video", "FS", "FS", "PSQL", "PObj", "MObj", "PVec", "QVec"
); );
println!(
"{:33}{:9}{:9}{:9}{:9}{:9}{:9}{:9}",
str::repeat("", 32),
str::repeat("", 8),
str::repeat("", 8),
str::repeat("", 8),
str::repeat("", 8),
str::repeat("", 8),
str::repeat("", 8),
str::repeat("", 8)
);
println!("╠══════════════════════════════════════════════════════════════════════════════════╣");
println!(
"{:32}{:8}{:8}{:8}{:8}{:8}{:8}{:8}",
"Video", "FS", "FS", "PSQL", "PObj", "MObj", "PVec", "QVec"
);
println!( println!(
"{:32}{:8}{:8}{:8}{:8}{:8}{:8}{:8}", "{:32}{:8}{:8}{:8}{:8}{:8}{:8}{:8}",
"", "Video", "JSON", "Chunk", "Chunk", "Chunk", "Chunk", "Chunk" "", "Video", "JSON", "Chunk", "Chunk", "Chunk", "Chunk", "Chunk"

View File

@@ -1,3 +1,3 @@
pub mod watcher; pub mod watcher;
pub use watcher::{watch_directories, WatcherConfig}; pub use watcher::{run_watcher, WatcherConfig};

View File

@@ -1,8 +1,11 @@
use anyhow::Result; use anyhow::Result;
use notify::{Config, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
use std::path::Path; use std::path::Path;
use std::sync::Arc; use std::sync::Arc;
use tokio::sync::mpsc; use tokio::time;
use tracing::{error, info, warn};
use crate::core::db::{Database, PostgresDb};
use crate::core::ingestion::IngestionService;
pub struct WatcherConfig { pub struct WatcherConfig {
pub directories: Vec<String>, pub directories: Vec<String>,
@@ -11,31 +14,94 @@ pub struct WatcherConfig {
impl Default for WatcherConfig { impl Default for WatcherConfig {
fn default() -> Self { fn default() -> Self {
// Default to SFTP demo directory if not specified
let default_dir = std::env::var("MOMENTRY_SFTP_ROOT")
.unwrap_or_else(|_| "/Users/accusys/momentry/var/sftpgo/data/demo/".to_string());
Self { Self {
directories: vec![], directories: vec![default_dir],
poll_interval_ms: 5000, poll_interval_ms: 60000, // 60 seconds polling interval
} }
} }
} }
pub async fn watch_directories(config: WatcherConfig, tx: mpsc::Sender<String>) -> Result<()> { /// Starts the file watcher in the background.
// TODO: Implement directory watcher /// Scans directories for video files and registers them if not already present.
// pub async fn run_watcher() -> Result<()> {
// Options: let config = WatcherConfig::default();
// 1. Use notify crate for file system events let dirs = config.directories.clone();
// 2. Use polling as fallback
//
// When new video file is detected:
// - Send job to Redis queue
// - Trigger registration process
println!("Watching directories: {:?}", config.directories); if dirs.is_empty() {
warn!("No directories configured for watching.");
for dir in &config.directories { return Err(anyhow::anyhow!("No watch directories"));
if Path::new(dir).exists() {
println!("Directory exists: {}", dir);
}
} }
info!("Initializing Database for Watcher...");
// Use Database::init() which handles config and pool creation
let db = PostgresDb::init().await?;
let service = Arc::new(IngestionService::new(db));
info!("Starting Ingestion Poller for: {:?}", dirs);
// Spawn background task
tokio::spawn(async move {
let mut interval = time::interval(time::Duration::from_millis(config.poll_interval_ms));
// Run once immediately on startup to catch existing files
scan_and_ingest(&dirs, &service).await;
loop {
interval.tick().await;
scan_and_ingest(&dirs, &service).await;
}
});
Ok(()) Ok(())
} }
async fn scan_and_ingest(directories: &[String], service: &Arc<IngestionService>) {
// Allowed extensions list
let allowed_extensions = vec!["mp4", "mov", "mkv"];
info!("Scanning directories for new videos...");
for dir in directories {
let path = Path::new(dir);
if !path.exists() {
warn!("Directory does not exist, skipping: {}", dir);
continue;
}
if let Ok(entries) = std::fs::read_dir(path) {
for entry in entries.flatten() {
let file_path = entry.path();
if file_path.is_file() {
// Check extension
let is_video = if let Some(ext) = file_path.extension().and_then(|e| e.to_str())
{
allowed_extensions.contains(&ext.to_lowercase().as_str())
} else {
false
};
if is_video {
if let Some(p_str) = file_path.to_str() {
// Try to ingest. The service checks if it already exists.
match service.ingest(p_str).await {
Ok(Some(uuid)) => {
info!("Auto-registered: {} -> {}", file_path.display(), uuid);
}
Ok(None) => {
// Already registered
}
Err(e) => {
error!("Failed to ingest {}: {}", file_path.display(), e);
}
}
}
}
}
}
}
}
}

View File

@@ -5,6 +5,7 @@ use std::time::Duration;
use tokio::time::sleep; use tokio::time::sleep;
use tracing::{error, info, warn}; use tracing::{error, info, warn};
use crate::core::chunk::{rule1_ingest, rule3_ingest};
use crate::core::db::{ use crate::core::db::{
MonitorJobStatus, PostgresDb, ProcessorJobStatus, ProcessorType, RedisClient, VideoStatus, MonitorJobStatus, PostgresDb, ProcessorJobStatus, ProcessorType, RedisClient, VideoStatus,
}; };
@@ -210,12 +211,58 @@ impl JobWorker {
.map(|r| r.processor_type.as_str().to_string()) .map(|r| r.processor_type.as_str().to_string())
.collect(); .collect();
// Check prerequisites for Rule 1 Chunking BEFORE moving arrays
let has_asr = completed_processors.iter().any(|p| p == "asr");
let has_asrx = completed_processors.iter().any(|p| p == "asrx");
let has_cut = completed_processors.iter().any(|p| p == "cut");
// Update processor arrays in job record // Update processor arrays in job record
self.db self.db
.update_job_processors_arrays(job_id, completed_processors, failed_processors) .update_job_processors_arrays(job_id, completed_processors, failed_processors)
.await?; .await?;
if all_completed && !any_failed { if all_completed && !any_failed {
// 🚀 P1 Trigger: Rule 1 Chunking
if has_asr && has_asrx {
info!("📝 Prerequisites met for Rule 1 Chunking. Starting ingestion...");
let db_clone = self.db.clone();
let uuid_clone = uuid.to_string();
tokio::spawn(async move {
match db_clone.get_video_by_uuid(&uuid_clone).await {
Ok(Some(video)) => {
let fps = video.fps;
match rule1_ingest::ingest_rule1(db_clone.pool(), &uuid_clone, fps)
.await
{
Ok(count) => info!(
"✅ Rule 1 Ingestion completed: {} chunks inserted.",
count
),
Err(e) => error!("❌ Rule 1 Ingestion failed: {}", e),
}
}
Ok(None) => error!("Video not found for chunking: {}", uuid_clone),
Err(e) => error!("Failed to get video info for chunking: {}", e),
}
});
}
// 🚀 P1 Trigger: Rule 3 Scene Chunking
if has_cut && has_asr {
info!("📝 Prerequisites met for Rule 3 Scene Chunking. Starting ingestion...");
let db_clone = self.db.clone();
let uuid_clone = uuid.to_string();
tokio::spawn(async move {
match rule3_ingest::ingest_rule3(db_clone.pool(), &uuid_clone).await {
Ok(count) => info!(
"✅ Rule 3 Scene Ingestion completed: {} scenes processed.",
count
),
Err(e) => error!("❌ Rule 3 Scene Ingestion failed: {}", e),
}
});
}
self.db self.db
.update_job_status(job_id, MonitorJobStatus::Completed) .update_job_status(job_id, MonitorJobStatus::Completed)
.await?; .await?;

View File

@@ -16,6 +16,7 @@ use crate::core::processor::cut::CutResult;
use crate::core::processor::face::FaceResult; use crate::core::processor::face::FaceResult;
use crate::core::processor::ocr::OcrResult; use crate::core::processor::ocr::OcrResult;
use crate::core::processor::pose::PoseResult; use crate::core::processor::pose::PoseResult;
use crate::core::processor::visual_chunk::VisualChunkResult;
use crate::core::processor::yolo::YoloResult; use crate::core::processor::yolo::YoloResult;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@@ -302,6 +303,24 @@ impl ProcessorPool {
} }
Ok(serde_json::to_value(result)?) Ok(serde_json::to_value(result)?)
} }
ProcessorType::VisualChunk => {
let result = processor::process_visual_chunk_advanced(
video_path,
output_path.to_str().unwrap(),
uuid,
)
.await?;
// Store VisualChunk chunks in database
tracing::info!(
"VisualChunk completed, storing {} chunks for {}",
result.chunk_count,
job.uuid
);
if let Err(e) = Self::store_visual_chunk_chunks(db, &job.uuid, &result).await {
tracing::error!("Failed to store VisualChunk chunks for {}: {}", job.uuid, e);
}
Ok(serde_json::to_value(result)?)
}
} }
} }
@@ -605,6 +624,13 @@ impl ProcessorPool {
// Override chunk_id to include processor prefix for uniqueness // Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_yolo_{:04}", i); chunk.chunk_id = format!("trace_yolo_{:04}", i);
// Populate text_content for BM25 search
let object_names: Vec<String> =
frame.objects.iter().map(|o| o.class_name.clone()).collect();
if !object_names.is_empty() {
chunk = chunk.with_text_content(object_names.join(" "));
}
match db.store_chunk(&chunk).await { match db.store_chunk(&chunk).await {
Ok(_) => { Ok(_) => {
tracing::info!( tracing::info!(
@@ -660,6 +686,12 @@ impl ProcessorPool {
// Override chunk_id to include processor prefix for uniqueness // Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_ocr_{:04}", i); chunk.chunk_id = format!("trace_ocr_{:04}", i);
// Populate text_content for BM25 search
let texts: Vec<String> = frame.texts.iter().map(|t| t.text.clone()).collect();
if !texts.is_empty() {
chunk = chunk.with_text_content(texts.join(" "));
}
match db.store_chunk(&chunk).await { match db.store_chunk(&chunk).await {
Ok(_) => { Ok(_) => {
tracing::info!( tracing::info!(
@@ -715,6 +747,16 @@ impl ProcessorPool {
// Override chunk_id to include processor prefix for uniqueness // Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_face_{:04}", i); chunk.chunk_id = format!("trace_face_{:04}", i);
// Populate text_content for BM25 search (face IDs)
let face_ids: Vec<String> = frame
.faces
.iter()
.filter_map(|f| f.face_id.clone())
.collect();
if !face_ids.is_empty() {
chunk = chunk.with_text_content(face_ids.join(" "));
}
match db.store_chunk(&chunk).await { match db.store_chunk(&chunk).await {
Ok(_) => { Ok(_) => {
tracing::info!( tracing::info!(
@@ -770,6 +812,16 @@ impl ProcessorPool {
// Override chunk_id to include processor prefix for uniqueness // Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_pose_{:04}", i); chunk.chunk_id = format!("trace_pose_{:04}", i);
// Populate text_content for BM25 search (person count indicator)
let person_count = frame.persons.len();
if person_count > 0 {
let text = format!("person person person")
.repeat(person_count.min(10))
.trim()
.to_string();
chunk = chunk.with_text_content(text);
}
match db.store_chunk(&chunk).await { match db.store_chunk(&chunk).await {
Ok(_) => { Ok(_) => {
tracing::info!( tracing::info!(
@@ -825,6 +877,16 @@ impl ProcessorPool {
// Override chunk_id to include processor prefix for uniqueness // Override chunk_id to include processor prefix for uniqueness
chunk.chunk_id = format!("trace_asrx_{:04}", i); chunk.chunk_id = format!("trace_asrx_{:04}", i);
// Populate text_content for BM25 search (already has text)
chunk = chunk.with_text_content(segment.text.clone());
// Also store speaker_id in content
chunk.content = serde_json::json!({
"text": segment.text,
"speaker_id": segment.speaker_id,
"timestamp": segment.start,
});
match db.store_chunk(&chunk).await { match db.store_chunk(&chunk).await {
Ok(_) => { Ok(_) => {
tracing::info!("Stored ASRX chunk {} for video {}", i, uuid); tracing::info!("Stored ASRX chunk {} for video {}", i, uuid);
@@ -837,6 +899,24 @@ impl ProcessorPool {
Ok(()) Ok(())
} }
pub async fn store_visual_chunk_chunks(
db: &PostgresDb,
uuid: &str,
visual_chunk_result: &VisualChunkResult,
) -> Result<()> {
for (i, chunk) in visual_chunk_result.chunks.iter().enumerate() {
match db.store_chunk(chunk).await {
Ok(_) => {
tracing::info!("Stored VisualChunk chunk {} for video {}", i, uuid);
}
Err(e) => {
tracing::error!("Failed to store VisualChunk chunk {}: {}", i, e);
}
}
}
Ok(())
}
pub async fn get_running_count(&self) -> usize { pub async fn get_running_count(&self) -> usize {
*self.running_count.read().await *self.running_count.read().await
} }