feat: Initial v0.9 release with API Key authentication

## v0.9.20260325_144654 ### Features - API Key Authentication System - Job Worker System - V2 Backup Versioning ### Bug Fixes - get_processor_results_by_job column mapping Co-authored-by: OpenCode
2026-03-25 14:52:51 +08:00
parent 47e86b696f
commit 383201cacd
193 changed files with 40268 additions and 422 deletions
--- a/docs/CHUNK_SPEC.md
+++ b/docs/CHUNK_SPEC.md
@@ -1,5 +1,21 @@
 # Video Chunk 切分規範

+| 項目 | 內容 |
+|------|------|
+| 建立者 | Warren |
+| 建立時間 | 2026-03-16 |
+| 文件版本 | V1.0 |
+
+---
+
+## 版本歷史
+
+| 版本 | 日期 | 目的 | 操作人 | 工具/模型 |
+|------|------|------|--------|-----------|
+| V1.0 | 2026-03-16 | 創建文件 | Warren | OpenCode / MiniMax M2.5 |
+
+---
+
 本文檔定義 Momentry Core 系統中影片 chunks 的切分原則與資料結構。

 ---
@@ -579,7 +595,518 @@ TimeBased Chunks (4 個, 重疊 2秒):

 ---

-## 10. 相關文件
+## 10. 資料庫儲存
+
+### 10.1 PostgreSQL 儲存
+
+#### Table Schema
+
+```sql
+CREATE TABLE chunks (
+    id BIGSERIAL PRIMARY KEY,
+    uuid VARCHAR(16) NOT NULL,
+    chunk_id VARCHAR(64) NOT NULL,
+    chunk_index INTEGER NOT NULL,
+    chunk_type VARCHAR(32) NOT NULL,
+    start_time DOUBLE PRECISION NOT NULL,
+    start_frame BIGINT NOT NULL,
+    end_time DOUBLE PRECISION NOT NULL,
+    end_frame BIGINT NOT NULL,
+    fps VARCHAR(16) NOT NULL,
+    fps_value DOUBLE PRECISION NOT NULL,
+    content JSONB NOT NULL,
+    metadata JSONB,
+    vector_id VARCHAR(64),
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    UNIQUE(uuid, chunk_id)
+);
+
+-- 索引
+CREATE INDEX idx_chunks_uuid ON chunks(uuid);
+CREATE INDEX idx_chunks_type ON chunks(chunk_type);
+CREATE INDEX idx_chunks_time ON chunks(start_time, end_time);
+CREATE INDEX idx_chunks_uuid_type ON chunks(uuid, chunk_type);
+CREATE INDEX idx_chunks_vector_id ON chunks(vector_id);
+```
+
+#### 儲存範例
+
+```rust
+pub async fn store_chunk_to_postgres(db: &PostgresDb, chunk: &Chunk) -> Result<()> {
+    sqlx::query!(
+        r#"
+        INSERT INTO chunks (
+            uuid, chunk_id, chunk_index, chunk_type,
+            start_time, start_frame, end_time, end_frame,
+            fps, fps_value, content, metadata, vector_id
+        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+        ON CONFLICT (uuid, chunk_id) DO UPDATE SET
+            content = EXCLUDED.content,
+            metadata = EXCLUDED.metadata,
+            vector_id = EXCLUDED.vector_id,
+            updated_at = NOW()
+        "#,
+        chunk.uuid,
+        chunk.chunk_id,
+        chunk.chunk_index as i32,
+        chunk.chunk_type.as_str(),
+        chunk.start_time,
+        chunk.start_frame,
+        chunk.end_time,
+        chunk.end_frame,
+        chunk.fps,
+        chunk.fps_value,
+        serde_json::to_value(&chunk.content)?,
+        serde_json::to_value(&chunk.metadata)?,
+        chunk.vector_id,
+    )
+    .execute(&db.pool)
+    .await?;
+    Ok(())
+}
+```
+
+---
+
+### 10.2 MongoDB 儲存
+
+#### Collection Schema
+
+```javascript
+// chunks collection
+{
+  _id: ObjectId,
+  uuid: "1636719dc31f78ac",
+  chunk_id: "sentence_0001",
+  chunk_index: 1,
+  chunk_type: "sentence",
+  start_time: 10.5,
+  start_frame: 252,
+  end_time: 15.75,
+  end_frame: 378,
+  fps: "24/1",
+  fps_value: 24.0,
+  content: {
+    text: "Hello world, this is a test",
+    text_normalized: "hello world this is a test",
+    word_count: 7,
+    char_count: 34
+  },
+  metadata: {
+    source: "asr",
+    confidence: 0.95,
+    language: "en"
+  },
+  vector_id: "vec_sentence_0001",
+  created_at: ISODate("2026-03-16T10:00:00Z"),
+  updated_at: ISODate("2026-03-16T10:00:00Z")
+}
+
+// 索引
+db.chunks.createIndex({ uuid: 1 })
+db.chunks.createIndex({ chunk_type: 1 })
+db.chunks.createIndex({ start_time: 1, end_time: 1 })
+db.chunks.createIndex({ vector_id: 1 })
+db.chunks.createIndex({ uuid: 1, chunk_type: 1 })
+```
+
+#### 儲存範例
+
+```rust
+pub async fn store_chunk_to_mongodb(db: &MongoDb, chunk: &Chunk) -> Result<()> {
+    let doc = bson::doc! {
+        "uuid": chunk.uuid,
+        "chunk_id": chunk.chunk_id,
+        "chunk_index": chunk.chunk_index,
+        "chunk_type": chunk.chunk_type.as_str(),
+        "start_time": chunk.start_time,
+        "start_frame": chunk.start_frame,
+        "end_time": chunk.end_time,
+        "end_frame": chunk.end_frame,
+        "fps": chunk.fps,
+        "fps_value": chunk.fps_value,
+        "content": serde_json::to_value(&chunk.content)?,
+        "metadata": serde_json::to_value(&chunk.metadata)?,
+        "vector_id": chunk.vector_id,
+        "created_at": chrono::Utc::now(),
+        "updated_at": chrono::Utc::now()
+    };
+    
+    let collection = db.database("momentry").collection("chunks");
+    collection.update_one(
+        doc! { "uuid": &chunk.uuid, "chunk_id": &chunk.chunk_id },
+        doc! { "$set": doc },
+        UpdateOptions::builder().upsert(true).build(),
+    ).await?;
+    Ok(())
+}
+```
+
+---
+
+## 11. 向量儲存設計
+
+### 11.1 設計原則
+
+**統一向量 ID 格式**，確保 Qdrant 與 PostgreSQL 相容：
+
+```
+{chunk_type}_{chunk_index:04}
+
+範例:
+sentence_0001
+cut_0002
+time_based_0015
+```
+
+### 11.2 Qdrant Collection
+
+#### 建立 Collection
+
+```bash
+# 使用 Qdrant client 建立 collection
+curl -X PUT http://localhost:6333/collections/chunks \
+  -H "Content-Type: application/json" \
+  -H "api-key: Test3200Test3200Test3200" \
+  -d '{
+    "vectors": {
+      "size": 768,
+      "distance": "Cosine"
+    }
+  }'
+```
+
+#### Point 結構
+
+```json
+{
+  "id": "sentence_0001",
+  "vector": [0.123, -0.456, ...],
+  "payload": {
+    "uuid": "1636719dc31f78ac",
+    "chunk_id": "sentence_0001",
+    "chunk_type": "sentence",
+    "chunk_index": 1,
+    "start_time": 10.5,
+    "end_time": 15.75,
+    "text": "Hello world, this is a test",
+    "metadata": {
+      "confidence": 0.95,
+      "language": "en"
+    }
+  }
+}
+```
+
+#### Rust 結構
+
+```rust
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VectorPoint {
+    pub id: String,
+    pub vector: Vec<f32>,
+    pub payload: VectorPayload,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VectorPayload {
+    pub uuid: String,
+    pub chunk_id: String,
+    pub chunk_type: String,
+    pub chunk_index: u32,
+    pub start_time: f64,
+    pub end_time: f64,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub scene_id: Option<i32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub segment_number: Option<i32>,
+    pub metadata: Option<serde_json::Value>,
+}
+```
+
+### 11.3 PostgreSQL Vector 儲存
+
+#### Table Schema
+
+```sql
+-- 使用 pgvector 擴展
+CREATE EXTENSION IF NOT EXISTS vector;
+
+CREATE TABLE chunk_vectors (
+    id BIGSERIAL PRIMARY KEY,
+    vector_id VARCHAR(64) NOT NULL UNIQUE,
+    uuid VARCHAR(16) NOT NULL,
+    chunk_id VARCHAR(64) NOT NULL,
+    chunk_type VARCHAR(32) NOT NULL,
+    chunk_index INTEGER NOT NULL,
+    start_time DOUBLE PRECISION NOT NULL,
+    end_time DOUBLE PRECISION NOT NULL,
+    embedding vector(768) NOT NULL,
+    metadata JSONB,
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    
+    FOREIGN KEY (uuid, chunk_id) REFERENCES chunks(uuid, chunk_id)
+);
+
+-- 向量檢索索引 (IVFFlat)
+CREATE INDEX idx_chunk_vectors_embedding 
+ON chunk_vectors 
+USING ivfflat (embedding vector_cosine_ops)
+WITH (lists = 100);
+
+-- 查詢索引
+CREATE INDEX idx_chunk_vectors_uuid ON chunk_vectors(uuid);
+CREATE INDEX idx_chunk_vectors_type ON chunk_vectors(chunk_type);
+```
+
+#### 儲存範例
+
+```rust
+pub async fn store_vector_to_postgres(db: &PostgresDb, point: &VectorPoint) -> Result<()> {
+    sqlx::query!(
+        r#"
+        INSERT INTO chunk_vectors (
+            vector_id, uuid, chunk_id, chunk_type, chunk_index,
+            start_time, end_time, embedding, metadata
+        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+        ON CONFLICT (vector_id) DO UPDATE SET
+            embedding = EXCLUDED.embedding,
+            metadata = EXCLUDED.metadata
+        "#,
+        point.id,
+        point.payload.uuid,
+        point.payload.chunk_id,
+        point.payload.chunk_type,
+        point.payload.chunk_index as i32,
+        point.payload.start_time,
+        point.payload.end_time,
+        point.vector,
+        serde_json::to_value(&point.payload.metadata)?,
+    )
+    .execute(&db.pool)
+    .await?;
+    Ok(())
+}
+```
+
+---
+
+## 12. 查詢範例
+
+### 12.1 語義搜尋 (Semantic Search)
+
+#### 查詢類型 1: 相似文字搜尋
+
+```rust
+// 搜尋與問句相似的 chunks
+pub async fn semantic_search(
+    qdrant: &QdrantDb,
+    query: &str,
+    limit: usize,
+) -> Result<Vec<SearchResult>> {
+    // 1. 將問句向量化
+    let query_vector = embed_text(query).await?;
+    
+    // 2. 搜尋 Qdrant
+    let results = qdrant.search(
+        "chunks",
+        &query_vector,
+        limit,
+        Some(&Filter::must([
+            Condition::Match("chunk_type", "sentence"),
+        ])),
+    ).await?;
+    
+    Ok(results)
+}
+
+// 使用範例
+let results = semantic_search(&qdrant, "找出有人在說話的片段", 10).await?;
+for r in results {
+    println!("{}: {:.3}", r.payload.chunk_id, r.score);
+    println!("  Time: {}s - {}s", r.payload.start_time, r.payload.end_time);
+    println!("  Text: {:?}", r.payload.text);
+}
+```
+
+#### 查詢類型 2: 語音/文字混合搜尋
+
+```sql
+-- PostgreSQL: 搜尋特定文字的 chunks
+SELECT 
+    c.chunk_id,
+    c.chunk_type,
+    c.start_time,
+    c.end_time,
+    c.content->>'text' as text,
+    v.embedding <=> query_embedding('找出開車的場景') as similarity
+FROM chunks c
+LEFT JOIN chunk_vectors v ON c.chunk_id = v.chunk_id
+WHERE c.chunk_type = 'sentence'
+AND c.content->>'text' ILIKE '%car%'
+ORDER BY v.embedding <=> query_embedding('找出開車的場景')
+LIMIT 10;
+```
+
+### 12.2 時間範圍搜尋
+
+#### 查詢類型 3: 特定時間範圍
+
+```rust
+// 找出 30-60 秒之間的所有 chunks
+pub async fn search_by_time_range(
+    db: &PostgresDb,
+    uuid: &str,
+    start: f64,
+    end: f64,
+) -> Result<Vec<Chunk>> {
+    let chunks = sqlx::query_as!(
+        Chunk,
+        r#"
+        SELECT * FROM chunks
+        WHERE uuid = $1
+        AND start_time < $3
+        AND end_time > $2
+        ORDER BY chunk_type, chunk_index
+        "#,
+        uuid, start, end
+    )
+    .fetch_all(&db.pool)
+    .await?;
+    Ok(chunks)
+}
+
+// 使用範例
+let chunks = search_by_time_range(&db, "1636719dc31f78ac", 30.0, 60.0).await?;
+```
+
+```javascript
+// MongoDB: 時間範圍查詢
+db.chunks.find({
+  uuid: "1636719dc31f78ac",
+  start_time: { $lt: 60 },
+  end_time: { $gt: 30 }
+}).sort({ chunk_type: 1, chunk_index: 1 })
+```
+
+### 12.3 混合搜尋 (Hybrid Search)
+
+#### 查詢類型 4: 文字關鍵詞 + 向量相似度
+
+```rust
+// 結合關鍵詞匹配與向量相似度
+pub async fn hybrid_search(
+    db: &PostgresDb,
+    qdrant: &QdrantDb,
+    query: &str,
+    keywords: &[&str],
+    limit: usize,
+) -> Result<Vec<HybridResult>> {
+    // 1. 向量搜尋
+    let query_vector = embed_text(query).await?;
+    let vector_results = qdrant.search("chunks", &query_vector, limit * 2, None).await?;
+    
+    // 2. 關鍵詞過濾
+    let keyword_filter: Vec<_> = keywords.iter()
+        .map(|k| format!("%{}%", k))
+        .collect();
+    
+    let filtered: Vec<_> = vector_results.into_iter()
+        .filter(|r| {
+            if let Some(text) = &r.payload.text {
+                keyword_filter.iter().any(|k| text.contains(k.as_str()))
+            } else {
+                false
+            }
+        })
+        .take(limit)
+        .collect();
+    
+    Ok(filtered)
+}
+```
+
+### 12.4 場景搜尋
+
+#### 查詢類型 5: 找出特定場景
+
+```sql
+-- PostgreSQL: 找出特定場景 ID 的 chunks
+SELECT * FROM chunks 
+WHERE uuid = '1636719dc31f78ac' 
+AND chunk_type = 'cut'
+AND (content->>'scene_id')::int = 5;
+
+-- 找出包含轉場效果的 chunks
+SELECT * FROM chunks 
+WHERE uuid = '1636719dc31f78ac' 
+AND chunk_type = 'cut'
+AND content->>'transition_type' = 'dissolve';
+```
+
+### 12.5 影片摘要
+
+#### 查詢類型 6: 產生影片摘要
+
+```sql
+-- 合併影片所有語句
+SELECT 
+    string_agg(content->>'text', ' ' ORDER BY start_time) as full_transcript
+FROM chunks 
+WHERE uuid = '1636719dc31f78ac' 
+AND chunk_type = 'sentence'
+AND content->>'text' IS NOT NULL;
+
+-- 按場景聚合文字
+SELECT 
+    content->>'scene_id' as scene,
+    string_agg(content->>'text', ' ' ORDER BY start_time) as scene_text
+FROM chunks 
+WHERE uuid = '1636719dc31f78ac' 
+AND chunk_type = 'cut'
+GROUP BY content->>'scene_id'
+ORDER BY MIN(start_time);
+```
+
+### 12.6 常見查詢模式
+
+| 查詢類型 | 描述 | 資料庫 | SQL/程式碼 |
+|----------|------|--------|-------------|
+| 語義搜尋 | 找相似內容 | Qdrant | `search(vector, limit)` |
+| 關鍵詞搜尋 | 精確文字匹配 | PostgreSQL | `ILIKE '%keyword%'` |
+| 時間範圍 | 特定時段 | Both | `start_time < end AND end_time > start` |
+| 場景搜尋 | 特定鏡頭 | PostgreSQL | `scene_id = N` |
+| 混合搜尋 | 向量+關鍵詞 | Both |結合以上兩種 |
+| 摘要產生 | 合併文字 | PostgreSQL | `string_agg()` |
+
+---
+
+## 13. 資料庫選擇建議
+
+### 13.1 儲存策略
+
+| 資料類型 | 主要儲存 | 備份/查詢 | 說明 |
+|----------|----------|-----------|------|
+| **Chunk 元數據** | PostgreSQL | MongoDB | 結構化查詢為主 |
+| **向量資料** | Qdrant | PostgreSQL | 向量搜尋為主 |
+| **全文檢索** | PostgreSQL | - | 關鍵詞搜尋 |
+| **日誌/歷史** | MongoDB | - | 靈活性為主 |
+
+### 13.2 讀寫模式
+
+| 場景 | 寫入 | 讀取 |
+|------|------|------|
+| **影片處理** | PostgreSQL + Qdrant | - |
+| **語義搜尋** | - | Qdrant |
+| **時間軸瀏覽** | - | PostgreSQL |
+| **系統分析** | MongoDB | MongoDB |
+
+---
+
+## 14. 相關文件

 - [JSON_OUTPUT_SPEC.md](./JSON_OUTPUT_SPEC.md) - JSON 輸出規範
 - [RUST_DEVELOPMENT.md](./RUST_DEVELOPMENT.md) - Rust 開發規範