diff --git a/docs_v1.0/API_WORKSPACE/modules/06_search.md b/docs_v1.0/API_WORKSPACE/modules/06_search.md new file mode 100644 index 0000000..ad6ae82 --- /dev/null +++ b/docs_v1.0/API_WORKSPACE/modules/06_search.md @@ -0,0 +1,135 @@ + + + + +## Search APIs + +### `POST /api/v1/search/smart` + +**Auth**: Required +**Scope**: file-level + +Semantic vector search using EmbeddingGemma-300m. Generates a query embedding via EmbeddingGemma (port 11436), then searches pgvector `story_parent` and `llm_parent` chunks by cosine similarity. + +#### Request Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `file_uuid` | string | Yes | — | File UUID to search within | +| `query` | string | Yes | — | Search text | +| `page` | integer | No | 1 | Page number | +| `page_size` | integer | No | 5 | Items per page | + +#### Example + +```bash +curl -s -X POST "$API/api/v1/search/smart" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $JWT" \ + -d '{"file_uuid": "'"$FILE_UUID"'", "query": "Audrey Hepburn"}' +``` + +#### Response (200) + +```json +{ + "query": "Audrey Hepburn", + "results": [ + { + "parent_id": 12345, + "start_time": 299.0, + "end_time": 300.0, + "summary": "[299s-300s, 1s] Cast: Audrey Hepburn. Total: 1 lines, 5 words...", + "similarity": 0.72 + } + ], + "strategy": "semantic_vector_search" +} +``` + +--- + +### `POST /api/v1/search/universal` + +**Auth**: Required +**Scope**: file-level + +Multi-type BM25 full-text search across chunks, frames, and persons. Uses PostgreSQL `tsvector`. + +#### Request Parameters + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `query` | string | Yes | — | Search text | +| `file_uuid` | string | No | — | Restrict to specific file | +| `types` | string[] | No | `["chunk","frame","person"]` | Search types | +| `page` | integer | No | 1 | Page number | +| `page_size` | integer | No | 20 | Items per page | + +#### Example + +```bash +curl -s -X POST "$API/api/v1/search/universal" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $JWT" \ + -d '{"file_uuid": "'"$FILE_UUID"'", "query": "Cary Grant"}' +``` + +#### Response (200) + +```json +{ + "results": [ + { + "type": "chunk", + "chunk_id": "uuid_1429", + "chunk_type": "story_child", + "start_time": 429.16, + "end_time": 430.5, + "text": "You could have the stamps.", + "score": 0.9 + } + ], + "total": 20, + "took_ms": 18 +} +``` + +--- + +### `POST /api/v1/search/frames` + +**Auth**: Required +**Scope**: file-level + +Search face detection frames by identity name or trace ID. + +--- + +### `POST /api/v1/search/identity_text` + +**Auth**: Required +**Scope**: file-level + +Search text chunks spoken by a specific identity. + +--- + +### Visual Search + +| Method | Endpoint | Description | +|--------|----------|-------------| +| POST | `/api/v1/search/visual` | Search visual chunks | +| POST | `/api/v1/search/visual/class` | Search by object class | +| POST | `/api/v1/search/visual/density` | Search by object density | +| POST | `/api/v1/search/visual/combination` | Search by object combination | +| POST | `/api/v1/search/visual/stats` | Visual chunk statistics | + +#### Embedding Model + +| Detail | Value | +|--------|-------| +| **Model** | EmbeddingGemma-300m | +| **Endpoint** | `POST /api/v1/embeddings` on port 11436 | +| **Dimension** | 768 | +| **Storage** | pgvector (`chunk.embedding` column) | diff --git a/src/api/search.rs b/src/api/search.rs index dedf12f..80344f0 100644 --- a/src/api/search.rs +++ b/src/api/search.rs @@ -11,7 +11,7 @@ use crate::core::embedding::Embedder; #[derive(Debug, Deserialize)] pub struct SmartSearchRequest { - pub uuid: String, + pub file_uuid: String, pub query: String, pub page: Option, pub page_size: Option, @@ -82,7 +82,7 @@ pub async fn smart_search( // 2. Search Database (Drill-Down: Find Parents First) let db_parents: Vec = db - .search_parent_chunks_semantic(&req.uuid, &embedding, limit) + .search_parent_chunks_semantic(&req.file_uuid, &embedding, limit) .await .map_err( |e: anyhow::Error| -> (StatusCode, Json) { diff --git a/src/api/server.rs b/src/api/server.rs index a424e23..e414487 100644 --- a/src/api/server.rs +++ b/src/api/server.rs @@ -3916,7 +3916,7 @@ struct ParentChunkResponse { /// Search visual chunks based on criteria #[derive(Debug, Deserialize)] struct VisualChunkSearchRequest { - uuid: String, + file_uuid: String, criteria: visual_chunk_search::VisualChunkSearchCriteria, } @@ -3930,8 +3930,8 @@ async fn search_visual_chunks( State(state): State, Json(req): Json, ) -> Result, StatusCode> { - let criteria_hash = generate_visual_search_hash(&req.uuid, &req.criteria); - let cache_key = keys::visual_search(&req.uuid, &criteria_hash); + let criteria_hash = generate_visual_search_hash(&req.file_uuid, &req.criteria); + let cache_key = keys::visual_search(&req.file_uuid, &criteria_hash); let ttl = state.mongo_cache.ttl_visual_search(); let chunks = state @@ -3941,7 +3941,7 @@ async fn search_visual_chunks( .await .map_err(|e| anyhow::anyhow!("PG init failed: {}", e))?; - visual_chunk_search::search_visual_chunks(&db, &req.uuid, &req.criteria) + visual_chunk_search::search_visual_chunks(&db, &req.file_uuid, &req.criteria) .await .map_err(|e| anyhow::anyhow!("Visual search failed: {}", e)) }) diff --git a/src/api/universal_search.rs b/src/api/universal_search.rs index ef199ee..5cacf48 100644 --- a/src/api/universal_search.rs +++ b/src/api/universal_search.rs @@ -15,7 +15,7 @@ use crate::core::db::{schema, Database, PostgresDb}; #[derive(Debug, Deserialize)] pub struct UniversalSearchRequest { pub query: String, - pub uuid: Option, + pub file_uuid: Option, #[serde(default)] pub types: Vec, // chunk, frame, person pub time_range: Option<[f64; 2]>, @@ -274,7 +274,7 @@ pub async fn search_persons( #[derive(Debug, Deserialize)] pub struct FrameSearchRequest { - pub uuid: Option, + pub file_uuid: Option, pub object_class: Option, pub ocr_text: Option, pub face_id: Option, @@ -292,7 +292,7 @@ pub struct FrameSearchResponse { pub struct FrameResult { pub frame_number: i64, pub timestamp: f64, - pub uuid: String, + pub file_uuid: String, pub objects: Option>, pub ocr_texts: Option>, pub faces: Option>, @@ -333,9 +333,9 @@ async fn search_chunks( req: &UniversalSearchRequest, ) -> Result, anyhow::Error> { // uuid is required for chunk search - chunk_id is only unique within a video - let uuid = match &req.uuid { + let uuid = match &req.file_uuid { Some(u) => u.replace('\'', "''"), - None => return Err(anyhow::anyhow!("uuid is required for chunk search")), + None => return Err(anyhow::anyhow!("file_uuid is required for chunk search")), }; let chunk_table = schema::table_name("chunk"); @@ -501,7 +501,7 @@ async fn search_frames_internal( table, video_table ); - if let Some(uuid) = &req.uuid { + if let Some(uuid) = &req.file_uuid { sql.push_str(&format!(" AND v.file_uuid = '{}'", uuid)); } if let Some(tr) = &req.time_range { @@ -665,8 +665,8 @@ async fn search_frames_internal_v2( table, video_table ); - if let Some(uuid) = &req.uuid { - sql.push_str(&format!(" AND v.file_uuid = '{}'", uuid)); + if let Some(uuid) = &req.file_uuid { + sql.push_str(&format!(" AND fd.file_uuid = '{}'", uuid)); } if let Some(tr) = &req.time_range { sql.push_str(&format!( @@ -721,7 +721,7 @@ async fn search_frames_internal_v2( FrameResult { frame_number, timestamp, - uuid, + file_uuid: uuid, objects: objects.map(|arr| arr.iter().map(|v| v.clone()).collect()), ocr_texts, faces,