refactor: rename search uuid -> file_uuid
This commit is contained in:
135
docs_v1.0/API_WORKSPACE/modules/06_search.md
Normal file
135
docs_v1.0/API_WORKSPACE/modules/06_search.md
Normal file
@@ -0,0 +1,135 @@
|
||||
<!-- module: search -->
|
||||
<!-- description: Vector search, BM25, smart search, universal search, visual search -->
|
||||
<!-- depends: 01_auth -->
|
||||
|
||||
## Search APIs
|
||||
|
||||
### `POST /api/v1/search/smart`
|
||||
|
||||
**Auth**: Required
|
||||
**Scope**: file-level
|
||||
|
||||
Semantic vector search using EmbeddingGemma-300m. Generates a query embedding via EmbeddingGemma (port 11436), then searches pgvector `story_parent` and `llm_parent` chunks by cosine similarity.
|
||||
|
||||
#### Request Parameters
|
||||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `file_uuid` | string | Yes | — | File UUID to search within |
|
||||
| `query` | string | Yes | — | Search text |
|
||||
| `page` | integer | No | 1 | Page number |
|
||||
| `page_size` | integer | No | 5 | Items per page |
|
||||
|
||||
#### Example
|
||||
|
||||
```bash
|
||||
curl -s -X POST "$API/api/v1/search/smart" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer $JWT" \
|
||||
-d '{"file_uuid": "'"$FILE_UUID"'", "query": "Audrey Hepburn"}'
|
||||
```
|
||||
|
||||
#### Response (200)
|
||||
|
||||
```json
|
||||
{
|
||||
"query": "Audrey Hepburn",
|
||||
"results": [
|
||||
{
|
||||
"parent_id": 12345,
|
||||
"start_time": 299.0,
|
||||
"end_time": 300.0,
|
||||
"summary": "[299s-300s, 1s] Cast: Audrey Hepburn. Total: 1 lines, 5 words...",
|
||||
"similarity": 0.72
|
||||
}
|
||||
],
|
||||
"strategy": "semantic_vector_search"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `POST /api/v1/search/universal`
|
||||
|
||||
**Auth**: Required
|
||||
**Scope**: file-level
|
||||
|
||||
Multi-type BM25 full-text search across chunks, frames, and persons. Uses PostgreSQL `tsvector`.
|
||||
|
||||
#### Request Parameters
|
||||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `query` | string | Yes | — | Search text |
|
||||
| `file_uuid` | string | No | — | Restrict to specific file |
|
||||
| `types` | string[] | No | `["chunk","frame","person"]` | Search types |
|
||||
| `page` | integer | No | 1 | Page number |
|
||||
| `page_size` | integer | No | 20 | Items per page |
|
||||
|
||||
#### Example
|
||||
|
||||
```bash
|
||||
curl -s -X POST "$API/api/v1/search/universal" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer $JWT" \
|
||||
-d '{"file_uuid": "'"$FILE_UUID"'", "query": "Cary Grant"}'
|
||||
```
|
||||
|
||||
#### Response (200)
|
||||
|
||||
```json
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"type": "chunk",
|
||||
"chunk_id": "uuid_1429",
|
||||
"chunk_type": "story_child",
|
||||
"start_time": 429.16,
|
||||
"end_time": 430.5,
|
||||
"text": "You could have the stamps.",
|
||||
"score": 0.9
|
||||
}
|
||||
],
|
||||
"total": 20,
|
||||
"took_ms": 18
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `POST /api/v1/search/frames`
|
||||
|
||||
**Auth**: Required
|
||||
**Scope**: file-level
|
||||
|
||||
Search face detection frames by identity name or trace ID.
|
||||
|
||||
---
|
||||
|
||||
### `POST /api/v1/search/identity_text`
|
||||
|
||||
**Auth**: Required
|
||||
**Scope**: file-level
|
||||
|
||||
Search text chunks spoken by a specific identity.
|
||||
|
||||
---
|
||||
|
||||
### Visual Search
|
||||
|
||||
| Method | Endpoint | Description |
|
||||
|--------|----------|-------------|
|
||||
| POST | `/api/v1/search/visual` | Search visual chunks |
|
||||
| POST | `/api/v1/search/visual/class` | Search by object class |
|
||||
| POST | `/api/v1/search/visual/density` | Search by object density |
|
||||
| POST | `/api/v1/search/visual/combination` | Search by object combination |
|
||||
| POST | `/api/v1/search/visual/stats` | Visual chunk statistics |
|
||||
|
||||
#### Embedding Model
|
||||
|
||||
| Detail | Value |
|
||||
|--------|-------|
|
||||
| **Model** | EmbeddingGemma-300m |
|
||||
| **Endpoint** | `POST /api/v1/embeddings` on port 11436 |
|
||||
| **Dimension** | 768 |
|
||||
| **Storage** | pgvector (`chunk.embedding` column) |
|
||||
@@ -11,7 +11,7 @@ use crate::core::embedding::Embedder;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct SmartSearchRequest {
|
||||
pub uuid: String,
|
||||
pub file_uuid: String,
|
||||
pub query: String,
|
||||
pub page: Option<usize>,
|
||||
pub page_size: Option<usize>,
|
||||
@@ -82,7 +82,7 @@ pub async fn smart_search(
|
||||
|
||||
// 2. Search Database (Drill-Down: Find Parents First)
|
||||
let db_parents: Vec<crate::core::db::postgres_db::SemanticSearchResult> = db
|
||||
.search_parent_chunks_semantic(&req.uuid, &embedding, limit)
|
||||
.search_parent_chunks_semantic(&req.file_uuid, &embedding, limit)
|
||||
.await
|
||||
.map_err(
|
||||
|e: anyhow::Error| -> (StatusCode, Json<serde_json::Value>) {
|
||||
|
||||
@@ -3916,7 +3916,7 @@ struct ParentChunkResponse {
|
||||
/// Search visual chunks based on criteria
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct VisualChunkSearchRequest {
|
||||
uuid: String,
|
||||
file_uuid: String,
|
||||
criteria: visual_chunk_search::VisualChunkSearchCriteria,
|
||||
}
|
||||
|
||||
@@ -3930,8 +3930,8 @@ async fn search_visual_chunks(
|
||||
State(state): State<AppState>,
|
||||
Json(req): Json<VisualChunkSearchRequest>,
|
||||
) -> Result<Json<VisualChunkSearchResponse>, StatusCode> {
|
||||
let criteria_hash = generate_visual_search_hash(&req.uuid, &req.criteria);
|
||||
let cache_key = keys::visual_search(&req.uuid, &criteria_hash);
|
||||
let criteria_hash = generate_visual_search_hash(&req.file_uuid, &req.criteria);
|
||||
let cache_key = keys::visual_search(&req.file_uuid, &criteria_hash);
|
||||
let ttl = state.mongo_cache.ttl_visual_search();
|
||||
|
||||
let chunks = state
|
||||
@@ -3941,7 +3941,7 @@ async fn search_visual_chunks(
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("PG init failed: {}", e))?;
|
||||
|
||||
visual_chunk_search::search_visual_chunks(&db, &req.uuid, &req.criteria)
|
||||
visual_chunk_search::search_visual_chunks(&db, &req.file_uuid, &req.criteria)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Visual search failed: {}", e))
|
||||
})
|
||||
|
||||
@@ -15,7 +15,7 @@ use crate::core::db::{schema, Database, PostgresDb};
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct UniversalSearchRequest {
|
||||
pub query: String,
|
||||
pub uuid: Option<String>,
|
||||
pub file_uuid: Option<String>,
|
||||
#[serde(default)]
|
||||
pub types: Vec<String>, // chunk, frame, person
|
||||
pub time_range: Option<[f64; 2]>,
|
||||
@@ -274,7 +274,7 @@ pub async fn search_persons(
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct FrameSearchRequest {
|
||||
pub uuid: Option<String>,
|
||||
pub file_uuid: Option<String>,
|
||||
pub object_class: Option<String>,
|
||||
pub ocr_text: Option<String>,
|
||||
pub face_id: Option<String>,
|
||||
@@ -292,7 +292,7 @@ pub struct FrameSearchResponse {
|
||||
pub struct FrameResult {
|
||||
pub frame_number: i64,
|
||||
pub timestamp: f64,
|
||||
pub uuid: String,
|
||||
pub file_uuid: String,
|
||||
pub objects: Option<Vec<serde_json::Value>>,
|
||||
pub ocr_texts: Option<Vec<String>>,
|
||||
pub faces: Option<Vec<serde_json::Value>>,
|
||||
@@ -333,9 +333,9 @@ async fn search_chunks(
|
||||
req: &UniversalSearchRequest,
|
||||
) -> Result<Vec<SearchResult>, anyhow::Error> {
|
||||
// uuid is required for chunk search - chunk_id is only unique within a video
|
||||
let uuid = match &req.uuid {
|
||||
let uuid = match &req.file_uuid {
|
||||
Some(u) => u.replace('\'', "''"),
|
||||
None => return Err(anyhow::anyhow!("uuid is required for chunk search")),
|
||||
None => return Err(anyhow::anyhow!("file_uuid is required for chunk search")),
|
||||
};
|
||||
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
@@ -501,7 +501,7 @@ async fn search_frames_internal(
|
||||
table, video_table
|
||||
);
|
||||
|
||||
if let Some(uuid) = &req.uuid {
|
||||
if let Some(uuid) = &req.file_uuid {
|
||||
sql.push_str(&format!(" AND v.file_uuid = '{}'", uuid));
|
||||
}
|
||||
if let Some(tr) = &req.time_range {
|
||||
@@ -665,8 +665,8 @@ async fn search_frames_internal_v2(
|
||||
table, video_table
|
||||
);
|
||||
|
||||
if let Some(uuid) = &req.uuid {
|
||||
sql.push_str(&format!(" AND v.file_uuid = '{}'", uuid));
|
||||
if let Some(uuid) = &req.file_uuid {
|
||||
sql.push_str(&format!(" AND fd.file_uuid = '{}'", uuid));
|
||||
}
|
||||
if let Some(tr) = &req.time_range {
|
||||
sql.push_str(&format!(
|
||||
@@ -721,7 +721,7 @@ async fn search_frames_internal_v2(
|
||||
FrameResult {
|
||||
frame_number,
|
||||
timestamp,
|
||||
uuid,
|
||||
file_uuid: uuid,
|
||||
objects: objects.map(|arr| arr.iter().map(|v| v.clone()).collect()),
|
||||
ocr_texts,
|
||||
faces,
|
||||
|
||||
Reference in New Issue
Block a user