refactor: rename search uuid -> file_uuid

This commit is contained in:
Accusys
2026-05-18 01:17:48 +08:00
parent 245ef39f03
commit 4125163f7b
4 changed files with 150 additions and 15 deletions

View File

@@ -0,0 +1,135 @@
<!-- module: search -->
<!-- description: Vector search, BM25, smart search, universal search, visual search -->
<!-- depends: 01_auth -->
## Search APIs
### `POST /api/v1/search/smart`
**Auth**: Required
**Scope**: file-level
Semantic vector search using EmbeddingGemma-300m. Generates a query embedding via EmbeddingGemma (port 11436), then searches pgvector `story_parent` and `llm_parent` chunks by cosine similarity.
#### Request Parameters
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `file_uuid` | string | Yes | — | File UUID to search within |
| `query` | string | Yes | — | Search text |
| `page` | integer | No | 1 | Page number |
| `page_size` | integer | No | 5 | Items per page |
#### Example
```bash
curl -s -X POST "$API/api/v1/search/smart" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $JWT" \
-d '{"file_uuid": "'"$FILE_UUID"'", "query": "Audrey Hepburn"}'
```
#### Response (200)
```json
{
"query": "Audrey Hepburn",
"results": [
{
"parent_id": 12345,
"start_time": 299.0,
"end_time": 300.0,
"summary": "[299s-300s, 1s] Cast: Audrey Hepburn. Total: 1 lines, 5 words...",
"similarity": 0.72
}
],
"strategy": "semantic_vector_search"
}
```
---
### `POST /api/v1/search/universal`
**Auth**: Required
**Scope**: file-level
Multi-type BM25 full-text search across chunks, frames, and persons. Uses PostgreSQL `tsvector`.
#### Request Parameters
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `query` | string | Yes | — | Search text |
| `file_uuid` | string | No | — | Restrict to specific file |
| `types` | string[] | No | `["chunk","frame","person"]` | Search types |
| `page` | integer | No | 1 | Page number |
| `page_size` | integer | No | 20 | Items per page |
#### Example
```bash
curl -s -X POST "$API/api/v1/search/universal" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $JWT" \
-d '{"file_uuid": "'"$FILE_UUID"'", "query": "Cary Grant"}'
```
#### Response (200)
```json
{
"results": [
{
"type": "chunk",
"chunk_id": "uuid_1429",
"chunk_type": "story_child",
"start_time": 429.16,
"end_time": 430.5,
"text": "You could have the stamps.",
"score": 0.9
}
],
"total": 20,
"took_ms": 18
}
```
---
### `POST /api/v1/search/frames`
**Auth**: Required
**Scope**: file-level
Search face detection frames by identity name or trace ID.
---
### `POST /api/v1/search/identity_text`
**Auth**: Required
**Scope**: file-level
Search text chunks spoken by a specific identity.
---
### Visual Search
| Method | Endpoint | Description |
|--------|----------|-------------|
| POST | `/api/v1/search/visual` | Search visual chunks |
| POST | `/api/v1/search/visual/class` | Search by object class |
| POST | `/api/v1/search/visual/density` | Search by object density |
| POST | `/api/v1/search/visual/combination` | Search by object combination |
| POST | `/api/v1/search/visual/stats` | Visual chunk statistics |
#### Embedding Model
| Detail | Value |
|--------|-------|
| **Model** | EmbeddingGemma-300m |
| **Endpoint** | `POST /api/v1/embeddings` on port 11436 |
| **Dimension** | 768 |
| **Storage** | pgvector (`chunk.embedding` column) |

View File

@@ -11,7 +11,7 @@ use crate::core::embedding::Embedder;
#[derive(Debug, Deserialize)]
pub struct SmartSearchRequest {
pub uuid: String,
pub file_uuid: String,
pub query: String,
pub page: Option<usize>,
pub page_size: Option<usize>,
@@ -82,7 +82,7 @@ pub async fn smart_search(
// 2. Search Database (Drill-Down: Find Parents First)
let db_parents: Vec<crate::core::db::postgres_db::SemanticSearchResult> = db
.search_parent_chunks_semantic(&req.uuid, &embedding, limit)
.search_parent_chunks_semantic(&req.file_uuid, &embedding, limit)
.await
.map_err(
|e: anyhow::Error| -> (StatusCode, Json<serde_json::Value>) {

View File

@@ -3916,7 +3916,7 @@ struct ParentChunkResponse {
/// Search visual chunks based on criteria
#[derive(Debug, Deserialize)]
struct VisualChunkSearchRequest {
uuid: String,
file_uuid: String,
criteria: visual_chunk_search::VisualChunkSearchCriteria,
}
@@ -3930,8 +3930,8 @@ async fn search_visual_chunks(
State(state): State<AppState>,
Json(req): Json<VisualChunkSearchRequest>,
) -> Result<Json<VisualChunkSearchResponse>, StatusCode> {
let criteria_hash = generate_visual_search_hash(&req.uuid, &req.criteria);
let cache_key = keys::visual_search(&req.uuid, &criteria_hash);
let criteria_hash = generate_visual_search_hash(&req.file_uuid, &req.criteria);
let cache_key = keys::visual_search(&req.file_uuid, &criteria_hash);
let ttl = state.mongo_cache.ttl_visual_search();
let chunks = state
@@ -3941,7 +3941,7 @@ async fn search_visual_chunks(
.await
.map_err(|e| anyhow::anyhow!("PG init failed: {}", e))?;
visual_chunk_search::search_visual_chunks(&db, &req.uuid, &req.criteria)
visual_chunk_search::search_visual_chunks(&db, &req.file_uuid, &req.criteria)
.await
.map_err(|e| anyhow::anyhow!("Visual search failed: {}", e))
})

View File

@@ -15,7 +15,7 @@ use crate::core::db::{schema, Database, PostgresDb};
#[derive(Debug, Deserialize)]
pub struct UniversalSearchRequest {
pub query: String,
pub uuid: Option<String>,
pub file_uuid: Option<String>,
#[serde(default)]
pub types: Vec<String>, // chunk, frame, person
pub time_range: Option<[f64; 2]>,
@@ -274,7 +274,7 @@ pub async fn search_persons(
#[derive(Debug, Deserialize)]
pub struct FrameSearchRequest {
pub uuid: Option<String>,
pub file_uuid: Option<String>,
pub object_class: Option<String>,
pub ocr_text: Option<String>,
pub face_id: Option<String>,
@@ -292,7 +292,7 @@ pub struct FrameSearchResponse {
pub struct FrameResult {
pub frame_number: i64,
pub timestamp: f64,
pub uuid: String,
pub file_uuid: String,
pub objects: Option<Vec<serde_json::Value>>,
pub ocr_texts: Option<Vec<String>>,
pub faces: Option<Vec<serde_json::Value>>,
@@ -333,9 +333,9 @@ async fn search_chunks(
req: &UniversalSearchRequest,
) -> Result<Vec<SearchResult>, anyhow::Error> {
// uuid is required for chunk search - chunk_id is only unique within a video
let uuid = match &req.uuid {
let uuid = match &req.file_uuid {
Some(u) => u.replace('\'', "''"),
None => return Err(anyhow::anyhow!("uuid is required for chunk search")),
None => return Err(anyhow::anyhow!("file_uuid is required for chunk search")),
};
let chunk_table = schema::table_name("chunk");
@@ -501,7 +501,7 @@ async fn search_frames_internal(
table, video_table
);
if let Some(uuid) = &req.uuid {
if let Some(uuid) = &req.file_uuid {
sql.push_str(&format!(" AND v.file_uuid = '{}'", uuid));
}
if let Some(tr) = &req.time_range {
@@ -665,8 +665,8 @@ async fn search_frames_internal_v2(
table, video_table
);
if let Some(uuid) = &req.uuid {
sql.push_str(&format!(" AND v.file_uuid = '{}'", uuid));
if let Some(uuid) = &req.file_uuid {
sql.push_str(&format!(" AND fd.file_uuid = '{}'", uuid));
}
if let Some(tr) = &req.time_range {
sql.push_str(&format!(
@@ -721,7 +721,7 @@ async fn search_frames_internal_v2(
FrameResult {
frame_number,
timestamp,
uuid,
file_uuid: uuid,
objects: objects.map(|arr| arr.iter().map(|v| v.clone()).collect()),
ocr_texts,
faces,