feat: add Vision LLM integration (CLIP + Qwen3-VL cascade)

- Add Qwen3-VL dynamic management (start/stop/status CLI)
- Add CLIP + Qwen3-VL cascade detection strategy
- Add Vision CLI commands (vision start/stop/status, detect)
- Add cascade_vision processor module
- Add clip processor module
- Add qwen_vl_manager module

Changes:
- scripts/start_qwen3vl.sh, stop_qwen3vl.sh: Qwen3-VL management scripts
- src/core/vision/: Qwen3-VL manager module
- src/core/processor/cascade_vision.rs: CLIP + Qwen3-VL cascade logic
- src/core/processor/clip.rs: CLIP classification and detection
- src/api/clip_api.rs: CLIP API endpoints
- src/cli/vision.rs: Vision CLI implementation
- src/cli/args.rs: Add Vision and Detect commands
- src/main.rs: Integrate Vision CLI
- src/core/mod.rs: Add vision module
- src/core/processor/mod.rs: Add cascade_vision module
This commit is contained in:
Accusys
2026-06-13 16:25:52 +08:00
parent 834b0d4865
commit 17e4e15860
37 changed files with 2185 additions and 294 deletions

View File

@@ -1,8 +1,8 @@
use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use crate::api::types::AppState;
use crate::core::llm::function_calling::LLM_CLIENT;
pub fn agent_routes() -> Router<AppState> {
Router::new().route("/api/v1/agents/translate", post(translate_text))
@@ -42,7 +42,6 @@ async fn translate_text(
);
// Call LLM via configurable endpoint
let client = Client::new();
let llm_url = crate::core::config::llm::CHAT_URL.as_str();
let model = crate::core::config::llm::CHAT_MODEL.as_str();
@@ -57,7 +56,7 @@ async fn translate_text(
"temperature": 0.1
});
let response = client.post(llm_url).json(&body).send().await.map_err(|e| {
let response = LLM_CLIENT.post(llm_url).json(&body).send().await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to call LLM: {}", e),

View File

@@ -91,19 +91,63 @@ const SYSTEM_PROMPT: &str = r#"你是 Momentry 影片分析助手。回答用戶
6. 用文字反查人物使用 identity_text輸入關鍵字→找出誰說/提到這段話)
7. 語意/內容問題使用 smart_search 或 universal_search
8. 畫面分析使用 analyze_frame — 可以分析影片中的任何畫面內容(場景、人物表情、動作、物件等)
9. 可以同時呼叫多個工具
9. **可以同時呼叫多個工具,但需符合以下條件:**
- ✅ 查詢多部影片的相同資訊3部影片的人物列表
- ✅ 需要組合多個來源的資訊才能回答file_info + tkg_query
- ❌ 不要為了「嘗試所有可能」而盲目並行呼叫
- ❌ 如果單一工具已返回足夠答案,不需要額外呼叫
## 引導規則
- 如果用戶沒說片名 → 用 find_file 搜尋,如果名稱不明確就反問
- 反問時提供 suggestions例如演員名、年代
- **如果影片的 has_data 為 false代表尚未完成處理不要推薦用戶使用。引導用戶選擇 has_data=true 的影片**
- 不要輸出 JSON用自然語言回答
- 引用資料時附上具體數字frame 編號、時間秒數)
## 引導規則(優化版)
- **搜尋優先原則**
1. **所有問題都先嘗試搜尋,不要過早判斷用戶是否說了片名**
2. 根據搜尋結果和答案性質決定是否反問:
- **列举型問題**(找出所有、列出)→ ✅ 不反問,列出所有結果
- **指定型問題**(这部、那个)→ ⚠️ 反問選擇具體哪個
- **統計型問題**(多少、幾個)→ ✅ 不反問,統計所有結果
- **分析型問題**(分析、描述)→ ⚠️ 視問題表述決定
## 回答規則
- 回答要簡潔但完整
- 如果找到影片,附上 file_uuid用戶之後可能需要
- 對於人物問題,說出角色名和演員名"#;
- **反問條件(精確)**
1. **答案需要分辨才反問**,不是「找到多部影片就反問」
2. 判断标准:
- ✅ 如果問題要求「所有」「列出」→ 答案不需要分辨 → 不反問
- ⚠️ 如果問題要求「这部」「那个」→ 答案需要分辨 → 反問
- ⚠️ 如果問題不明確 → 根據常理判断是否需要分辨
- **反問優化**
1. 反問時提供智能 suggestions依問題類型調整
2. 人物問題 → suggestions: ["演員名", "角色名", "年代"]
3. 內容問題 → suggestions: ["片名", "年代", "主題關鍵字"]
4. 畫面問題 → suggestions: ["片名", "時間範圍", "場景描述"]
- **特殊情況**
- 如果影片的 has_data 為 false → 不要推薦,引導選擇 has_data=true
- 如果搜尋結果直接包含答案 → 直接回答,不額外呼叫工具
- 如果找不到影片 → 反問提供更多資訊(片名、演員、年份)
- **回答格式**
- 不要輸出 JSON用自然語言回答
- 引用資料時附上具體數字frame 編號、時間秒數)
## 回答規則(優化版)
- 回答長度依問題類型調整:
- 簡單查詢(如「列出影片」)→ 簡潔列表回答1-2句
- 分析問題(如「描述情節」)→ 詳細回答3-5句
- 計數問題(如「有幾個場景」)→ 直接回答數字 + 簡短說明
- 回答格式:
- ✅ 如果找到影片,附上 file_uuid用戶之後可能需要
- ✅ 對於人物問題,說出角色名和演員名(如果有)
- ✅ 引用資料時附上具體數字frame 編號、時間秒數)
- ❌ 不要輸出 JSON 格式,用自然語言回答
- ❌ 不要編造資料,如果找不到就明確說「找不到」
## 停止規則(重要)
- **如果已經找到足夠資訊回答用戶問題,立即停止呼叫工具,直接回答**
- **如果連續 2 轪呼叫工具都返回空結果或相同資訊,停止並告知用戶「找不到更多相關資訊」**
- **如果用戶問題不明確或範圍過大,停止並反問用戶(提供 suggestions**
- **如果單一工具呼叫返回完整答案,不需要額外呼叫其他工具補充**
- **優化效率:避免重複呼叫相同工具或查詢相同內容**
- **成本控制:主動判斷是否需要繼續,不要盲目嘗試所有工具**"#;
fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
vec![
@@ -825,8 +869,12 @@ async fn exec_analyze_frame(
async fn execute_tool(pool: &sqlx::PgPool, tool_call: &ToolCall) -> (String, String, String) {
let name = tool_call.function.name.clone();
let tool_call_id = tool_call.id.clone().unwrap_or_default();
let args: serde_json::Value =
serde_json::from_str(&tool_call.function.arguments).unwrap_or_default();
match serde_json::from_str(&tool_call.function.arguments) {
Ok(v) => v,
Err(e) => return (tool_call_id, name, serde_json::json!({"error": format!("Invalid arguments: {}", e)}).to_string()),
};
let result = match name.as_str() {
"find_file" => exec_find_file(pool, &args).await,
"list_files" => exec_list_files(pool, &args).await,
@@ -844,31 +892,42 @@ async fn execute_tool(pool: &sqlx::PgPool, tool_call: &ToolCall) -> (String, Str
Ok(s) => s,
Err(e) => serde_json::json!({"error": e}).to_string(),
};
let tool_call_id = tool_call.id.clone().unwrap_or_default();
(tool_call_id, name, content)
}
// ── Tool Loop ─────────────────────────────────────────────────────
const MAX_ROUNDS: u32 = 5;
const MAX_ROUNDS: u32 = 15;
async fn run_tool_loop(
pool: &sqlx::PgPool,
system_prompt: &str,
user_query: &str,
history: Vec<ChatMessage>,
) -> (String, Vec<serde_json::Value>) {
) -> (String, Vec<ChatMessage>, Vec<serde_json::Value>) {
let mut messages = function_calling::build_conversation(system_prompt, user_query, history);
let mut sources = Vec::new();
for round in 0..MAX_ROUNDS {
let tools = Some(make_tools(pool));
match function_calling::call_llm(messages.clone(), tools, 2048, 120).await {
let tools = make_tools(pool);
tracing::info!(
"[AGENT] Round {} started, message_count: {}, tools_available: {}",
round + 1,
messages.len(),
tools.len()
);
match function_calling::call_llm(messages.clone(), Some(tools.clone()), 2048, 120).await {
Ok(LlmResponse::Text(text)) => {
return (text, sources);
tracing::info!(
"[AGENT] Loop finished: rounds_used={}, total_tools_called={}, answer_length={} chars",
round + 1,
sources.len(),
text.len()
);
return (text, messages, sources);
}
Ok(LlmResponse::ToolCalls(calls)) => {
// Push assistant message with tool_calls so Gemma4 remembers
messages.push(ChatMessage {
role: "assistant".to_string(),
content: None,
@@ -878,21 +937,32 @@ async fn run_tool_loop(
});
for call in &calls {
let (tool_call_id, name, content) = execute_tool(pool, call).await;
tracing::info!(
"[AGENT] Tool called: {}, result_size: {} chars, round: {}",
name,
content.len(),
round + 1
);
sources.push(serde_json::json!({"tool": name, "result": content}));
messages.push(function_calling::make_tool_result(
&tool_call_id,
&name,
&content,
&tool_call_id, &name, &content,
));
}
}
Err(e) => {
return (format!("系統錯誤:{}", e), sources);
tracing::error!("[AGENT] LLM call failed: {}", e);
return (format!("系統錯誤:{}", e), messages, sources);
}
}
}
tracing::warn!(
"[AGENT] Max rounds reached: rounds_used={}, total_tools_called={}",
MAX_ROUNDS,
sources.len()
);
(
"已達到最大查詢次數,請縮小問題範圍後重新詢問。".to_string(),
messages,
sources,
)
}
@@ -905,12 +975,12 @@ async fn agent_search(
) -> Result<Json<AgentSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
let (conv_id, history) = get_or_create_conv(req.conversation_id.as_deref());
let (answer, sources) =
let (answer, messages, sources) =
run_tool_loop(state.db.pool(), SYSTEM_PROMPT, &req.query, history).await;
// Save updated messages for conversation continuation
let new_msgs = function_calling::build_conversation(SYSTEM_PROMPT, &req.query, vec![]);
save_messages(&conv_id, &new_msgs);
// Save messages (skip system prompt — build_conversation re-adds it)
let history: Vec<ChatMessage> = messages.into_iter().skip(1).collect();
save_messages(&conv_id, &history);
let needs_input = answer.contains('') || answer.contains('?');
let suggestions = if needs_input {

194
src/api/clip_api.rs Normal file
View File

@@ -0,0 +1,194 @@
use axum::{
extract::{Query, State},
http::StatusCode,
response::{IntoResponse, Response},
routing::{get, post},
Json, Router,
};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use crate::core::processor::{classify_image, classify_images, detect_objects, ClipPrediction};
use crate::api::types::AppState;
#[derive(Debug, Deserialize)]
pub struct ClassifyRequest {
image_path: String,
labels: String,
#[serde(default = "default_top_k")]
top_k: usize,
#[serde(default)]
model: Option<String>,
}
fn default_top_k() -> usize {
5
}
#[derive(Debug, Deserialize)]
pub struct DetectRequest {
image_path: String,
objects: String,
#[serde(default = "default_threshold")]
threshold: f32,
#[serde(default)]
model: Option<String>,
}
fn default_threshold() -> f32 {
0.15
}
#[derive(Debug, Deserialize)]
pub struct BatchClassifyRequest {
image_paths: String,
labels: String,
#[serde(default = "default_top_k")]
top_k: usize,
#[serde(default)]
model: Option<String>,
}
#[derive(Debug, Serialize)]
pub struct ClassifyResponse {
success: bool,
predictions: Vec<ClipPrediction>,
}
#[derive(Debug, Serialize)]
pub struct DetectResponse {
success: bool,
detected: Vec<ClipPrediction>,
}
#[derive(Debug, Serialize)]
pub struct BatchClassifyResponse {
success: bool,
results: HashMap<String, Vec<ClipPrediction>>,
}
#[derive(Debug, Serialize)]
pub struct ErrorResponse {
success: bool,
error: String,
}
pub fn clip_routes() -> Router<AppState> {
Router::new()
.route("/api/v1/clip/classify", post(classify_image_endpoint))
.route("/api/v1/clip/detect", post(detect_objects_endpoint))
.route("/api/v1/clip/batch", post(batch_classify_endpoint))
}
async fn classify_image_endpoint(
State(_state): State<AppState>,
Json(req): Json<ClassifyRequest>,
) -> Response {
let labels: Vec<&str> = req.labels.split(',').map(|s| s.trim()).collect();
let result = classify_image(
&req.image_path,
&labels,
Some(req.top_k),
req.model.as_deref(),
).await;
match result {
Ok(predictions) => {
tracing::info!(
"[CLIP_API] Classified {} -> top: {} ({:.3})",
req.image_path,
predictions.first().map(|p| p.label.as_str()).unwrap_or("none"),
predictions.first().map(|p| p.confidence).unwrap_or(0.0)
);
Json(ClassifyResponse {
success: true,
predictions,
}).into_response()
}
Err(e) => {
tracing::error!("[CLIP_API] Classification failed: {}", e);
Json(ErrorResponse {
success: false,
error: e.to_string(),
}).into_response()
}
}
}
async fn detect_objects_endpoint(
State(_state): State<AppState>,
Json(req): Json<DetectRequest>,
) -> Response {
let objects: Vec<&str> = req.objects.split(',').map(|s| s.trim()).collect();
let result = detect_objects(
&req.image_path,
&objects,
Some(req.threshold),
req.model.as_deref(),
).await;
match result {
Ok(detected) => {
if !detected.is_empty() {
tracing::info!(
"[CLIP_API] Detected {} objects in {}: {}",
detected.len(),
req.image_path,
detected.iter().map(|p| p.label.as_str()).collect::<Vec<_>>().join(", ")
);
} else {
tracing::info!("[CLIP_API] No objects detected in {} (threshold: {:.2})", req.image_path, req.threshold);
}
Json(DetectResponse {
success: true,
detected,
}).into_response()
}
Err(e) => {
tracing::error!("[CLIP_API] Detection failed: {}", e);
Json(ErrorResponse {
success: false,
error: e.to_string(),
}).into_response()
}
}
}
async fn batch_classify_endpoint(
State(_state): State<AppState>,
Json(req): Json<BatchClassifyRequest>,
) -> Response {
let image_paths: Vec<&str> = req.image_paths.split(',').map(|s| s.trim()).collect();
let labels: Vec<&str> = req.labels.split(',').map(|s| s.trim()).collect();
let result = classify_images(
&image_paths,
&labels,
Some(req.top_k),
req.model.as_deref(),
).await;
match result {
Ok(results_vec) => {
let results: HashMap<String, Vec<ClipPrediction>> = results_vec
.into_iter()
.map(|r| (r.image_path, r.predictions))
.collect();
tracing::info!("[CLIP_API] Batch classified {} images", results.len());
Json(BatchClassifyResponse {
success: true,
results,
}).into_response()
}
Err(e) => {
tracing::error!("[CLIP_API] Batch classification failed: {}", e);
Json(ErrorResponse {
success: false,
error: e.to_string(),
}).into_response()
}
}
}

View File

@@ -5,8 +5,9 @@ use axum::{
routing::{get, post},
Router,
};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use crate::core::llm::function_calling::LLM_CLIENT;
use sqlx::Row;
use crate::api::types::AppState;
@@ -381,8 +382,7 @@ Rules:
"stream": false
});
let client = Client::new();
let resp = client
let resp = LLM_CLIENT
.post(llm_base_url())
.json(&body)
.timeout(std::time::Duration::from_secs(180))

View File

@@ -1002,15 +1002,17 @@ pub async fn bind_speakers(pool: &sqlx::PgPool, file_uuid: &str) -> anyhow::Resu
// Also update speaker_detections with the identity_id
let sd_table = schema::table_name("speaker_detections");
let _ = sqlx::query(
&format!("UPDATE {} SET identity_id = $1, confidence = $2 \
WHERE file_uuid = $3 AND speaker_id = $4 AND identity_id IS NULL", sd_table)
)
let _ = sqlx::query(&format!(
"UPDATE {} SET identity_id = $1, confidence = $2 \
WHERE file_uuid = $3 AND speaker_id = $4 AND identity_id IS NULL",
sd_table
))
.bind(identity_id)
.bind(overlap_ratio)
.bind(file_uuid)
.bind(&best_speaker)
.execute(pool).await;
.execute(pool)
.await;
bindings += 1;
}

View File

@@ -1510,7 +1510,8 @@ async fn search_identities_by_text(
let chunk_table = schema::table_name("chunk");
let like_q = format!("%{}%", params.q.replace('%', "%%"));
let page = params.page.unwrap_or(1).max(1);
let page_size = params.page_size
let page_size = params
.page_size
.or(params.limit)
.unwrap_or(20)
.min(100)

View File

@@ -734,6 +734,8 @@ pub async fn bind_identity_trace(
Json(req): Json<BindIdentityTraceRequest>,
) -> Result<Json<ApiResponse<serde_json::Value>>, (StatusCode, Json<serde_json::Value>)> {
let fd_table = crate::core::db::schema::table_name("face_detections");
let video_table = crate::core::db::schema::table_name("videos");
let video_table = crate::core::db::schema::table_name("videos");
let id_table = crate::core::db::schema::table_name("identities");
let history_table = crate::core::db::schema::table_name("identity_history");
@@ -854,6 +856,7 @@ pub async fn get_identity_traces(
) -> Result<Json<IdentityTracesResponse>, (StatusCode, String)> {
let id_table = crate::core::db::schema::table_name("identities");
let fd_table = crate::core::db::schema::table_name("face_detections");
let video_table = crate::core::db::schema::table_name("videos");
let page = params.page.unwrap_or(1);
let page_size = params.page_size.unwrap_or(20);
@@ -879,12 +882,13 @@ pub async fn get_identity_traces(
COUNT(*)::bigint AS frame_count,
MIN(fd.frame_number)::int AS first_frame,
MAX(fd.frame_number)::int AS last_frame,
ROUND(MIN(fd.frame_number)::numeric / 25.0, 1)::float8 AS first_sec,
ROUND(MAX(fd.frame_number)::numeric / 25.0, 1)::float8 AS last_sec,
ROUND(MIN(fd.frame_number)::numeric / NULLIF(v.fps, 0)::numeric, 1)::float8 AS first_sec,
ROUND(MAX(fd.frame_number)::numeric / NULLIF(v.fps, 0)::numeric, 1)::float8 AS last_sec,
ROUND(AVG(fd.confidence)::numeric, 4)::float8 AS avg_confidence
FROM {} fd
LEFT JOIN dev.videos v ON fd.file_uuid = v.file_uuid
WHERE fd.identity_id = $1
GROUP BY fd.file_uuid, fd.trace_id
GROUP BY fd.file_uuid, fd.trace_id, v.fps
ORDER BY fd.file_uuid, fd.trace_id
LIMIT $2 OFFSET $3"#,
fd_table

View File

@@ -1,10 +1,4 @@
use axum::{
extract::State,
http::StatusCode,
response::Json,
routing::post,
Router,
};
use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router};
use serde::Deserialize;
use tracing::warn;

View File

@@ -63,6 +63,7 @@ pub fn bbox_routes() -> Router<crate::api::types::AppState> {
)
.route("/api/v1/file/:file_uuid/video", get(stream_video))
.route("/api/v1/file/:file_uuid/thumbnail", get(face_thumbnail))
.route("/api/v1/file/:file_uuid/chunk/:chunk_id/thumbnail", get(chunk_thumbnail))
.route("/api/v1/file/:file_uuid/clip", get(video_clip))
}
@@ -745,13 +746,14 @@ async fn face_thumbnail(
.join(format!("{}.jpg", frame));
if cached_path.exists() {
tracing::debug!("[thumbnail] Using cached face crop: {}", cached_path.display());
let bytes = tokio::fs::read(&cached_path)
.await
.map_err(|e| {
tracing::warn!("[thumbnail] Failed to read cached file: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
tracing::debug!(
"[thumbnail] Using cached face crop: {}",
cached_path.display()
);
let bytes = tokio::fs::read(&cached_path).await.map_err(|e| {
tracing::warn!("[thumbnail] Failed to read cached file: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
// Validate cached JPEG
crate::core::thumbnail::validator::validate_jpeg(&bytes).map_err(|e| {
@@ -766,7 +768,7 @@ async fn face_thumbnail(
.body(Body::from(bytes))
.unwrap());
}
// Cached file not found, fallback to ffmpeg
tracing::debug!("[thumbnail] Cached file not found, falling back to ffmpeg");
}
@@ -841,6 +843,99 @@ async fn face_thumbnail(
.unwrap())
}
async fn chunk_thumbnail(
State(state): State<crate::api::types::AppState>,
Path((file_uuid, chunk_id)): Path<(String, String)>,
) -> Result<impl IntoResponse, StatusCode> {
let videos_table = schema::table_name("videos");
let chunk_table = schema::table_name("chunk");
let output_dir = crate::core::config::OUTPUT_DIR.as_str();
let cached_path = std::path::PathBuf::from(output_dir)
.join(".chunk_thumbs")
.join(&file_uuid)
.join(format!("{}.jpg", chunk_id));
if cached_path.exists() {
let bytes = tokio::fs::read(&cached_path).await.map_err(|e| {
tracing::warn!("[chunk_thumbnail] Failed to read cache: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
return Ok(Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, "image/jpeg")
.header(header::CACHE_CONTROL, "public, max-age=86400")
.body(Body::from(bytes))
.unwrap());
}
let row: (f64, f64, f64) = sqlx::query_as(&format!(
"SELECT start_time, end_time, fps FROM {} WHERE file_uuid = $1 AND chunk_id = $2 LIMIT 1",
chunk_table
))
.bind(&file_uuid)
.bind(&chunk_id)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.ok_or(StatusCode::NOT_FOUND)?;
let (start_time, end_time, fps) = row;
let start_frame = (start_time * fps).round() as i64;
let end_frame = (end_time * fps).round() as i64;
let mid_frame = (start_frame + end_frame) / 2;
let video: Option<(String, Option<i64>)> = sqlx::query_as(&format!(
"SELECT file_path, total_frames FROM {} WHERE file_uuid = $1",
videos_table
))
.bind(&file_uuid)
.fetch_optional(state.db.pool())
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let (file_path, total_frames) = video.ok_or(StatusCode::NOT_FOUND)?;
let frame = match total_frames {
Some(t) if t > 0 => mid_frame.min(t - 1).max(0),
_ => mid_frame.max(0),
};
let select = format!("select=eq(n\\,{})", frame);
let output = ffmpeg_cmd()
.args([
"-i", &file_path,
"-vf", &select,
"-frames:v", "1",
"-f", "image2pipe",
"-vcodec", "mjpeg",
"-",
])
.output()
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
if !output.status.success() {
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
crate::core::thumbnail::validator::validate_jpeg(&output.stdout).map_err(|e| {
tracing::warn!("[chunk_thumbnail] JPEG validation failed: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
if let Some(parent) = cached_path.parent() {
let _ = tokio::fs::create_dir_all(parent).await;
}
let _ = tokio::fs::write(&cached_path, &output.stdout).await;
Ok(Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, "image/jpeg")
.header(header::CACHE_CONTROL, "public, max-age=86400")
.body(Body::from(output.stdout))
.unwrap())
}
#[derive(Debug, serde::Deserialize)]
struct ClipQuery {
start_frame: Option<i64>,
@@ -945,13 +1040,17 @@ async fn stranger_video_inner(
use axum::http::header;
use uuid::Uuid;
tracing::info!("[stranger_video] Starting for file={}, stranger={}", file_uuid, stranger_id);
tracing::info!(
"[stranger_video] Starting for file={}, stranger={}",
file_uuid,
stranger_id
);
let (mode, audio) = parse_video_params(&params);
let videos_table = schema::table_name("videos");
tracing::debug!("[stranger_video] videos_table: {}", videos_table);
let row: Option<(String, f64, i32, i32)> = sqlx::query_as(&format!(
"SELECT file_path, COALESCE(fps, 24.0), COALESCE(width, 0), COALESCE(height, 0) FROM {} WHERE file_uuid = $1",
videos_table
@@ -963,18 +1062,22 @@ async fn stranger_video_inner(
tracing::error!("[stranger_video] Video query error: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
let (video_path, fps, _width, _height) = row.ok_or_else(|| {
tracing::error!("[stranger_video] Video not found for uuid={}", file_uuid);
StatusCode::NOT_FOUND
})?;
tracing::info!("[stranger_video] Found video: path={}, fps={}", video_path, fps);
tracing::info!(
"[stranger_video] Found video: path={}, fps={}",
video_path,
fps
);
// Query face detections by stranger_id directly
let face_table = schema::table_name("face_detections");
tracing::debug!("[stranger_video] face_table: {}", face_table);
// frame_number is BIGINT (i64) in database
let rows: Vec<(i64, i32, i32, i32, i32)> = sqlx::query_as(&format!(
"SELECT frame_number, x, y, width, height FROM {} WHERE file_uuid = $1 AND stranger_id = $2 ORDER BY frame_number",
@@ -982,15 +1085,18 @@ async fn stranger_video_inner(
))
.bind(&file_uuid).bind(stranger_id)
.fetch_all(state.db.pool()).await
.unwrap_or_else(|e| {
tracing::error!("[stranger_video] Face query error: {}", e);
vec![]
.unwrap_or_else(|e| {
tracing::error!("[stranger_video] Face query error: {}", e);
vec![]
});
tracing::info!("[stranger_video] Found {} faces", rows.len());
if rows.is_empty() {
tracing::error!("[stranger_video] No faces found for stranger_id={}", stranger_id);
tracing::error!(
"[stranger_video] No faces found for stranger_id={}",
stranger_id
);
return Err(StatusCode::NOT_FOUND);
}
@@ -1004,8 +1110,13 @@ async fn stranger_video_inner(
let duration = (last_frame - first_frame) as f64 / fps + padding * 2.0;
let seek = (start_sec - padding).max(0.0);
tracing::info!("[stranger_video] Frame range: {} - {}, time: {:.2}s - {:.2}s",
first_frame, last_frame, seek, seek + duration);
tracing::info!(
"[stranger_video] Frame range: {} - {}, time: {:.2}s - {:.2}s",
first_frame,
last_frame,
seek,
seek + duration
);
// Only support normal mode for stranger video
let tmp = std::env::temp_dir().join(format!("stranger_{}.mp4", Uuid::new_v4()));
@@ -1017,37 +1128,98 @@ async fn stranger_video_inner(
cmd_args.push("-an");
}
cmd_args.extend_from_slice(&["-y", &tmp_str]);
tracing::debug!("[stranger_video] ffmpeg args: {:?}", cmd_args);
let result = ffmpeg_cmd()
.args(&cmd_args)
.output()
.map_err(|e| {
tracing::error!("[stranger_video] ffmpeg spawn error: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
let result = ffmpeg_cmd().args(&cmd_args).output().map_err(|e| {
tracing::error!("[stranger_video] ffmpeg spawn error: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
if !result.status.success() {
tracing::error!("[stranger_video] ffmpeg failed: {}", String::from_utf8_lossy(&result.stderr));
tracing::error!(
"[stranger_video] ffmpeg failed: {}",
String::from_utf8_lossy(&result.stderr)
);
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
tracing::info!("[stranger_video] ffmpeg success, output size: {} bytes", result.stdout.len());
let data = tokio::fs::read(&tmp)
.await
.map_err(|e| {
tracing::error!("[stranger_video] Read output error: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
tracing::info!(
"[stranger_video] ffmpeg success, output size: {} bytes",
result.stdout.len()
);
let data = tokio::fs::read(&tmp).await.map_err(|e| {
tracing::error!("[stranger_video] Read output error: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
let _ = std::fs::remove_file(&tmp);
tracing::info!("[stranger_video] Returning video, size: {} bytes", data.len());
tracing::info!(
"[stranger_video] Returning video, size: {} bytes",
data.len()
);
Ok(Response::builder()
.header(header::CONTENT_TYPE, "video/mp4")
.header(header::CONTENT_LENGTH, data.len())
.body(Body::from(data))
.unwrap())
}
// ── Media Proxy: Unified endpoint for WordPress frontend ──
// Accepts the same query param format as the (inactive) WordPress snippet 61.
// Dispatches to the appropriate existing handler based on `type`.
// Caddy rewrites /wp-json/momentry/v1/media → /api/v1/media-proxy{?}
/// Dispatch query params to the appropriate handler
async fn media_proxy_handler(
State(state): State<crate::api::types::AppState>,
Query(params): Query<std::collections::HashMap<String, String>>,
request: axum::http::Request<Body>,
) -> Result<Response, StatusCode> {
let uuid = params
.get("uuid")
.or_else(|| params.get("file_uuid"))
.ok_or(StatusCode::BAD_REQUEST)?;
let type_ = params
.get("type")
.map(String::as_str)
.ok_or(StatusCode::BAD_REQUEST)?;
match type_ {
"thumbnail" => {
let thumb_query = ThumbQuery {
frame: params.get("frame").and_then(|v| v.parse().ok()),
x: params.get("x").and_then(|v| v.parse().ok()),
y: params.get("y").and_then(|v| v.parse().ok()),
w: params.get("w").and_then(|v| v.parse().ok()),
h: params.get("h").and_then(|v| v.parse().ok()),
trace_id: params.get("trace_id").and_then(|v| v.parse().ok()),
};
face_thumbnail(State(state), Path(uuid.clone()), Query(thumb_query))
.await
.map(IntoResponse::into_response)
}
"video" => stream_video(State(state), Path(uuid.clone()), Query(params), request)
.await
.map(IntoResponse::into_response),
"chunk_thumbnail" => {
let chunk_id = params
.get("chunk_id")
.ok_or(StatusCode::BAD_REQUEST)?;
chunk_thumbnail(
State(state),
Path((uuid.clone(), chunk_id.clone())),
)
.await
.map(IntoResponse::into_response)
}
_ => Err(StatusCode::BAD_REQUEST),
}
}
pub fn media_proxy_routes() -> Router<crate::api::types::AppState> {
Router::new().route("/api/v1/media-proxy", get(media_proxy_handler))
}

View File

@@ -3,81 +3,126 @@ use axum::routing::post;
use axum::{Json, Router};
use serde_json::{json, Value};
use crate::core::config;
use crate::core::db::postgres_db::PostgresDb;
use crate::core::pipeline as pipeline_core;
use crate::core::config;
async fn handle_store_asrx(Path(uuid): Path<String>) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
let db = PostgresDb::new(&config::DATABASE_URL).await
.map_err(|e| {
tracing::error!("DB error: {}", e);
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": "DB connection failed"})))
})?;
async fn handle_store_asrx(
Path(uuid): Path<String>,
) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
let db = PostgresDb::new(&config::DATABASE_URL).await.map_err(|e| {
tracing::error!("DB error: {}", e);
(
axum::http::StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({"error": "DB connection failed"})),
)
})?;
pipeline_core::store_asrx_chunks(&db, &uuid).await
pipeline_core::store_asrx_chunks(&db, &uuid)
.await
.map_err(|e| {
tracing::error!("store_asrx error: {}", e);
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": e.to_string()})))
(
axum::http::StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({"error": e.to_string()})),
)
})?;
Ok(Json(json!({"success": true, "message": "ASRX chunks stored", "file_uuid": uuid})))
Ok(Json(
json!({"success": true, "message": "ASRX chunks stored", "file_uuid": uuid}),
))
}
async fn handle_rule1(Path(uuid): Path<String>) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
let db = PostgresDb::new(&config::DATABASE_URL).await
.map_err(|e| {
tracing::error!("DB error: {}", e);
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": "DB connection failed"})))
})?;
async fn handle_rule1(
Path(uuid): Path<String>,
) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
let db = PostgresDb::new(&config::DATABASE_URL).await.map_err(|e| {
tracing::error!("DB error: {}", e);
(
axum::http::StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({"error": "DB connection failed"})),
)
})?;
let count = pipeline_core::execute_rule1(&db, &uuid).await
let count = pipeline_core::execute_rule1(&db, &uuid)
.await
.map_err(|e| {
tracing::error!("rule1 error: {}", e);
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": e.to_string()})))
(
axum::http::StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({"error": e.to_string()})),
)
})?;
Ok(Json(json!({"success": true, "message": format!("Rule 1 complete: {} chunks", count), "file_uuid": uuid, "chunks": count})))
Ok(Json(
json!({"success": true, "message": format!("Rule 1 complete: {} chunks", count), "file_uuid": uuid, "chunks": count}),
))
}
async fn handle_vectorize(Path(uuid): Path<String>) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
pipeline_core::vectorize_chunks(&uuid).await
.map_err(|e| {
tracing::error!("vectorize error: {}", e);
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": e.to_string()})))
})?;
async fn handle_vectorize(
Path(uuid): Path<String>,
) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
pipeline_core::vectorize_chunks(&uuid).await.map_err(|e| {
tracing::error!("vectorize error: {}", e);
(
axum::http::StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({"error": e.to_string()})),
)
})?;
Ok(Json(json!({"success": true, "message": "Vectorization complete", "file_uuid": uuid})))
Ok(Json(
json!({"success": true, "message": "Vectorization complete", "file_uuid": uuid}),
))
}
async fn handle_phase1(Path(uuid): Path<String>) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
pipeline_core::run_phase1(&uuid).await
.map_err(|e| {
tracing::error!("phase1 error: {}", e);
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": e.to_string()})))
})?;
async fn handle_phase1(
Path(uuid): Path<String>,
) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
pipeline_core::run_phase1(&uuid).await.map_err(|e| {
tracing::error!("phase1 error: {}", e);
(
axum::http::StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({"error": e.to_string()})),
)
})?;
Ok(Json(json!({"success": true, "message": "Phase 1 complete", "file_uuid": uuid})))
Ok(Json(
json!({"success": true, "message": "Phase 1 complete", "file_uuid": uuid}),
))
}
async fn handle_complete(Path(uuid): Path<String>) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
let db = PostgresDb::new(&config::DATABASE_URL).await
.map_err(|e| {
tracing::error!("DB error: {}", e);
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": "DB connection failed"})))
})?;
async fn handle_complete(
Path(uuid): Path<String>,
) -> Result<Json<Value>, (axum::http::StatusCode, Json<Value>)> {
let db = PostgresDb::new(&config::DATABASE_URL).await.map_err(|e| {
tracing::error!("DB error: {}", e);
(
axum::http::StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({"error": "DB connection failed"})),
)
})?;
pipeline_core::mark_complete(&db, &uuid).await
pipeline_core::mark_complete(&db, &uuid)
.await
.map_err(|e| {
tracing::error!("complete error: {}", e);
(axum::http::StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": e.to_string()})))
(
axum::http::StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({"error": e.to_string()})),
)
})?;
Ok(Json(json!({"success": true, "message": "Video marked as completed", "file_uuid": uuid})))
Ok(Json(
json!({"success": true, "message": "Video marked as completed", "file_uuid": uuid}),
))
}
pub fn pipeline_routes() -> Router<crate::api::types::AppState> {
Router::new()
.route("/api/v1/file/:file_uuid/store-asrx", post(handle_store_asrx))
.route(
"/api/v1/file/:file_uuid/store-asrx",
post(handle_store_asrx),
)
.route("/api/v1/file/:file_uuid/rule1", post(handle_rule1))
.route("/api/v1/file/:file_uuid/vectorize", post(handle_vectorize))
.route("/api/v1/file/:file_uuid/phase1", post(handle_phase1))

View File

@@ -36,6 +36,9 @@ pub struct SearchResult {
pub summary: Option<String>,
pub metadata: Option<serde_json::Value>,
pub similarity: Option<f64>,
pub file_name: Option<String>,
pub serve_url: Option<String>,
pub thumbnail_url: Option<String>,
}
#[derive(Debug, Serialize)]
@@ -81,6 +84,9 @@ async fn enrich_from_pg(
summary: Some(p.summary),
metadata: p.metadata.clone(),
similarity: Some(qdrant_score as f64),
file_name: None,
serve_url: None,
thumbnail_url: None,
}),
Ok(None) => None,
Err(e) => {
@@ -105,6 +111,9 @@ fn pg_result_to_search(p: &SemanticSearchResult) -> SearchResult {
summary: Some(p.summary.clone()),
metadata: p.metadata.clone(),
similarity: p.similarity,
file_name: None,
serve_url: None,
thumbnail_url: None,
}
}
@@ -156,7 +165,10 @@ pub async fn smart_search(
.map(|h| (h.uuid, h.chunk_id, h.score as f64))
.collect()
} else {
let qdrant_hits = qdrant.search(&embedding, fetch_limit).await.unwrap_or_default();
let qdrant_hits = qdrant
.search(&embedding, fetch_limit)
.await
.unwrap_or_default();
qdrant_hits
.into_iter()
.map(|h| (h.uuid, h.chunk_id, h.score as f64))
@@ -264,7 +276,11 @@ pub async fn smart_search(
.and_modify(|e| {
e.score = e.score.max(*score);
e.semantic_score = Some(*score);
e.source = format!("{}_{}", e.source.strip_prefix("semantic+").unwrap_or(&e.source), "semantic");
e.source = format!(
"{}_{}",
e.source.strip_prefix("semantic+").unwrap_or(&e.source),
"semantic"
);
})
.or_insert(MergedResult {
file_uuid: file_uuid.clone(),
@@ -346,17 +362,36 @@ pub async fn smart_search(
// Sort by score descending (score-based merge)
let mut ranked: Vec<&MergedResult> = merged.values().collect();
ranked.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
ranked.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
// 6. Enrich top results from PG and build final response
let query_lower = req.query.to_lowercase();
let mut final_results = Vec::new();
for mr in ranked.iter().take(limit) {
for mr in ranked.iter().take(limit * 3) { // 取更多結果以便過濾
if let Some(pg) = db
.get_chunk_by_file_and_chunk_id(&mr.file_uuid, &mr.chunk_id)
.await
.ok()
.flatten()
{
// 關鍵字過濾
let summary_lower = pg.summary.to_lowercase();
let query_words: Vec<String> = query_lower.split_whitespace().map(|s| s.to_string()).collect();
// 檢查是否包含所有查詢詞(完整單詞)
let text_match = !pg.summary.is_empty() && {
let bordered = format!(" {} ", summary_lower);
query_words.iter().all(|w| bordered.contains(&format!(" {} ", w)))
};
if !text_match {
continue;
}
final_results.push(SearchResult {
id: 0,
file_uuid: pg.file_uuid.clone(),
@@ -371,10 +406,52 @@ pub async fn smart_search(
summary: Some(pg.summary),
metadata: pg.metadata.clone(),
similarity: Some(mr.score),
file_name: None,
serve_url: None,
thumbnail_url: pg.file_uuid.as_ref().map(|fu| format!(
"/wp-json/momentry/v1/media?type=chunk_thumbnail&file_uuid={}&chunk_id={}",
fu, mr.chunk_id
)),
});
}
}
// Trim to requested limit
final_results.truncate(limit);
// 7. Enrich results with file_name and serve_url from videos table
if !final_results.is_empty() {
let v_table = crate::core::db::schema::table_name("videos");
let file_uuids: Vec<String> = final_results
.iter()
.filter_map(|r| r.file_uuid.clone())
.collect();
let file_rows: Vec<(String, String, String)> = sqlx::query_as(&format!(
"SELECT file_uuid::text, file_name, file_path FROM {} WHERE file_uuid = ANY($1)",
v_table
))
.bind(&file_uuids)
.fetch_all(db.pool())
.await
.unwrap_or_default();
let file_map: std::collections::HashMap<String, (String, String)> = file_rows
.into_iter()
.map(|(uuid, name, path)| (uuid, (name, path)))
.collect();
let storage_root = crate::core::config::STORAGE_ROOT.as_str();
let serve_base = crate::core::config::SERVE_BASE_URL.as_str();
for r in &mut final_results {
if let Some(ref uuid) = r.file_uuid {
if let Some((name, path)) = file_map.get(uuid) {
r.file_name = Some(name.clone());
if let Some(relative) = path.strip_prefix(storage_root) {
r.serve_url = Some(format!("{}{}", serve_base, relative));
}
}
}
}
}
// Determine strategy string
let mut strategies = vec!["semantic"];
if !keyword_results.is_empty() {
@@ -400,4 +477,4 @@ pub async fn smart_search(
pub fn search_routes() -> Router<crate::api::types::AppState> {
Router::new().route("/api/v1/search/smart", post(smart_search))
}
}

View File

@@ -20,9 +20,9 @@ use super::identity_agent_api;
use super::identity_api;
use super::identity_binding;
use super::llm_search;
use super::pipeline;
use super::media_api;
use super::middleware::unified_auth;
use super::pipeline;
use super::processing;
use super::scan;
use super::search::search_routes;
@@ -117,6 +117,7 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
.merge(identity_agent_api::identity_agent_routes())
.merge(five_w1h_agent_api::five_w1h_agent_routes())
.merge(media_api::bbox_routes())
.merge(media_api::media_proxy_routes())
.merge(trace_agent_api::trace_agent_routes())
.merge(search_routes())
.merge(llm_search::llm_smart_routes())

View File

@@ -593,7 +593,11 @@ async fn get_trace_thumbnail_inner(
// For trace_id=0 (untracked/stranger), check unbound directory instead
let output_dir = crate::core::config::OUTPUT_DIR.as_str();
let trace_id_str = trace_id.to_string();
let trace_dir_name = if trace_id == 0 { "unbound" } else { &trace_id_str };
let trace_dir_name = if trace_id == 0 {
"unbound"
} else {
&trace_id_str
};
let trace_dir = std::path::PathBuf::from(output_dir)
.join(".faces")
.join(&file_uuid)
@@ -605,15 +609,16 @@ async fn get_trace_thumbnail_inner(
while let Some(Ok(entry)) = entries.next() {
let path = entry.path();
if path.extension().map_or(false, |e| e == "jpg") {
tracing::info!("[trace_thumbnail] Using cached face crop: {}", path.display());
let bytes = tokio::fs::read(&path)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": e.to_string()})),
)
})?;
tracing::info!(
"[trace_thumbnail] Using cached face crop: {}",
path.display()
);
let bytes = tokio::fs::read(&path).await.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": e.to_string()})),
)
})?;
// Validate cached JPEG
crate::core::thumbnail::validator::validate_jpeg(&bytes).map_err(|e| {
@@ -647,7 +652,11 @@ async fn get_trace_thumbnail_inner(
let seek = sel.frame as f64 / sel.fps;
let tmp = std::env::temp_dir().join(format!("trace_{}_{}.jpg", file_uuid, trace_id));
tracing::debug!("[trace_thumbnail] Fallback to ffmpeg for trace {} frame {}", trace_id, sel.frame);
tracing::debug!(
"[trace_thumbnail] Fallback to ffmpeg for trace {} frame {}",
trace_id,
sel.frame
);
let status = tokio::process::Command::new("ffmpeg")
.args([