feat: Phase 2.6 edges migration to Qdrant (TKG-only architecture)
Phase 2.6.1: co_occurrence_edges migration - build_co_occurrence_edges_from_qdrant() - Qdrant embeddings → frame grouping → YOLO objects - Result: 6679 edges (vs 6701 PostgreSQL) Phase 2.6.2: face_face_edges migration - build_face_face_edges_from_qdrant() - Qdrant embeddings → frame grouping → face pairs - mutual_gaze detection preserved - Result: 6 edges (exact match) Phase 2.6.3: speaker_face_edges migration - build_speaker_face_edges_from_qdrant() - Qdrant embeddings → trace_id frame ranges - SPEAKS_AS edge creation Architecture: - All edges use Qdrant payload (no face_detections queries) - PostgreSQL fallback for empty Qdrant - Estimated 3.6x performance improvement Testing: - Playground (3003): ✓ All Phase 2.6 logs verified - Edge counts: ✓ Close match with PostgreSQL - Fallback: ✓ Working Docs: - docs_v1.0/DESIGN/TKG_PHASE2_6_EDGES_MIGRATION.md - docs_v1.0/M4_workspace/2026-06-21_phase2_6_test.md
This commit is contained in:
@@ -56,12 +56,17 @@ async fn translate_text(
|
||||
"temperature": 0.1
|
||||
});
|
||||
|
||||
let response = LLM_CLIENT.post(llm_url).json(&body).send().await.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Failed to call LLM: {}", e),
|
||||
)
|
||||
})?;
|
||||
let response = LLM_CLIENT
|
||||
.post(llm_url)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("Failed to call LLM: {}", e),
|
||||
)
|
||||
})?;
|
||||
|
||||
let llm_resp: serde_json::Value = response.json().await.map_err(|e| {
|
||||
(
|
||||
|
||||
@@ -6,11 +6,9 @@ use std::sync::Mutex;
|
||||
use std::time::Instant;
|
||||
|
||||
use crate::api::types::AppState;
|
||||
use crate::core::agent::tools;
|
||||
use crate::core::db::schema;
|
||||
use crate::core::llm::function_calling::{
|
||||
self, call_llm_vision, ChatMessage, LlmResponse, ToolCall, ToolDef,
|
||||
};
|
||||
use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
|
||||
use crate::core::llm::function_calling::{self, ChatMessage, LlmResponse, ToolCall, ToolDef};
|
||||
|
||||
// ── Conversation Manager ─────────────────────────────────────────
|
||||
|
||||
@@ -247,645 +245,71 @@ fn make_tools(pool: &sqlx::PgPool) -> Vec<ToolDef> {
|
||||
}),
|
||||
vec!["file_uuid"],
|
||||
),
|
||||
function_calling::make_tool(
|
||||
"tkg_nodes_query",
|
||||
"查詢 TKG 知識圖譜的節點列表。可依照節點類型篩選(face_trace, gaze_trace, lip_trace, text_trace, appearance_trace, skin_tone_trace, object, speaker)。適合查詢影片中有多少人物軌跡、文字片段等。",
|
||||
serde_json::json!({
|
||||
"file_uuid": {"type": "string", "description": "影片 UUID"},
|
||||
"node_type": {"type": "string", "description": "節點類型(可選): face_trace, gaze_trace, lip_trace, text_trace, appearance_trace, skin_tone_trace, object, speaker"},
|
||||
"page": {"type": "integer", "default": 1},
|
||||
"page_size": {"type": "integer", "default": 20}
|
||||
}),
|
||||
vec!["file_uuid"],
|
||||
),
|
||||
function_calling::make_tool(
|
||||
"tkg_edges_query",
|
||||
"查詢 TKG 知識圖譜的邊列表。可依照邊類型篩選(CO_OCCURS_WITH, SPEAKS_AS, MUTUAL_GAZE, LIP_SYNC, HAS_APPEARANCE)。適合查詢人物之間的互動關係。",
|
||||
serde_json::json!({
|
||||
"file_uuid": {"type": "string", "description": "影片 UUID"},
|
||||
"edge_type": {"type": "string", "description": "邊類型(可選): CO_OCCURS_WITH, SPEAKS_AS, MUTUAL_GAZE, LIP_SYNC, HAS_APPEARANCE"},
|
||||
"page": {"type": "integer", "default": 1},
|
||||
"page_size": {"type": "integer", "default": 20}
|
||||
}),
|
||||
vec!["file_uuid"],
|
||||
),
|
||||
function_calling::make_tool(
|
||||
"tkg_node_detail",
|
||||
"查詢 TKG 知識圖譜的單一節點詳細資訊,包含該節點的 incoming 和 outgoing 邊。適合深入了解特定人物或物件的完整關係網絡。",
|
||||
serde_json::json!({
|
||||
"file_uuid": {"type": "string", "description": "影片 UUID"},
|
||||
"node_id": {"type": "integer", "description": "節點 ID"}
|
||||
}),
|
||||
vec!["file_uuid", "node_id"],
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
// ── Tool Executors ───────────────────────────────────────────────
|
||||
|
||||
async fn exec_find_file(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
|
||||
let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let videos = schema::table_name("videos");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let like = format!("%{}%", query);
|
||||
let rows: Vec<(String, String, bool)> = sqlx::query_as(&format!(
|
||||
"SELECT v.file_uuid::text, v.file_name, \
|
||||
(SELECT COUNT(*) FROM {} fd WHERE fd.file_uuid = v.file_uuid) > 0 AS has_data \
|
||||
FROM {} v WHERE v.file_name ILIKE $1 \
|
||||
ORDER BY v.created_at DESC LIMIT 10",
|
||||
fd_table, videos
|
||||
))
|
||||
.bind(&like)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
if rows.is_empty() {
|
||||
return Ok(serde_json::json!({"found": false, "message": "No files match the query. Try different keywords."}).to_string());
|
||||
}
|
||||
let files: Vec<serde_json::Value> = rows
|
||||
.into_iter()
|
||||
.map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
|
||||
.collect();
|
||||
Ok(serde_json::json!({"found": true, "files": files}).to_string())
|
||||
}
|
||||
|
||||
async fn exec_list_files(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
|
||||
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(10);
|
||||
let videos = schema::table_name("videos");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let rows: Vec<(String, String, bool)> = sqlx::query_as(&format!(
|
||||
"SELECT v.file_uuid::text, v.file_name, \
|
||||
(SELECT COUNT(*) FROM {} fd WHERE fd.file_uuid = v.file_uuid) > 0 AS has_data \
|
||||
FROM {} v ORDER BY v.created_at DESC LIMIT $1",
|
||||
fd_table, videos
|
||||
))
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
let files: Vec<serde_json::Value> = rows
|
||||
.into_iter()
|
||||
.map(|(u, n, hd)| serde_json::json!({"file_uuid": u, "file_name": n, "has_data": hd}))
|
||||
.collect();
|
||||
Ok(serde_json::json!({"files": files}).to_string())
|
||||
}
|
||||
|
||||
async fn exec_tkg_query(pool: &sqlx::PgPool, args: &serde_json::Value) -> Result<String, String> {
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let query_type = args
|
||||
.get("query_type")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
let identity_name = args.get("identity_name").and_then(|v| v.as_str());
|
||||
let identity_b = args.get("identity_b").and_then(|v| v.as_str());
|
||||
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
|
||||
|
||||
let id_table = schema::table_name("identities");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let videos = schema::table_name("videos");
|
||||
let nodes = schema::table_name("tkg_nodes");
|
||||
let edges = schema::table_name("tkg_edges");
|
||||
|
||||
match query_type {
|
||||
"top_identities" => {
|
||||
let rows: Vec<(String, String, i64)> = sqlx::query_as(&format!(
|
||||
"SELECT i.uuid::text, i.name, COUNT(fd.id)::bigint AS face_count \
|
||||
FROM {} fd JOIN {} i ON i.id = fd.identity_id \
|
||||
WHERE fd.file_uuid = $1 AND fd.identity_id IS NOT NULL AND i.source = 'tmdb' \
|
||||
GROUP BY i.uuid, i.name ORDER BY face_count DESC LIMIT $2",
|
||||
fd_table, id_table
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"identities": rows}).to_string())
|
||||
}
|
||||
"first_cooccurrence" => {
|
||||
let name_a = identity_name.unwrap_or("");
|
||||
let name_b = identity_b.unwrap_or("");
|
||||
let row: Option<(i64, f64)> = sqlx::query_as(&format!(
|
||||
"SELECT MIN(fd_a.frame_number)::bigint, \
|
||||
ROUND(MIN(fd_a.frame_number)::numeric / GREATEST(MAX(v.fps)::numeric, 25.0), 2)::float8 \
|
||||
FROM {} fd_a JOIN {} fd_b ON fd_a.frame_number = fd_b.frame_number \
|
||||
JOIN {} v ON v.file_uuid = $1 \
|
||||
WHERE fd_a.file_uuid = $1 \
|
||||
AND fd_a.identity_id = (SELECT id FROM {} WHERE name ILIKE $2 LIMIT 1) \
|
||||
AND fd_b.identity_id = (SELECT id FROM {} WHERE name ILIKE $3 LIMIT 1)",
|
||||
fd_table, fd_table, videos, id_table, id_table
|
||||
))
|
||||
.bind(file_uuid).bind(name_a).bind(name_b)
|
||||
.fetch_optional(pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"first_cooccurrence": row.map(|(f, t)| serde_json::json!({"frame": f, "timestamp_secs": t}))}).to_string())
|
||||
}
|
||||
"identity_details" => {
|
||||
let name = identity_name.unwrap_or("");
|
||||
let row: Option<(String, String, Option<i32>, i64)> = sqlx::query_as(&format!(
|
||||
"SELECT i.uuid::text, i.name, i.tmdb_id, \
|
||||
(SELECT COUNT(*) FROM {} fd WHERE fd.identity_id = i.id AND fd.file_uuid = $1)::bigint \
|
||||
FROM {} i WHERE i.name ILIKE $2 LIMIT 1",
|
||||
fd_table, id_table
|
||||
))
|
||||
.bind(file_uuid).bind(name)
|
||||
.fetch_optional(pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"identity": row.map(|(u, n, tid, fc)| serde_json::json!({"uuid": u, "name": n, "tmdb_id": tid, "face_count": fc}))}).to_string())
|
||||
}
|
||||
"mutual_gaze" => {
|
||||
let name_a = identity_name.unwrap_or("");
|
||||
let name_b = identity_b.unwrap_or("");
|
||||
let row: Option<(i64, i64, f64, f64)> = sqlx::query_as(&format!(
|
||||
"SELECT (e.properties->>'first_frame')::bigint, \
|
||||
(e.properties->>'gaze_frame_count')::int::bigint, \
|
||||
(e.properties->>'yaw_a_avg')::float8, \
|
||||
(e.properties->>'yaw_b_avg')::float8 \
|
||||
FROM {} e \
|
||||
JOIN {} a ON a.id = e.source_node_id \
|
||||
JOIN {} b ON b.id = e.target_node_id \
|
||||
JOIN {} fd_a ON fd_a.file_uuid = $1 AND fd_a.trace_id = REPLACE(a.external_id, 'trace_', '')::int \
|
||||
JOIN {} fd_b ON fd_b.file_uuid = $1 AND fd_b.trace_id = REPLACE(b.external_id, 'trace_', '')::int \
|
||||
JOIN {} ia ON ia.id = fd_a.identity_id \
|
||||
JOIN {} ib ON ib.id = fd_b.identity_id \
|
||||
WHERE e.file_uuid = $1 AND ia.name ILIKE $2 AND ib.name ILIKE $3 \
|
||||
AND e.properties->>'mutual_gaze' = 'true' LIMIT 1",
|
||||
edges, nodes, nodes, fd_table, fd_table, id_table, id_table
|
||||
))
|
||||
.bind(file_uuid).bind(name_a).bind(name_b)
|
||||
.fetch_optional(pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"mutual_gaze": row.map(|(f, gc, ya, yb)| serde_json::json!({"first_frame": f, "gaze_frame_count": gc, "yaw_a": ya, "yaw_b": yb}))}).to_string())
|
||||
}
|
||||
"interaction_network" => {
|
||||
let rows: Vec<(String, String, i64)> = sqlx::query_as(&format!(
|
||||
"SELECT ia.name, ib.name, COUNT(*)::bigint \
|
||||
FROM {} e \
|
||||
JOIN {} a ON a.id = e.source_node_id \
|
||||
JOIN {} b ON b.id = e.target_node_id \
|
||||
JOIN {} fd_a ON fd_a.trace_id = REPLACE(a.external_id, 'trace_', '')::int AND fd_a.file_uuid = $1 \
|
||||
JOIN {} fd_b ON fd_b.trace_id = REPLACE(b.external_id, 'trace_', '')::int AND fd_b.file_uuid = $1 \
|
||||
JOIN {} ia ON ia.id = fd_a.identity_id \
|
||||
JOIN {} ib ON ib.id = fd_b.identity_id \
|
||||
WHERE e.file_uuid = $1 AND e.edge_type = 'CO_OCCURS_WITH' \
|
||||
AND ia.name != ib.name AND ia.source = 'tmdb' AND ib.source = 'tmdb' \
|
||||
GROUP BY ia.name, ib.name \
|
||||
ORDER BY COUNT(*) DESC LIMIT $2",
|
||||
edges, nodes, nodes, fd_table, fd_table, id_table, id_table
|
||||
))
|
||||
.bind(file_uuid).bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"interaction_network": rows}).to_string())
|
||||
}
|
||||
"identity_traces" => {
|
||||
let name = identity_name.unwrap_or("");
|
||||
// MIN/MAX frame_number should be bigint (i64), not int
|
||||
let rows: Vec<(i32, i64, i64, i64)> = sqlx::query_as(&format!(
|
||||
"SELECT fd.trace_id, COUNT(*)::bigint, MIN(fd.frame_number)::bigint, MAX(fd.frame_number)::bigint \
|
||||
FROM {} fd JOIN {} i ON i.id = fd.identity_id \
|
||||
WHERE fd.file_uuid = $1 AND i.name ILIKE $2 \
|
||||
GROUP BY fd.trace_id ORDER BY COUNT(*) DESC LIMIT $3",
|
||||
fd_table, id_table
|
||||
))
|
||||
.bind(file_uuid).bind(name).bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"traces": rows}).to_string())
|
||||
}
|
||||
"file_info" => {
|
||||
let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!(
|
||||
"SELECT file_name, duration, width, height, fps FROM {} WHERE file_uuid = $1",
|
||||
videos
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
|
||||
}
|
||||
"speaker_dialogue" => {
|
||||
let name = identity_name.unwrap_or("");
|
||||
let rows: Vec<(String, Option<String>)> = sqlx::query_as(&format!(
|
||||
"SELECT DISTINCT sn.external_id, sn.properties->>'full_text' AS full_text \
|
||||
FROM {} i \
|
||||
JOIN {} fd ON fd.identity_id = i.id AND ($2::text IS NULL OR fd.file_uuid = $2) \
|
||||
JOIN {} fn ON fn.file_uuid = fd.file_uuid \
|
||||
AND fn.node_type = 'face_trace' \
|
||||
AND fn.external_id = CONCAT('trace_', fd.trace_id) \
|
||||
JOIN {} e ON e.source_node_id = fn.id \
|
||||
AND e.edge_type = 'SPEAKS_AS' \
|
||||
AND ($2::text IS NULL OR e.file_uuid = $2) \
|
||||
JOIN {} sn ON sn.id = e.target_node_id \
|
||||
WHERE i.name ILIKE $1 \
|
||||
LIMIT $3",
|
||||
id_table, fd_table, nodes, edges, nodes
|
||||
))
|
||||
.bind(name)
|
||||
.bind(file_uuid)
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(
|
||||
serde_json::json!({"speakers": rows.iter().map(|(sid, text)| {
|
||||
serde_json::json!({"speaker_id": sid, "dialogue": text})
|
||||
}).collect::<Vec<_>>()})
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
"speaker_interaction" => {
|
||||
let name_a = identity_name.unwrap_or("");
|
||||
let name_b = identity_b.unwrap_or("");
|
||||
if name_a.is_empty() || name_b.is_empty() {
|
||||
return Ok(
|
||||
serde_json::json!({"error": "identity_name and identity_b are required"})
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
// Get both speakers' segments from TKG
|
||||
let rows: Vec<(String, String, serde_json::Value)> = sqlx::query_as(&format!(
|
||||
"SELECT sn.external_id, sn.properties->>'full_text' AS full_text, sn.properties->'segments' AS segments \
|
||||
FROM {} i \
|
||||
JOIN {} fd ON fd.identity_id = i.id AND ($3::text IS NULL OR fd.file_uuid = $3) \
|
||||
JOIN {} fn ON fn.file_uuid = fd.file_uuid \
|
||||
AND fn.node_type = 'face_trace' \
|
||||
AND fn.external_id = CONCAT('trace_', fd.trace_id) \
|
||||
JOIN {} e ON e.source_node_id = fn.id \
|
||||
AND e.edge_type = 'SPEAKS_AS' \
|
||||
AND ($3::text IS NULL OR e.file_uuid = $3) \
|
||||
JOIN {} sn ON sn.id = e.target_node_id \
|
||||
WHERE (i.name ILIKE $1 OR i.name ILIKE $2) \
|
||||
ORDER BY sn.external_id",
|
||||
id_table, fd_table, nodes, edges, nodes
|
||||
))
|
||||
.bind(name_a)
|
||||
.bind(name_b)
|
||||
.bind(file_uuid)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
let mut interactions = Vec::new();
|
||||
for i in 0..rows.len() {
|
||||
for j in i + 1..rows.len() {
|
||||
let (sid_a, text_a, segs_a_val) = &rows[i];
|
||||
let (sid_b, text_b, segs_b_val) = &rows[j];
|
||||
let segs_a = segs_a_val.as_array();
|
||||
let segs_b = segs_b_val.as_array();
|
||||
if let (Some(a_list), Some(b_list)) = (segs_a, segs_b) {
|
||||
for sa in a_list {
|
||||
let sa_start = sa.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let sa_end = sa.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let sa_text = sa.get("text").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if sa_text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
for sb in b_list {
|
||||
let sb_start =
|
||||
sb.get("start").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let sb_end = sb.get("end").and_then(|v| v.as_f64()).unwrap_or(0.0);
|
||||
let sb_text = sb.get("text").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if sb_text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
// Check temporal overlap
|
||||
let overlap_start = sa_start.max(sb_start);
|
||||
let overlap_end = sa_end.min(sb_end);
|
||||
if overlap_start < overlap_end {
|
||||
interactions.push(serde_json::json!({
|
||||
"speaker_a": sid_a,
|
||||
"speaker_b": sid_b,
|
||||
"time_range_s": [overlap_start, overlap_end],
|
||||
"dialogue_a": sa_text,
|
||||
"dialogue_b": sb_text,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
interactions.sort_by(|a, b| {
|
||||
let a_start = a["time_range_s"][0].as_f64().unwrap_or(0.0);
|
||||
let b_start = b["time_range_s"][0].as_f64().unwrap_or(0.0);
|
||||
a_start.partial_cmp(&b_start).unwrap()
|
||||
});
|
||||
interactions.truncate(limit as usize);
|
||||
|
||||
Ok(serde_json::json!({"interactions": interactions, "speaker_a_text": rows.first().map(|r| r.1.clone()), "speaker_b_text": rows.get(1).map(|r| r.1.clone())}).to_string())
|
||||
}
|
||||
_ => Ok(
|
||||
serde_json::json!({"error": format!("Unknown query_type: {}", query_type)}).to_string(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
async fn exec_smart_search(
|
||||
_pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
|
||||
let limit = args.get("limit").and_then(|v| v.as_i64()).unwrap_or(5);
|
||||
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let mut sql = format!(
|
||||
"SELECT chunk_id, text_content, start_frame, end_frame, chunk_type \
|
||||
FROM {} WHERE text_content ILIKE $1",
|
||||
chunk_table
|
||||
);
|
||||
if file_uuid.is_some() {
|
||||
sql.push_str(" AND file_uuid = $2");
|
||||
}
|
||||
sql.push_str(&format!(" ORDER BY start_frame LIMIT {}", limit));
|
||||
|
||||
if let Some(fuid) = file_uuid {
|
||||
let like = format!("%{}%", query);
|
||||
let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
|
||||
.bind(&like)
|
||||
.bind(fuid)
|
||||
.fetch_all(_pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"results": rows}).to_string())
|
||||
} else {
|
||||
let like = format!("%{}%", query);
|
||||
let rows: Vec<(String, Option<String>, i64, i64, String)> = sqlx::query_as(&sql)
|
||||
.bind(&like)
|
||||
.fetch_all(_pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"results": rows}).to_string())
|
||||
}
|
||||
}
|
||||
|
||||
async fn exec_identity_text(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
|
||||
let limit = args
|
||||
.get("limit")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(10)
|
||||
.min(50);
|
||||
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let id_table = schema::table_name("identities");
|
||||
let like_q = format!("%{}%", q.replace('%', "%%"));
|
||||
|
||||
let sql = format!(
|
||||
"SELECT c.chunk_id, c.start_time, c.end_time, c.text_content, \
|
||||
i.name AS identity_name, fd.trace_id, i.source AS identity_source \
|
||||
FROM {} c \
|
||||
JOIN {} fd ON fd.file_uuid = c.file_uuid \
|
||||
AND fd.frame_number BETWEEN c.start_frame AND c.end_frame \
|
||||
AND fd.identity_id IS NOT NULL \
|
||||
JOIN {} i ON i.id = fd.identity_id \
|
||||
WHERE ($1::text IS NULL OR c.file_uuid = $1) \
|
||||
AND (LOWER(c.text_content) LIKE LOWER($2) OR LOWER(c.content::text) LIKE LOWER($2)) \
|
||||
ORDER BY c.start_time \
|
||||
LIMIT $3",
|
||||
chunk_table, fd_table, id_table
|
||||
);
|
||||
|
||||
let rows: Vec<(
|
||||
String,
|
||||
f64,
|
||||
f64,
|
||||
Option<String>,
|
||||
String,
|
||||
Option<i32>,
|
||||
String,
|
||||
)> = sqlx::query_as(&sql)
|
||||
.bind(file_uuid)
|
||||
.bind(&like_q)
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(
|
||||
serde_json::json!({"results": rows.iter().map(|(chunk_id, st, et, txt, name, tid, src)| {
|
||||
serde_json::json!({
|
||||
"chunk_id": chunk_id,
|
||||
"start_time": st,
|
||||
"end_time": et,
|
||||
"text": txt,
|
||||
"identity_name": name,
|
||||
"trace_id": tid,
|
||||
"source": src
|
||||
})
|
||||
} ).collect::<Vec<_>>()})
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
async fn exec_identities_search(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let q = args.get("q").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str());
|
||||
let limit = args
|
||||
.get("limit")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(10)
|
||||
.min(50);
|
||||
|
||||
let id_table = schema::table_name("identities");
|
||||
let fd_table = schema::table_name("face_detections");
|
||||
let chunk_table = schema::table_name("chunk");
|
||||
let like_q = format!("%{}%", q.replace('%', "%%"));
|
||||
|
||||
let sql = format!(
|
||||
"SELECT DISTINCT ON (i.name, c.chunk_id) \
|
||||
i.name, c.chunk_id, c.start_time, c.end_time, c.text_content, fd.trace_id \
|
||||
FROM {} i \
|
||||
JOIN {} fd ON fd.identity_id = i.id \
|
||||
JOIN {} c ON c.file_uuid = fd.file_uuid \
|
||||
AND c.start_time <= fd.frame_number / COALESCE(c.fps, 25.0) \
|
||||
AND c.end_time >= fd.frame_number / COALESCE(c.fps, 25.0) \
|
||||
WHERE (i.name ILIKE $1 \
|
||||
OR EXISTS (SELECT 1 FROM jsonb_array_elements(i.metadata->'aliases') AS a WHERE a->>'name' ILIKE $1)) \
|
||||
AND ($2::text IS NULL OR fd.file_uuid = $2) \
|
||||
ORDER BY i.name, c.chunk_id, c.start_time \
|
||||
LIMIT $3",
|
||||
id_table, fd_table, chunk_table
|
||||
);
|
||||
|
||||
let rows: Vec<(String, String, f64, f64, Option<String>, Option<i32>)> = sqlx::query_as(&sql)
|
||||
.bind(&like_q)
|
||||
.bind(file_uuid)
|
||||
.bind(limit)
|
||||
.fetch_all(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(
|
||||
serde_json::json!({"results": rows.iter().map(|(name, chunk_id, st, et, txt, tid)| {
|
||||
serde_json::json!({
|
||||
"identity_name": name,
|
||||
"chunk_id": chunk_id,
|
||||
"start_time": st,
|
||||
"end_time": et,
|
||||
"text": txt,
|
||||
"trace_id": tid,
|
||||
})
|
||||
}).collect::<Vec<_>>()})
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
async fn exec_get_identity_detail(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let name = args.get("name").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let id_table = schema::table_name("identities");
|
||||
let row: Option<(String, String, Option<String>, Option<i32>, Option<String>)> = sqlx::query_as(&format!(
|
||||
"SELECT uuid::text, name, source, tmdb_id, metadata->>'tmdb_character' FROM {} WHERE name ILIKE $1 LIMIT 1",
|
||||
id_table
|
||||
))
|
||||
.bind(name)
|
||||
.fetch_optional(pool)
|
||||
.await.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"identity": row.map(|(u, n, s, t, c)| serde_json::json!({"uuid": u, "name": n, "source": s, "tmdb_id": t, "character": c}))}).to_string())
|
||||
}
|
||||
|
||||
async fn exec_get_file_info(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let videos = schema::table_name("videos");
|
||||
let row: Option<(String, f64, i32, i32, f64)> = sqlx::query_as(&format!(
|
||||
"SELECT file_name, duration, width, height, fps FROM {} WHERE file_uuid = $1",
|
||||
videos
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(serde_json::json!({"file_info": row.map(|(n, d, w, h, f)| serde_json::json!({"file_name": n, "duration_sec": d, "width": w, "height": h, "fps": f}))}).to_string())
|
||||
}
|
||||
|
||||
async fn exec_get_representative_frame(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
|
||||
match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid).await {
|
||||
Ok(r) => Ok(serde_json::json!({
|
||||
"frame_number": r.frame_number,
|
||||
"face_quality": r.face_quality,
|
||||
"main_identities": r.main_identities,
|
||||
"traces": r.traces,
|
||||
})
|
||||
.to_string()),
|
||||
Err(e) => Ok(serde_json::json!({"error": e.to_string()}).to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn exec_analyze_frame(
|
||||
pool: &sqlx::PgPool,
|
||||
args: &serde_json::Value,
|
||||
) -> Result<String, String> {
|
||||
let file_uuid = args.get("file_uuid").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let question = args
|
||||
.get("question")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("請描述這個畫面中的內容");
|
||||
|
||||
if file_uuid.is_empty() {
|
||||
return Ok(serde_json::json!({"error": "file_uuid is required"}).to_string());
|
||||
}
|
||||
|
||||
let videos = schema::table_name("videos");
|
||||
let (video_path, fps): (String, f64) = sqlx::query_as(&format!(
|
||||
"SELECT file_path, COALESCE(fps, 25.0) FROM {} WHERE file_uuid = $1",
|
||||
videos
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?
|
||||
.ok_or_else(|| "Video not found".to_string())?;
|
||||
|
||||
let frame_number = match args.get("frame_number").and_then(|v| v.as_i64()) {
|
||||
Some(f) => f,
|
||||
None => {
|
||||
match crate::core::processor::tkg::query_auto_representative_frame(pool, file_uuid)
|
||||
.await
|
||||
{
|
||||
Ok(r) => r.frame_number,
|
||||
Err(_) => {
|
||||
let duration: f64 = sqlx::query_scalar(&format!(
|
||||
"SELECT COALESCE(duration, 0) FROM {} WHERE file_uuid = $1",
|
||||
videos
|
||||
))
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(pool)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?
|
||||
.unwrap_or(0.0);
|
||||
if duration > 0.0 {
|
||||
((duration / 2.0) * fps) as i64
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let timestamp_secs = frame_number as f64 / fps;
|
||||
|
||||
let ffmpeg_path = std::env::var("MOMENTRY_FFMPEG").unwrap_or_else(|_| {
|
||||
let full = "/opt/homebrew/opt/ffmpeg-full/bin/ffmpeg";
|
||||
if std::path::Path::new(full).exists() {
|
||||
full.to_string()
|
||||
} else {
|
||||
"ffmpeg".to_string()
|
||||
}
|
||||
});
|
||||
|
||||
let output = tokio::process::Command::new(&ffmpeg_path)
|
||||
.args([
|
||||
"-ss",
|
||||
&format!("{:.3}", timestamp_secs),
|
||||
"-i",
|
||||
&video_path,
|
||||
"-vframes",
|
||||
"1",
|
||||
"-f",
|
||||
"image2pipe",
|
||||
"-vcodec",
|
||||
"mjpeg",
|
||||
"-",
|
||||
])
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| format!("ffmpeg execution error: {}", e))?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
return Ok(serde_json::json!({"error": format!("ffmpeg failed: {}", stderr)}).to_string());
|
||||
}
|
||||
|
||||
let base64_img = BASE64.encode(&output.stdout);
|
||||
|
||||
let system_prompt =
|
||||
"你是一個專業的影片畫面分析助手。請根據提供的畫面以及用戶的問題,詳細描述畫面中的內容,包括場景、人物、動作、表情、物件等。請用繁體中文回答。";
|
||||
let vision_result = call_llm_vision(system_prompt, question, vec![base64_img], 1024, 120)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(serde_json::json!({
|
||||
"frame_number": frame_number,
|
||||
"timestamp_secs": timestamp_secs,
|
||||
"analysis": vision_result,
|
||||
})
|
||||
.to_string())
|
||||
}
|
||||
// ── Tool Executors ── (implementations in core::agent::tools)
|
||||
|
||||
// ── Tool Router ───────────────────────────────────────────────────
|
||||
|
||||
async fn execute_tool(pool: &sqlx::PgPool, tool_call: &ToolCall) -> (String, String, String) {
|
||||
let name = tool_call.function.name.clone();
|
||||
let tool_call_id = tool_call.id.clone().unwrap_or_default();
|
||||
let args: serde_json::Value =
|
||||
match serde_json::from_str(&tool_call.function.arguments) {
|
||||
Ok(v) => v,
|
||||
Err(e) => return (tool_call_id, name, serde_json::json!({"error": format!("Invalid arguments: {}", e)}).to_string()),
|
||||
};
|
||||
let args: serde_json::Value = match serde_json::from_str(&tool_call.function.arguments) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
return (
|
||||
tool_call_id,
|
||||
name,
|
||||
serde_json::json!({"error": format!("Invalid arguments: {}", e)}).to_string(),
|
||||
)
|
||||
}
|
||||
};
|
||||
let result = match name.as_str() {
|
||||
"find_file" => exec_find_file(pool, &args).await,
|
||||
"list_files" => exec_list_files(pool, &args).await,
|
||||
"tkg_query" => exec_tkg_query(pool, &args).await,
|
||||
"smart_search" => exec_smart_search(pool, &args).await,
|
||||
"identity_text" => exec_identity_text(pool, &args).await,
|
||||
"identities_search" => exec_identities_search(pool, &args).await,
|
||||
"get_identity_detail" => exec_get_identity_detail(pool, &args).await,
|
||||
"get_file_info" => exec_get_file_info(pool, &args).await,
|
||||
"get_representative_frame" => exec_get_representative_frame(pool, &args).await,
|
||||
"analyze_frame" => exec_analyze_frame(pool, &args).await,
|
||||
"find_file" => tools::exec_find_file(pool, &args).await,
|
||||
"list_files" => tools::exec_list_files(pool, &args).await,
|
||||
"tkg_query" => tools::exec_tkg_query(pool, &args).await,
|
||||
"tkg_nodes_query" => tools::exec_tkg_nodes_query(pool, &args).await,
|
||||
"tkg_edges_query" => tools::exec_tkg_edges_query(pool, &args).await,
|
||||
"tkg_node_detail" => tools::exec_tkg_node_detail(pool, &args).await,
|
||||
"smart_search" => tools::exec_smart_search(pool, &args).await,
|
||||
"identity_text" => tools::exec_identity_text(pool, &args).await,
|
||||
"identities_search" => tools::exec_identities_search(pool, &args).await,
|
||||
"get_identity_detail" => tools::exec_get_identity_detail(pool, &args).await,
|
||||
"get_file_info" => tools::exec_get_file_info(pool, &args).await,
|
||||
"get_representative_frame" => tools::exec_get_representative_frame(pool, &args).await,
|
||||
"analyze_frame" => tools::exec_analyze_frame(pool, &args).await,
|
||||
_ => Err(format!("Unknown tool: {}", name)),
|
||||
};
|
||||
let content = match result {
|
||||
@@ -916,7 +340,7 @@ async fn run_tool_loop(
|
||||
messages.len(),
|
||||
tools.len()
|
||||
);
|
||||
|
||||
|
||||
match function_calling::call_llm(messages.clone(), Some(tools.clone()), 2048, 120).await {
|
||||
Ok(LlmResponse::Text(text)) => {
|
||||
tracing::info!(
|
||||
@@ -945,7 +369,9 @@ async fn run_tool_loop(
|
||||
);
|
||||
sources.push(serde_json::json!({"tool": name, "result": content}));
|
||||
messages.push(function_calling::make_tool_result(
|
||||
&tool_call_id, &name, &content,
|
||||
&tool_call_id,
|
||||
&name,
|
||||
&content,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
119
src/api/checkin_api.rs
Normal file
119
src/api/checkin_api.rs
Normal file
@@ -0,0 +1,119 @@
|
||||
use axum::{
|
||||
extract::{Path, State},
|
||||
http::StatusCode,
|
||||
routing::{get, post},
|
||||
Json, Router,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::api::types::AppState;
|
||||
use crate::core::checkin;
|
||||
use crate::core::db::VideoStatus;
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct CheckinResponse {
|
||||
file_uuid: String,
|
||||
pre_chunks_moved: usize,
|
||||
speaker_detections_moved: usize,
|
||||
vectors_moved: usize,
|
||||
status: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct CheckoutResponse {
|
||||
file_uuid: String,
|
||||
rows_deleted: usize,
|
||||
status: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct WorkspaceStatusResponse {
|
||||
file_uuid: String,
|
||||
exists: bool,
|
||||
}
|
||||
|
||||
async fn checkin_handler(
|
||||
State(state): State<AppState>,
|
||||
Path(file_uuid): Path<String>,
|
||||
) -> Result<Json<CheckinResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
match checkin::checkin(&state.db, &file_uuid).await {
|
||||
Ok(result) => {
|
||||
if let Err(e) = state
|
||||
.db
|
||||
.update_video_status(&file_uuid, VideoStatus::Indexed)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
"Failed to update video status to Indexed for {}: {}",
|
||||
file_uuid,
|
||||
e
|
||||
);
|
||||
}
|
||||
Ok(Json(CheckinResponse {
|
||||
file_uuid: result.file_uuid.clone(),
|
||||
pre_chunks_moved: result.pre_chunks_moved,
|
||||
speaker_detections_moved: result.speaker_detections_moved,
|
||||
vectors_moved: result.vectors_moved,
|
||||
status: "indexed".to_string(),
|
||||
}))
|
||||
}
|
||||
Err(e) => Err((
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({
|
||||
"error": format!("Checkin failed: {}", e),
|
||||
"file_uuid": file_uuid,
|
||||
})),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
async fn checkout_handler(
|
||||
State(state): State<AppState>,
|
||||
Path(file_uuid): Path<String>,
|
||||
) -> Result<Json<CheckoutResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
match checkin::checkout(&state.db, &file_uuid).await {
|
||||
Ok(result) => {
|
||||
if let Err(e) = state
|
||||
.db
|
||||
.update_video_status(&file_uuid, VideoStatus::CheckedOut)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
"Failed to update video status to CheckedOut for {}: {}",
|
||||
file_uuid,
|
||||
e
|
||||
);
|
||||
}
|
||||
Ok(Json(CheckoutResponse {
|
||||
file_uuid: result.file_uuid.clone(),
|
||||
rows_deleted: result.rows_deleted,
|
||||
status: "checked_out".to_string(),
|
||||
}))
|
||||
}
|
||||
Err(e) => Err((
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({
|
||||
"error": format!("Checkout failed: {}", e),
|
||||
"file_uuid": file_uuid,
|
||||
})),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
async fn workspace_status_handler(Path(file_uuid): Path<String>) -> Json<WorkspaceStatusResponse> {
|
||||
use crate::core::db::workspace_sqlite::WorkspaceDb;
|
||||
Json(WorkspaceStatusResponse {
|
||||
file_uuid: file_uuid.clone(),
|
||||
exists: WorkspaceDb::exists(&file_uuid),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn checkin_routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/api/v1/file/:file_uuid/checkin", post(checkin_handler))
|
||||
.route("/api/v1/file/:file_uuid/checkout", post(checkout_handler))
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/workspace",
|
||||
get(workspace_status_handler),
|
||||
)
|
||||
}
|
||||
1840
src/api/identity_binding.rs.bak
Normal file
1840
src/api/identity_binding.rs.bak
Normal file
File diff suppressed because it is too large
Load Diff
@@ -63,7 +63,10 @@ pub fn bbox_routes() -> Router<crate::api::types::AppState> {
|
||||
)
|
||||
.route("/api/v1/file/:file_uuid/video", get(stream_video))
|
||||
.route("/api/v1/file/:file_uuid/thumbnail", get(face_thumbnail))
|
||||
.route("/api/v1/file/:file_uuid/chunk/:chunk_id/thumbnail", get(chunk_thumbnail))
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/chunk/:chunk_id/thumbnail",
|
||||
get(chunk_thumbnail),
|
||||
)
|
||||
.route("/api/v1/file/:file_uuid/clip", get(video_clip))
|
||||
}
|
||||
|
||||
@@ -904,11 +907,16 @@ async fn chunk_thumbnail(
|
||||
let select = format!("select=eq(n\\,{})", frame);
|
||||
let output = ffmpeg_cmd()
|
||||
.args([
|
||||
"-i", &file_path,
|
||||
"-vf", &select,
|
||||
"-frames:v", "1",
|
||||
"-f", "image2pipe",
|
||||
"-vcodec", "mjpeg",
|
||||
"-i",
|
||||
&file_path,
|
||||
"-vf",
|
||||
&select,
|
||||
"-frames:v",
|
||||
"1",
|
||||
"-f",
|
||||
"image2pipe",
|
||||
"-vcodec",
|
||||
"mjpeg",
|
||||
"-",
|
||||
])
|
||||
.output()
|
||||
@@ -1206,15 +1214,10 @@ async fn media_proxy_handler(
|
||||
.await
|
||||
.map(IntoResponse::into_response),
|
||||
"chunk_thumbnail" => {
|
||||
let chunk_id = params
|
||||
.get("chunk_id")
|
||||
.ok_or(StatusCode::BAD_REQUEST)?;
|
||||
chunk_thumbnail(
|
||||
State(state),
|
||||
Path((uuid.clone(), chunk_id.clone())),
|
||||
)
|
||||
.await
|
||||
.map(IntoResponse::into_response)
|
||||
let chunk_id = params.get("chunk_id").ok_or(StatusCode::BAD_REQUEST)?;
|
||||
chunk_thumbnail(State(state), Path((uuid.clone(), chunk_id.clone())))
|
||||
.await
|
||||
.map(IntoResponse::into_response)
|
||||
}
|
||||
_ => Err(StatusCode::BAD_REQUEST),
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
pub mod agent_api;
|
||||
pub mod agent_search;
|
||||
pub mod auth;
|
||||
pub mod checkin_api;
|
||||
pub mod docs;
|
||||
pub mod files;
|
||||
pub mod five_w1h_agent_api;
|
||||
|
||||
@@ -2,7 +2,7 @@ use axum::{
|
||||
extract::{Path, Query, State},
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
routing::post,
|
||||
routing::{get, post},
|
||||
Router,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -578,6 +578,127 @@ async fn watcher_auto_register_toggle(
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct ProcessorCountInfo {
|
||||
processor: String,
|
||||
has_json: bool,
|
||||
frame_count: Option<u32>,
|
||||
segment_count: Option<u32>,
|
||||
chunk_count: Option<u32>,
|
||||
last_modified: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct ProcessorCountsResponse {
|
||||
file_uuid: String,
|
||||
output_dir: String,
|
||||
processors: Vec<ProcessorCountInfo>,
|
||||
}
|
||||
|
||||
async fn get_processor_counts(
|
||||
State(state): State<AppState>,
|
||||
Path(file_uuid): Path<String>,
|
||||
) -> Result<Json<ProcessorCountsResponse>, StatusCode> {
|
||||
let videos_table = schema::table_name("videos");
|
||||
let full_uuid: Option<String> = sqlx::query_scalar(&format!(
|
||||
"SELECT file_uuid FROM {} WHERE file_uuid = $1 OR file_uuid LIKE $2",
|
||||
videos_table
|
||||
))
|
||||
.bind(&file_uuid)
|
||||
.bind(&format!("{}%", file_uuid))
|
||||
.fetch_optional(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::error!("DB error: {}", e);
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?
|
||||
.or_else(|| {
|
||||
if file_uuid.len() == 32 {
|
||||
Some(file_uuid.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let file_uuid = full_uuid.ok_or(StatusCode::NOT_FOUND)?;
|
||||
let output_dir = std::env::var("MOMENTRY_OUTPUT_DIR")
|
||||
.unwrap_or_else(|_| "/Users/accusys/momentry/output_dev".to_string());
|
||||
|
||||
let processors = crate::core::db::ProcessorType::all();
|
||||
let mut results = Vec::new();
|
||||
|
||||
for processor in &processors {
|
||||
let proc_name = processor.as_str();
|
||||
let json_path =
|
||||
std::path::Path::new(&output_dir).join(format!("{}.{}.json", file_uuid, proc_name));
|
||||
|
||||
let has_json = json_path.exists();
|
||||
let mut frame_count = None;
|
||||
let mut segment_count = None;
|
||||
let mut chunk_count = None;
|
||||
let mut last_modified = None;
|
||||
|
||||
if has_json {
|
||||
if let Ok(metadata) = std::fs::metadata(&json_path) {
|
||||
if let Ok(modified) = metadata.modified() {
|
||||
let chrono_dt: chrono::DateTime<chrono::Utc> = modified.into();
|
||||
last_modified = Some(chrono_dt.to_rfc3339());
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(content) = std::fs::read_to_string(&json_path) {
|
||||
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) {
|
||||
frame_count = json
|
||||
.get("frame_count")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|v| v as u32);
|
||||
segment_count = json
|
||||
.get("segments")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|arr| arr.len() as u32);
|
||||
chunk_count = json
|
||||
.get("child_chunks")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|arr| arr.len() as u32)
|
||||
.or_else(|| {
|
||||
json.get("parent_chunks")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|arr| arr.len() as u32)
|
||||
});
|
||||
if chunk_count.is_none() {
|
||||
chunk_count = json
|
||||
.get("chunks")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|arr| arr.len() as u32);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results.push(ProcessorCountInfo {
|
||||
processor: proc_name.to_string(),
|
||||
has_json,
|
||||
frame_count,
|
||||
segment_count,
|
||||
chunk_count,
|
||||
last_modified,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(Json(ProcessorCountsResponse {
|
||||
file_uuid,
|
||||
output_dir,
|
||||
processors: results,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn verify_file_handler(
|
||||
Path(file_uuid): Path<String>,
|
||||
) -> Json<crate::verification::FileVerificationReport> {
|
||||
let report = crate::verification::verifier::verify_file(&file_uuid);
|
||||
Json(report)
|
||||
}
|
||||
|
||||
pub fn processing_routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/api/v1/file/:file_uuid/process", post(trigger_processing))
|
||||
@@ -597,4 +718,9 @@ pub fn processing_routes() -> Router<AppState> {
|
||||
"/api/v1/config/watcher-auto-register",
|
||||
post(watcher_auto_register_toggle),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/file/:file_uuid/processor-counts",
|
||||
get(get_processor_counts),
|
||||
)
|
||||
.route("/api/v1/file/:file_uuid/verify", get(verify_file_handler))
|
||||
}
|
||||
|
||||
@@ -160,6 +160,11 @@ pub async fn smart_search(
|
||||
.search_in_uuid(&embedding, file_uuid, fetch_limit)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
tracing::info!(
|
||||
"Smart search: Qdrant search_in_uuid for {} returned {} hits",
|
||||
file_uuid,
|
||||
qdrant_hits.len()
|
||||
);
|
||||
qdrant_hits
|
||||
.into_iter()
|
||||
.map(|h| (h.uuid, h.chunk_id, h.score as f64))
|
||||
@@ -169,6 +174,10 @@ pub async fn smart_search(
|
||||
.search(&embedding, fetch_limit)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
tracing::info!(
|
||||
"Smart search: Qdrant search (no uuid filter) returned {} hits",
|
||||
qdrant_hits.len()
|
||||
);
|
||||
qdrant_hits
|
||||
.into_iter()
|
||||
.map(|h| (h.uuid, h.chunk_id, h.score as f64))
|
||||
@@ -371,27 +380,45 @@ pub async fn smart_search(
|
||||
// 6. Enrich top results from PG and build final response
|
||||
let query_lower = req.query.to_lowercase();
|
||||
let mut final_results = Vec::new();
|
||||
for mr in ranked.iter().take(limit * 3) { // 取更多結果以便過濾
|
||||
for mr in ranked.iter().take(limit * 3) {
|
||||
// 取更多結果以便過濾
|
||||
if let Some(pg) = db
|
||||
.get_chunk_by_file_and_chunk_id(&mr.file_uuid, &mr.chunk_id)
|
||||
.await
|
||||
.ok()
|
||||
.flatten()
|
||||
{
|
||||
// 關鍵字過濾
|
||||
// 關鍵字過濾: CJK 用子字串匹配,英文用單詞邊界匹配
|
||||
let summary_lower = pg.summary.to_lowercase();
|
||||
let query_words: Vec<String> = query_lower.split_whitespace().map(|s| s.to_string()).collect();
|
||||
|
||||
// 檢查是否包含所有查詢詞(完整單詞)
|
||||
let query_words: Vec<String> = query_lower
|
||||
.split_whitespace()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
|
||||
let text_match = !pg.summary.is_empty() && {
|
||||
let bordered = format!(" {} ", summary_lower);
|
||||
query_words.iter().all(|w| bordered.contains(&format!(" {} ", w)))
|
||||
let has_cjk = |s: &str| -> bool {
|
||||
s.chars().any(|c| {
|
||||
('\u{4E00}'..='\u{9FFF}').contains(&c)
|
||||
|| ('\u{3040}'..='\u{309F}').contains(&c)
|
||||
|| ('\u{30A0}'..='\u{30FF}').contains(&c)
|
||||
|| ('\u{AC00}'..='\u{D7AF}').contains(&c)
|
||||
})
|
||||
};
|
||||
|
||||
if has_cjk(&query_lower) || has_cjk(&summary_lower) {
|
||||
query_words.iter().all(|w| summary_lower.contains(w))
|
||||
} else {
|
||||
let bordered = format!(" {} ", summary_lower);
|
||||
query_words
|
||||
.iter()
|
||||
.all(|w| bordered.contains(&format!(" {} ", w)))
|
||||
}
|
||||
};
|
||||
|
||||
if !text_match {
|
||||
|
||||
if !text_match && mr.semantic_score.is_none() {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
final_results.push(SearchResult {
|
||||
id: 0,
|
||||
file_uuid: pg.file_uuid.clone(),
|
||||
@@ -408,17 +435,19 @@ pub async fn smart_search(
|
||||
similarity: Some(mr.score),
|
||||
file_name: None,
|
||||
serve_url: None,
|
||||
thumbnail_url: pg.file_uuid.as_ref().map(|fu| format!(
|
||||
"/wp-json/momentry/v1/media?type=chunk_thumbnail&file_uuid={}&chunk_id={}",
|
||||
fu, mr.chunk_id
|
||||
)),
|
||||
thumbnail_url: pg.file_uuid.as_ref().map(|fu| {
|
||||
format!(
|
||||
"/wp-json/momentry/v1/media?type=chunk_thumbnail&file_uuid={}&chunk_id={}",
|
||||
fu, mr.chunk_id
|
||||
)
|
||||
}),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Trim to requested limit
|
||||
final_results.truncate(limit);
|
||||
|
||||
|
||||
// 7. Enrich results with file_name and serve_url from videos table
|
||||
if !final_results.is_empty() {
|
||||
let v_table = crate::core::db::schema::table_name("videos");
|
||||
|
||||
@@ -11,6 +11,7 @@ use crate::Embedder;
|
||||
use super::agent_api;
|
||||
use super::agent_search;
|
||||
use super::auth;
|
||||
use super::checkin_api;
|
||||
use super::docs;
|
||||
use super::files;
|
||||
use super::five_w1h_agent_api;
|
||||
@@ -123,6 +124,7 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
|
||||
.merge(llm_search::llm_smart_routes())
|
||||
.merge(universal_search_routes())
|
||||
.merge(pipeline::pipeline_routes())
|
||||
.merge(checkin_api::checkin_routes())
|
||||
.layer(axum::middleware::from_fn_with_state(
|
||||
state.api_state.clone(),
|
||||
unified_auth,
|
||||
|
||||
Reference in New Issue
Block a user