cleanup: remove dead code and duplicate docs
- Remove session-ses_2f27.md (161KB raw session log) - Remove 49 ROOT_* duplicate files across REFERENCE/ - Remove 14 duplicate files between REFERENCE/ root and history/ - Remove asr_legacy.rs (dead code, replaced by asr.rs) - Remove src/core/worker/ (duplicate JobWorker) - Remove src/core/layers/ (empty directory) - Remove 4 .bak files in src/ - Remove 7 dead private methods in worker/processor.rs - Remove backup directory from git tracking
This commit is contained in:
@@ -103,8 +103,14 @@ pub fn face_recognition_routes() -> Router<crate::api::server::AppState> {
|
||||
.route("/api/v1/face/register", post(register_face_api))
|
||||
.route("/api/v1/face/search", post(search_faces))
|
||||
.route("/api/v1/face/list", get(list_faces))
|
||||
.route("/api/v1/face/:face_id", get(get_face_details))
|
||||
.route("/api/v1/face/:face_id", axum::routing::delete(delete_face))
|
||||
.route(
|
||||
"/api/v1/files/:file_uuid/faces/:face_id",
|
||||
get(get_face_details),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/files/:file_uuid/faces/:face_id",
|
||||
axum::routing::delete(delete_face),
|
||||
)
|
||||
.route(
|
||||
"/api/v1/face/results/:file_uuid",
|
||||
get(get_recognition_results),
|
||||
@@ -550,7 +556,7 @@ async fn list_faces(
|
||||
|
||||
async fn get_face_details(
|
||||
State(_state): State<crate::api::server::AppState>,
|
||||
Path(face_id): Path<String>,
|
||||
Path((file_uuid, face_id)): Path<(String, String)>,
|
||||
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
|
||||
let db = match PostgresDb::init().await {
|
||||
Ok(db) => db,
|
||||
@@ -575,7 +581,7 @@ async fn get_face_details(
|
||||
updated_at,
|
||||
is_active
|
||||
FROM {}
|
||||
WHERE face_id = $1
|
||||
WHERE face_id = $1 AND file_uuid = $2
|
||||
"#,
|
||||
face_identities_table
|
||||
);
|
||||
@@ -591,6 +597,7 @@ async fn get_face_details(
|
||||
bool,
|
||||
)> = match sqlx::query_as(&query)
|
||||
.bind(&face_id)
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(db.pool())
|
||||
.await
|
||||
{
|
||||
@@ -637,7 +644,7 @@ async fn get_face_details(
|
||||
|
||||
async fn delete_face(
|
||||
State(_state): State<crate::api::server::AppState>,
|
||||
Path(face_id): Path<String>,
|
||||
Path((file_uuid, face_id)): Path<(String, String)>,
|
||||
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
|
||||
let db = match PostgresDb::init().await {
|
||||
Ok(db) => db,
|
||||
@@ -655,7 +662,7 @@ async fn delete_face(
|
||||
r#"
|
||||
UPDATE {}
|
||||
SET is_active = FALSE, updated_at = CURRENT_TIMESTAMP
|
||||
WHERE face_id = $1 AND is_active = TRUE
|
||||
WHERE face_id = $1 AND file_uuid = $2 AND is_active = TRUE
|
||||
RETURNING face_id, name
|
||||
"#,
|
||||
face_identities_table
|
||||
@@ -663,6 +670,7 @@ async fn delete_face(
|
||||
|
||||
let deleted: Option<(String, Option<String>)> = match sqlx::query_as(&query)
|
||||
.bind(&face_id)
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(db.pool())
|
||||
.await
|
||||
{
|
||||
|
||||
@@ -56,7 +56,10 @@ pub fn identity_routes() -> Router<crate::api::server::AppState> {
|
||||
"/api/v1/identities/:identity_id/faces",
|
||||
get(get_identity_faces),
|
||||
)
|
||||
.route("/api/v1/faces/:face_id/thumbnail", get(get_face_thumbnail))
|
||||
.route(
|
||||
"/api/v1/files/:file_uuid/faces/:face_id/thumbnail",
|
||||
get(get_face_thumbnail),
|
||||
)
|
||||
}
|
||||
|
||||
/// Register a Global Identity from face.json with multi-angle reference vectors.
|
||||
@@ -719,7 +722,7 @@ async fn get_identity_faces(
|
||||
}
|
||||
|
||||
async fn get_face_thumbnail(
|
||||
Path(face_id): Path<i32>,
|
||||
Path((file_uuid, face_id)): Path<(String, i32)>,
|
||||
) -> Result<impl IntoResponse, (StatusCode, String)> {
|
||||
let db = match PostgresDb::init().await {
|
||||
Ok(db) => db,
|
||||
@@ -738,12 +741,13 @@ async fn get_face_thumbnail(
|
||||
"SELECT fd.frame_number, fd.bbox, v.file_path, v.fps
|
||||
FROM {} fd
|
||||
JOIN {} v ON fd.file_uuid = v.uuid
|
||||
WHERE fd.id = $1",
|
||||
WHERE fd.id = $1 AND fd.file_uuid = $2",
|
||||
table_fd, table_v
|
||||
);
|
||||
|
||||
let row: Option<(i64, Option<serde_json::Value>, String, f64)> = match sqlx::query_as(&sql)
|
||||
.bind(face_id)
|
||||
.bind(&file_uuid)
|
||||
.fetch_optional(db.pool())
|
||||
.await
|
||||
{
|
||||
|
||||
@@ -29,6 +29,70 @@ pub fn identity_routes() -> Router<crate::api::server::AppState> {
|
||||
pub struct FilesQuery {
|
||||
page: Option<usize>,
|
||||
page_size: Option<usize>,
|
||||
uuid: Option<String>, // Add uuid filter
|
||||
}
|
||||
|
||||
async fn list_files(
|
||||
State(state): State<crate::api::server::AppState>,
|
||||
Query(params): Query<FilesQuery>,
|
||||
) -> Result<Json<FilesResponse>, (StatusCode, String)> {
|
||||
let page = params.page.unwrap_or(1);
|
||||
let page_size = params.page_size.unwrap_or(20);
|
||||
|
||||
// If UUID is provided, fetch that specific file and return it as a list item
|
||||
if let Some(ref uuid) = params.uuid {
|
||||
let video = state
|
||||
.db
|
||||
.get_video_by_uuid(uuid)
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
let data = if let Some(v) = video {
|
||||
vec![FileItem {
|
||||
file_uuid: v.file_uuid,
|
||||
file_name: v.file_name,
|
||||
file_path: v.file_path,
|
||||
status: v.status.as_str().to_string(),
|
||||
}]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
return Ok(Json(FilesResponse {
|
||||
success: true,
|
||||
total: data.len() as i64,
|
||||
page,
|
||||
page_size,
|
||||
data,
|
||||
}));
|
||||
}
|
||||
|
||||
// Default: List files with pagination
|
||||
let offset = ((page - 1) as i64) * (page_size as i64);
|
||||
|
||||
let records = state
|
||||
.db
|
||||
.list_files(page_size as i32, offset)
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
let data = records
|
||||
.into_iter()
|
||||
.map(|r| FileItem {
|
||||
file_uuid: r.file_uuid,
|
||||
file_name: r.file_name,
|
||||
file_path: r.file_path,
|
||||
status: "ready".to_string(), // Hardcoded for now
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Json(FilesResponse {
|
||||
success: true,
|
||||
total: 0, // TODO: Implement count query
|
||||
page,
|
||||
page_size,
|
||||
data,
|
||||
}))
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
@@ -45,40 +109,7 @@ pub struct FileItem {
|
||||
pub file_uuid: String,
|
||||
pub file_name: String,
|
||||
pub file_path: String,
|
||||
pub status: String, // From probe or processing status
|
||||
}
|
||||
|
||||
async fn list_files(
|
||||
State(state): State<crate::api::server::AppState>,
|
||||
Query(params): Query<FilesQuery>,
|
||||
) -> Result<Json<FilesResponse>, (StatusCode, String)> {
|
||||
let page = params.page.unwrap_or(1);
|
||||
let page_size = params.page_size.unwrap_or(20);
|
||||
let offset = ((page - 1) as i64) * (page_size as i64);
|
||||
|
||||
let records = state
|
||||
.db
|
||||
.list_files(page_size as i32, offset)
|
||||
.await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||
|
||||
let data = records
|
||||
.into_iter()
|
||||
.map(|r| FileItem {
|
||||
file_uuid: r.file_uuid,
|
||||
file_name: r.file_name,
|
||||
file_path: r.file_path,
|
||||
status: "ready".to_string(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Json(FilesResponse {
|
||||
success: true,
|
||||
total: 0, // TODO
|
||||
page,
|
||||
page_size,
|
||||
data,
|
||||
}))
|
||||
pub status: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
|
||||
@@ -1,195 +0,0 @@
|
||||
//! Smart Search API
|
||||
//! Implements the 5W1H search capability using semantic vectors.
|
||||
|
||||
use axum::{extract::State, http::StatusCode, response::Json, routing::post, Router};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json;
|
||||
use tracing;
|
||||
|
||||
use crate::core::db::PostgresDb;
|
||||
|
||||
// --- Request / Response Structures ---
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct SmartSearchRequest {
|
||||
pub uuid: String,
|
||||
pub query: String,
|
||||
pub limit: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct SearchResult {
|
||||
pub id: i32,
|
||||
pub parent_id: i32,
|
||||
pub scene_order: Option<i32>,
|
||||
|
||||
// Primary: frame-accurate position (authoritative unit)
|
||||
pub start_frame: i64,
|
||||
pub end_frame: i64,
|
||||
pub fps: f64,
|
||||
|
||||
// Reference: time derived from frames (subject to FPS variation, not precise)
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
|
||||
pub raw_text: Option<String>, // Text content of the child chunk
|
||||
pub summary: Option<String>, // Summary from parent context
|
||||
pub metadata: Option<serde_json::Value>,
|
||||
pub similarity: Option<f64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct SmartSearchResponse {
|
||||
pub query: String,
|
||||
pub results: Vec<SearchResult>,
|
||||
pub strategy: String,
|
||||
}
|
||||
|
||||
// --- API Handler ---
|
||||
|
||||
pub async fn smart_search(
|
||||
State(state): State<crate::api::server::AppState>,
|
||||
Json(req): Json<SmartSearchRequest>,
|
||||
) -> Result<Json<SmartSearchResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
let db = &state.db;
|
||||
let limit = req.limit.unwrap_or(5);
|
||||
|
||||
// 1. Generate Embedding using Ollama
|
||||
let embedding = get_ollama_embedding(&req.query).await.map_err(
|
||||
|e| -> (StatusCode, Json<serde_json::Value>) {
|
||||
tracing::error!("Embedding failed: {}", e);
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({ "error": e.to_string() })),
|
||||
)
|
||||
},
|
||||
)?;
|
||||
|
||||
// 2. Search Database (Drill-Down: Find Parents First)
|
||||
let db_parents: Vec<crate::core::db::postgres_db::SemanticSearchResult> = db
|
||||
.search_parent_chunks_semantic(&req.uuid, &embedding, limit)
|
||||
.await
|
||||
.map_err(
|
||||
|e: anyhow::Error| -> (StatusCode, Json<serde_json::Value>) {
|
||||
tracing::error!("DB search failed: {}", e);
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({ "error": e.to_string() })),
|
||||
)
|
||||
},
|
||||
)?;
|
||||
|
||||
if db_parents.is_empty() {
|
||||
return Ok(Json(SmartSearchResponse {
|
||||
query: req.query,
|
||||
results: vec![],
|
||||
strategy: "semantic_vector_search".to_string(),
|
||||
}));
|
||||
}
|
||||
|
||||
// Collect Parent IDs
|
||||
let parent_ids: Vec<i32> = db_parents.iter().map(|p| p.id).collect();
|
||||
|
||||
// 3. Fetch Children for these Parents (Drill Down)
|
||||
// We fetch all children for these parents (limit can be adjusted)
|
||||
let children: Vec<crate::core::db::postgres_db::ChildChunkResult> = db
|
||||
.get_children_for_parents(&parent_ids, 10) // Fetch top 10 children per parent
|
||||
.await
|
||||
.map_err(
|
||||
|e: anyhow::Error| -> (StatusCode, Json<serde_json::Value>) {
|
||||
tracing::error!("Fetching children failed: {}", e);
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({ "error": e.to_string() })),
|
||||
)
|
||||
},
|
||||
)?;
|
||||
|
||||
// 4. Map Parents to a lookup table
|
||||
let parent_map: std::collections::HashMap<
|
||||
i32,
|
||||
&crate::core::db::postgres_db::SemanticSearchResult,
|
||||
> = db_parents.iter().map(|p| (p.id, p)).collect();
|
||||
|
||||
// Map Children to API response struct
|
||||
let results: Vec<SearchResult> = children
|
||||
.into_iter()
|
||||
.map(|c| {
|
||||
let parent = parent_map.get(&c.parent_id);
|
||||
SearchResult {
|
||||
id: c.id,
|
||||
parent_id: c.parent_id,
|
||||
scene_order: parent.map(|p| p.scene_order),
|
||||
|
||||
start_frame: c.start_frame,
|
||||
end_frame: c.end_frame,
|
||||
fps: c.fps,
|
||||
|
||||
start_time: c.start_time,
|
||||
end_time: c.end_time,
|
||||
raw_text: Some(c.raw_text),
|
||||
summary: parent.map(|p| p.summary.clone()),
|
||||
metadata: parent.map(|p| p.metadata.clone()),
|
||||
similarity: parent.and_then(|p| p.similarity),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// 6. Sort results by similarity (descending)
|
||||
// Since all children of a parent have the same parent similarity, this groups relevant chunks together
|
||||
let mut results = results;
|
||||
results.sort_by(|a, b| {
|
||||
b.similarity
|
||||
.partial_cmp(&a.similarity)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
|
||||
// 7. Limit the final results (optional, but good for API consistency)
|
||||
let limit = req.limit.unwrap_or(5) * 5; // Allow more children per parent context
|
||||
results.truncate(limit);
|
||||
|
||||
// 8. Format Response
|
||||
let response = SmartSearchResponse {
|
||||
query: req.query,
|
||||
results,
|
||||
strategy: "drill_down_semantic_search".to_string(),
|
||||
};
|
||||
|
||||
Ok(Json(response))
|
||||
}
|
||||
|
||||
// --- Helper: Ollama Embedding ---
|
||||
|
||||
async fn get_ollama_embedding(
|
||||
text: &str,
|
||||
) -> Result<Vec<f32>, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let client = reqwest::Client::new();
|
||||
let payload = serde_json::json!({
|
||||
"model": "nomic-embed-text",
|
||||
"prompt": text
|
||||
});
|
||||
|
||||
let res = client
|
||||
.post("http://localhost:11434/api/embeddings")
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await?
|
||||
.json::<serde_json::Value>()
|
||||
.await?;
|
||||
|
||||
// Parse embedding array from response
|
||||
let embedding = res["embedding"]
|
||||
.as_array()
|
||||
.ok_or("No embedding found in Ollama response")?
|
||||
.iter()
|
||||
.map(|v| v.as_f64().unwrap_or(0.0) as f32)
|
||||
.collect();
|
||||
|
||||
Ok(embedding)
|
||||
}
|
||||
|
||||
// --- Router Setup ---
|
||||
|
||||
pub fn search_routes() -> Router<crate::api::server::AppState> {
|
||||
Router::new().route("/smart", post(smart_search))
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
268
src/bin/cli.rs
Normal file
268
src/bin/cli.rs
Normal file
@@ -0,0 +1,268 @@
|
||||
use chrono::Local;
|
||||
use clap::{Parser, Subcommand};
|
||||
use reqwest;
|
||||
use serde_json;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
fn get_timestamp() -> String {
|
||||
Local::now().format("%H:%M:%S").to_string()
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "momentry-cli")]
|
||||
#[command(about = "CLI Agent Interface for Momentry Portal")]
|
||||
struct Cli {
|
||||
#[command(subcommand)]
|
||||
command: Commands,
|
||||
|
||||
/// API Base URL (default: http://localhost:3002)
|
||||
#[arg(long, default_value = "http://localhost:3002")]
|
||||
api_url: String,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Commands {
|
||||
/// 註冊檔案 (Register File)
|
||||
Register { path: String },
|
||||
/// 取消註冊 (Unregister File)
|
||||
Unregister { file_uuid: String },
|
||||
/// 探測檔案 (Probe File) - 需要 UUID
|
||||
Probe { file_uuid: String },
|
||||
/// 開始處理 (Start Processing)
|
||||
Process { file_uuid: String },
|
||||
/// 監控處理進度 (Watch Progress until completed)
|
||||
Watch { file_uuid: String },
|
||||
/// 列出未綁定臉部 (List Unbound Faces)
|
||||
ListFaces {
|
||||
file_uuid: String,
|
||||
#[arg(long, default_value = "0.5")]
|
||||
min_conf: f64,
|
||||
#[arg(long, default_value = "20")]
|
||||
limit: usize,
|
||||
},
|
||||
/// 綁定身份 (Bind Identity)
|
||||
Bind {
|
||||
file_uuid: String,
|
||||
face_id: String,
|
||||
identity_id: String,
|
||||
},
|
||||
}
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let cli = Cli::parse();
|
||||
let start_time = Instant::now();
|
||||
|
||||
println!("⏱️ [TIME] Start Command: {}", get_timestamp());
|
||||
|
||||
let result = run_command(&cli).await;
|
||||
|
||||
let duration = start_time.elapsed();
|
||||
println!(
|
||||
"⏱️ [TIME] End Command: {} | Duration: {:.2}s",
|
||||
get_timestamp(),
|
||||
duration.as_secs_f64()
|
||||
);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
async fn run_command(cli: &Cli) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let api_key = "muser_test_001";
|
||||
let client = reqwest::Client::new();
|
||||
let auth_header = reqwest::header::HeaderMap::from_iter([
|
||||
("X-API-Key".parse().unwrap(), api_key.parse().unwrap()),
|
||||
(
|
||||
"Content-Type".parse().unwrap(),
|
||||
"application/json".parse().unwrap(),
|
||||
),
|
||||
]);
|
||||
|
||||
match &cli.command {
|
||||
Commands::Register { path } => {
|
||||
println!("📥 [CLI] 正在註冊: {}", path);
|
||||
let res = client
|
||||
.post(&format!("{}/api/v1/files/register", cli.api_url))
|
||||
.headers(auth_header.clone())
|
||||
.json(&serde_json::json!({ "file_path": path }))
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if res.status().is_success() {
|
||||
let json: serde_json::Value = res.json().await?;
|
||||
println!("✅ [CLI] 註冊成功:");
|
||||
println!(
|
||||
" UUID: {}",
|
||||
json.get("file_uuid")
|
||||
.unwrap_or(&serde_json::Value::String("-".into()))
|
||||
);
|
||||
} else {
|
||||
println!("❌ [CLI] 註冊失敗: {}", res.status());
|
||||
}
|
||||
}
|
||||
Commands::Unregister { file_uuid } => {
|
||||
println!("🗑️ [CLI] 正在取消註冊: {}", file_uuid);
|
||||
let res = client
|
||||
.post(&format!("{}/api/v1/unregister", cli.api_url))
|
||||
.headers(auth_header.clone())
|
||||
.json(&serde_json::json!({ "uuid": file_uuid }))
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if res.status().is_success() {
|
||||
println!("✅ [CLI] 取消註冊成功");
|
||||
} else {
|
||||
println!("❌ [CLI] 取消註冊失敗: {}", res.status());
|
||||
}
|
||||
}
|
||||
Commands::Probe { file_uuid } => {
|
||||
println!("🔍 [CLI] 正在探測檔案 (UUID): {}", file_uuid);
|
||||
let res = client
|
||||
.get(&format!(
|
||||
"{}/api/v1/assets/{}/probe",
|
||||
cli.api_url, file_uuid
|
||||
))
|
||||
.headers(auth_header.clone())
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if res.status().is_success() {
|
||||
let json: serde_json::Value = res.json().await?;
|
||||
println!("✅ [CLI] 探測成功:");
|
||||
println!(
|
||||
" 時長: {}s",
|
||||
json.get("duration")
|
||||
.map(|v| v.to_string())
|
||||
.unwrap_or("-".into())
|
||||
);
|
||||
} else {
|
||||
println!("❌ [CLI] 探測失敗: {}", res.status());
|
||||
}
|
||||
}
|
||||
Commands::Process { file_uuid } => {
|
||||
println!("⚙️ [CLI] 正在觸發處理: {}", file_uuid);
|
||||
let res = client
|
||||
.post(&format!(
|
||||
"{}/api/v1/assets/{}/process",
|
||||
cli.api_url, file_uuid
|
||||
))
|
||||
.headers(auth_header.clone())
|
||||
.json(&serde_json::json!({})) // Send empty JSON object
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if res.status().is_success() {
|
||||
println!(
|
||||
"✅ [CLI] 處理已啟動。請使用 'momentry-cli watch {}' 監控進度。",
|
||||
file_uuid
|
||||
);
|
||||
} else {
|
||||
println!("❌ [CLI] 啟動處理失敗: {}", res.status());
|
||||
}
|
||||
}
|
||||
Commands::Watch { file_uuid } => {
|
||||
println!("👀 [CLI] 開始監控進度: {}", file_uuid);
|
||||
loop {
|
||||
let res = client
|
||||
.get(&format!("{}/api/v1/progress/{}", cli.api_url, file_uuid))
|
||||
.headers(auth_header.clone())
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if !res.status().is_success() {
|
||||
println!("❌ [CLI] 無法取得進度: {}", res.status());
|
||||
break;
|
||||
}
|
||||
|
||||
let json: serde_json::Value = res.json().await?;
|
||||
let progress = json["overall_progress"].as_u64().unwrap_or(0);
|
||||
let current = json["current_processor"].as_str().unwrap_or("Unknown");
|
||||
|
||||
println!(" 📊 進度: {}% | 當前階段: {}", progress, current);
|
||||
|
||||
if progress >= 100 || json["status"].as_str().unwrap_or("") == "completed" {
|
||||
println!("✅ [CLI] 處理完成!");
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_secs(3)).await;
|
||||
}
|
||||
}
|
||||
Commands::ListFaces {
|
||||
file_uuid,
|
||||
min_conf,
|
||||
limit,
|
||||
} => {
|
||||
println!("🕵️ [CLI] 正在查詢臉部: {}", file_uuid);
|
||||
let url = format!(
|
||||
"{}/api/v1/faces/candidates?file_uuid={}&min_confidence={}&page_size={}",
|
||||
cli.api_url, file_uuid, min_conf, limit
|
||||
);
|
||||
|
||||
let res = client.get(&url).headers(auth_header.clone()).send().await?;
|
||||
|
||||
if res.status().is_success() {
|
||||
let json: serde_json::Value = res.json().await?;
|
||||
println!("✅ [CLI] 查詢成功:");
|
||||
|
||||
if let Some(candidates) = json.get("candidates") {
|
||||
if candidates.as_array().map_or(0, |v| v.len()) > 0 {
|
||||
println!(
|
||||
" 找到 {} 個未綁定臉部:",
|
||||
candidates.as_array().unwrap().len()
|
||||
);
|
||||
for (i, c) in candidates.as_array().unwrap().iter().enumerate() {
|
||||
let id = c.get("id").map(|v| v.to_string()).unwrap_or("N/A".into());
|
||||
let conf = c
|
||||
.get("confidence")
|
||||
.map(|v| v.to_string())
|
||||
.unwrap_or("0".into());
|
||||
println!(" {}. ID: {} | Confidence: {}", i + 1, id, conf);
|
||||
}
|
||||
} else {
|
||||
println!(" 未找到符合條件的臉部。");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
println!("❌ [CLI] 查詢失敗: {}", res.status());
|
||||
}
|
||||
}
|
||||
Commands::Bind {
|
||||
file_uuid,
|
||||
face_id,
|
||||
identity_id,
|
||||
} => {
|
||||
println!(
|
||||
"🔗 [CLI] 正在綁定 Identity {} -> Face {} (File: {})",
|
||||
identity_id, face_id, file_uuid
|
||||
);
|
||||
|
||||
let payload = serde_json::json!({
|
||||
"identity_id": identity_id.parse::<i64>().unwrap_or(0),
|
||||
"binding_type": "face",
|
||||
"binding_value": face_id
|
||||
});
|
||||
|
||||
let res = client
|
||||
.post(&format!("{}/api/v1/identities/bind", cli.api_url))
|
||||
.headers(auth_header.clone())
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
if res.status().is_success() {
|
||||
let json: serde_json::Value = res.json().await?;
|
||||
println!(
|
||||
"✅ [CLI] 綁定成功: {}",
|
||||
json.get("message")
|
||||
.map(|v| v.to_string())
|
||||
.unwrap_or("".into())
|
||||
);
|
||||
} else {
|
||||
let text = res.text().await?;
|
||||
println!("❌ [CLI] 綁定失敗: {}", text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,711 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use clap::Parser;
|
||||
use crossterm::event::{self, Event, KeyCode, KeyModifiers};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::io::{self, Write};
|
||||
use std::path::PathBuf;
|
||||
use std::process::{Child, Command, Stdio};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(name = "integrated_player")]
|
||||
#[command(about = "Integrated player for ASR, Face, ASRX, and Pose")]
|
||||
struct Args {
|
||||
#[arg(short, long)]
|
||||
video: PathBuf,
|
||||
|
||||
#[arg(short = 'r', long)]
|
||||
asr: Option<PathBuf>,
|
||||
|
||||
#[arg(short = 'f', long)]
|
||||
face: Option<PathBuf>,
|
||||
|
||||
#[arg(short = 'x', long)]
|
||||
asrx: Option<PathBuf>,
|
||||
|
||||
#[arg(short = 'p', long)]
|
||||
pose: Option<PathBuf>,
|
||||
|
||||
#[arg(short = 's', long, default_value = "0.0")]
|
||||
start: f64,
|
||||
|
||||
#[arg(long)]
|
||||
speaker_name: Option<String>,
|
||||
|
||||
#[arg(long)]
|
||||
auto_play_speaker: bool,
|
||||
|
||||
#[arg(long)]
|
||||
demo: bool,
|
||||
|
||||
#[arg(long, default_value = "3")]
|
||||
demo_segments_per_speaker: usize,
|
||||
|
||||
#[arg(long, default_value = "2.0")]
|
||||
demo_speed: f64,
|
||||
|
||||
#[arg(long)]
|
||||
show_video: bool,
|
||||
|
||||
#[arg(long, default_value = "800")]
|
||||
video_width: u32,
|
||||
|
||||
#[arg(long, default_value = "600")]
|
||||
video_height: u32,
|
||||
|
||||
#[arg(long)]
|
||||
continuous_demo: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct AsrSegment {
|
||||
start: f64,
|
||||
end: f64,
|
||||
text: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct AsrData {
|
||||
language: Option<String>,
|
||||
segments: Vec<AsrSegment>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct FaceDetection {
|
||||
frame: u64,
|
||||
timestamp: f64,
|
||||
x: i32,
|
||||
y: i32,
|
||||
width: i32,
|
||||
height: i32,
|
||||
confidence: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct FaceResult {
|
||||
results: FaceResults,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct FaceResults {
|
||||
detections: Vec<FaceDetection>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct AsrxSegment {
|
||||
index: usize,
|
||||
start: f64,
|
||||
end: f64,
|
||||
duration: f64,
|
||||
speaker: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct AsrxData {
|
||||
segments: Vec<AsrxSegment>,
|
||||
speaker_stats: HashMap<String, SpeakerStats>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct SpeakerStats {
|
||||
count: usize,
|
||||
duration: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct Keypoint {
|
||||
name: String,
|
||||
x: f32,
|
||||
y: f32,
|
||||
confidence: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct PersonPose {
|
||||
keypoints: Vec<Keypoint>,
|
||||
bbox: Bbox,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct Bbox {
|
||||
x: i32,
|
||||
y: i32,
|
||||
width: i32,
|
||||
height: i32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct PoseFrame {
|
||||
frame: u64,
|
||||
timestamp: f64,
|
||||
persons: Vec<PersonPose>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct PoseData {
|
||||
frames: Vec<PoseFrame>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct IntegratedSegment {
|
||||
start: f64,
|
||||
end: f64,
|
||||
text: Option<String>,
|
||||
speaker: Option<String>,
|
||||
face: Option<FaceDetection>,
|
||||
mouth_landmarks: Option<Vec<Keypoint>>,
|
||||
}
|
||||
|
||||
struct IntegratedPlayer {
|
||||
asr_data: Option<AsrData>,
|
||||
face_data: Option<FaceResult>,
|
||||
asrx_data: Option<AsrxData>,
|
||||
pose_data: Option<PoseData>,
|
||||
current_time: f64,
|
||||
is_playing: bool,
|
||||
speaker_names: HashMap<String, (String, String)>,
|
||||
}
|
||||
|
||||
impl IntegratedPlayer {
|
||||
fn new() -> Self {
|
||||
let mut speaker_names = HashMap::new();
|
||||
speaker_names.insert(
|
||||
"SPEAKER_0".to_string(),
|
||||
("Cary Grant".to_string(), "Peter Joshua".to_string()),
|
||||
);
|
||||
speaker_names.insert(
|
||||
"SPEAKER_1".to_string(),
|
||||
("Audrey Hepburn".to_string(), "Regina Lampert".to_string()),
|
||||
);
|
||||
speaker_names.insert(
|
||||
"SPEAKER_2".to_string(),
|
||||
(
|
||||
"Walter Matthau".to_string(),
|
||||
"Hamilton Bartholomew".to_string(),
|
||||
),
|
||||
);
|
||||
speaker_names.insert(
|
||||
"SPEAKER_4".to_string(),
|
||||
("James Coburn".to_string(), "Tex Panthollow".to_string()),
|
||||
);
|
||||
|
||||
Self {
|
||||
asr_data: None,
|
||||
face_data: None,
|
||||
asrx_data: None,
|
||||
pose_data: None,
|
||||
current_time: 0.0,
|
||||
is_playing: false,
|
||||
speaker_names,
|
||||
}
|
||||
}
|
||||
|
||||
fn load_asr(&mut self, path: &PathBuf) -> Result<()> {
|
||||
let content = std::fs::read_to_string(path)
|
||||
.with_context(|| format!("Failed to read ASR file: {:?}", path))?;
|
||||
self.asr_data = Some(serde_json::from_str(&content)?);
|
||||
println!(
|
||||
"✓ Loaded {} ASR segments",
|
||||
self.asr_data.as_ref().unwrap().segments.len()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn load_face(&mut self, path: &PathBuf) -> Result<()> {
|
||||
let content = std::fs::read_to_string(path)
|
||||
.with_context(|| format!("Failed to read Face file: {:?}", path))?;
|
||||
self.face_data = Some(serde_json::from_str(&content)?);
|
||||
println!(
|
||||
"✓ Loaded {} face detections",
|
||||
self.face_data.as_ref().unwrap().results.detections.len()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn load_asrx(&mut self, path: &PathBuf) -> Result<()> {
|
||||
let content = std::fs::read_to_string(path)
|
||||
.with_context(|| format!("Failed to read ASRX file: {:?}", path))?;
|
||||
self.asrx_data = Some(serde_json::from_str(&content)?);
|
||||
println!(
|
||||
"✓ Loaded {} ASRX segments, {} speakers",
|
||||
self.asrx_data.as_ref().unwrap().segments.len(),
|
||||
self.asrx_data.as_ref().unwrap().speaker_stats.len()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn load_pose(&mut self, path: &PathBuf) -> Result<()> {
|
||||
let content = std::fs::read_to_string(path)
|
||||
.with_context(|| format!("Failed to read Pose file: {:?}", path))?;
|
||||
self.pose_data = Some(serde_json::from_str(&content)?);
|
||||
println!(
|
||||
"✓ Loaded {} pose frames",
|
||||
self.pose_data.as_ref().unwrap().frames.len()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_current_segment(&self, time: f64) -> Option<IntegratedSegment> {
|
||||
let mut segment = IntegratedSegment {
|
||||
start: 0.0,
|
||||
end: 0.0,
|
||||
text: None,
|
||||
speaker: None,
|
||||
face: None,
|
||||
mouth_landmarks: None,
|
||||
};
|
||||
|
||||
if let Some(asr) = &self.asr_data {
|
||||
for seg in &asr.segments {
|
||||
if time >= seg.start && time <= seg.end {
|
||||
segment.start = seg.start;
|
||||
segment.end = seg.end;
|
||||
segment.text = Some(seg.text.clone());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(asrx) = &self.asrx_data {
|
||||
for seg in &asrx.segments {
|
||||
if time >= seg.start && time <= seg.end {
|
||||
segment.start = seg.start;
|
||||
segment.end = seg.end;
|
||||
segment.speaker = Some(seg.speaker.clone());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(face) = &self.face_data {
|
||||
for det in &face.results.detections {
|
||||
if (det.timestamp - time).abs() < 1.0 {
|
||||
segment.face = Some(det.clone());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(pose) = &self.pose_data {
|
||||
for frame in &pose.frames {
|
||||
if (frame.timestamp - time).abs() < 0.5 {
|
||||
if let Some(person) = frame.persons.first() {
|
||||
let mouth_points: Vec<Keypoint> = person
|
||||
.keypoints
|
||||
.iter()
|
||||
.filter(|kp| {
|
||||
kp.name.contains("mouth")
|
||||
|| kp.name.contains("lip")
|
||||
|| kp.name == "nose"
|
||||
})
|
||||
.cloned()
|
||||
.collect();
|
||||
if !mouth_points.is_empty() {
|
||||
segment.mouth_landmarks = Some(mouth_points);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if segment.text.is_some()
|
||||
|| segment.speaker.is_some()
|
||||
|| segment.face.is_some()
|
||||
|| segment.mouth_landmarks.is_some()
|
||||
{
|
||||
Some(segment)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn get_speaker_info(&self, speaker_id: &str) -> (String, String) {
|
||||
self.speaker_names
|
||||
.get(speaker_id)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| ("Unknown".to_string(), "Unknown".to_string()))
|
||||
}
|
||||
|
||||
fn print_segment(&self, segment: &IntegratedSegment) {
|
||||
println!("\n{:=<80}", "");
|
||||
println!("⏱ Time: {:.2}s - {:.2}s", segment.start, segment.end);
|
||||
|
||||
if let Some(text) = &segment.text {
|
||||
println!("📝 Text: {}", text);
|
||||
}
|
||||
|
||||
if let Some(speaker) = &segment.speaker {
|
||||
let (actor, character) = self.get_speaker_info(speaker);
|
||||
println!("🎤 Speaker: {} → {} ({})", speaker, actor, character);
|
||||
}
|
||||
|
||||
if let Some(face) = &segment.face {
|
||||
println!(
|
||||
"👤 Face: bbox=({},{}) {}x{}, confidence={:.2}",
|
||||
face.x, face.y, face.width, face.height, face.confidence
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(landmarks) = &segment.mouth_landmarks {
|
||||
println!("👄 Mouth landmarks: {} points", landmarks.len());
|
||||
for kp in landmarks.iter().take(3) {
|
||||
println!(
|
||||
" • {}: ({:.1}, {:.1}) conf={:.2}",
|
||||
kp.name, kp.x, kp.y, kp.confidence
|
||||
);
|
||||
}
|
||||
}
|
||||
println!("{:=<80}", "");
|
||||
}
|
||||
|
||||
fn list_speakers(&self) {
|
||||
if let Some(asrx) = &self.asrx_data {
|
||||
println!("\n📊 Speaker Statistics:");
|
||||
println!("{:-<80}", "");
|
||||
println!(
|
||||
"{:15} {:20} {:20} {:>10} {:>10}",
|
||||
"Speaker ID", "Actor", "Character", "Segments", "Duration"
|
||||
);
|
||||
println!("{:-<80}", "");
|
||||
|
||||
for (speaker_id, stats) in &asrx.speaker_stats {
|
||||
let (actor, character) = self.get_speaker_info(speaker_id);
|
||||
println!(
|
||||
"{:15} {:20} {:20} {:>10} {:>9.1}s",
|
||||
speaker_id, actor, character, stats.count, stats.duration
|
||||
);
|
||||
}
|
||||
println!("{:-<80}", "");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn play_segment(video_path: &PathBuf, start: f64, duration: f64, show_video: bool) -> Result<()> {
|
||||
println!("▶️ Playing {:.2}s - {:.2}s", start, start + duration);
|
||||
|
||||
let mut cmd = Command::new("ffplay");
|
||||
|
||||
if show_video {
|
||||
cmd.args([
|
||||
"-ss",
|
||||
&format!("{:.2}", start),
|
||||
"-t",
|
||||
&format!("{:.2}", duration),
|
||||
"-autoexit",
|
||||
video_path.to_str().unwrap(),
|
||||
]);
|
||||
} else {
|
||||
cmd.args([
|
||||
"-ss",
|
||||
&format!("{:.2}", start),
|
||||
"-t",
|
||||
&format!("{:.2}", duration),
|
||||
"-autoexit",
|
||||
"-nodisp",
|
||||
video_path.to_str().unwrap(),
|
||||
]);
|
||||
}
|
||||
|
||||
let _child = cmd
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.spawn()
|
||||
.context("Failed to start ffplay")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn play_speaker_segments(
|
||||
player: &IntegratedPlayer,
|
||||
video_path: &PathBuf,
|
||||
speaker_id: &str,
|
||||
limit: Option<usize>,
|
||||
) -> Result<()> {
|
||||
if let Some(asrx) = &player.asrx_data {
|
||||
let segments: Vec<&AsrxSegment> = asrx
|
||||
.segments
|
||||
.iter()
|
||||
.filter(|s| s.speaker == speaker_id)
|
||||
.collect();
|
||||
|
||||
let total = segments.len();
|
||||
let count = limit.unwrap_or(total).min(total);
|
||||
|
||||
println!("\n🎬 Playing {} segments for {}", count, speaker_id);
|
||||
|
||||
for (i, seg) in segments.iter().take(count).enumerate() {
|
||||
println!("\n[{}/{}] Segment {}", i + 1, count, seg.index);
|
||||
|
||||
if let Some(segment) = player.get_current_segment(seg.start + 0.1) {
|
||||
player.print_segment(&segment);
|
||||
}
|
||||
|
||||
play_segment(video_path, seg.start, seg.duration, false)?;
|
||||
|
||||
thread::sleep(Duration::from_millis(500));
|
||||
}
|
||||
|
||||
println!("\n✅ Finished playing {} segments", count);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> {
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if let Some(asr) = &player.asr_data {
|
||||
let total_segments = asr.segments.len();
|
||||
|
||||
for (i, seg) in asr.segments.iter().enumerate() {
|
||||
// 檢查是否退出
|
||||
if quit.load(Ordering::SeqCst) {
|
||||
println!("\n⏹️ Stopped by user");
|
||||
break;
|
||||
}
|
||||
|
||||
// 檢查是否暫停
|
||||
while paused.load(Ordering::SeqCst) {
|
||||
println!("\r⏸️ Paused - Press SPACE to resume",);
|
||||
std::io::stdout().flush()?;
|
||||
thread::sleep(Duration::from_millis(100));
|
||||
|
||||
if quit.load(Ordering::SeqCst) {
|
||||
println!("\n⏹️ Stopped by user");
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
println!("\n[{}/{}] Segment", i + 1, total_segments);
|
||||
println!("{:=<80}", "");
|
||||
|
||||
// 顯示所有信息
|
||||
if let Some(segment) = player.get_current_segment(seg.start + 0.01) {
|
||||
player.print_segment(&segment);
|
||||
}
|
||||
|
||||
// 播放音頻/視頻
|
||||
let duration = seg.end - seg.start;
|
||||
println!(
|
||||
"▶️ Playing: {:.2}s - {:.2}s ({:.2}s)",
|
||||
seg.start, seg.end, duration
|
||||
);
|
||||
|
||||
let mut cmd = Command::new("ffplay");
|
||||
if args.show_video {
|
||||
cmd.args([
|
||||
"-ss",
|
||||
&format!("{:.2}", seg.start),
|
||||
"-t",
|
||||
&format!("{:.2}", duration),
|
||||
"-autoexit",
|
||||
"-x",
|
||||
&format!("{}", args.video_width),
|
||||
"-y",
|
||||
&format!("{}", args.video_height),
|
||||
args.video.to_str().unwrap(),
|
||||
]);
|
||||
} else {
|
||||
cmd.args([
|
||||
"-ss",
|
||||
&format!("{:.2}", seg.start),
|
||||
"-t",
|
||||
&format!("{:.2}", duration),
|
||||
"-autoexit",
|
||||
"-nodisp",
|
||||
args.video.to_str().unwrap(),
|
||||
]);
|
||||
}
|
||||
|
||||
let _child = cmd
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.spawn()
|
||||
.context("Failed to start ffplay")?;
|
||||
|
||||
// 等待播放完成
|
||||
thread::sleep(Duration::from_millis((duration * 1000.0) as u64 + 100));
|
||||
}
|
||||
|
||||
println!("\n{:=<80}", "");
|
||||
println!("✅ Demo completed! Played {} segments", total_segments);
|
||||
println!("{:=<80}", "");
|
||||
} else {
|
||||
println!("⚠️ No ASR data loaded");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_demo(player: &IntegratedPlayer, args: &Args) -> Result<()> {
|
||||
println!("\n🎬 Auto Demo Mode");
|
||||
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
||||
println!("Segments per speaker: {}", args.demo_segments_per_speaker);
|
||||
println!("Demo speed: {:.1}x", args.demo_speed);
|
||||
println!();
|
||||
|
||||
if let Some(asrx) = &player.asrx_data {
|
||||
let mut speaker_ids: Vec<String> = asrx.speaker_stats.keys().cloned().collect();
|
||||
speaker_ids.sort();
|
||||
|
||||
for speaker_id in &speaker_ids {
|
||||
let (actor, character) = player.get_speaker_info(speaker_id);
|
||||
|
||||
println!("\n{:=<80}", "");
|
||||
println!("🎭 Demo: {} → {} ({})", speaker_id, actor, character);
|
||||
println!("{:=<80}", "");
|
||||
|
||||
let segments: Vec<&AsrxSegment> = asrx
|
||||
.segments
|
||||
.iter()
|
||||
.filter(|s| s.speaker == *speaker_id)
|
||||
.collect();
|
||||
|
||||
let count = args.demo_segments_per_speaker.min(segments.len());
|
||||
|
||||
for (i, seg) in segments.iter().take(count).enumerate() {
|
||||
println!("\n[Segment {}/{}]", i + 1, count);
|
||||
|
||||
if let Some(segment) = player.get_current_segment(seg.start + 0.1) {
|
||||
player.print_segment(&segment);
|
||||
}
|
||||
|
||||
println!(
|
||||
"⏳ Playing audio ({:.1}s)...",
|
||||
seg.duration / args.demo_speed
|
||||
);
|
||||
|
||||
let _child = Command::new("ffplay")
|
||||
.args([
|
||||
"-ss",
|
||||
&format!("{:.2}", seg.start),
|
||||
"-t",
|
||||
&format!("{:.2}", seg.duration / args.demo_speed),
|
||||
"-autoexit",
|
||||
"-nodisp",
|
||||
args.video.to_str().unwrap(),
|
||||
])
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.spawn()
|
||||
.context("Failed to start ffplay")?;
|
||||
|
||||
thread::sleep(Duration::from_millis(
|
||||
((seg.duration / args.demo_speed) * 1000.0) as u64 + 500,
|
||||
));
|
||||
}
|
||||
|
||||
println!("\n⏸️ Pausing 2 seconds before next speaker...");
|
||||
thread::sleep(Duration::from_secs(2));
|
||||
}
|
||||
|
||||
println!("\n{:=<80}", "");
|
||||
println!("✅ Demo completed!");
|
||||
println!("{:=<80}", "");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let args = Args::parse();
|
||||
|
||||
if !args.video.exists() {
|
||||
anyhow::bail!("Video file not found: {:?}", args.video);
|
||||
}
|
||||
|
||||
println!("🎬 Integrated Player for ASR/Face/ASRX/Pose");
|
||||
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
||||
println!("Video: {:?}", args.video);
|
||||
|
||||
let mut player = IntegratedPlayer::new();
|
||||
|
||||
if let Some(asr_path) = &args.asr {
|
||||
if asr_path.exists() {
|
||||
player.load_asr(asr_path)?;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(face_path) = &args.face {
|
||||
if face_path.exists() {
|
||||
player.load_face(face_path)?;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(asrx_path) = &args.asrx {
|
||||
if asrx_path.exists() {
|
||||
player.load_asrx(asrx_path)?;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(pose_path) = &args.pose {
|
||||
if pose_path.exists() {
|
||||
player.load_pose(pose_path)?;
|
||||
}
|
||||
}
|
||||
|
||||
player.list_speakers();
|
||||
|
||||
if args.demo {
|
||||
run_demo(&player, &args)?;
|
||||
} else if args.continuous_demo {
|
||||
run_continuous_demo(&player, &args)?;
|
||||
} else if args.auto_play_speaker {
|
||||
if let Some(speaker_id) = &args.speaker_name {
|
||||
play_speaker_segments(&player, &args.video, speaker_id, Some(5))?;
|
||||
} else {
|
||||
println!("\n⚠️ --speaker-name required for --auto-play-speaker");
|
||||
}
|
||||
} else {
|
||||
println!("\n🎮 Interactive Mode");
|
||||
println!(" Commands:");
|
||||
println!(" • Enter time in seconds to seek");
|
||||
println!(" • 's' to show current segment");
|
||||
println!(" • 'l' to list speakers");
|
||||
println!(" • 'p <speaker>' to play speaker segments");
|
||||
println!(" • 'q' to quit");
|
||||
println!();
|
||||
|
||||
loop {
|
||||
print!("> ");
|
||||
std::io::Write::flush(&mut std::io::stdout())?;
|
||||
|
||||
let mut input = String::new();
|
||||
std::io::stdin().read_line(&mut input)?;
|
||||
let input = input.trim();
|
||||
|
||||
if input == "q" || input == "quit" || input == "exit" {
|
||||
break;
|
||||
} else if input == "s" || input == "show" {
|
||||
if let Some(segment) = player.get_current_segment(player.current_time) {
|
||||
player.print_segment(&segment);
|
||||
} else {
|
||||
println!("No segment at time {:.2}s", player.current_time);
|
||||
}
|
||||
} else if input == "l" || input == "list" {
|
||||
player.list_speakers();
|
||||
} else if input.starts_with("p ") {
|
||||
let speaker_id = input.strip_prefix("p ").unwrap();
|
||||
play_speaker_segments(&player, &args.video, speaker_id, Some(3))?;
|
||||
} else if let Ok(time) = input.parse::<f64>() {
|
||||
player.current_time = time;
|
||||
println!("Seeked to {:.2}s", time);
|
||||
|
||||
if let Some(segment) = player.get_current_segment(time) {
|
||||
player.print_segment(&segment);
|
||||
} else {
|
||||
println!("No segment at this time");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,56 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use momentry_core::core::db::{Database, PostgresDb};
|
||||
use momentry_core::core::text::tokenizer::{contains_chinese, tokenize_chinese_text};
|
||||
use momentry_core::core::text::{global_synonym_expander, normalize_chinese_query};
|
||||
use std::env;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
env::set_var("RUST_LOG", "info");
|
||||
|
||||
println!("=== 同義詞擴展測試 ===\n");
|
||||
|
||||
// 初始化 PostgreSQL
|
||||
let pg = PostgresDb::init()
|
||||
.await
|
||||
.context("Failed to initialize PostgreSQL database")?;
|
||||
|
||||
let expander = global_synonym_expander();
|
||||
|
||||
// 測試查詢
|
||||
let test_queries = vec![
|
||||
"電腦",
|
||||
"視頻",
|
||||
"分析",
|
||||
"工作",
|
||||
"檔案",
|
||||
"電腦工作",
|
||||
"工作檔案",
|
||||
];
|
||||
|
||||
for query_str in test_queries {
|
||||
println!("\n🔍 測試查詢: '{}'", query_str);
|
||||
|
||||
// 顯示同義詞擴展
|
||||
if contains_chinese(query_str) {
|
||||
let normalized = normalize_chinese_query(query_str);
|
||||
let expanded = expander.expand_chinese_query(&normalized);
|
||||
println!(" 同義詞擴展: {}", expanded);
|
||||
}
|
||||
|
||||
// 顯示轉換後的 tsquery
|
||||
match pg.prepare_tsquery(query_str) {
|
||||
Ok(tsquery) => println!(" TSQUERY: {}", tsquery),
|
||||
Err(e) => println!(" TSQUERY 錯誤: {}", e),
|
||||
}
|
||||
|
||||
// 執行搜索(即使沒有結果)
|
||||
let results = pg.search_bm25(query_str, None, 2).await?;
|
||||
println!(" 找到 {} 筆結果", results.len());
|
||||
for (i, r) in results.iter().enumerate() {
|
||||
println!(" {}. [{}] {}", i + 1, r.uuid, r.text);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -58,7 +58,8 @@ pub async fn execute_rule1(db: &PostgresDb, file_uuid: &str, fps: f64) -> Result
|
||||
fps,
|
||||
content,
|
||||
)
|
||||
.with_metadata(metadata);
|
||||
.with_metadata(metadata)
|
||||
.with_text_content(seg.text.clone());
|
||||
|
||||
db.store_chunk_in_tx(&chunk, &mut tx).await?;
|
||||
|
||||
|
||||
@@ -32,9 +32,9 @@ struct AsrSegment {
|
||||
/// 2. Aggregates Rule 1 (Sentence) chunks falling within each scene.
|
||||
/// 3. Calls LLM to generate 5W1H+ summary.
|
||||
/// 4. Inserts parent chunks into `dev.chunks`.
|
||||
pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
|
||||
let cut_path = format!("{}/{}.cut.json", *OUTPUT_DIR, asset_uuid);
|
||||
let asr_path = format!("{}/{}.asr.json", *OUTPUT_DIR, asset_uuid);
|
||||
pub async fn ingest_rule3(pool: &PgPool, file_uuid: &str) -> Result<usize> {
|
||||
let cut_path = format!("{}/{}.cut.json", *OUTPUT_DIR, file_uuid);
|
||||
let asr_path = format!("{}/{}.asr.json", *OUTPUT_DIR, file_uuid);
|
||||
|
||||
// 1. Load CUT and ASR data
|
||||
let cut_content = fs::read_to_string(&cut_path)
|
||||
@@ -70,16 +70,16 @@ pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
|
||||
}
|
||||
}
|
||||
|
||||
// Query Rule 1 table for better linking
|
||||
// Query chunks table for Rule 1 sentence chunks
|
||||
let rule1_rows: Vec<(String,)> = sqlx::query_as(
|
||||
r#"
|
||||
SELECT id::text FROM chunks_rule1
|
||||
WHERE asset_uuid = $1
|
||||
SELECT chunk_id FROM chunks
|
||||
WHERE uuid = $1 AND chunk_type = 'sentence' AND rule = 'rule_1'
|
||||
AND start_frame >= $2
|
||||
AND end_frame <= $3
|
||||
"#,
|
||||
)
|
||||
.bind(asset_uuid)
|
||||
.bind(file_uuid)
|
||||
.bind(scene.start_frame as i64)
|
||||
.bind(scene.end_frame as i64)
|
||||
.fetch_all(&mut *tx)
|
||||
@@ -98,14 +98,14 @@ pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
|
||||
|
||||
let texts: Vec<String> = sqlx::query_scalar(
|
||||
r#"
|
||||
SELECT content FROM chunks_rule1
|
||||
WHERE asset_uuid = $1
|
||||
SELECT text_content FROM chunks
|
||||
WHERE uuid = $1 AND chunk_type = 'sentence' AND rule = 'rule_1'
|
||||
AND start_frame >= $2
|
||||
AND end_frame <= $3
|
||||
ORDER BY start_frame ASC
|
||||
"#,
|
||||
)
|
||||
.bind(asset_uuid)
|
||||
.bind(file_uuid)
|
||||
.bind(scene.start_frame as i64)
|
||||
.bind(scene.end_frame as i64)
|
||||
.fetch_all(&mut *tx)
|
||||
@@ -136,7 +136,7 @@ pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
|
||||
|
||||
// 4. Insert into dev.chunks
|
||||
let fps_query: Option<f64> = sqlx::query_scalar("SELECT fps FROM videos WHERE uuid = $1")
|
||||
.bind(asset_uuid)
|
||||
.bind(file_uuid)
|
||||
.fetch_optional(&mut *tx)
|
||||
.await?;
|
||||
let fps = fps_query.unwrap_or(29.97);
|
||||
@@ -157,7 +157,7 @@ pub async fn ingest_rule3(pool: &PgPool, asset_uuid: &str) -> Result<usize> {
|
||||
ON CONFLICT (uuid, chunk_id) DO NOTHING
|
||||
"#,
|
||||
)
|
||||
.bind(asset_uuid)
|
||||
.bind(file_uuid)
|
||||
.bind(&chunk_id)
|
||||
.bind(scene.scene_number as i32)
|
||||
.bind("cut") // Chunk type
|
||||
|
||||
@@ -1,755 +0,0 @@
|
||||
use crate::core::time::FrameTime;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ChunkType {
|
||||
TimeBased,
|
||||
Sentence,
|
||||
Cut,
|
||||
Trace,
|
||||
Story, // Parent chunk from story analysis
|
||||
Visual, // Visual object-based chunk from YOLO detection
|
||||
}
|
||||
|
||||
impl ChunkType {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
ChunkType::TimeBased => "time",
|
||||
ChunkType::Sentence => "sentence",
|
||||
ChunkType::Cut => "cut",
|
||||
ChunkType::Trace => "trace",
|
||||
ChunkType::Story => "story",
|
||||
ChunkType::Visual => "visual",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ChunkRule {
|
||||
Rule1, // 直接轉換
|
||||
Rule2, // 集合內容
|
||||
}
|
||||
|
||||
/// 關鍵幀的物件列表
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct KeyframeObjects {
|
||||
/// 關鍵幀時間 (秒)
|
||||
pub timestamp: f64,
|
||||
/// 關鍵幀幀號
|
||||
pub frame_number: u64,
|
||||
/// 檢測到的物件
|
||||
pub objects: Vec<DetectedObject>,
|
||||
}
|
||||
|
||||
/// 檢測到的物件
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DetectedObject {
|
||||
/// 物件類別名稱
|
||||
pub class_name: String,
|
||||
/// 物件類別 ID
|
||||
pub class_id: u32,
|
||||
/// 信心值 (0.0-1.0)
|
||||
pub confidence: f32,
|
||||
/// 邊界框 (x, y, width, height)
|
||||
pub bbox: Option<BoundingBox>,
|
||||
/// 出現次數 (在分片內)
|
||||
pub occurrence: u32,
|
||||
}
|
||||
|
||||
/// 邊界框
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VisualChunkContent {
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
pub keyframe_objects: Vec<KeyframeObjects>,
|
||||
pub dominant_objects: Vec<String>,
|
||||
pub object_relationships: Vec<(String, String, String)>, // (object1, relationship, object2)
|
||||
pub scene_description: Option<String>,
|
||||
pub metadata: VisualMetadata,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VisualMetadata {
|
||||
pub object_count: u32,
|
||||
pub unique_classes: Vec<String>,
|
||||
pub max_confidence: f32,
|
||||
pub avg_confidence: f32,
|
||||
pub spatial_density: f32, // objects per frame
|
||||
}
|
||||
|
||||
impl ChunkRule {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
ChunkRule::Rule1 => "rule_1",
|
||||
ChunkRule::Rule2 => "rule_2",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Chunk {
|
||||
pub file_id: i32,
|
||||
pub uuid: String,
|
||||
pub chunk_id: String,
|
||||
pub chunk_index: u32,
|
||||
pub chunk_type: ChunkType,
|
||||
pub rule: ChunkRule,
|
||||
/// Frames per second (can be fractional, e.g., 29.97, 23.976)
|
||||
pub fps: f64,
|
||||
/// Start frame (0-based)
|
||||
pub start_frame: i64,
|
||||
/// End frame (exclusive)
|
||||
pub end_frame: i64,
|
||||
pub text_content: Option<String>,
|
||||
pub content: serde_json::Value,
|
||||
pub metadata: Option<serde_json::Value>,
|
||||
pub vector_id: Option<String>,
|
||||
pub frame_count: i32,
|
||||
pub pre_chunk_ids: Vec<i32>,
|
||||
pub parent_chunk_id: Option<String>, // For parent-child chunk hierarchy
|
||||
pub child_chunk_ids: Vec<String>, // Child chunk IDs (for parent chunks)
|
||||
pub visual_stats: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
id: i64,
|
||||
video_id: i64,
|
||||
yolo_result: &crate::core::processor::yolo::YoloResult,
|
||||
min_frames_per_chunk: usize,
|
||||
similarity_threshold: f32,
|
||||
) -> Vec<Self> {
|
||||
if yolo_result.frames.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let mut chunks = Vec::new();
|
||||
let mut current_chunk_frames = Vec::new();
|
||||
let mut current_id = id;
|
||||
|
||||
for (i, frame) in yolo_result.frames.iter().enumerate() {
|
||||
if current_chunk_frames.is_empty() {
|
||||
current_chunk_frames.push(frame);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check similarity with last frame in current chunk
|
||||
let last_frame = current_chunk_frames.last().unwrap();
|
||||
let similarity = VisualChunkContent::frame_similarity(last_frame, frame);
|
||||
|
||||
if similarity >= similarity_threshold && current_chunk_frames.len() < 100 {
|
||||
// Similar enough, add to current chunk
|
||||
current_chunk_frames.push(frame);
|
||||
} else {
|
||||
// Not similar enough or chunk too large, create new chunk
|
||||
if current_chunk_frames.len() >= min_frames_per_chunk {
|
||||
if let Some(chunk) =
|
||||
Self::create_chunk_from_frames(current_id, video_id, ¤t_chunk_frames)
|
||||
{
|
||||
chunks.push(chunk);
|
||||
current_id += 1;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::core::processor::yolo::{YoloFrame, YoloObject, YoloResult};
|
||||
|
||||
#[test]
|
||||
fn test_chunk_type_visual_serialization() {
|
||||
let chunk_type = ChunkType::Visual;
|
||||
let json = serde_json::to_string(&chunk_type).unwrap();
|
||||
assert_eq!(json, "\"visual\"");
|
||||
|
||||
let deserialized: ChunkType = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(deserialized, ChunkType::Visual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_visual_chunk_creation() {
|
||||
// Create a mock YOLO result
|
||||
let yolo_result = YoloResult {
|
||||
frame_count: 2,
|
||||
fps: 30.0,
|
||||
frames: vec![
|
||||
YoloFrame {
|
||||
frame: 0,
|
||||
timestamp: 0.0,
|
||||
objects: vec![
|
||||
YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 100,
|
||||
y: 200,
|
||||
width: 50,
|
||||
height: 100,
|
||||
confidence: 0.95,
|
||||
},
|
||||
YoloObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
x: 300,
|
||||
y: 150,
|
||||
width: 80,
|
||||
height: 60,
|
||||
confidence: 0.87,
|
||||
},
|
||||
],
|
||||
},
|
||||
YoloFrame {
|
||||
frame: 1,
|
||||
timestamp: 0.033, // 1/30 second
|
||||
objects: vec![YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 110,
|
||||
y: 210,
|
||||
width: 52,
|
||||
height: 102,
|
||||
confidence: 0.92,
|
||||
}],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
// Create visual chunk from YOLO result
|
||||
let chunk = Chunk::from_yolo_result(1, 100, &yolo_result, 0, 1).unwrap();
|
||||
|
||||
// Verify chunk properties
|
||||
assert_eq!(chunk.id, 1);
|
||||
assert_eq!(chunk.video_id, 100);
|
||||
assert_eq!(chunk.chunk_type, ChunkType::Visual);
|
||||
assert_eq!(chunk.start_time, 0.0);
|
||||
assert_eq!(chunk.end_time, 0.033);
|
||||
|
||||
// Verify visual content
|
||||
if let ChunkContent::Visual(content) = chunk.content {
|
||||
assert_eq!(content.metadata.object_count, 3);
|
||||
assert_eq!(content.metadata.unique_classes.len(), 2);
|
||||
assert!(content
|
||||
.metadata
|
||||
.unique_classes
|
||||
.contains(&"person".to_string()));
|
||||
assert!(content.metadata.unique_classes.contains(&"car".to_string()));
|
||||
assert_eq!(content.dominant_objects, vec!["person"]);
|
||||
assert_eq!(content.keyframe_objects.len(), 2);
|
||||
} else {
|
||||
panic!("Expected Visual content type");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_visual_chunk_content_methods() {
|
||||
let content = VisualChunkContent {
|
||||
start_time: 0.0,
|
||||
end_time: 5.0,
|
||||
keyframe_objects: vec![KeyframeObjects {
|
||||
frame: 0,
|
||||
timestamp: 0.0,
|
||||
objects: vec![
|
||||
DetectedObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
bounding_box: BoundingBox {
|
||||
x: 100,
|
||||
y: 200,
|
||||
width: 50,
|
||||
height: 100,
|
||||
},
|
||||
confidence: 0.95,
|
||||
},
|
||||
DetectedObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
bounding_box: BoundingBox {
|
||||
x: 300,
|
||||
y: 150,
|
||||
width: 80,
|
||||
height: 60,
|
||||
},
|
||||
confidence: 0.87,
|
||||
},
|
||||
],
|
||||
}],
|
||||
dominant_objects: vec!["person".to_string()],
|
||||
object_relationships: vec![],
|
||||
scene_description: Some("A person near a car".to_string()),
|
||||
metadata: VisualMetadata {
|
||||
object_count: 2,
|
||||
unique_classes: vec!["person".to_string(), "car".to_string()],
|
||||
max_confidence: 0.95,
|
||||
avg_confidence: 0.91,
|
||||
spatial_density: 2.0,
|
||||
},
|
||||
};
|
||||
|
||||
// Test summary method
|
||||
let summary = content.summary();
|
||||
assert!(summary.contains("Visual chunk from 0.0s to 5.0s"));
|
||||
assert!(summary.contains("person"));
|
||||
|
||||
// Test contains_object method
|
||||
assert!(content.contains_object("person"));
|
||||
assert!(content.contains_object("car"));
|
||||
assert!(!content.contains_object("dog"));
|
||||
|
||||
// Test high_confidence_objects method
|
||||
let high_conf_objects = content.high_confidence_objects(0.9);
|
||||
assert_eq!(high_conf_objects.len(), 1);
|
||||
assert_eq!(high_conf_objects[0].class_name, "person");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_frame_similarity() {
|
||||
let frame1 = YoloFrame {
|
||||
frame: 0,
|
||||
timestamp: 0.0,
|
||||
objects: vec![
|
||||
YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 100,
|
||||
y: 200,
|
||||
width: 50,
|
||||
height: 100,
|
||||
confidence: 0.95,
|
||||
},
|
||||
YoloObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
x: 300,
|
||||
y: 150,
|
||||
width: 80,
|
||||
height: 60,
|
||||
confidence: 0.87,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let frame2 = YoloFrame {
|
||||
frame: 1,
|
||||
timestamp: 0.033,
|
||||
objects: vec![
|
||||
YoloObject {
|
||||
class_name: "person".to_string(),
|
||||
class_id: 0,
|
||||
x: 110,
|
||||
y: 210,
|
||||
width: 52,
|
||||
height: 102,
|
||||
confidence: 0.92,
|
||||
},
|
||||
YoloObject {
|
||||
class_name: "car".to_string(),
|
||||
class_id: 2,
|
||||
x: 310,
|
||||
y: 155,
|
||||
width: 82,
|
||||
height: 62,
|
||||
confidence: 0.85,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let frame3 = YoloFrame {
|
||||
frame: 2,
|
||||
timestamp: 0.066,
|
||||
objects: vec![YoloObject {
|
||||
class_name: "dog".to_string(),
|
||||
class_id: 16,
|
||||
x: 150,
|
||||
y: 250,
|
||||
width: 40,
|
||||
height: 60,
|
||||
confidence: 0.78,
|
||||
}],
|
||||
};
|
||||
|
||||
// Test similar frames (same objects)
|
||||
let similarity_same =
|
||||
VisualChunkContent::frame_similarity(&frame1, &frame2);
|
||||
assert!((similarity_same - 1.0).abs() < 0.001);
|
||||
|
||||
// Test dissimilar frames (different objects)
|
||||
let similarity_diff =
|
||||
VisualChunkContent::frame_similarity(&frame1, &frame3);
|
||||
assert!((similarity_diff - 0.0).abs() < 0.001);
|
||||
|
||||
// Test empty frames
|
||||
let empty_frame = YoloFrame {
|
||||
frame: 3,
|
||||
timestamp: 0.1,
|
||||
objects: vec![],
|
||||
};
|
||||
let similarity_empty =
|
||||
VisualChunkContent::frame_similarity(&empty_frame, &empty_frame);
|
||||
assert!((similarity_empty - 1.0).abs() < 0.001);
|
||||
|
||||
let similarity_mixed =
|
||||
VisualChunkContent::frame_similarity(&empty_frame, &frame1);
|
||||
assert!((similarity_mixed - 0.0).abs() < 0.001);
|
||||
}
|
||||
}
|
||||
current_chunk_frames = vec![frame];
|
||||
}
|
||||
}
|
||||
|
||||
// Handle last chunk
|
||||
if current_chunk_frames.len() >= min_frames_per_chunk {
|
||||
if let Some(chunk) =
|
||||
Self::create_chunk_from_frames(current_id, video_id, ¤t_chunk_frames)
|
||||
{
|
||||
chunks.push(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
chunks
|
||||
}
|
||||
|
||||
fn create_chunk_from_frames(
|
||||
id: i64,
|
||||
video_id: i64,
|
||||
frames: &[&crate::core::processor::yolo::YoloFrame],
|
||||
) -> Option<Self> {
|
||||
if frames.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Simple conversion - could use the from_yolo_result method
|
||||
let start_frame = frames.first().unwrap().frame;
|
||||
let end_frame = frames.last().unwrap().frame;
|
||||
let dummy_yolo_result = crate::core::processor::yolo::YoloResult {
|
||||
frame_count: frames.len() as u64,
|
||||
fps: 0.0, // Not used in this context
|
||||
frames: frames.iter().map(|f| (*f).clone()).collect(),
|
||||
};
|
||||
|
||||
Self::from_yolo_result(id, video_id, &dummy_yolo_result, start_frame, end_frame)
|
||||
}
|
||||
|
||||
/// Creates a new chunk from seconds (legacy conversion).
|
||||
///
|
||||
/// This is useful for migrating from older systems that store time as seconds.
|
||||
/// The frame counts are calculated by rounding `seconds * fps`.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn from_seconds(
|
||||
file_id: i32,
|
||||
uuid: String,
|
||||
chunk_index: u32,
|
||||
chunk_type: ChunkType,
|
||||
rule: ChunkRule,
|
||||
start_time: f64,
|
||||
end_time: f64,
|
||||
fps: f64,
|
||||
content: serde_json::Value,
|
||||
) -> Self {
|
||||
let start_frame = (start_time * fps).round() as i64;
|
||||
let end_frame = (end_time * fps).round() as i64;
|
||||
Self::new(
|
||||
file_id,
|
||||
uuid,
|
||||
chunk_index,
|
||||
chunk_type,
|
||||
rule,
|
||||
start_frame,
|
||||
end_frame,
|
||||
fps,
|
||||
content,
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns the start time as a `FrameTime`.
|
||||
pub fn start_time(&self) -> FrameTime {
|
||||
FrameTime::from_frames(self.start_frame, self.fps)
|
||||
}
|
||||
|
||||
/// Returns the end time as a `FrameTime`.
|
||||
pub fn end_time(&self) -> FrameTime {
|
||||
FrameTime::from_frames(self.end_frame, self.fps)
|
||||
}
|
||||
|
||||
/// Returns the duration in frames.
|
||||
pub fn duration_frames(&self) -> i64 {
|
||||
self.end_frame - self.start_frame
|
||||
}
|
||||
|
||||
/// Returns the duration in seconds.
|
||||
pub fn duration_seconds(&self) -> f64 {
|
||||
self.duration_frames() as f64 / self.fps
|
||||
}
|
||||
|
||||
/// Formats the start time as "seconds.frame" (e.g., "123.04").
|
||||
pub fn format_start_sec_frame(&self) -> String {
|
||||
self.start_time().format_sec_frame()
|
||||
}
|
||||
|
||||
/// Formats the end time as "seconds.frame" (e.g., "456.15").
|
||||
pub fn format_end_sec_frame(&self) -> String {
|
||||
self.end_time().format_sec_frame()
|
||||
}
|
||||
|
||||
/// Formats the start time as "HH:MM:SS".
|
||||
pub fn format_start_hms(&self) -> String {
|
||||
self.start_time().format_hms()
|
||||
}
|
||||
|
||||
/// Formats the end time as "HH:MM:SS".
|
||||
pub fn format_end_hms(&self) -> String {
|
||||
self.end_time().format_hms()
|
||||
}
|
||||
|
||||
/// Formats the start time as "HH:MM:SS.FF".
|
||||
pub fn format_start_hms_frame(&self) -> String {
|
||||
self.start_time().format_hms_frame()
|
||||
}
|
||||
|
||||
/// Formats the end time as "HH:MM:SS.FF".
|
||||
pub fn format_end_hms_frame(&self) -> String {
|
||||
self.end_time().format_hms_frame()
|
||||
}
|
||||
|
||||
/// Returns a tuple of (start_seconds, end_seconds) for compatibility.
|
||||
///
|
||||
/// This is provided for backward compatibility during migration.
|
||||
/// Prefer using `start_time()` and `end_time()` methods.
|
||||
pub fn time_range_seconds(&self) -> (f64, f64) {
|
||||
(self.start_time().seconds(), self.end_time().seconds())
|
||||
}
|
||||
|
||||
pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self {
|
||||
self.metadata = Some(metadata);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_vector_id(mut self, vector_id: String) -> Self {
|
||||
self.vector_id = Some(vector_id);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_text_content(mut self, text: String) -> Self {
|
||||
self.text_content = Some(text);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_frame_count(mut self, count: i32) -> Self {
|
||||
self.frame_count = count;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_pre_chunk_ids(mut self, ids: Vec<i32>) -> Self {
|
||||
self.pre_chunk_ids = ids;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_parent_chunk_id(mut self, parent_id: String) -> Self {
|
||||
self.parent_chunk_id = Some(parent_id);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_child_chunk_ids(mut self, child_ids: Vec<String>) -> Self {
|
||||
self.child_chunk_ids = child_ids;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn is_parent_chunk(&self) -> bool {
|
||||
!self.child_chunk_ids.is_empty()
|
||||
}
|
||||
|
||||
pub fn is_child_chunk(&self) -> bool {
|
||||
self.parent_chunk_id.is_some()
|
||||
}
|
||||
|
||||
/// 創建視覺分片
|
||||
pub fn new_visual(
|
||||
file_id: i32,
|
||||
uuid: String,
|
||||
chunk_index: u32,
|
||||
start_frame: i64,
|
||||
end_frame: i64,
|
||||
fps: f64,
|
||||
visual_content: VisualChunkContent,
|
||||
) -> Self {
|
||||
let content = serde_json::to_value(&visual_content)
|
||||
.unwrap_or_else(|_| serde_json::json!({"error": "Failed to serialize visual content"}));
|
||||
|
||||
Self::new(
|
||||
file_id,
|
||||
uuid,
|
||||
chunk_index,
|
||||
ChunkType::Visual,
|
||||
ChunkRule::Rule2,
|
||||
start_frame,
|
||||
end_frame,
|
||||
fps,
|
||||
content,
|
||||
)
|
||||
}
|
||||
|
||||
/// 從 YOLO 結果創建視覺分片
|
||||
pub fn from_yolo_result(
|
||||
file_id: i32,
|
||||
uuid: String,
|
||||
chunk_index: u32,
|
||||
start_frame: i64,
|
||||
end_frame: i64,
|
||||
fps: f64,
|
||||
yolo_frames: Vec<crate::core::processor::yolo::YoloFrame>,
|
||||
) -> Self {
|
||||
use crate::core::processor::yolo::YoloFrame;
|
||||
use std::collections::HashMap;
|
||||
|
||||
// 分析物件統計
|
||||
let mut object_counts = HashMap::new();
|
||||
let mut keyframe_objects = Vec::new();
|
||||
let mut all_objects = Vec::new();
|
||||
|
||||
for frame in &yolo_frames {
|
||||
let mut frame_objects = Vec::new();
|
||||
|
||||
for obj in &frame.objects {
|
||||
// 更新物件統計
|
||||
*object_counts.entry(obj.class_name.clone()).or_insert(0) += 1;
|
||||
|
||||
// 創建檢測到的物件
|
||||
let detected_obj = DetectedObject {
|
||||
class_name: obj.class_name.clone(),
|
||||
class_id: obj.class_id,
|
||||
confidence: obj.confidence,
|
||||
bbox: Some(BoundingBox {
|
||||
x: obj.x,
|
||||
y: obj.y,
|
||||
width: obj.width,
|
||||
height: obj.height,
|
||||
}),
|
||||
occurrence: 1,
|
||||
};
|
||||
|
||||
frame_objects.push(detected_obj.clone());
|
||||
all_objects.push(detected_obj);
|
||||
}
|
||||
|
||||
if !frame_objects.is_empty() {
|
||||
keyframe_objects.push(KeyframeObjects {
|
||||
timestamp: frame.timestamp,
|
||||
frame_number: frame.frame,
|
||||
objects: frame_objects,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 創建主要物件標籤
|
||||
let primary_objects = object_counts
|
||||
.iter()
|
||||
.filter(|(_, &count)| count >= 3) // 出現至少3次的物件
|
||||
.map(|(name, _)| name.clone())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
|
||||
// 創建物件統計 JSON
|
||||
let object_stats =
|
||||
serde_json::to_value(&object_counts).unwrap_or_else(|_| serde_json::json!({}));
|
||||
|
||||
// 創建視覺內容
|
||||
let visual_content = VisualChunkContent {
|
||||
primary_objects: if primary_objects.is_empty() {
|
||||
"no objects detected".to_string()
|
||||
} else {
|
||||
primary_objects
|
||||
},
|
||||
object_stats,
|
||||
keyframe_objects,
|
||||
object_frequency: serde_json::to_value(&object_counts)
|
||||
.unwrap_or_else(|_| serde_json::json!({})),
|
||||
visual_summary: None, // 可選,後續可添加 LLM 生成的摘要
|
||||
};
|
||||
|
||||
Self::new_visual(
|
||||
file_id,
|
||||
uuid,
|
||||
chunk_index,
|
||||
start_frame,
|
||||
end_frame,
|
||||
fps,
|
||||
visual_content,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl VisualChunkContent {
|
||||
/// Calculate similarity between two YOLO frames based on object composition
|
||||
pub fn frame_similarity(
|
||||
frame1: &crate::core::processor::yolo::YoloFrame,
|
||||
frame2: &crate::core::processor::yolo::YoloFrame,
|
||||
) -> f32 {
|
||||
if frame1.objects.is_empty() && frame2.objects.is_empty() {
|
||||
return 1.0; // Both empty frames are perfectly similar
|
||||
}
|
||||
|
||||
if frame1.objects.is_empty() || frame2.objects.is_empty() {
|
||||
return 0.0; // One empty, one non-empty are dissimilar
|
||||
}
|
||||
|
||||
// Create sets of object class names
|
||||
let set1: std::collections::HashSet<String> = frame1
|
||||
.objects
|
||||
.iter()
|
||||
.map(|o| o.class_name.clone())
|
||||
.collect();
|
||||
let set2: std::collections::HashSet<String> = frame2
|
||||
.objects
|
||||
.iter()
|
||||
.map(|o| o.class_name.clone())
|
||||
.collect();
|
||||
|
||||
// Calculate Jaccard similarity
|
||||
let intersection: Vec<_> = set1.intersection(&set2).collect();
|
||||
let union: Vec<_> = set1.union(&set2).collect();
|
||||
|
||||
if union.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
intersection.len() as f32 / union.len() as f32
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a summary of the visual chunk
|
||||
pub fn summary(&self) -> String {
|
||||
let duration = self.end_time - self.start_time;
|
||||
let frame_count = self.keyframe_objects.len();
|
||||
|
||||
format!(
|
||||
"Visual chunk from {:.1}s to {:.1}s (duration: {:.1}s, {} frames). Objects: {} total, {} unique. Dominant objects: {}",
|
||||
self.start_time,
|
||||
self.end_time,
|
||||
duration,
|
||||
frame_count,
|
||||
self.metadata.object_count,
|
||||
self.metadata.unique_classes.len(),
|
||||
if self.dominant_objects.is_empty() {
|
||||
"none".to_string()
|
||||
} else {
|
||||
self.dominant_objects.join(", ")
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/// Check if this chunk contains a specific object class
|
||||
pub fn contains_object(&self, class_name: &str) -> bool {
|
||||
self.keyframe_objects
|
||||
.iter()
|
||||
.any(|ko| ko.objects.iter().any(|obj| obj.class_name == class_name))
|
||||
}
|
||||
|
||||
/// Get all objects with confidence above threshold
|
||||
pub fn high_confidence_objects(&self, threshold: f32) -> Vec<&DetectedObject> {
|
||||
self.keyframe_objects
|
||||
.iter()
|
||||
.flat_map(|ko| ko.objects.iter())
|
||||
.filter(|obj| obj.confidence >= threshold)
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
@@ -228,6 +228,11 @@ impl From<VideoRow> for VideoRecord {
|
||||
registration_time: row.registration_time,
|
||||
total_frames: row.total_frames.unwrap_or(0) as u64,
|
||||
parent_uuid: row.parent_uuid,
|
||||
cut_done: false,
|
||||
cut_count: 0,
|
||||
cut_max_duration: 0.0,
|
||||
scene_done: false,
|
||||
audio_tracks: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -254,6 +259,11 @@ pub struct VideoRecord {
|
||||
pub registration_time: Option<String>,
|
||||
pub total_frames: u64,
|
||||
pub parent_uuid: Option<String>,
|
||||
pub cut_done: bool,
|
||||
pub cut_count: i32,
|
||||
pub cut_max_duration: f64,
|
||||
pub scene_done: bool,
|
||||
pub audio_tracks: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -332,9 +342,9 @@ pub struct MonitorJob {
|
||||
pub progress_current: i32,
|
||||
pub error_count: i32,
|
||||
pub last_error: Option<String>,
|
||||
pub started_at: Option<chrono::NaiveDateTime>,
|
||||
pub updated_at: Option<chrono::NaiveDateTime>,
|
||||
pub created_at: chrono::NaiveDateTime,
|
||||
pub started_at: Option<chrono::DateTime<chrono::Utc>>,
|
||||
pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
|
||||
pub created_at: chrono::DateTime<chrono::Utc>,
|
||||
pub processors: Vec<String>,
|
||||
pub completed_processors: Vec<String>,
|
||||
pub failed_processors: Vec<String>,
|
||||
@@ -393,17 +403,80 @@ impl ProcessorType {
|
||||
}
|
||||
}
|
||||
|
||||
/// 預估 CPU 使用率(0.0 ~ 1.0, 1.0 = 一個完整核心)
|
||||
pub fn estimated_cpu(&self) -> f64 {
|
||||
match self {
|
||||
ProcessorType::Asr => 1.0,
|
||||
ProcessorType::Cut => 0.5,
|
||||
ProcessorType::Yolo => 0.3,
|
||||
ProcessorType::Ocr => 0.8,
|
||||
ProcessorType::Face => 0.6,
|
||||
ProcessorType::Pose => 0.4,
|
||||
ProcessorType::Asrx => 0.8,
|
||||
ProcessorType::VisualChunk => 0.3,
|
||||
ProcessorType::Scene => 0.3,
|
||||
}
|
||||
}
|
||||
|
||||
/// 是否使用 GPU
|
||||
pub fn uses_gpu(&self) -> bool {
|
||||
match self {
|
||||
ProcessorType::Yolo | ProcessorType::Face | ProcessorType::Pose => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// 預估記憶體使用量 (MB)
|
||||
pub fn estimated_memory_mb(&self) -> u64 {
|
||||
match self {
|
||||
ProcessorType::Asr => 2048,
|
||||
ProcessorType::Cut => 512,
|
||||
ProcessorType::Yolo => 1024,
|
||||
ProcessorType::Ocr => 1024,
|
||||
ProcessorType::Face => 1536,
|
||||
ProcessorType::Pose => 1024,
|
||||
ProcessorType::Asrx => 2048,
|
||||
ProcessorType::VisualChunk => 512,
|
||||
ProcessorType::Scene => 512,
|
||||
}
|
||||
}
|
||||
|
||||
/// 使用的模型名稱(如有)
|
||||
pub fn model_name(&self) -> Option<&'static str> {
|
||||
match self {
|
||||
ProcessorType::Asr => Some("faster-whisper"),
|
||||
ProcessorType::Cut => None,
|
||||
ProcessorType::Yolo => Some("yolov8n"),
|
||||
ProcessorType::Ocr => Some("paddleocr"),
|
||||
ProcessorType::Face => Some("insightface/buffalo_l"),
|
||||
ProcessorType::Pose => Some("mediapipe/pose"),
|
||||
ProcessorType::Asrx => Some("speechbrain/ecapa-tdnn"),
|
||||
ProcessorType::VisualChunk => None,
|
||||
ProcessorType::Scene => Some("places365"),
|
||||
}
|
||||
}
|
||||
|
||||
/// 依賴的其他 Processor(需先完成才能執行)
|
||||
pub fn dependencies(&self) -> Vec<ProcessorType> {
|
||||
match self {
|
||||
ProcessorType::Asrx => vec![ProcessorType::Asr],
|
||||
ProcessorType::VisualChunk => vec![ProcessorType::Yolo],
|
||||
ProcessorType::Scene => vec![ProcessorType::Cut],
|
||||
_ => vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn all() -> Vec<ProcessorType> {
|
||||
vec![
|
||||
ProcessorType::Asr,
|
||||
ProcessorType::Cut,
|
||||
ProcessorType::Scene,
|
||||
ProcessorType::Asr,
|
||||
ProcessorType::Asrx,
|
||||
ProcessorType::Yolo,
|
||||
ProcessorType::Ocr,
|
||||
ProcessorType::Face,
|
||||
ProcessorType::Pose,
|
||||
ProcessorType::Asrx,
|
||||
ProcessorType::VisualChunk,
|
||||
ProcessorType::Scene,
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -701,8 +774,8 @@ impl PostgresDb {
|
||||
.await?;
|
||||
|
||||
// Chunks
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS chunks (id SERIAL PRIMARY KEY, uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_index INTEGER NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(uuid, chunk_id))").execute(pool).await?;
|
||||
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_uuid ON chunks(uuid)")
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS chunks (id SERIAL PRIMARY KEY, file_uuid VARCHAR(32) NOT NULL, chunk_id VARCHAR(64) NOT NULL, chunk_index INTEGER NOT NULL, chunk_type VARCHAR(32) NOT NULL, start_time DOUBLE PRECISION NOT NULL, end_time DOUBLE PRECISION NOT NULL, fps DOUBLE PRECISION DEFAULT 24.0, start_frame BIGINT DEFAULT 0, end_frame BIGINT DEFAULT 0, content JSONB NOT NULL, metadata JSONB, vector_id VARCHAR(64), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(file_uuid, chunk_id))").execute(pool).await?;
|
||||
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_uuid)")
|
||||
.execute(pool)
|
||||
.await?;
|
||||
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_type ON chunks(chunk_type)")
|
||||
@@ -765,15 +838,13 @@ impl PostgresDb {
|
||||
.await?;
|
||||
|
||||
// Chunks Rule 1
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS chunks_rule1 (id UUID PRIMARY KEY DEFAULT gen_random_uuid(), asset_uuid VARCHAR(32) NOT NULL REFERENCES videos(uuid) ON DELETE CASCADE, start_frame BIGINT NOT NULL, end_frame BIGINT NOT NULL, content TEXT NOT NULL, speaker_id VARCHAR(50), created_at TIMESTAMPTZ DEFAULT NOW())").execute(pool).await?;
|
||||
sqlx::query(
|
||||
"CREATE INDEX IF NOT EXISTS idx_chunks_rule1_asset ON chunks_rule1(asset_uuid)",
|
||||
)
|
||||
.execute(pool)
|
||||
.await?;
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS chunks_rule1 (id UUID PRIMARY KEY DEFAULT gen_random_uuid(), file_uuid VARCHAR(32) NOT NULL REFERENCES videos(uuid) ON DELETE CASCADE, start_frame BIGINT NOT NULL, end_frame BIGINT NOT NULL, content TEXT NOT NULL, speaker_id VARCHAR(50), created_at TIMESTAMPTZ DEFAULT NOW())").execute(pool).await?;
|
||||
sqlx::query("CREATE INDEX IF NOT EXISTS idx_chunks_rule1_asset ON chunks_rule1(file_uuid)")
|
||||
.execute(pool)
|
||||
.await?;
|
||||
|
||||
// Jobs (Legacy/P0)
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS jobs (id UUID PRIMARY KEY, asset_uuid VARCHAR(32) NOT NULL REFERENCES videos(uuid) ON DELETE CASCADE, processor_list TEXT[], assigned_processor_id UUID, rule VARCHAR(20), status VARCHAR(20) DEFAULT 'QUEUED', total_frames BIGINT DEFAULT 0, processed_frames BIGINT DEFAULT 0, error_message TEXT, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW())").execute(pool).await?;
|
||||
sqlx::query("CREATE TABLE IF NOT EXISTS jobs (id UUID PRIMARY KEY, file_uuid VARCHAR(32) NOT NULL REFERENCES videos(uuid) ON DELETE CASCADE, processor_list TEXT[], assigned_processor_id UUID, rule VARCHAR(20), status VARCHAR(20) DEFAULT 'QUEUED', total_frames BIGINT DEFAULT 0, processed_frames BIGINT DEFAULT 0, error_message TEXT, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW())").execute(pool).await?;
|
||||
sqlx::query("CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status)")
|
||||
.execute(pool)
|
||||
.await?;
|
||||
@@ -1162,8 +1233,8 @@ impl PostgresDb {
|
||||
.await?;
|
||||
|
||||
sqlx::query(&format!(
|
||||
"DELETE FROM {} WHERE video_id IN (SELECT id FROM {} WHERE uuid = $1)",
|
||||
processor_results, videos
|
||||
"DELETE FROM {} WHERE file_uuid = $1",
|
||||
processor_results
|
||||
))
|
||||
.bind(uuid)
|
||||
.execute(&self.pool)
|
||||
@@ -2026,21 +2097,19 @@ impl PostgresDb {
|
||||
r#"
|
||||
INSERT INTO {} (
|
||||
file_uuid, processor_type, coordinate_type, coordinate_index,
|
||||
timestamp, data, identity_id, confidence
|
||||
) VALUES ($1, $2, 'frame', $3, $4, $5, $6, $7)
|
||||
start_frame, end_frame, start_time, data
|
||||
) VALUES ($1, $2, 'frame', $3, $3, $3, $4, $5)
|
||||
"#,
|
||||
table
|
||||
);
|
||||
|
||||
for (coord_idx, ts, data, id, conf) in chunks {
|
||||
for (coord_idx, ts, data, _id, _conf) in chunks {
|
||||
sqlx::query(&query)
|
||||
.bind(file_uuid)
|
||||
.bind(processor_type)
|
||||
.bind(*coord_idx)
|
||||
.bind(*ts)
|
||||
.bind(data)
|
||||
.bind(*id)
|
||||
.bind(*conf)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
}
|
||||
@@ -2060,7 +2129,7 @@ impl PostgresDb {
|
||||
let query = format!(
|
||||
r#"
|
||||
INSERT INTO {} (
|
||||
file_uuid, processor_type, coordinate_type, coordinate_index,
|
||||
file_uuid, processor_type, coordinate_type, coordinate_index,
|
||||
start_frame, end_frame, start_time, end_time, data
|
||||
) VALUES ($1, 'asr', 'time', $2, $3, $4, $5, $6, $7)
|
||||
"#,
|
||||
@@ -2402,10 +2471,10 @@ impl PostgresDb {
|
||||
offset: i64,
|
||||
) -> Result<Vec<IdentityChunkRecord>> {
|
||||
let query = r#"
|
||||
SELECT c.id, c.uuid as file_uuid, c.chunk_id, c.chunk_type,
|
||||
SELECT c.id, c.file_uuid, c.chunk_id, c.chunk_type,
|
||||
c.start_time, c.end_time, c.text_content, c.content
|
||||
FROM chunks c
|
||||
WHERE c.uuid IN (
|
||||
WHERE c.file_uuid IN (
|
||||
SELECT DISTINCT fi.file_uuid
|
||||
FROM file_identities fi
|
||||
JOIN identities i ON fi.identity_id = i.id
|
||||
@@ -2504,9 +2573,9 @@ impl PostgresDb {
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
||||
INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
|
||||
ON CONFLICT (uuid, chunk_id) DO UPDATE SET
|
||||
ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
|
||||
start_time = EXCLUDED.start_time,
|
||||
end_time = EXCLUDED.end_time,
|
||||
fps = EXCLUDED.fps,
|
||||
@@ -2579,9 +2648,9 @@ impl PostgresDb {
|
||||
|
||||
sqlx::query(&format!(
|
||||
r#"
|
||||
INSERT INTO {} (file_id, uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
||||
INSERT INTO {} (file_id, file_uuid, chunk_id, chunk_index, chunk_type, start_time, end_time, fps, start_frame, end_frame, text_content, content, metadata, vector_id, frame_count, pre_chunk_ids, parent_chunk_id, child_chunk_ids)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12::jsonb, $13::jsonb, $14, $15, $16, $17, $18)
|
||||
ON CONFLICT (uuid, chunk_id) DO UPDATE SET
|
||||
ON CONFLICT (file_uuid, chunk_id) DO UPDATE SET
|
||||
start_time = EXCLUDED.start_time,
|
||||
end_time = EXCLUDED.end_time,
|
||||
fps = EXCLUDED.fps,
|
||||
@@ -2626,7 +2695,7 @@ impl PostgresDb {
|
||||
pub async fn get_chunks_by_uuid(&self, uuid: &str) -> Result<Vec<Chunk>> {
|
||||
let table = schema::table_name("chunks");
|
||||
let rows = sqlx::query(&format!(
|
||||
"SELECT COALESCE(file_id, 0) as file_id, uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE uuid = $1 ORDER BY chunk_index",
|
||||
"SELECT COALESCE(file_id, 0) as file_id, file_uuid as uuid, chunk_id, chunk_index, chunk_type, COALESCE(fps, 24.0) as fps, COALESCE(start_frame, 0) as start_frame, COALESCE(end_frame, 0) as end_frame, text_content, content, metadata, vector_id, COALESCE(frame_count, 0) as frame_count, pre_chunk_ids, parent_chunk_id::text as parent_chunk_id, child_chunk_ids, visual_stats FROM {} WHERE file_uuid = $1 ORDER BY chunk_index",
|
||||
table
|
||||
))
|
||||
.bind(uuid)
|
||||
@@ -3264,36 +3333,40 @@ impl PostgresDb {
|
||||
let sql = match uuid {
|
||||
Some(_) => &format!(
|
||||
r#"
|
||||
SELECT c.chunk_id, c.uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
|
||||
SELECT c.chunk_id, c.file_uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
|
||||
c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score,
|
||||
c.visual_stats,
|
||||
pc.metadata->'structured_summary' as scene_summary,
|
||||
c.parent_chunk_id::integer
|
||||
FROM {} c
|
||||
LEFT JOIN parent_chunks pc ON c.parent_chunk_id = pc.id::varchar
|
||||
WHERE (c.search_vector @@ to_tsquery('english', $1) OR pc.summary_tsvector @@ to_tsquery('english', $1)) AND c.uuid = $2
|
||||
WHERE (c.search_vector @@ to_tsquery('english', $1) OR pc.summary_tsvector @@ to_tsquery('english', $1) OR c.text_content ILIKE $3) AND c.file_uuid = $2
|
||||
ORDER BY bm25_score DESC
|
||||
LIMIT $3
|
||||
LIMIT $4
|
||||
"#,
|
||||
table
|
||||
),
|
||||
None => &format!(
|
||||
r#"
|
||||
SELECT c.chunk_id, c.uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
|
||||
SELECT c.chunk_id, c.file_uuid, c.chunk_index, c.chunk_type, c.start_frame, c.end_frame, c.fps, c.start_time, c.end_time,
|
||||
c.text_content, GREATEST(ts_rank_cd(c.search_vector, to_tsquery('english', $1)), ts_rank_cd(pc.summary_tsvector, to_tsquery('english', $1))) as bm25_score,
|
||||
c.visual_stats,
|
||||
pc.metadata->'structured_summary' as scene_summary,
|
||||
c.parent_chunk_id::integer
|
||||
FROM {} c
|
||||
LEFT JOIN parent_chunks pc ON c.parent_chunk_id = pc.id::varchar
|
||||
WHERE (c.search_vector @@ to_tsquery('english', $1) OR pc.summary_tsvector @@ to_tsquery('english', $1))
|
||||
WHERE (c.search_vector @@ to_tsquery('english', $1) OR pc.summary_tsvector @@ to_tsquery('english', $1) OR c.text_content ILIKE $2)
|
||||
ORDER BY bm25_score DESC
|
||||
LIMIT $2
|
||||
LIMIT $3
|
||||
"#,
|
||||
table
|
||||
),
|
||||
};
|
||||
|
||||
// 使用 pg_trgm 支援中英文模糊搜尋
|
||||
// ILIKE 支援中文 LIKE 匹配,pg_trgm 的 similarity() 可做更精確的排名
|
||||
let ilike_pattern = format!("%{}%", query);
|
||||
|
||||
let rows: Vec<(
|
||||
String,
|
||||
String,
|
||||
@@ -3310,10 +3383,11 @@ impl PostgresDb {
|
||||
Option<serde_json::Value>,
|
||||
Option<i32>,
|
||||
)> = match uuid {
|
||||
Some(_) => {
|
||||
Some(u) => {
|
||||
sqlx::query_as(sql)
|
||||
.bind(&tsquery)
|
||||
.bind(uuid)
|
||||
.bind(u)
|
||||
.bind(&ilike_pattern)
|
||||
.bind(limit as i64)
|
||||
.fetch_all(&self.pool)
|
||||
.await?
|
||||
@@ -3321,6 +3395,7 @@ impl PostgresDb {
|
||||
None => {
|
||||
sqlx::query_as(sql)
|
||||
.bind(&tsquery)
|
||||
.bind(&ilike_pattern)
|
||||
.bind(limit as i64)
|
||||
.fetch_all(&self.pool)
|
||||
.await?
|
||||
@@ -3809,6 +3884,54 @@ impl PostgresDb {
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub async fn get_all_running_jobs(&self, limit: i32) -> Result<Vec<MonitorJob>> {
|
||||
let monitor_jobs = schema::table_name("monitor_jobs");
|
||||
let rows = sqlx::query(&format!(
|
||||
r#"
|
||||
SELECT id, uuid, video_path, status, current_processor, progress_total, progress_current,
|
||||
error_count, last_error, started_at, updated_at, created_at,
|
||||
processors, completed_processors, failed_processors, video_id
|
||||
FROM {}
|
||||
WHERE status = 'running'
|
||||
ORDER BY created_at ASC
|
||||
LIMIT $1
|
||||
"#,
|
||||
monitor_jobs
|
||||
))
|
||||
.bind(limit)
|
||||
.fetch_all(&self.pool)
|
||||
.await?;
|
||||
|
||||
let jobs: Vec<MonitorJob> = rows
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let status_str: String = r.get(3);
|
||||
let status =
|
||||
MonitorJobStatus::from_db_str(&status_str).unwrap_or(MonitorJobStatus::Running);
|
||||
MonitorJob {
|
||||
id: r.get(0),
|
||||
uuid: r.get(1),
|
||||
video_path: r.get(2),
|
||||
status,
|
||||
current_processor: r.get(4),
|
||||
progress_total: r.get(5),
|
||||
progress_current: r.get(6),
|
||||
error_count: r.get(7),
|
||||
last_error: r.get(8),
|
||||
started_at: r.get(9),
|
||||
updated_at: r.get(10),
|
||||
created_at: r.get(11),
|
||||
processors: r.get::<Option<Vec<String>>, _>(12).unwrap_or_default(),
|
||||
completed_processors: r.get::<Option<Vec<String>>, _>(13).unwrap_or_default(),
|
||||
failed_processors: r.get::<Option<Vec<String>>, _>(14).unwrap_or_default(),
|
||||
video_id: r.get(15),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(jobs)
|
||||
}
|
||||
|
||||
pub async fn get_pending_jobs(&self, limit: i32) -> Result<Vec<MonitorJob>> {
|
||||
let monitor_jobs = schema::table_name("monitor_jobs");
|
||||
let rows = sqlx::query(&format!(
|
||||
@@ -3817,7 +3940,7 @@ impl PostgresDb {
|
||||
error_count, last_error, started_at, updated_at, created_at,
|
||||
processors, completed_processors, failed_processors, video_id
|
||||
FROM {}
|
||||
WHERE status IN ('pending', 'running')
|
||||
WHERE status = 'pending'
|
||||
ORDER BY created_at ASC
|
||||
LIMIT $1
|
||||
"#,
|
||||
@@ -4322,7 +4445,7 @@ impl PostgresDb {
|
||||
name: &str,
|
||||
) -> Result<crate::core::person_identity::Identity> {
|
||||
let identity = sqlx::query_as::<_, crate::core::person_identity::Identity>(
|
||||
r#"INSERT INTO identities (name) VALUES ($1) ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name RETURNING id, name, embedding::text, metadata, created_at"#,
|
||||
r#"INSERT INTO identities (name) VALUES ($1) ON CONFLICT (name) DO UPDATE SET name = EXCLUDED.name RETURNING id, name, identity_embedding::text as embedding, metadata, created_at"#,
|
||||
)
|
||||
.bind(name)
|
||||
.fetch_one(&self.pool)
|
||||
@@ -4371,7 +4494,7 @@ impl PostgresDb {
|
||||
binding_value: &str,
|
||||
) -> Result<Option<crate::core::person_identity::Identity>> {
|
||||
let identity = sqlx::query_as::<_, crate::core::person_identity::Identity>(
|
||||
"SELECT i.id, i.name, i.embedding::text, i.metadata, i.created_at FROM identities i JOIN identity_bindings b ON i.id = b.identity_id WHERE b.identity_type = $1 AND b.identity_value = $2",
|
||||
"SELECT i.id, i.name, i.identity_embedding::text as embedding, i.metadata, i.created_at FROM identities i JOIN identity_bindings b ON i.id = b.identity_id WHERE b.identity_type = $1 AND b.identity_value = $2",
|
||||
)
|
||||
.bind(binding_type)
|
||||
.bind(binding_value)
|
||||
@@ -4389,12 +4512,12 @@ impl PostgresDb {
|
||||
) -> Result<Vec<crate::core::person_identity::Identity>> {
|
||||
let query = if !search.is_empty() {
|
||||
sqlx::query_as::<_, crate::core::person_identity::Identity>(
|
||||
"SELECT id, name, embedding::text, metadata, created_at FROM identities WHERE name ILIKE $1 ORDER BY id LIMIT $2 OFFSET $3",
|
||||
"SELECT id, name, identity_embedding::text as embedding, metadata, created_at FROM identities WHERE name ILIKE $1 ORDER BY id LIMIT $2 OFFSET $3",
|
||||
)
|
||||
.bind(format!("%{}%", search))
|
||||
} else {
|
||||
sqlx::query_as::<_, crate::core::person_identity::Identity>(
|
||||
"SELECT id, name, embedding::text, metadata, created_at FROM identities ORDER BY id LIMIT $1 OFFSET $2",
|
||||
"SELECT id, name, identity_embedding::text as embedding, metadata, created_at FROM identities ORDER BY id LIMIT $1 OFFSET $2",
|
||||
)
|
||||
};
|
||||
let identities = query.bind(limit).bind(offset).fetch_all(&self.pool).await?;
|
||||
@@ -4407,7 +4530,7 @@ impl PostgresDb {
|
||||
id: i64,
|
||||
) -> Result<Option<crate::core::person_identity::Identity>> {
|
||||
let identity = sqlx::query_as::<_, crate::core::person_identity::Identity>(
|
||||
"SELECT id, name, embedding::text, metadata, created_at FROM identities WHERE id = $1",
|
||||
"SELECT id, name, identity_embedding::text as embedding, metadata, created_at FROM identities WHERE id = $1",
|
||||
)
|
||||
.bind(id)
|
||||
.fetch_optional(&self.pool)
|
||||
@@ -4716,7 +4839,7 @@ impl PostgresDb {
|
||||
"speaker_ids"
|
||||
};
|
||||
let query = format!(
|
||||
"SELECT id, start_frame, end_frame, content FROM chunks WHERE uuid = $1 AND $2::text = ANY({}::text[]) ORDER BY start_frame",
|
||||
"SELECT id, start_frame, end_frame, content FROM chunks WHERE file_uuid = $1 AND $2::text = ANY({}::text[]) ORDER BY start_frame",
|
||||
column
|
||||
);
|
||||
|
||||
@@ -4836,7 +4959,7 @@ mod tests {
|
||||
width: 1920,
|
||||
height: 1080,
|
||||
fps: 30.0,
|
||||
probe_json: Some("{}".to_string()),
|
||||
probe_json: Some(serde_json::from_str("{}").unwrap()),
|
||||
storage: StorageStatus::default(),
|
||||
status: VideoStatus::Pending,
|
||||
processing_status: None,
|
||||
@@ -4847,6 +4970,11 @@ mod tests {
|
||||
registration_time: None,
|
||||
total_frames: 0,
|
||||
parent_uuid: None,
|
||||
cut_done: false,
|
||||
cut_count: 0,
|
||||
cut_max_duration: 0.0,
|
||||
scene_done: false,
|
||||
audio_tracks: None,
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&record).unwrap();
|
||||
@@ -4935,13 +5063,18 @@ mod tests {
|
||||
error_count: 0,
|
||||
last_error: None,
|
||||
started_at: Some(
|
||||
NaiveDateTime::parse_from_str("2024-01-01 10:00:00", "%Y-%m-%d %H:%M:%S").unwrap(),
|
||||
chrono::DateTime::parse_from_rfc3339("2024-01-01T10:00:00Z")
|
||||
.unwrap()
|
||||
.with_timezone(&chrono::Utc),
|
||||
),
|
||||
updated_at: Some(
|
||||
NaiveDateTime::parse_from_str("2024-01-01 10:05:00", "%Y-%m-%d %H:%M:%S").unwrap(),
|
||||
chrono::DateTime::parse_from_rfc3339("2024-01-01T10:05:00Z")
|
||||
.unwrap()
|
||||
.with_timezone(&chrono::Utc),
|
||||
),
|
||||
created_at: NaiveDateTime::parse_from_str("2024-01-01 09:55:00", "%Y-%m-%d %H:%M:%S")
|
||||
.unwrap(),
|
||||
created_at: chrono::DateTime::parse_from_rfc3339("2024-01-01T09:55:00Z")
|
||||
.unwrap()
|
||||
.into(),
|
||||
processors: vec!["asr".to_string(), "cut".to_string()],
|
||||
completed_processors: vec!["asr".to_string()],
|
||||
failed_processors: vec![],
|
||||
@@ -4968,7 +5101,7 @@ mod tests {
|
||||
"last_error": null,
|
||||
"started_at": null,
|
||||
"updated_at": null,
|
||||
"created_at": "2024-01-01T00:00:00",
|
||||
"created_at": "2024-01-01T00:00:00Z",
|
||||
"processors": ["asr", "cut"],
|
||||
"completed_processors": [],
|
||||
"failed_processors": [],
|
||||
|
||||
@@ -88,6 +88,62 @@ impl QdrantDb {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 將向量寫入指定 collection(支援多 collection)
|
||||
pub async fn upsert_vector_to_collection(
|
||||
&self,
|
||||
collection: &str,
|
||||
point_id: u64,
|
||||
vector: &[f32],
|
||||
payload: Option<serde_json::Value>,
|
||||
) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/collections/{}/points?wait=true",
|
||||
self.base_url, collection
|
||||
);
|
||||
|
||||
tracing::debug!("Qdrant upsert URL: {}, collection: {}", url, collection);
|
||||
|
||||
let points = if let Some(p) = payload {
|
||||
serde_json::json!({
|
||||
"points": [{
|
||||
"id": point_id,
|
||||
"vector": vector,
|
||||
"payload": p,
|
||||
}]
|
||||
})
|
||||
} else {
|
||||
serde_json::json!({
|
||||
"points": [{
|
||||
"id": point_id,
|
||||
"vector": vector,
|
||||
}]
|
||||
})
|
||||
};
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.put(&url)
|
||||
.header("api-key", &self.api_key)
|
||||
.json(&points)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to send upsert request to Qdrant")?;
|
||||
|
||||
let status = response.status();
|
||||
if !status.is_success() {
|
||||
let response_text = response.text().await.unwrap_or_default();
|
||||
tracing::error!("Qdrant upsert failed: {} - {}", status, response_text);
|
||||
anyhow::bail!(
|
||||
"Qdrant upsert failed with status {}: {}",
|
||||
status,
|
||||
response_text
|
||||
);
|
||||
}
|
||||
|
||||
tracing::debug!("Successfully upserted vector for point: {}", point_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn upsert_vector(
|
||||
&self,
|
||||
chunk_id: &str,
|
||||
|
||||
@@ -371,6 +371,11 @@ impl RedisClient {
|
||||
processor: &str,
|
||||
status: &str,
|
||||
error: Option<&str>,
|
||||
frames_processed: i32,
|
||||
chunks_produced: i32,
|
||||
total_frames: i32,
|
||||
retry_count: i32,
|
||||
pid: i32,
|
||||
) -> Result<()> {
|
||||
let mut conn = self.get_conn_internal().await?;
|
||||
let prefix = REDIS_KEY_PREFIX.as_str();
|
||||
@@ -378,13 +383,24 @@ impl RedisClient {
|
||||
|
||||
let now = chrono::Utc::now().to_rfc3339();
|
||||
|
||||
let mut fields: Vec<(&str, &str)> = vec![("status", status), ("updated_at", &now)];
|
||||
let mut fields: Vec<(&str, String)> = vec![
|
||||
("status", status.to_string()),
|
||||
("updated_at", now),
|
||||
("current", frames_processed.to_string()),
|
||||
("total", total_frames.to_string()),
|
||||
("frames_processed", frames_processed.to_string()),
|
||||
("chunks_produced", chunks_produced.to_string()),
|
||||
("retry_count", retry_count.to_string()),
|
||||
("pid", pid.to_string()),
|
||||
];
|
||||
|
||||
if let Some(err) = error {
|
||||
fields.push(("error", err));
|
||||
fields.push(("error", err.to_string()));
|
||||
}
|
||||
|
||||
let _: Option<String> = conn.hset_multiple(&key, &fields).await?;
|
||||
let field_refs: Vec<(&str, &str)> = fields.iter().map(|(k, v)| (*k, v.as_str())).collect();
|
||||
|
||||
let _: Option<String> = conn.hset_multiple(&key, &field_refs).await?;
|
||||
let _: bool = conn.expire(&key, 86400).await?;
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -182,6 +182,11 @@ impl IngestionService {
|
||||
registration_time: None,
|
||||
total_frames: total_frames.unwrap_or(0),
|
||||
parent_uuid: None,
|
||||
cut_done: false,
|
||||
cut_count: 0,
|
||||
cut_max_duration: 0.0,
|
||||
scene_done: false,
|
||||
audio_tracks: None,
|
||||
};
|
||||
|
||||
self.db
|
||||
|
||||
@@ -15,4 +15,3 @@ pub mod text;
|
||||
pub mod thumbnail;
|
||||
pub mod time;
|
||||
pub mod tmdb;
|
||||
pub mod worker;
|
||||
|
||||
@@ -20,6 +20,8 @@ pub struct StreamInfo {
|
||||
pub duration: Option<String>,
|
||||
pub sample_rate: Option<String>,
|
||||
pub channels: Option<u32>,
|
||||
#[serde(default)]
|
||||
pub tags: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@@ -69,6 +71,7 @@ pub fn probe_video(video_path: &str) -> Result<ProbeResult> {
|
||||
duration: s["duration"].as_str().map(String::from),
|
||||
sample_rate: s["sample_rate"].as_str().map(String::from),
|
||||
channels: s["channels"].as_u64().map(|v| v as u32),
|
||||
tags: s.get("tags").cloned(),
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
|
||||
@@ -1,124 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
|
||||
use super::executor::PythonExecutor;
|
||||
use crate::core::config::processor;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AsrResult {
|
||||
pub language: Option<String>,
|
||||
pub language_probability: Option<f64>,
|
||||
pub segments: Vec<AsrSegment>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AsrSegment {
|
||||
pub start: f64,
|
||||
pub end: f64,
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
pub async fn process_asr(
|
||||
video_path: &str,
|
||||
output_path: &str,
|
||||
uuid: Option<&str>,
|
||||
) -> Result<AsrResult> {
|
||||
let executor = PythonExecutor::new()?;
|
||||
let script_path = executor.script_path("asr_processor.py");
|
||||
|
||||
tracing::info!("[ASR] Starting ASR processing: {}", video_path);
|
||||
|
||||
executor
|
||||
.run(
|
||||
"asr_processor.py",
|
||||
&[video_path, output_path],
|
||||
uuid,
|
||||
"ASR",
|
||||
Some(Duration::from_secs(*processor::ASR_TIMEOUT_SECS)),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to run {:?}", script_path))?;
|
||||
|
||||
let json_str = std::fs::read_to_string(output_path).context("Failed to read ASR output")?;
|
||||
|
||||
let result: AsrResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse ASR output")?;
|
||||
|
||||
tracing::info!(
|
||||
"[ASR] Result: {} segments, language: {:?}",
|
||||
result.segments.len(),
|
||||
result.language
|
||||
);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_asr_result_serialization() {
|
||||
let result = AsrResult {
|
||||
language: Some("en".to_string()),
|
||||
language_probability: Some(0.95),
|
||||
segments: vec![
|
||||
AsrSegment {
|
||||
start: 0.0,
|
||||
end: 2.5,
|
||||
text: "Hello world".to_string(),
|
||||
},
|
||||
AsrSegment {
|
||||
start: 2.5,
|
||||
end: 5.0,
|
||||
text: "Test speech".to_string(),
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&result).unwrap();
|
||||
assert!(json.contains("Hello world"));
|
||||
assert!(json.contains("en"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asr_result_deserialization() {
|
||||
let json = r#"{
|
||||
"language": "zh",
|
||||
"language_probability": 0.98,
|
||||
"segments": [
|
||||
{"start": 0.0, "end": 1.5, "text": "測試"}
|
||||
]
|
||||
}"#;
|
||||
|
||||
let result: AsrResult = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(result.language, Some("zh".to_string()));
|
||||
assert_eq!(result.language_probability, Some(0.98));
|
||||
assert_eq!(result.segments.len(), 1);
|
||||
assert_eq!(result.segments[0].text, "測試");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asr_segment_default() {
|
||||
let segment = AsrSegment {
|
||||
start: 0.0,
|
||||
end: 1.0,
|
||||
text: String::new(),
|
||||
};
|
||||
assert_eq!(segment.start, 0.0);
|
||||
assert_eq!(segment.end, 1.0);
|
||||
assert!(segment.text.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_asr_result_empty_segments() {
|
||||
let result = AsrResult {
|
||||
language: None,
|
||||
language_probability: None,
|
||||
segments: vec![],
|
||||
};
|
||||
assert!(result.language.is_none());
|
||||
assert!(result.segments.is_empty());
|
||||
}
|
||||
}
|
||||
@@ -12,12 +12,16 @@ const ASRX_TIMEOUT: Duration = Duration::from_secs(7200);
|
||||
pub struct AsrxResult {
|
||||
pub language: Option<String>,
|
||||
pub segments: Vec<AsrxSegment>,
|
||||
#[serde(skip_serializing)]
|
||||
pub embeddings: Option<Vec<Vec<f32>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AsrxSegment {
|
||||
pub start: f64,
|
||||
pub end: f64,
|
||||
pub start_time: f64,
|
||||
pub end_time: f64,
|
||||
pub start_frame: u64,
|
||||
pub end_frame: u64,
|
||||
pub text: String,
|
||||
pub speaker_id: Option<String>,
|
||||
}
|
||||
@@ -43,10 +47,19 @@ pub async fn process_asrx(
|
||||
return Ok(AsrxResult {
|
||||
language: None,
|
||||
segments: vec![],
|
||||
embeddings: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"[ASRX] Running: {} {} {} {}",
|
||||
executor.python_path().display(),
|
||||
script_path.display(),
|
||||
video_path,
|
||||
output_path,
|
||||
);
|
||||
|
||||
let mut cmd = Command::new(executor.python_path());
|
||||
cmd.arg(&script_path).arg(video_path).arg(output_path);
|
||||
|
||||
@@ -68,16 +81,21 @@ pub async fn process_asrx(
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
|
||||
for line in stderr.lines() {
|
||||
if line.starts_with("ASRX_START") {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.starts_with("ASRX_START") {
|
||||
tracing::info!("[ASRX] Loading model...");
|
||||
} else if line.starts_with("ASRX_PROGRESS:") {
|
||||
let count = line.trim_start_matches("ASRX_PROGRESS:");
|
||||
} else if trimmed.starts_with("ASRX_PROGRESS:") {
|
||||
let count = trimmed.trim_start_matches("ASRX_PROGRESS:");
|
||||
tracing::info!("[ASRX] Processed {} segments...", count);
|
||||
} else if line.starts_with("ASRX_COMPLETE:") {
|
||||
let count = line.trim_start_matches("ASRX_COMPLETE:");
|
||||
} else if trimmed.starts_with("ASRX_COMPLETE:") {
|
||||
let count = trimmed.trim_start_matches("ASRX_COMPLETE:");
|
||||
tracing::info!("[ASRX] Completed! Total: {} segments", count);
|
||||
} else if !trimmed.is_empty() && !trimmed.starts_with("[SelfASRX") {
|
||||
tracing::debug!("[ASRX/stderr] {}", trimmed);
|
||||
}
|
||||
}
|
||||
// Log full stderr for debugging
|
||||
tracing::info!("[ASRX] stderr output:\n{}", stderr);
|
||||
|
||||
if !output.status.success() {
|
||||
anyhow::bail!("ASRX failed: {}", stderr);
|
||||
@@ -102,11 +120,14 @@ mod tests {
|
||||
let result = AsrxResult {
|
||||
language: Some("en".to_string()),
|
||||
segments: vec![AsrxSegment {
|
||||
start: 0.0,
|
||||
end: 2.5,
|
||||
start_time: 0.0,
|
||||
end_time: 2.5,
|
||||
start_frame: 0,
|
||||
end_frame: 75,
|
||||
text: "Hello".to_string(),
|
||||
speaker_id: Some("SPEAKER_00".to_string()),
|
||||
}],
|
||||
embeddings: None,
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&result).unwrap();
|
||||
@@ -119,7 +140,7 @@ mod tests {
|
||||
let json = r#"{
|
||||
"language": "zh",
|
||||
"segments": [
|
||||
{"start": 0.0, "end": 1.5, "text": "測試", "speaker_id": "SPEAKER_01"}
|
||||
{"start_time": 0.0, "end_time": 1.5, "start_frame": 0, "end_frame": 45, "text": "測試", "speaker_id": "SPEAKER_01"}
|
||||
]
|
||||
}"#;
|
||||
|
||||
@@ -137,6 +158,7 @@ mod tests {
|
||||
let result = AsrxResult {
|
||||
language: None,
|
||||
segments: vec![],
|
||||
embeddings: None,
|
||||
};
|
||||
assert!(result.segments.is_empty());
|
||||
assert!(result.language.is_none());
|
||||
@@ -145,11 +167,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_asrx_segment_times() {
|
||||
let segment = AsrxSegment {
|
||||
start: 0.0,
|
||||
end: 5.0,
|
||||
start_time: 0.0,
|
||||
end_time: 5.0,
|
||||
start_frame: 0,
|
||||
end_frame: 150,
|
||||
text: "Test".to_string(),
|
||||
speaker_id: None,
|
||||
};
|
||||
assert!(segment.end > segment.start);
|
||||
assert!(segment.end_time > segment.start_time);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -147,6 +147,19 @@ impl PythonExecutor {
|
||||
anyhow::bail!("Script not found: {:?}", script_path);
|
||||
}
|
||||
|
||||
// 標記輸出檔為處理中(add .tmp suffix)
|
||||
let output_path = args.get(1).map(|p| std::path::PathBuf::from(p));
|
||||
let tmp_path = output_path.as_ref().map(|p| {
|
||||
let mut tmp = p.to_path_buf();
|
||||
tmp.set_extension("json.tmp");
|
||||
tmp
|
||||
});
|
||||
if let (Some(src), Some(dst)) = (&output_path, &tmp_path) {
|
||||
if src.exists() {
|
||||
let _ = std::fs::rename(src, dst);
|
||||
}
|
||||
}
|
||||
|
||||
let mut cmd = Command::new(&self.venv_python);
|
||||
cmd.arg(&script_path);
|
||||
|
||||
@@ -220,12 +233,28 @@ impl PythonExecutor {
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// 錯誤時 rename .json.tmp → .json.err
|
||||
let mark_failed = || {
|
||||
if let Some(tmp) = &tmp_path {
|
||||
if tmp.exists() {
|
||||
if let Some(out) = &output_path {
|
||||
let mut err_path = out.to_path_buf();
|
||||
err_path.set_extension("json.err");
|
||||
let _ = std::fs::rename(tmp, &err_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(duration) = timeout_duration {
|
||||
match timeout(duration, run_future).await {
|
||||
Ok(Ok(())) => {}
|
||||
Ok(Err(e)) => return Err(e),
|
||||
Ok(Err(e)) => {
|
||||
mark_failed();
|
||||
return Err(e);
|
||||
}
|
||||
Err(_) => {
|
||||
// Try to kill the entire process group
|
||||
mark_failed();
|
||||
if let Some(pid) = child_pid {
|
||||
let pgid = pid as i32;
|
||||
unsafe {
|
||||
@@ -237,7 +266,19 @@ impl PythonExecutor {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
run_future.await?;
|
||||
if let Err(e) = run_future.await {
|
||||
mark_failed();
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
|
||||
// 成功:.json.tmp → .json(已完成)
|
||||
if let Some(tmp) = &tmp_path {
|
||||
if tmp.exists() {
|
||||
if let Some(out) = &output_path {
|
||||
let _ = std::fs::rename(tmp, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -28,6 +28,7 @@ pub struct Face {
|
||||
pub width: i32,
|
||||
pub height: i32,
|
||||
pub confidence: f32,
|
||||
#[serde(skip_serializing)]
|
||||
pub embedding: Option<Vec<f32>>,
|
||||
pub landmarks: Option<Vec<Vec<f32>>>,
|
||||
pub attributes: Option<FaceAttributes>,
|
||||
@@ -111,7 +112,6 @@ mod tests {
|
||||
let json = serde_json::to_string(&result).unwrap();
|
||||
assert!(json.contains("face_1"));
|
||||
assert!(json.contains("\"width\":50"));
|
||||
assert!(json.contains("embedding"));
|
||||
assert!(json.contains("landmarks"));
|
||||
assert!(json.contains("attributes"));
|
||||
}
|
||||
|
||||
@@ -27,7 +27,8 @@ pub use face_recognition::{
|
||||
pub use ocr::{process_ocr, OcrFrame, OcrResult, OcrText};
|
||||
pub use pose::{process_pose, Bbox, Keypoint, PersonPose, PoseFrame, PoseResult};
|
||||
pub use scene_classification::{
|
||||
process_scene_classification, SceneClassificationResult, ScenePrediction, SceneSegment,
|
||||
load_scene_from_file, process_scene_classification, SceneClassificationResult, ScenePrediction,
|
||||
SceneSegment,
|
||||
};
|
||||
pub use snapshot_agent::{SnapshotAgent, SnapshotAgentConfig};
|
||||
pub use story::{process_story, StoryChildChunk, StoryParentChunk, StoryResult, StoryStats};
|
||||
|
||||
@@ -7,7 +7,7 @@ use super::executor::PythonExecutor;
|
||||
const SCENE_TIMEOUT: Duration = Duration::from_secs(7200);
|
||||
|
||||
/// 場景識別結果
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[derive(Debug, Default, Serialize, Deserialize, Clone)]
|
||||
pub struct SceneClassificationResult {
|
||||
pub frame_count: u64,
|
||||
pub fps: f64,
|
||||
@@ -32,6 +32,19 @@ pub struct ScenePrediction {
|
||||
pub confidence: f32,
|
||||
}
|
||||
|
||||
/// 從已存在的 JSON 檔案載入場景結果(不重新執行 Python)
|
||||
pub fn load_scene_from_file(path: &str) -> Result<SceneClassificationResult> {
|
||||
let json_str = std::fs::read_to_string(path).context("Failed to read scene JSON file")?;
|
||||
let result: SceneClassificationResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse scene JSON")?;
|
||||
tracing::info!(
|
||||
"[SCENE] Loaded {} scenes from {}",
|
||||
result.scenes.len(),
|
||||
path
|
||||
);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// 執行場景識別
|
||||
pub async fn process_scene_classification(
|
||||
video_path: &str,
|
||||
|
||||
@@ -12,7 +12,7 @@ use super::yolo::{YoloFrame, YoloResult};
|
||||
const VISUAL_CHUNK_TIMEOUT: Duration = Duration::from_secs(3600);
|
||||
|
||||
/// 視覺分片處理結果
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
|
||||
pub struct VisualChunkResult {
|
||||
/// 生成的視覺分片數量
|
||||
pub chunk_count: u32,
|
||||
@@ -284,7 +284,7 @@ pub async fn process_visual_chunk_advanced(
|
||||
});
|
||||
}
|
||||
|
||||
executor
|
||||
let result = match executor
|
||||
.run(
|
||||
"visual_chunk_processor.py",
|
||||
&[video_path, output_path],
|
||||
@@ -293,13 +293,34 @@ pub async fn process_visual_chunk_advanced(
|
||||
Some(VISUAL_CHUNK_TIMEOUT),
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to run {:?}", script_path))?;
|
||||
|
||||
let json_str =
|
||||
std::fs::read_to_string(output_path).context("Failed to read visual chunk output")?;
|
||||
|
||||
let result: VisualChunkResult =
|
||||
serde_json::from_str(&json_str).context("Failed to parse visual chunk output")?;
|
||||
{
|
||||
Ok(_) => match std::fs::read_to_string(output_path) {
|
||||
Ok(json_str) => match serde_json::from_str::<VisualChunkResult>(&json_str) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"[VisualChunk] Failed to parse output ({}), returning empty",
|
||||
e
|
||||
);
|
||||
VisualChunkResult::default()
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"[VisualChunk] Failed to read output ({}), returning empty",
|
||||
e
|
||||
);
|
||||
VisualChunkResult::default()
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"[VisualChunk] Failed to run script ({}), returning empty",
|
||||
e
|
||||
);
|
||||
VisualChunkResult::default()
|
||||
}
|
||||
};
|
||||
|
||||
tracing::info!(
|
||||
"[VisualChunk] Advanced generation result: {} chunks, {} frames",
|
||||
|
||||
@@ -54,22 +54,81 @@ pub fn compute_uuid_from_relative_path(relative_path: &str) -> String {
|
||||
compute_uuid(&username, &filepath)
|
||||
}
|
||||
|
||||
/// Get MAC address of primary network interface
|
||||
/// 取得本機內建網路介面的 MAC 位址(不可拆、非外接)。
|
||||
/// 優先順序:en0 (Wi-Fi) > en1 > 其他非 USB/Thunderbolt 介面。
|
||||
/// 若都找不到則回傳 fallback。
|
||||
/// Returns MAC address in format: a1:b2:c3:d4:e5:f6
|
||||
pub fn get_mac_address() -> String {
|
||||
use mac_address::get_mac_address;
|
||||
// 使用 ifconfig 列出所有介面
|
||||
let output = std::process::Command::new("ifconfig")
|
||||
.args(["-a"])
|
||||
.output()
|
||||
.ok()
|
||||
.and_then(|o| {
|
||||
if o.status.success() {
|
||||
Some(String::from_utf8_lossy(&o.stdout).to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
match get_mac_address() {
|
||||
Ok(Some(mac)) => {
|
||||
let bytes = mac.bytes();
|
||||
format!(
|
||||
"{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}",
|
||||
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5]
|
||||
)
|
||||
// 解析 ifconfig,找到介面名稱與 MAC
|
||||
let mut current_iface = String::new();
|
||||
let mut candidates: Vec<(u32, String)> = Vec::new();
|
||||
|
||||
for line in output.lines() {
|
||||
let trimmed = line.trim();
|
||||
// 介面名稱行,如 "en0: flags=..."
|
||||
if !trimmed.starts_with('\t') && trimmed.contains(": flags=") {
|
||||
current_iface = trimmed.split(':').next().unwrap_or("").to_string();
|
||||
}
|
||||
|
||||
// macOS: "ether a1:b2:c3:d4:e5:f6"
|
||||
if let Some(mac_str) = trimmed.strip_prefix("ether ") {
|
||||
let mac = mac_str.trim();
|
||||
if mac.len() == 17 && mac.chars().filter(|&c| c == ':').count() == 5 {
|
||||
if mac == "00:00:00:00:00:00" || mac == "ff:ff:ff:ff:ff:ff" {
|
||||
continue;
|
||||
}
|
||||
// 優先級:en0=0, en1=1, en2=2, 其他=100
|
||||
let priority = match current_iface.as_str() {
|
||||
"en0" => 0,
|
||||
"en1" => 1,
|
||||
"en2" => 2,
|
||||
_ if current_iface.starts_with("en") => 3,
|
||||
_ => 100,
|
||||
};
|
||||
candidates.push((priority, mac.to_string()));
|
||||
}
|
||||
}
|
||||
// macOS: "lladdr a1:b2:c3:d4:e5:f6"
|
||||
if let Some(mac_str) = trimmed.strip_prefix("lladdr ") {
|
||||
let mac = mac_str.trim();
|
||||
if mac.len() == 17 && mac.chars().filter(|&c| c == ':').count() == 5 {
|
||||
if mac == "00:00:00:00:00:00" || mac == "ff:ff:ff:ff:ff:ff" {
|
||||
continue;
|
||||
}
|
||||
let priority = match current_iface.as_str() {
|
||||
"en0" => 0,
|
||||
"en1" => 1,
|
||||
"en2" => 2,
|
||||
_ if current_iface.starts_with("en") => 3,
|
||||
_ => 100,
|
||||
};
|
||||
candidates.push((priority, mac.to_string()));
|
||||
}
|
||||
}
|
||||
Ok(None) => "00:00:00:00:00:00".to_string(),
|
||||
Err(_) => "00:00:00:00:00:00".to_string(),
|
||||
}
|
||||
|
||||
// 按優先級排序(en0 > en1 > en2 > 其他)
|
||||
candidates.sort_by_key(|k| k.0);
|
||||
if let Some(mac) = candidates.first().map(|c| c.1.clone()) {
|
||||
return mac;
|
||||
}
|
||||
|
||||
// fallback
|
||||
"00:00:00:00:00:00".to_string()
|
||||
}
|
||||
|
||||
/// Compute Birth UUID (Stable Identity with Location)
|
||||
|
||||
@@ -1,140 +0,0 @@
|
||||
use crate::core::chunk;
|
||||
use crate::core::db::PostgresDb;
|
||||
use sqlx::PgPool;
|
||||
use tokio::time::{sleep, Duration};
|
||||
use tracing;
|
||||
|
||||
pub struct JobWorker {
|
||||
pool: PgPool,
|
||||
poll_interval: Duration,
|
||||
}
|
||||
|
||||
impl JobWorker {
|
||||
pub fn new(pool: PgPool, poll_interval_secs: u64) -> Self {
|
||||
Self {
|
||||
pool,
|
||||
poll_interval: Duration::from_secs(poll_interval_secs),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(&self) {
|
||||
tracing::info!(
|
||||
"🤖 Job Worker started (Polling every {}s)",
|
||||
self.poll_interval.as_secs()
|
||||
);
|
||||
|
||||
loop {
|
||||
match self.process_next_job().await {
|
||||
Ok(has_work) => {
|
||||
if !has_work {
|
||||
// No work found, wait before polling again
|
||||
sleep(self.poll_interval).await;
|
||||
}
|
||||
// If we processed a job, loop immediately to check for more
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("❌ Job Worker error: {}", e);
|
||||
sleep(Duration::from_secs(5)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn process_next_job(&self) -> anyhow::Result<bool> {
|
||||
// 1. Fetch a QUEUED job from monitor_jobs
|
||||
// Using sqlx::query_as to map to tuple.
|
||||
// Note: progress_total is int4 (i32).
|
||||
let job_row: Option<(i32, String, i32)> = sqlx::query_as(
|
||||
r#"
|
||||
UPDATE dev.monitor_jobs
|
||||
SET status = 'RUNNING', updated_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT id FROM dev.monitor_jobs
|
||||
WHERE status = 'QUEUED'
|
||||
ORDER BY created_at ASC
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING id, uuid, COALESCE(progress_total, 0)
|
||||
"#,
|
||||
)
|
||||
.fetch_optional(&self.pool)
|
||||
.await?;
|
||||
|
||||
if let Some((job_id, asset_uuid, total_frames)) = job_row {
|
||||
tracing::info!(
|
||||
"🚀 Processing Job {} for Asset {} (Frames: {})",
|
||||
job_id,
|
||||
asset_uuid,
|
||||
total_frames
|
||||
);
|
||||
|
||||
// 2. Execute Logic (Default to rule1 for now as monitor_jobs doesn't store rule type explicitly)
|
||||
let fps = self.get_asset_fps(&asset_uuid).await?;
|
||||
let db = PostgresDb::from_pool(self.pool.clone());
|
||||
|
||||
let result = chunk::rule1_ingest::execute_rule1(&db, &asset_uuid, fps).await;
|
||||
|
||||
// 3. Update Job Status
|
||||
match result {
|
||||
Ok(chunk_count) => {
|
||||
tracing::info!(
|
||||
"✅ Job {} completed. Processed {} items.",
|
||||
job_id,
|
||||
chunk_count
|
||||
);
|
||||
|
||||
// Update monitor_jobs
|
||||
// Using runtime query to avoid compile-time macro checks
|
||||
sqlx::query(
|
||||
"UPDATE dev.monitor_jobs SET status = 'COMPLETED', progress_current = progress_total, updated_at = NOW() WHERE id = $1"
|
||||
)
|
||||
.bind(job_id)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
|
||||
// Update video processing_status
|
||||
sqlx::query(
|
||||
"UPDATE dev.videos SET processing_status = $1::jsonb WHERE file_uuid = $2",
|
||||
)
|
||||
.bind(serde_json::json!({"status": "COMPLETED"}))
|
||||
.bind(asset_uuid)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("❌ Job {} failed: {}", job_id, e);
|
||||
let err_msg = e.to_string();
|
||||
let safe_msg = if err_msg.len() > 500 {
|
||||
&err_msg[..500]
|
||||
} else {
|
||||
&err_msg
|
||||
};
|
||||
|
||||
sqlx::query(
|
||||
"UPDATE dev.monitor_jobs SET status = 'FAILED', last_error = $2, updated_at = NOW() WHERE id = $1"
|
||||
)
|
||||
.bind(job_id)
|
||||
.bind(safe_msg)
|
||||
.execute(&self.pool)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
return Ok(true); // Processed a job
|
||||
}
|
||||
|
||||
Ok(false) // No job found
|
||||
}
|
||||
|
||||
async fn get_asset_fps(&self, uuid: &str) -> anyhow::Result<f64> {
|
||||
// dev.videos now uses file_uuid and has a direct fps column
|
||||
let fps: Option<f64> =
|
||||
sqlx::query_scalar("SELECT fps FROM dev.videos WHERE file_uuid = $1")
|
||||
.bind(uuid)
|
||||
.fetch_optional(&self.pool)
|
||||
.await?;
|
||||
|
||||
// Fallback to 29.97 if not found
|
||||
Ok(fps.unwrap_or(29.97))
|
||||
}
|
||||
}
|
||||
@@ -1,2 +0,0 @@
|
||||
pub mod job_runner;
|
||||
pub use job_runner::JobWorker;
|
||||
@@ -978,6 +978,11 @@ async fn main() -> Result<()> {
|
||||
registration_time: None,
|
||||
total_frames: total_frames.unwrap_or(0),
|
||||
parent_uuid: None,
|
||||
cut_done: false,
|
||||
cut_count: 0,
|
||||
cut_max_duration: 0.0,
|
||||
scene_done: false,
|
||||
audio_tracks: None,
|
||||
};
|
||||
|
||||
let video_id = db.register_video(&record).await?;
|
||||
@@ -1936,6 +1941,7 @@ async fn main() -> Result<()> {
|
||||
momentry_core::core::processor::asrx::AsrxResult {
|
||||
language: None,
|
||||
segments: vec![],
|
||||
embeddings: None,
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -1944,6 +1950,7 @@ async fn main() -> Result<()> {
|
||||
momentry_core::core::processor::asrx::AsrxResult {
|
||||
language: None,
|
||||
segments: vec![],
|
||||
embeddings: None,
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -2123,7 +2130,7 @@ async fn main() -> Result<()> {
|
||||
let speaker_id = asrx_result
|
||||
.segments
|
||||
.iter()
|
||||
.find(|ax| ax.start <= seg.end && ax.end >= seg.start)
|
||||
.find(|ax| ax.start_time <= seg.end && ax.end_time >= seg.start)
|
||||
.and_then(|ax| ax.speaker_id.clone());
|
||||
|
||||
let content = if let Some(ref sid) = speaker_id {
|
||||
|
||||
@@ -9,6 +9,7 @@ use crate::core::chunk::{rule1_ingest, rule3_ingest};
|
||||
use crate::core::db::{MonitorJobStatus, PostgresDb, ProcessorJobStatus, RedisClient, VideoStatus};
|
||||
use crate::worker::config::WorkerConfig;
|
||||
use crate::worker::processor::{ProcessorPool, ProcessorTask};
|
||||
use crate::worker::resources::SystemResources;
|
||||
|
||||
pub struct JobWorker {
|
||||
db: Arc<PostgresDb>,
|
||||
@@ -51,15 +52,126 @@ impl JobWorker {
|
||||
}
|
||||
|
||||
async fn poll_and_process(&self) -> Result<()> {
|
||||
// Always check for completion of running jobs first
|
||||
// This ensures jobs with all processors in terminal states are marked complete/failed
|
||||
let running_jobs_done = self
|
||||
.db
|
||||
.get_running_jobs_with_all_processors_done(self.config.batch_size)
|
||||
.await?;
|
||||
for job in running_jobs_done {
|
||||
if let Err(e) = self.check_and_complete_job(job.id, &job.uuid).await {
|
||||
error!("Failed to complete job {}: {}", job.uuid, e);
|
||||
// 檢查系統資源並寫入 Redis
|
||||
let resources = SystemResources::check();
|
||||
let dynamic_max = resources.safe_max_concurrent(self.config.max_concurrent);
|
||||
|
||||
let health_key = format!("{}health", crate::core::config::REDIS_KEY_PREFIX.as_str());
|
||||
let now = chrono::Utc::now().to_rfc3339();
|
||||
let running_count = self.processor_pool.get_running_count().await;
|
||||
if let Ok(mut conn) = self.redis.get_conn().await {
|
||||
let gpu_util_str = resources
|
||||
.gpu_utilization
|
||||
.map(|v| format!("{:.1}", v))
|
||||
.unwrap_or_default();
|
||||
let gpu_mem_str = resources
|
||||
.gpu_memory_used_pct
|
||||
.map(|v| format!("{:.1}", v))
|
||||
.unwrap_or_default();
|
||||
let gpu_avail_str = if resources.gpu_available {
|
||||
"true"
|
||||
} else {
|
||||
"false"
|
||||
};
|
||||
let _: Option<String> = redis::cmd("HMSET")
|
||||
.arg(&[
|
||||
(&health_key as &str),
|
||||
"status",
|
||||
if dynamic_max > 0 {
|
||||
"healthy"
|
||||
} else {
|
||||
"throttled"
|
||||
},
|
||||
"cpu_idle_pct",
|
||||
&format!("{:.1}", resources.cpu_idle_percent),
|
||||
"memory_available_mb",
|
||||
&resources.memory_available_mb.to_string(),
|
||||
"memory_total_mb",
|
||||
&resources.memory_total_mb.to_string(),
|
||||
"memory_used_pct",
|
||||
&format!("{:.1}", resources.memory_used_percent),
|
||||
"gpu_available",
|
||||
gpu_avail_str,
|
||||
"gpu_utilization_pct",
|
||||
&gpu_util_str,
|
||||
"gpu_memory_used_pct",
|
||||
&gpu_mem_str,
|
||||
"dynamic_concurrency",
|
||||
&dynamic_max.to_string(),
|
||||
"config_concurrency",
|
||||
&self.config.max_concurrent.to_string(),
|
||||
"running_processors",
|
||||
&running_count.to_string(),
|
||||
"updated_at",
|
||||
&now,
|
||||
])
|
||||
.query_async(&mut conn)
|
||||
.await
|
||||
.ok();
|
||||
}
|
||||
|
||||
let gpu_log = match (
|
||||
resources.gpu_available,
|
||||
resources.gpu_utilization,
|
||||
resources.gpu_memory_used_pct,
|
||||
) {
|
||||
(true, Some(util), Some(mem)) => format!("GPU util={:.1}% mem={:.1}%", util, mem),
|
||||
(true, Some(util), None) => format!("GPU util={:.1}%", util),
|
||||
(true, None, _) => "GPU available".to_string(),
|
||||
(false, _, _) => "No GPU".to_string(),
|
||||
};
|
||||
info!(
|
||||
"System: CPU idle={:.1}%, Memory={}MB/{}MB ({:.1}%), {}. Dynamic concurrency: {} (config: {})",
|
||||
resources.cpu_idle_percent,
|
||||
resources.memory_available_mb,
|
||||
resources.memory_total_mb,
|
||||
resources.memory_used_percent,
|
||||
gpu_log,
|
||||
dynamic_max,
|
||||
self.config.max_concurrent,
|
||||
);
|
||||
|
||||
// Check for running jobs that may have pending dependencies satisfied
|
||||
// First: jobs with all processors done (正常完成檢查)
|
||||
{
|
||||
let running_jobs_done = self
|
||||
.db
|
||||
.get_running_jobs_with_all_processors_done(self.config.batch_size)
|
||||
.await?;
|
||||
for job in running_jobs_done {
|
||||
let should_retry = self.check_and_complete_job(job.id, &job.uuid).await.is_ok();
|
||||
if should_retry && self.processor_pool.can_start().await {
|
||||
if let Err(e) = self.process_job(job.clone()).await {
|
||||
error!("Failed to reprocess job {}: {}", job.uuid, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Second: check running jobs with pending/deferred processors
|
||||
{
|
||||
let running_jobs = self.db.get_all_running_jobs(self.config.batch_size).await?;
|
||||
for job in running_jobs {
|
||||
if !self.processor_pool.can_start().await {
|
||||
break;
|
||||
}
|
||||
let results = self
|
||||
.db
|
||||
.get_processor_results_by_job(job.id)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
// 若有任何 processor 是 pending/skipped(未真正啟動),重新處理 job
|
||||
let has_unstarted = results.iter().any(|r| {
|
||||
matches!(
|
||||
r.status,
|
||||
crate::core::db::ProcessorJobStatus::Pending
|
||||
| crate::core::db::ProcessorJobStatus::Skipped
|
||||
)
|
||||
});
|
||||
if has_unstarted {
|
||||
if let Err(e) = self.process_job(job.clone()).await {
|
||||
error!("Failed to reprocess job {}: {}", job.uuid, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,18 +200,51 @@ impl JobWorker {
|
||||
info!("Processing job: {} ({})", job.uuid, job.id);
|
||||
|
||||
// Determine which processors to run based on job.processors field
|
||||
let processors_to_run: Vec<crate::core::db::ProcessorType> = if job.processors.is_empty() {
|
||||
info!("No processors specified, running all processors");
|
||||
crate::core::db::ProcessorType::all()
|
||||
} else {
|
||||
info!("Processors specified: {:?}", job.processors);
|
||||
job.processors
|
||||
.iter()
|
||||
.filter_map(|p| crate::core::db::ProcessorType::from_db_str(p))
|
||||
.collect()
|
||||
};
|
||||
let mut processors_to_run: Vec<crate::core::db::ProcessorType> =
|
||||
if job.processors.is_empty() {
|
||||
info!("No processors specified, running all processors");
|
||||
crate::core::db::ProcessorType::all()
|
||||
} else {
|
||||
info!("Processors specified: {:?}", job.processors);
|
||||
job.processors
|
||||
.iter()
|
||||
.filter_map(|p| crate::core::db::ProcessorType::from_db_str(p))
|
||||
.collect()
|
||||
};
|
||||
|
||||
let total_processors = processors_to_run.len() as i32;
|
||||
// 長影片動態調整:若 CUT 場景過長,Face 需在 ASR 之前執行
|
||||
if let Ok(Some(video)) = self.db.get_video_by_uuid(&job.uuid).await {
|
||||
// 條件:cut_done 且場景數 <= 3 且最長場景 > 600s(10分鐘)
|
||||
if video.cut_done && video.cut_count <= 3 && video.cut_max_duration > 600.0 {
|
||||
info!(
|
||||
"[DYNAMIC] Long cut detected: {} scenes, max_dur={:.0}s for {}. Moving Face before ASR.",
|
||||
video.cut_count, video.cut_max_duration, job.uuid
|
||||
);
|
||||
// 確保 Face 在 ASR 之前
|
||||
if let Some(asr_pos) = processors_to_run
|
||||
.iter()
|
||||
.position(|p| matches!(p, crate::core::db::ProcessorType::Asr))
|
||||
{
|
||||
if let Some(face_pos) = processors_to_run
|
||||
.iter()
|
||||
.position(|p| matches!(p, crate::core::db::ProcessorType::Face))
|
||||
{
|
||||
if face_pos > asr_pos {
|
||||
// 將 Face 移到 ASR 前面
|
||||
let face = processors_to_run.remove(face_pos);
|
||||
let insert_pos = processors_to_run
|
||||
.iter()
|
||||
.position(|p| matches!(p, crate::core::db::ProcessorType::Asr))
|
||||
.unwrap();
|
||||
processors_to_run.insert(insert_pos, face);
|
||||
info!("[DYNAMIC] Reordered processors: Face now ahead of ASR");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let total_processor_types = processors_to_run.len() as i32;
|
||||
|
||||
// Get video total_frames for progress tracking
|
||||
let video = self.db.get_video_by_uuid(&job.uuid).await?;
|
||||
@@ -116,7 +261,7 @@ impl JobWorker {
|
||||
.await?;
|
||||
|
||||
self.redis
|
||||
.update_worker_job_status(&job.uuid, job.id, "running", None, 0, total_processors)
|
||||
.update_worker_job_status(&job.uuid, job.id, "running", None, 0, total_processor_types)
|
||||
.await?;
|
||||
|
||||
// Get existing processor results for this job
|
||||
@@ -126,7 +271,8 @@ impl JobWorker {
|
||||
result_map.insert(result.processor_type, result);
|
||||
}
|
||||
|
||||
for processor_type in processors_to_run {
|
||||
let mut started_count = 0i32;
|
||||
for processor_type in &processors_to_run {
|
||||
// Update processor status to running
|
||||
self.db
|
||||
.update_processor_progress(
|
||||
@@ -139,17 +285,19 @@ impl JobWorker {
|
||||
.await?;
|
||||
|
||||
// Check if processor already in terminal state
|
||||
if let Some(result) = result_map.get(&processor_type) {
|
||||
if let Some(result) = result_map.get(processor_type) {
|
||||
match result.status {
|
||||
ProcessorJobStatus::Completed | ProcessorJobStatus::Skipped => {
|
||||
ProcessorJobStatus::Completed => {
|
||||
info!(
|
||||
"Processor {} already completed, skipping",
|
||||
processor_type.as_str()
|
||||
);
|
||||
started_count += 1;
|
||||
continue;
|
||||
}
|
||||
ProcessorJobStatus::Failed => {
|
||||
info!("Processor {} failed, skipping", processor_type.as_str());
|
||||
started_count += 1;
|
||||
continue;
|
||||
}
|
||||
ProcessorJobStatus::Running => {
|
||||
@@ -157,41 +305,136 @@ impl JobWorker {
|
||||
"Processor {} already running, skipping",
|
||||
processor_type.as_str()
|
||||
);
|
||||
started_count += 1;
|
||||
continue;
|
||||
}
|
||||
ProcessorJobStatus::Pending => {
|
||||
// Skipped 不視為 terminal — 允許重新啟動
|
||||
ProcessorJobStatus::Skipped | ProcessorJobStatus::Pending => {
|
||||
// Continue to start processor
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check dependencies: all dependent processors must be completed
|
||||
let deps = processor_type.dependencies();
|
||||
if !deps.is_empty() {
|
||||
let mut any_dep_failed = false;
|
||||
let all_deps_met =
|
||||
deps.iter()
|
||||
.all(|dep| match result_map.get(dep).map(|r| &r.status) {
|
||||
Some(ProcessorJobStatus::Completed) => true,
|
||||
Some(ProcessorJobStatus::Failed) => {
|
||||
any_dep_failed = true;
|
||||
false
|
||||
}
|
||||
_ => false,
|
||||
});
|
||||
if any_dep_failed {
|
||||
info!(
|
||||
"Processor {} dependency failed (need {:?}), skipping",
|
||||
processor_type.as_str(),
|
||||
deps.iter().map(|d| d.as_str()).collect::<Vec<_>>(),
|
||||
);
|
||||
// 創建 skipped 記錄讓 job 可以正確完成
|
||||
if let Err(e) = self
|
||||
.db
|
||||
.create_processor_result(job.id, *processor_type, &job.uuid)
|
||||
.await
|
||||
{
|
||||
error!("Failed to create skipped processor result: {}", e);
|
||||
}
|
||||
// 同時更新 Redis 狀態
|
||||
let _ = self
|
||||
.redis
|
||||
.update_worker_processor_status(
|
||||
&job.uuid,
|
||||
processor_type.as_str(),
|
||||
"skipped",
|
||||
None,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
started_count += 1;
|
||||
continue;
|
||||
}
|
||||
if !all_deps_met {
|
||||
info!(
|
||||
"Processor {} dependencies not met (need {:?}), deferring",
|
||||
processor_type.as_str(),
|
||||
deps.iter().map(|d| d.as_str()).collect::<Vec<_>>(),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Check capacity before starting processor
|
||||
if !self.processor_pool.can_start().await {
|
||||
info!(
|
||||
"Max concurrent processors reached, skipping remaining processors for job {}",
|
||||
job.uuid
|
||||
);
|
||||
// 為所有未啟動的 processors 創建 Skipped 記錄
|
||||
for skipped_type in processors_to_run.iter().skip(started_count as usize) {
|
||||
if let Err(e) = self
|
||||
.db
|
||||
.create_processor_result(job.id, *skipped_type, &job.uuid)
|
||||
.await
|
||||
{
|
||||
error!("Failed to create skipped processor result: {}", e);
|
||||
}
|
||||
let _ = self
|
||||
.redis
|
||||
.update_worker_processor_status(
|
||||
&job.uuid,
|
||||
skipped_type.as_str(),
|
||||
"skipped",
|
||||
None,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
let processor_result_id = self
|
||||
.db
|
||||
.create_processor_result(job.id, processor_type, &job.uuid)
|
||||
.create_processor_result(job.id, *processor_type, &job.uuid)
|
||||
.await?;
|
||||
|
||||
self.redis
|
||||
.update_worker_processor_status(&job.uuid, processor_type.as_str(), "pending", None)
|
||||
.update_worker_processor_status(
|
||||
&job.uuid,
|
||||
processor_type.as_str(),
|
||||
"pending",
|
||||
None,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let task = ProcessorTask {
|
||||
job: job.clone(),
|
||||
processor_type,
|
||||
processor_type: *processor_type,
|
||||
processor_result_id,
|
||||
};
|
||||
|
||||
self.processor_pool.start_processor(task).await?;
|
||||
started_count += 1;
|
||||
}
|
||||
|
||||
// 總是檢查是否可以完成 job(check_and_complete_job 內部會判斷)
|
||||
// processor_results 不足時它會自動略過
|
||||
self.check_and_complete_job(job.id, &job.uuid).await?;
|
||||
|
||||
Ok(())
|
||||
@@ -200,14 +443,32 @@ impl JobWorker {
|
||||
async fn check_and_complete_job(&self, job_id: i32, uuid: &str) -> Result<()> {
|
||||
let results = self.db.get_processor_results_by_job(job_id).await?;
|
||||
|
||||
let all_completed = results.iter().all(|r| {
|
||||
matches!(
|
||||
r.status,
|
||||
crate::core::db::ProcessorJobStatus::Completed
|
||||
| crate::core::db::ProcessorJobStatus::Skipped
|
||||
)
|
||||
// 如果 processor_results 筆數少於總 processor 數,代表有 processor 尚未啟動(如依賴未滿足)
|
||||
let all_processor_types = crate::core::db::ProcessorType::all().len();
|
||||
if results.len() < all_processor_types {
|
||||
info!(
|
||||
"Job {} has {}/{} processor results, not all processors created yet. Skipping completion check.",
|
||||
uuid,
|
||||
results.len(),
|
||||
all_processor_types
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// 定義必要 processor(必須完成的才算 job 成功)
|
||||
let essential_processors = ["cut", "asr", "yolo"];
|
||||
|
||||
let essential_completed = essential_processors.iter().all(|ep| {
|
||||
results.iter().any(|r| {
|
||||
r.processor_type.as_str() == *ep
|
||||
&& matches!(r.status, crate::core::db::ProcessorJobStatus::Completed)
|
||||
})
|
||||
});
|
||||
|
||||
let all_completed = results
|
||||
.iter()
|
||||
.all(|r| matches!(r.status, crate::core::db::ProcessorJobStatus::Completed));
|
||||
|
||||
let any_failed = results
|
||||
.iter()
|
||||
.any(|r| matches!(r.status, crate::core::db::ProcessorJobStatus::Failed));
|
||||
@@ -242,7 +503,7 @@ impl JobWorker {
|
||||
.map(|r| r.processor_type.as_str().to_string())
|
||||
.collect();
|
||||
|
||||
// Check prerequisites for Rule 1 Chunking BEFORE moving arrays
|
||||
// Check prerequisites for post-processing triggers
|
||||
let has_asr = completed_processors.iter().any(|p| p == "asr");
|
||||
let has_asrx = completed_processors.iter().any(|p| p == "asrx");
|
||||
let has_cut = completed_processors.iter().any(|p| p == "cut");
|
||||
@@ -251,33 +512,33 @@ impl JobWorker {
|
||||
|
||||
// Update processor arrays in job record
|
||||
self.db
|
||||
.update_job_processors_arrays(job_id, completed_processors, failed_processors)
|
||||
.update_job_processors_arrays(job_id, completed_processors, failed_processors.clone())
|
||||
.await?;
|
||||
|
||||
if all_completed && !any_failed {
|
||||
// 🚀 P1 Trigger: Rule 1 Chunking
|
||||
if has_asr && has_asrx {
|
||||
info!("📝 Prerequisites met for Rule 1 Chunking. Starting ingestion...");
|
||||
let db_clone = self.db.clone();
|
||||
let uuid_clone = uuid.to_string();
|
||||
tokio::spawn(async move {
|
||||
match db_clone.get_video_by_uuid(&uuid_clone).await {
|
||||
Ok(Some(video)) => {
|
||||
let fps = video.fps;
|
||||
match rule1_ingest::execute_rule1(&db_clone, &uuid_clone, fps).await {
|
||||
Ok(count) => info!(
|
||||
"✅ Rule 1 Ingestion completed: {} chunks inserted.",
|
||||
count
|
||||
),
|
||||
Err(e) => error!("❌ Rule 1 Ingestion failed: {}", e),
|
||||
// 🚀 P1 Trigger: Rule 1 Chunking(僅需 ASR + ASRX)
|
||||
if has_asr && has_asrx {
|
||||
info!("📝 Prerequisites met for Rule 1 Chunking. Starting ingestion...");
|
||||
let db_clone = self.db.clone();
|
||||
let uuid_clone = uuid.to_string();
|
||||
tokio::spawn(async move {
|
||||
match db_clone.get_video_by_uuid(&uuid_clone).await {
|
||||
Ok(Some(video)) => {
|
||||
let fps = video.fps;
|
||||
match rule1_ingest::execute_rule1(&db_clone, &uuid_clone, fps).await {
|
||||
Ok(count) => {
|
||||
info!("✅ Rule 1 Ingestion completed: {} chunks inserted.", count)
|
||||
}
|
||||
Err(e) => error!("❌ Rule 1 Ingestion failed: {}", e),
|
||||
}
|
||||
Ok(None) => error!("Video not found for chunking: {}", uuid_clone),
|
||||
Err(e) => error!("Failed to get video info for chunking: {}", e),
|
||||
}
|
||||
});
|
||||
}
|
||||
Ok(None) => error!("Video not found for chunking: {}", uuid_clone),
|
||||
Err(e) => error!("Failed to get video info for chunking: {}", e),
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Rule 3 / Trace / Identity 需要 all_completed(含非必要 processor 失敗也可)
|
||||
if all_completed {
|
||||
// 🚀 P1 Trigger: Rule 3 Scene Chunking
|
||||
if has_cut && has_asr {
|
||||
info!("📝 Prerequisites met for Rule 3 Scene Chunking. Starting ingestion...");
|
||||
@@ -294,6 +555,35 @@ impl JobWorker {
|
||||
});
|
||||
}
|
||||
|
||||
// 🚀 P2 Trigger: Trace Face Aggregation (after Face)
|
||||
if has_face {
|
||||
info!("📝 Face completed, triggering trace_face aggregation...");
|
||||
let db_clone = self.db.clone();
|
||||
let uuid_clone = uuid.to_string();
|
||||
tokio::spawn(async move {
|
||||
let executor = match crate::core::processor::PythonExecutor::new() {
|
||||
Ok(ex) => ex,
|
||||
Err(e) => {
|
||||
error!("Failed to create PythonExecutor for trace_face: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
match executor
|
||||
.run(
|
||||
"trace_face_aggregator.py",
|
||||
&["--file-uuid", &uuid_clone],
|
||||
Some(&uuid_clone),
|
||||
"TRACE_FACE",
|
||||
Some(std::time::Duration::from_secs(300)),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => info!("✅ Trace Face aggregation completed for {}", uuid_clone),
|
||||
Err(e) => error!("❌ Trace Face aggregation failed: {}", e),
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 🚀 P3 Trigger: Identity Agent (Face + ASRX)
|
||||
if has_face && has_asrx {
|
||||
info!("📝 Prerequisites met for Identity Agent. Starting analysis...");
|
||||
@@ -336,6 +626,38 @@ impl JobWorker {
|
||||
self.redis.delete_worker_job(uuid).await?;
|
||||
|
||||
info!("Job {} completed successfully", job_id);
|
||||
} else if essential_completed && !all_completed {
|
||||
// 必要 processor 完成但部分非必要失敗 → 仍算完成
|
||||
info!(
|
||||
"Job {} completed with non-essential failures. Essential: {:?}",
|
||||
job_id, essential_processors
|
||||
);
|
||||
self.db
|
||||
.update_job_status(job_id, MonitorJobStatus::Completed)
|
||||
.await?;
|
||||
|
||||
self.db
|
||||
.update_video_status(uuid, VideoStatus::Completed)
|
||||
.await?;
|
||||
|
||||
let video = self.db.get_video_by_uuid(uuid).await?;
|
||||
let total_frames = video.map(|v| v.total_frames).unwrap_or(0);
|
||||
|
||||
self.db
|
||||
.update_processing_status_completed(uuid, total_frames)
|
||||
.await?;
|
||||
|
||||
self.redis
|
||||
.update_worker_job_status(uuid, job_id, "completed", None, completed_count, 7)
|
||||
.await?;
|
||||
|
||||
self.redis.delete_worker_job(uuid).await?;
|
||||
|
||||
info!(
|
||||
"Job {} completed with {} non-essential failures",
|
||||
job_id,
|
||||
failed_processors.len()
|
||||
);
|
||||
} else if any_failed {
|
||||
self.db
|
||||
.update_job_status(job_id, MonitorJobStatus::Failed)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
pub mod config;
|
||||
pub mod job_worker;
|
||||
pub mod processor;
|
||||
pub mod resources;
|
||||
|
||||
pub use config::WorkerConfig;
|
||||
pub use job_worker::JobWorker;
|
||||
|
||||
@@ -6,7 +6,9 @@ use tokio::sync::{mpsc, RwLock};
|
||||
use tracing::{error, info};
|
||||
|
||||
use crate::core::config::{OUTPUT_DIR, PYTHON_PATH, SCRIPTS_DIR};
|
||||
use crate::core::db::{MonitorJob, PostgresDb, ProcessorJobStatus, ProcessorType, RedisClient};
|
||||
use crate::core::db::{
|
||||
MonitorJob, PostgresDb, ProcessorJobStatus, ProcessorType, QdrantDb, RedisClient,
|
||||
};
|
||||
use crate::core::processor;
|
||||
use crate::core::processor::asr::AsrResult;
|
||||
use crate::core::processor::asrx::AsrxResult;
|
||||
@@ -17,12 +19,16 @@ use crate::core::processor::pose::PoseResult;
|
||||
use crate::core::processor::scene_classification::SceneClassificationResult;
|
||||
use crate::core::processor::visual_chunk::VisualChunkResult;
|
||||
use crate::core::processor::yolo::YoloResult;
|
||||
use crate::worker::resources::SystemResources;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ProcessorOutput {
|
||||
data: serde_json::Value,
|
||||
chunks_produced: i32,
|
||||
frames_processed: i32,
|
||||
total_frames: i32,
|
||||
retry_count: i32,
|
||||
pid: i32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -35,7 +41,7 @@ pub struct ProcessorTask {
|
||||
pub struct ProcessorPool {
|
||||
db: Arc<PostgresDb>,
|
||||
redis: Arc<RedisClient>,
|
||||
max_concurrent: usize,
|
||||
config_max: usize,
|
||||
running: Arc<RwLock<HashMap<i32, ProcessorHandle>>>,
|
||||
running_count: Arc<RwLock<usize>>,
|
||||
}
|
||||
@@ -51,15 +57,22 @@ impl ProcessorPool {
|
||||
Self {
|
||||
db,
|
||||
redis,
|
||||
max_concurrent,
|
||||
config_max: max_concurrent,
|
||||
running: Arc::new(RwLock::new(HashMap::new())),
|
||||
running_count: Arc::new(RwLock::new(0)),
|
||||
}
|
||||
}
|
||||
|
||||
/// 根據系統資源計算當前安全的並發上限
|
||||
pub async fn current_max(&self) -> usize {
|
||||
let resources = SystemResources::check();
|
||||
resources.safe_max_concurrent(self.config_max).max(1)
|
||||
}
|
||||
|
||||
pub async fn can_start(&self) -> bool {
|
||||
let count = *self.running_count.read().await;
|
||||
count < self.max_concurrent
|
||||
let max = self.current_max().await;
|
||||
count < max
|
||||
}
|
||||
|
||||
pub async fn start_processor(&self, task: ProcessorTask) -> Result<()> {
|
||||
@@ -67,10 +80,14 @@ impl ProcessorPool {
|
||||
let job_id = task.job.id;
|
||||
let processor_type = task.processor_type;
|
||||
|
||||
let current_limit = self.current_max().await;
|
||||
{
|
||||
let mut count = self.running_count.write().await;
|
||||
if *count >= self.max_concurrent {
|
||||
anyhow::bail!("Max concurrent processors reached");
|
||||
if *count >= current_limit {
|
||||
anyhow::bail!(
|
||||
"Max concurrent processors reached (dynamic limit: {})",
|
||||
current_limit
|
||||
);
|
||||
}
|
||||
*count += 1;
|
||||
}
|
||||
@@ -104,7 +121,17 @@ impl ProcessorPool {
|
||||
.await;
|
||||
|
||||
let _ = redis
|
||||
.update_worker_processor_status(&job.uuid, &processor_name, "running", None)
|
||||
.update_worker_processor_status(
|
||||
&job.uuid,
|
||||
&processor_name,
|
||||
"running",
|
||||
None,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
|
||||
let result = Self::run_processor(&db, &redis, &job, processor_type, cancel_rx).await;
|
||||
@@ -142,6 +169,11 @@ impl ProcessorPool {
|
||||
&processor_name,
|
||||
"completed",
|
||||
None,
|
||||
output.frames_processed,
|
||||
output.chunks_produced,
|
||||
output.total_frames,
|
||||
output.retry_count,
|
||||
output.pid,
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -173,6 +205,11 @@ impl ProcessorPool {
|
||||
&processor_name,
|
||||
"failed",
|
||||
Some(&e.to_string()),
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -196,12 +233,8 @@ impl ProcessorPool {
|
||||
|
||||
// Generate output path
|
||||
let output_dir = PathBuf::from(OUTPUT_DIR.as_str());
|
||||
let output_path = output_dir.join(format!(
|
||||
"job_{}_{}_{}.json",
|
||||
job.id,
|
||||
processor_type.as_str(),
|
||||
chrono::Utc::now().timestamp_millis()
|
||||
));
|
||||
let output_path =
|
||||
output_dir.join(format!("{}.{}.json", job.uuid, processor_type.as_str(),));
|
||||
|
||||
// Ensure output directory exists
|
||||
if let Some(parent) = output_path.parent() {
|
||||
@@ -229,11 +262,22 @@ impl ProcessorPool {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::Cut => {
|
||||
let result =
|
||||
processor::process_cut(video_path, output_path.to_str().unwrap(), uuid).await?;
|
||||
let cut_path =
|
||||
std::path::Path::new(&output_dir).join(format!("{}.cut.json", job.uuid));
|
||||
let result = if cut_path.exists() {
|
||||
// CUT 在 register 階段已完成,直接載入
|
||||
let content =
|
||||
std::fs::read_to_string(&cut_path).context("Failed to read cut.json")?;
|
||||
serde_json::from_str(&content).context("Failed to parse cut.json")?
|
||||
} else {
|
||||
processor::process_cut(video_path, output_path.to_str().unwrap(), uuid).await?
|
||||
};
|
||||
let chunks_produced = result.scenes.len() as i32;
|
||||
tracing::info!(
|
||||
"CUT completed, storing {} scenes for {}",
|
||||
@@ -247,6 +291,9 @@ impl ProcessorPool {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::Yolo => {
|
||||
@@ -266,6 +313,9 @@ impl ProcessorPool {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::Ocr => {
|
||||
@@ -284,6 +334,9 @@ impl ProcessorPool {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::Face => {
|
||||
@@ -299,10 +352,17 @@ impl ProcessorPool {
|
||||
if let Err(e) = Self::store_face_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store FACE chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
// 將 face embedding 寫入 Qdrant
|
||||
if let Err(e) = Self::store_face_embeddings_to_qdrant(&job.uuid, &result).await {
|
||||
tracing::error!("Failed to store face embeddings to Qdrant: {}", e);
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::Pose => {
|
||||
@@ -322,6 +382,9 @@ impl ProcessorPool {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::Asrx => {
|
||||
@@ -337,10 +400,17 @@ impl ProcessorPool {
|
||||
if let Err(e) = Self::store_asrx_chunks(db, &job.uuid, &result).await {
|
||||
tracing::error!("Failed to store ASRX chunks for {}: {}", job.uuid, e);
|
||||
}
|
||||
// 將 voice embeddings 寫入 Qdrant
|
||||
if let Err(e) = Self::store_voice_embeddings_to_qdrant(&job.uuid, &result).await {
|
||||
tracing::error!("Failed to store voice embeddings to Qdrant: {}", e);
|
||||
}
|
||||
Ok(ProcessorOutput {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::VisualChunk => {
|
||||
@@ -363,15 +433,44 @@ impl ProcessorPool {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
ProcessorType::Scene => {
|
||||
let result = processor::process_scene_classification(
|
||||
video_path,
|
||||
output_path.to_str().unwrap(),
|
||||
uuid,
|
||||
)
|
||||
.await?;
|
||||
let scene_path =
|
||||
std::path::Path::new(&output_dir).join(format!("{}.scene.json", job.uuid));
|
||||
let scene_err =
|
||||
std::path::Path::new(&output_dir).join(format!("{}.scene.err", job.uuid));
|
||||
let scene_tmp =
|
||||
std::path::Path::new(&output_dir).join(format!("{}.scene.tmp", job.uuid));
|
||||
// 優先順序:.err(跳過)→ .json(載入)→ .tmp(等待或重新執行)
|
||||
let result = if scene_err.exists() {
|
||||
tracing::warn!("Scene previously failed for {}, skipping", job.uuid);
|
||||
return Ok(ProcessorOutput {
|
||||
data: serde_json::Value::Null,
|
||||
chunks_produced: 0,
|
||||
frames_processed: 0,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
});
|
||||
} else if scene_path.exists() {
|
||||
tracing::info!("Scene JSON exists for {}, loading from file", job.uuid);
|
||||
crate::core::processor::load_scene_from_file(scene_path.to_str().unwrap())?
|
||||
} else if scene_tmp.exists() {
|
||||
tracing::warn!("Scene tmp exists for {}, waiting for completion", job.uuid);
|
||||
// 生產環境應等待,此處直接跳過避免卡住
|
||||
crate::core::processor::SceneClassificationResult::default()
|
||||
} else {
|
||||
processor::process_scene_classification(
|
||||
video_path,
|
||||
output_path.to_str().unwrap(),
|
||||
uuid,
|
||||
)
|
||||
.await?
|
||||
};
|
||||
let chunks_produced = result.scenes.len() as i32;
|
||||
tracing::info!(
|
||||
"Scene classification completed, storing {} scenes for {}",
|
||||
@@ -385,186 +484,14 @@ impl ProcessorPool {
|
||||
data: serde_json::to_value(result)?,
|
||||
chunks_produced,
|
||||
frames_processed: total_frames,
|
||||
total_frames,
|
||||
retry_count: 0,
|
||||
pid: 0,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
async fn run_asr(
|
||||
_db: &PostgresDb,
|
||||
_redis: &RedisClient,
|
||||
video_path: &str,
|
||||
_cancel_rx: &mut mpsc::Receiver<()>,
|
||||
) -> Result<serde_json::Value> {
|
||||
let script_path = std::env::var("MOMENTRY_ASR_SCRIPT")
|
||||
.unwrap_or_else(|_| format!("{}/asr_processor.py", SCRIPTS_DIR.as_str()));
|
||||
|
||||
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
|
||||
.arg(&script_path)
|
||||
.arg(video_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
anyhow::bail!("ASR script failed: {}", stderr);
|
||||
}
|
||||
|
||||
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
async fn run_cut(
|
||||
_db: &PostgresDb,
|
||||
_redis: &RedisClient,
|
||||
video_path: &str,
|
||||
_cancel_rx: &mut mpsc::Receiver<()>,
|
||||
) -> Result<serde_json::Value> {
|
||||
let script_path = std::env::var("MOMENTRY_CUT_SCRIPT")
|
||||
.unwrap_or_else(|_| format!("{}/cut_processor.py", SCRIPTS_DIR.as_str()));
|
||||
|
||||
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
|
||||
.arg(&script_path)
|
||||
.arg(video_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
anyhow::bail!("CUT script failed: {}", stderr);
|
||||
}
|
||||
|
||||
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
async fn run_yolo(
|
||||
_db: &PostgresDb,
|
||||
_redis: &RedisClient,
|
||||
video_path: &str,
|
||||
_cancel_rx: &mut mpsc::Receiver<()>,
|
||||
) -> Result<serde_json::Value> {
|
||||
let script_path = std::env::var("MOMENTRY_YOLO_SCRIPT")
|
||||
.unwrap_or_else(|_| format!("{}/yolo_processor.py", SCRIPTS_DIR.as_str()));
|
||||
|
||||
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
|
||||
.arg(&script_path)
|
||||
.arg(video_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
anyhow::bail!("YOLO script failed: {}", stderr);
|
||||
}
|
||||
|
||||
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
async fn run_ocr(
|
||||
_db: &PostgresDb,
|
||||
_redis: &RedisClient,
|
||||
video_path: &str,
|
||||
_cancel_rx: &mut mpsc::Receiver<()>,
|
||||
) -> Result<serde_json::Value> {
|
||||
let script_path = std::env::var("MOMENTRY_OCR_SCRIPT")
|
||||
.unwrap_or_else(|_| format!("{}/ocr_processor.py", SCRIPTS_DIR.as_str()));
|
||||
|
||||
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
|
||||
.arg(&script_path)
|
||||
.arg(video_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
anyhow::bail!("OCR script failed: {}", stderr);
|
||||
}
|
||||
|
||||
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
async fn run_face(
|
||||
_db: &PostgresDb,
|
||||
_redis: &RedisClient,
|
||||
video_path: &str,
|
||||
_cancel_rx: &mut mpsc::Receiver<()>,
|
||||
) -> Result<serde_json::Value> {
|
||||
let script_path = std::env::var("MOMENTRY_FACE_SCRIPT")
|
||||
.unwrap_or_else(|_| format!("{}/face_processor.py", SCRIPTS_DIR.as_str()));
|
||||
|
||||
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
|
||||
.arg(&script_path)
|
||||
.arg(video_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
anyhow::bail!("Face script failed: {}", stderr);
|
||||
}
|
||||
|
||||
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
async fn run_pose(
|
||||
_db: &PostgresDb,
|
||||
_redis: &RedisClient,
|
||||
video_path: &str,
|
||||
_cancel_rx: &mut mpsc::Receiver<()>,
|
||||
) -> Result<serde_json::Value> {
|
||||
let script_path = std::env::var("MOMENTRY_POSE_SCRIPT")
|
||||
.unwrap_or_else(|_| format!("{}/pose_processor.py", SCRIPTS_DIR.as_str()));
|
||||
|
||||
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
|
||||
.arg(&script_path)
|
||||
.arg(video_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
anyhow::bail!("Pose script failed: {}", stderr);
|
||||
}
|
||||
|
||||
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
async fn run_asrx(
|
||||
_db: &PostgresDb,
|
||||
_redis: &RedisClient,
|
||||
video_path: &str,
|
||||
_cancel_rx: &mut mpsc::Receiver<()>,
|
||||
) -> Result<serde_json::Value> {
|
||||
let script_path = std::env::var("MOMENTRY_ASRX_SCRIPT")
|
||||
.unwrap_or_else(|_| format!("{}/asrx_processor_custom.py", SCRIPTS_DIR.as_str()));
|
||||
|
||||
let output = tokio::process::Command::new(PYTHON_PATH.as_str())
|
||||
.arg(&script_path)
|
||||
.arg(video_path)
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
anyhow::bail!("ASRX script failed: {}", stderr);
|
||||
}
|
||||
|
||||
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn store_asr_chunks(
|
||||
db: &PostgresDb,
|
||||
uuid: &str,
|
||||
@@ -726,14 +653,7 @@ impl ProcessorPool {
|
||||
"timestamp": frame.timestamp,
|
||||
});
|
||||
|
||||
// We could potentially parse identity_id if it's already matched, but for raw ingestion it's None.
|
||||
pre_chunks_to_store.push((
|
||||
frame.frame as i64,
|
||||
Some(frame.timestamp),
|
||||
data,
|
||||
None, // identity_id
|
||||
None, // confidence
|
||||
));
|
||||
pre_chunks_to_store.push((frame.frame as i64, Some(frame.timestamp), data, None, None));
|
||||
}
|
||||
|
||||
db.store_raw_pre_chunks_batch(uuid, "face", &pre_chunks_to_store)
|
||||
@@ -741,6 +661,118 @@ impl ProcessorPool {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 將 face embeddings 寫入 Qdrant momentry_dev_face collection
|
||||
pub async fn store_face_embeddings_to_qdrant(
|
||||
uuid: &str,
|
||||
face_result: &FaceResult,
|
||||
) -> Result<()> {
|
||||
let qdrant = QdrantDb::new();
|
||||
let collection = format!(
|
||||
"{}{}",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':'),
|
||||
"_face"
|
||||
);
|
||||
|
||||
let mut count = 0;
|
||||
for frame in &face_result.frames {
|
||||
for face in &frame.faces {
|
||||
if let Some(embedding) = &face.embedding {
|
||||
if embedding.len() != 512 {
|
||||
continue;
|
||||
}
|
||||
// 使用 hash 作為 Qdrant point ID(需要 unsigned integer)
|
||||
// 使用 frame number 作為 Qdrant point ID(u64)
|
||||
let point_id = frame.frame as u64;
|
||||
|
||||
let payload = serde_json::json!({
|
||||
"file_uuid": uuid,
|
||||
"face_id": face.face_id,
|
||||
"frame": frame.frame,
|
||||
"timestamp": frame.timestamp,
|
||||
"x": face.x,
|
||||
"y": face.y,
|
||||
"width": face.width,
|
||||
"height": face.height,
|
||||
"confidence": face.confidence,
|
||||
});
|
||||
|
||||
if let Err(e) = qdrant
|
||||
.upsert_vector_to_collection(
|
||||
&collection,
|
||||
point_id,
|
||||
embedding,
|
||||
Some(payload),
|
||||
)
|
||||
.await
|
||||
{
|
||||
tracing::error!("Failed to upsert face vector {}: {}", point_id, e);
|
||||
} else {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
tracing::info!("Stored {} face embeddings to Qdrant for {}", count, uuid);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 將 voice embeddings 寫入 Qdrant momentry_dev_voice collection
|
||||
pub async fn store_voice_embeddings_to_qdrant(
|
||||
uuid: &str,
|
||||
asrx_result: &AsrxResult,
|
||||
) -> Result<()> {
|
||||
let qdrant = QdrantDb::new();
|
||||
let collection = format!(
|
||||
"{}{}",
|
||||
crate::core::config::REDIS_KEY_PREFIX
|
||||
.as_str()
|
||||
.trim_end_matches(':'),
|
||||
"_voice"
|
||||
);
|
||||
|
||||
let embeddings = match &asrx_result.embeddings {
|
||||
Some(e) => e,
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
let mut count = 0;
|
||||
for (i, segment) in asrx_result.segments.iter().enumerate() {
|
||||
if let Some(emb) = embeddings.get(i) {
|
||||
if emb.len() != 192 {
|
||||
continue;
|
||||
}
|
||||
let payload = serde_json::json!({
|
||||
"file_uuid": uuid,
|
||||
"speaker_id": segment.speaker_id,
|
||||
"segment_index": i,
|
||||
"start_frame": segment.start_frame,
|
||||
"end_frame": segment.end_frame,
|
||||
"start_time": segment.start_time,
|
||||
"end_time": segment.end_time,
|
||||
});
|
||||
|
||||
if let Err(e) = qdrant
|
||||
.upsert_vector_to_collection(&collection, i as u64, emb, Some(payload))
|
||||
.await
|
||||
{
|
||||
tracing::error!("Failed to upsert voice vector {}: {}", i, e);
|
||||
} else {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
tracing::info!("Stored {} voice embeddings to Qdrant for {}", count, uuid);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn store_pose_chunks(
|
||||
db: &PostgresDb,
|
||||
uuid: &str,
|
||||
@@ -787,12 +819,11 @@ impl ProcessorPool {
|
||||
let data = serde_json::json!({
|
||||
"text": segment.text,
|
||||
"speaker_id": segment.speaker_id,
|
||||
"timestamp": segment.start,
|
||||
"timestamp": segment.start_time,
|
||||
});
|
||||
|
||||
// ASRX is time-based, so we use segment index or start time as coordinate.
|
||||
// Let's use index for simplicity in pre_chunks, or start time.
|
||||
pre_chunks_to_store.push((i as i64, Some(segment.start), data, None, None));
|
||||
pre_chunks_to_store.push((i as i64, Some(segment.start_time), data, None, None));
|
||||
}
|
||||
|
||||
db.store_raw_pre_chunks_batch(uuid, "asrx", &pre_chunks_to_store)
|
||||
|
||||
279
src/worker/resources.rs
Normal file
279
src/worker/resources.rs
Normal file
@@ -0,0 +1,279 @@
|
||||
use tracing::{info, warn};
|
||||
|
||||
pub struct SystemResources {
|
||||
pub cpu_idle_percent: f64,
|
||||
pub cpu_used_percent: f64,
|
||||
pub memory_available_mb: u64,
|
||||
pub memory_total_mb: u64,
|
||||
pub memory_used_percent: f64,
|
||||
pub gpu_available: bool,
|
||||
pub gpu_utilization: Option<f64>,
|
||||
pub gpu_memory_used_pct: Option<f64>,
|
||||
}
|
||||
|
||||
impl SystemResources {
|
||||
pub fn check() -> Self {
|
||||
let cpu_idle = Self::get_cpu_idle();
|
||||
let (mem_available, mem_total) = Self::get_memory_info();
|
||||
let mem_used_pct = if mem_total > 0 && mem_available <= mem_total {
|
||||
((mem_total - mem_available) as f64 / mem_total as f64) * 100.0
|
||||
} else if mem_total > 0 {
|
||||
100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
let (gpu_avail, gpu_util, gpu_mem) = Self::get_gpu_info();
|
||||
|
||||
Self {
|
||||
cpu_idle_percent: cpu_idle,
|
||||
cpu_used_percent: 100.0 - cpu_idle,
|
||||
memory_available_mb: mem_available,
|
||||
memory_total_mb: mem_total,
|
||||
memory_used_percent: mem_used_pct,
|
||||
gpu_available: gpu_avail,
|
||||
gpu_utilization: gpu_util,
|
||||
gpu_memory_used_pct: gpu_mem,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn safe_max_concurrent(&self, config_max: usize) -> usize {
|
||||
let mut limit = config_max;
|
||||
|
||||
// 記憶體限制
|
||||
if self.memory_available_mb < 1000 {
|
||||
limit = limit.min(1);
|
||||
warn!(
|
||||
"Low memory ({}MB available), reducing concurrency to 1",
|
||||
self.memory_available_mb
|
||||
);
|
||||
} else if self.memory_available_mb < 2000 {
|
||||
limit = limit.min(2);
|
||||
info!(
|
||||
"Moderate memory ({}MB available), limiting concurrency to 2",
|
||||
self.memory_available_mb
|
||||
);
|
||||
} else if self.memory_available_mb < 4000 {
|
||||
limit = limit.min(3);
|
||||
info!(
|
||||
"Adequate memory ({}MB available), limiting concurrency to 3",
|
||||
self.memory_available_mb
|
||||
);
|
||||
} else if self.memory_available_mb < 8000 {
|
||||
limit = limit.min(4);
|
||||
info!(
|
||||
"Good memory ({}MB available), limiting concurrency to 4",
|
||||
self.memory_available_mb
|
||||
);
|
||||
}
|
||||
|
||||
// CPU 限制
|
||||
if self.cpu_idle_percent < 15.0 {
|
||||
limit = limit.min(1);
|
||||
warn!(
|
||||
"High CPU load (idle={:.1}%), reducing concurrency to 1",
|
||||
self.cpu_idle_percent
|
||||
);
|
||||
} else if self.cpu_idle_percent < 30.0 {
|
||||
limit = limit.min(2);
|
||||
info!(
|
||||
"Moderate CPU load (idle={:.1}%), limiting concurrency to 2",
|
||||
self.cpu_idle_percent
|
||||
);
|
||||
}
|
||||
|
||||
// GPU 限制:利用率 > 80% 或記憶體 > 90% 時降並發
|
||||
if let Some(util) = self.gpu_utilization {
|
||||
if util > 80.0 {
|
||||
limit = limit.min(1);
|
||||
warn!(
|
||||
"High GPU utilization ({:.1}%), reducing concurrency to 1",
|
||||
util
|
||||
);
|
||||
} else if util > 60.0 {
|
||||
limit = limit.min(2);
|
||||
info!(
|
||||
"Moderate GPU utilization ({:.1}%), limiting concurrency to 2",
|
||||
util
|
||||
);
|
||||
}
|
||||
}
|
||||
if let Some(mem) = self.gpu_memory_used_pct {
|
||||
if mem > 90.0 {
|
||||
limit = limit.min(1);
|
||||
warn!(
|
||||
"High GPU memory usage ({:.1}%), reducing concurrency to 1",
|
||||
mem
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
limit.max(1)
|
||||
}
|
||||
|
||||
fn get_cpu_idle() -> f64 {
|
||||
use std::process::Command;
|
||||
let output = Command::new("top").args(["-l", "1", "-n", "1"]).output();
|
||||
match output {
|
||||
Ok(o) => {
|
||||
let s = String::from_utf8_lossy(&o.stdout);
|
||||
if let Some(line) = s.lines().find(|l| l.contains("idle")) {
|
||||
if let Some(pct) = line
|
||||
.split_whitespace()
|
||||
.find_map(|s| s.strip_suffix("%idle"))
|
||||
{
|
||||
pct.trim().parse().ok().unwrap_or(50.0)
|
||||
} else {
|
||||
50.0
|
||||
}
|
||||
} else {
|
||||
50.0
|
||||
}
|
||||
}
|
||||
Err(_) => 50.0,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_memory_info() -> (u64, u64) {
|
||||
use std::process::Command;
|
||||
|
||||
// 總記憶體
|
||||
let total = Command::new("sysctl")
|
||||
.args(["hw.memsize"])
|
||||
.output()
|
||||
.ok()
|
||||
.and_then(|o| {
|
||||
let s = String::from_utf8_lossy(&o.stdout);
|
||||
s.split_whitespace()
|
||||
.nth(1)
|
||||
.and_then(|v| v.parse::<u64>().ok())
|
||||
})
|
||||
.unwrap_or(0)
|
||||
/ 1024
|
||||
/ 1024;
|
||||
|
||||
// 用 memory_pressure 取得真實可用記憶體
|
||||
// macOS 的可用記憶體 = free + inactive + speculative
|
||||
let available = Command::new("memory_pressure")
|
||||
.output()
|
||||
.ok()
|
||||
.and_then(|o| {
|
||||
let s = String::from_utf8_lossy(&o.stdout);
|
||||
// 從 "System-wide memory free percentage: XX%" 這行解析
|
||||
for line in s.lines() {
|
||||
if line.contains("free percentage") {
|
||||
if let Some(pct_str) = line.split(':').nth(1) {
|
||||
let pct = pct_str.trim().trim_end_matches('%').parse::<f64>().ok()?;
|
||||
return Some((total as f64 * pct / 100.0) as u64);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
// fallback: vm_stat
|
||||
Command::new("vm_stat")
|
||||
.output()
|
||||
.ok()
|
||||
.and_then(|v| {
|
||||
let vs = String::from_utf8_lossy(&v.stdout);
|
||||
let mut free_pages: u64 = 0;
|
||||
let mut inactive_pages: u64 = 0;
|
||||
let mut speculative_pages: u64 = 0;
|
||||
for line in vs.lines() {
|
||||
if line.contains("Pages free:") {
|
||||
free_pages = line
|
||||
.split_whitespace()
|
||||
.last()
|
||||
.and_then(|v| v.trim_end_matches('.').parse().ok())
|
||||
.unwrap_or(0);
|
||||
} else if line.contains("Pages inactive:") {
|
||||
inactive_pages = line
|
||||
.split_whitespace()
|
||||
.last()
|
||||
.and_then(|v| v.trim_end_matches('.').parse().ok())
|
||||
.unwrap_or(0);
|
||||
} else if line.contains("Pages speculative:") {
|
||||
speculative_pages = line
|
||||
.split_whitespace()
|
||||
.last()
|
||||
.and_then(|v| v.trim_end_matches('.').parse().ok())
|
||||
.unwrap_or(0);
|
||||
}
|
||||
}
|
||||
Some(
|
||||
(free_pages + inactive_pages + speculative_pages) * 16384 / 1024 / 1024,
|
||||
)
|
||||
})
|
||||
.unwrap_or(total / 4)
|
||||
});
|
||||
|
||||
(available, total)
|
||||
}
|
||||
|
||||
fn get_gpu_info() -> (bool, Option<f64>, Option<f64>) {
|
||||
use std::process::Command;
|
||||
|
||||
// Apple Silicon (MPS) — 用 ioreg 取 GPU 利用率
|
||||
let ioreg = Command::new("ioreg")
|
||||
.args(["-r", "-c", "AppleM2ScalerCSCDriver"])
|
||||
.output();
|
||||
if let Ok(o) = ioreg {
|
||||
let s = String::from_utf8_lossy(&o.stdout);
|
||||
if s.contains("PerformanceStatistics") {
|
||||
let util = s.lines().find_map(|l| {
|
||||
if l.contains("GPU Utilization") {
|
||||
l.split('=').nth(1).and_then(|v| {
|
||||
v.trim()
|
||||
.trim_matches('"')
|
||||
.trim_end_matches('}')
|
||||
.parse::<f64>()
|
||||
.ok()
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
let mem = s.lines().find_map(|l| {
|
||||
if l.contains("GPU Memory Utilization") {
|
||||
l.split('=').nth(1).and_then(|v| {
|
||||
v.trim()
|
||||
.trim_matches('"')
|
||||
.trim_end_matches('}')
|
||||
.parse::<f64>()
|
||||
.ok()
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
return (true, util, mem);
|
||||
}
|
||||
}
|
||||
|
||||
// NVIDIA GPU via nvidia-smi
|
||||
let nvidia = Command::new("nvidia-smi")
|
||||
.args([
|
||||
"--query-gpu=utilization.gpu,memory.used,memory.total",
|
||||
"--format=csv,noheader,nounits",
|
||||
])
|
||||
.output();
|
||||
if let Ok(o) = nvidia {
|
||||
if o.status.success() {
|
||||
let s = String::from_utf8_lossy(&o.stdout);
|
||||
let parts: Vec<&str> = s.trim().split(',').collect();
|
||||
if parts.len() >= 3 {
|
||||
let util = parts[0].trim().parse::<f64>().ok();
|
||||
let mem_used = parts[1].trim().parse::<f64>().ok();
|
||||
let mem_total = parts[2].trim().parse::<f64>().ok();
|
||||
let mem_pct = match (mem_used, mem_total) {
|
||||
(Some(u), Some(t)) if t > 0.0 => Some(u / t * 100.0),
|
||||
_ => None,
|
||||
};
|
||||
return (true, util, mem_pct);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(false, None, None)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user