feat: file dedup — content_hash SHA256 + /files/lookup API + auto-rename on name collision
This commit is contained in:
@@ -7,6 +7,7 @@ use axum::{
|
||||
};
|
||||
use once_cell::sync::OnceCell;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use sha2::{Digest, Sha256};
|
||||
use sqlx::Row;
|
||||
use std::time::Instant;
|
||||
@@ -161,6 +162,93 @@ struct CacheToggleResponse {
|
||||
|
||||
// Missing structs added
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[derive(Serialize)]
|
||||
struct FileLookupMatch {
|
||||
file_uuid: String,
|
||||
file_name: String,
|
||||
file_type: Option<String>,
|
||||
status: String,
|
||||
content_hash: Option<String>,
|
||||
file_size: Option<i64>,
|
||||
duration: Option<f64>,
|
||||
width: Option<i32>,
|
||||
height: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct FileLookupResponse {
|
||||
file_name: String,
|
||||
exists: bool,
|
||||
matches: Vec<FileLookupMatch>,
|
||||
next_name: String,
|
||||
}
|
||||
|
||||
async fn lookup_file_by_name(
|
||||
State(state): State<AppState>,
|
||||
Query(params): Query<HashMap<String, String>>,
|
||||
) -> Result<Json<FileLookupResponse>, StatusCode> {
|
||||
let base = params.get("file_name").map(|s| s.trim().to_string()).unwrap_or_default();
|
||||
if base.is_empty() {
|
||||
return Ok(Json(FileLookupResponse {
|
||||
file_name: String::new(),
|
||||
exists: false,
|
||||
matches: vec![],
|
||||
next_name: String::new(),
|
||||
}));
|
||||
}
|
||||
let table = schema::table_name("videos");
|
||||
let dot_pos = base.rfind('.');
|
||||
let (stem, ext) = match dot_pos {
|
||||
Some(p) => (base[..p].to_string(), base[p..].to_string()),
|
||||
None => (base.clone(), String::new()),
|
||||
};
|
||||
let pattern = format!("{}%%", &stem);
|
||||
|
||||
let query_sql = format!("SELECT file_uuid, file_name, file_type, status, content_hash, duration, width, height FROM {} WHERE file_name = $1 OR file_name LIKE $2 ORDER BY file_name", table);
|
||||
let rows = sqlx::query(&query_sql)
|
||||
.bind(&base)
|
||||
.bind(&pattern)
|
||||
.fetch_all(state.db.pool())
|
||||
.await
|
||||
.map_err(|e| { tracing::error!("lookup query error: {}", e); StatusCode::INTERNAL_SERVER_ERROR })?;
|
||||
|
||||
let exists = rows.iter().any(|r| r.get::<String, _>("file_name") == base);
|
||||
let matches: Vec<FileLookupMatch> = rows.iter().map(|r| {
|
||||
FileLookupMatch {
|
||||
file_uuid: r.get("file_uuid"),
|
||||
file_name: r.get("file_name"),
|
||||
file_type: r.get("file_type"),
|
||||
status: r.get("status"),
|
||||
content_hash: r.get("content_hash"),
|
||||
file_size: None,
|
||||
duration: r.get("duration"),
|
||||
width: r.get("width"),
|
||||
height: r.get("height"),
|
||||
}
|
||||
}).collect();
|
||||
|
||||
let max_n: usize = rows.iter().filter_map(|r| {
|
||||
let n: String = r.get("file_name");
|
||||
if n == base { return Some(0usize); }
|
||||
let rest = n.strip_prefix(&stem).and_then(|r| r.strip_suffix(&ext))?;
|
||||
let inner = rest.trim().strip_prefix('(').and_then(|r| r.strip_suffix(')'))?;
|
||||
inner.parse::<usize>().ok()
|
||||
}).max().unwrap_or(0);
|
||||
let next_name = if max_n == 0 && !exists {
|
||||
base.clone()
|
||||
} else {
|
||||
format!("{} ({}){}", stem, max_n + 1, ext)
|
||||
};
|
||||
|
||||
Ok(Json(FileLookupResponse {
|
||||
file_name: base,
|
||||
exists,
|
||||
matches,
|
||||
next_name,
|
||||
}))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct RegisterFileRequest {
|
||||
file_path: String,
|
||||
@@ -684,6 +772,45 @@ fn generate_visual_search_hash(
|
||||
format!("{:x}", hasher.finalize())[..16].to_string()
|
||||
}
|
||||
|
||||
/// Compute SHA256 for dedup. Returns hex string.
|
||||
fn sha256_file(path: &std::path::Path) -> Option<String> {
|
||||
crate::core::storage::content_hash::compute_sha256(path).ok()
|
||||
}
|
||||
|
||||
/// Resolve name conflict: if file_name collides with existing but content differs,
|
||||
/// append ` (N)` suffix. Returns the resolved file_name.
|
||||
async fn resolve_filename(
|
||||
db: &PostgresDb,
|
||||
file_name: &str,
|
||||
content_hash: &str,
|
||||
) -> String {
|
||||
let table = schema::table_name("videos");
|
||||
let base = file_name.to_string();
|
||||
let dot_pos = base.rfind('.');
|
||||
let (stem, ext) = match dot_pos {
|
||||
Some(p) => (base[..p].to_string(), base[p..].to_string()),
|
||||
None => (base.clone(), String::new()),
|
||||
};
|
||||
let mut candidate = base.clone();
|
||||
let mut attempt = 0usize;
|
||||
loop {
|
||||
// Check if candidate name exists with a DIFFERENT hash (same content = OK)
|
||||
let conflict: Option<String> = sqlx::query_scalar(
|
||||
&format!("SELECT file_uuid FROM {} WHERE file_name = $1 AND (content_hash IS DISTINCT FROM $2 OR content_hash IS NULL)", table)
|
||||
)
|
||||
.bind(&candidate)
|
||||
.bind(content_hash)
|
||||
.fetch_optional(db.pool())
|
||||
.await
|
||||
.unwrap_or(None);
|
||||
if conflict.is_none() {
|
||||
return candidate;
|
||||
}
|
||||
attempt += 1;
|
||||
candidate = format!("{} ({}){}", stem, attempt, ext);
|
||||
}
|
||||
}
|
||||
|
||||
/// 註冊單一檔案(內部函數,不處理 pattern)
|
||||
async fn register_single_file(
|
||||
state: &AppState,
|
||||
@@ -743,11 +870,50 @@ async fn register_single_file(
|
||||
}
|
||||
};
|
||||
|
||||
// Step 1: Compute SHA256 of full file
|
||||
let content_hash = sha256_file(&path).unwrap_or_default();
|
||||
|
||||
// Step 2: Hash check — same content = already registered (regardless of name)
|
||||
let videos_table = schema::table_name("videos");
|
||||
if !content_hash.is_empty() {
|
||||
if let Ok(Some(existing_uuid)) = sqlx::query_scalar::<_, String>(
|
||||
&format!("SELECT file_uuid FROM {} WHERE content_hash = $1 LIMIT 1", videos_table)
|
||||
)
|
||||
.bind(&content_hash)
|
||||
.fetch_optional(db.pool())
|
||||
.await
|
||||
{
|
||||
tracing::info!("[REGISTER] Content hash collision → already registered: {}", existing_uuid);
|
||||
let existing_name: Option<String> = sqlx::query_scalar(
|
||||
&format!("SELECT file_name FROM {} WHERE file_uuid = $1", videos_table)
|
||||
).bind(&existing_uuid).fetch_optional(db.pool()).await.unwrap_or(None);
|
||||
return RegisterFileResponse {
|
||||
success: true,
|
||||
file_uuid: existing_uuid,
|
||||
file_name: existing_name.unwrap_or(file_name),
|
||||
file_path: canonical_path,
|
||||
file_type: None,
|
||||
duration: 0.0,
|
||||
width: 0,
|
||||
height: 0,
|
||||
fps: 0.0,
|
||||
total_frames: 0,
|
||||
registration_time: None,
|
||||
already_exists: true,
|
||||
message: "Content already registered (identical file)".to_string(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Name check — same name but different content → auto-rename
|
||||
let final_name = resolve_filename(&db, &file_name, &content_hash).await;
|
||||
|
||||
// Step 4: Compute UUID (using final resolved name)
|
||||
let videos_table = schema::table_name("videos");
|
||||
let birthday = sqlx::query_scalar::<_, chrono::DateTime<chrono::Utc>>(
|
||||
&format!("SELECT registration_time FROM {} WHERE file_name = $1 AND registration_time IS NOT NULL LIMIT 1", videos_table)
|
||||
)
|
||||
.bind(&file_name)
|
||||
.bind(&final_name)
|
||||
.fetch_optional(db.pool())
|
||||
.await
|
||||
.unwrap_or(None)
|
||||
@@ -759,30 +925,10 @@ async fn register_single_file(
|
||||
&mac_address,
|
||||
&birthday,
|
||||
&canonical_path,
|
||||
&file_name,
|
||||
&final_name,
|
||||
);
|
||||
|
||||
// Check if already exists
|
||||
if let Ok(Some(_)) = db.get_video_by_uuid(&file_uuid).await {
|
||||
tracing::info!("[REGISTER] File already registered: {}", file_uuid);
|
||||
return RegisterFileResponse {
|
||||
success: true,
|
||||
file_uuid,
|
||||
file_name,
|
||||
file_path: canonical_path,
|
||||
file_type: None,
|
||||
duration: 0.0,
|
||||
width: 0,
|
||||
height: 0,
|
||||
fps: 0.0,
|
||||
total_frames: 0,
|
||||
registration_time: None,
|
||||
already_exists: true,
|
||||
message: "File already registered".to_string(),
|
||||
};
|
||||
}
|
||||
|
||||
// Probe
|
||||
// Step 5: Probe
|
||||
let probe_result = match crate::core::probe::probe_video(&canonical_path) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
@@ -857,12 +1003,12 @@ async fn register_single_file(
|
||||
let probe_json = serde_json::to_value(&probe_result).ok();
|
||||
let status = "pending";
|
||||
let _ = sqlx::query(&format!(
|
||||
"INSERT INTO {} (file_uuid, file_path, file_name, file_type, duration, width, height, fps, probe_json, status, registration_time) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, NOW()) ON CONFLICT (file_uuid) DO UPDATE SET file_path = EXCLUDED.file_path, file_name = EXCLUDED.file_name, status = EXCLUDED.status",
|
||||
"INSERT INTO {} (file_uuid, file_path, file_name, file_type, duration, width, height, fps, probe_json, status, content_hash, registration_time) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NOW()) ON CONFLICT (file_uuid) DO UPDATE SET file_path = EXCLUDED.file_path, file_name = EXCLUDED.file_name, status = EXCLUDED.status, content_hash = EXCLUDED.content_hash",
|
||||
videos_table
|
||||
))
|
||||
.bind(&file_uuid).bind(&canonical_path).bind(&file_name).bind(&final_file_type)
|
||||
.bind(&file_uuid).bind(&canonical_path).bind(&final_name).bind(&final_file_type)
|
||||
.bind(duration).bind(width as i32).bind(height as i32).bind(fps)
|
||||
.bind(&probe_json).bind(status)
|
||||
.bind(&probe_json).bind(status).bind(&content_hash)
|
||||
.execute(db.pool()).await;
|
||||
|
||||
// 若是 video 類型,同步執行 CUT + Scene 分類
|
||||
@@ -2614,6 +2760,7 @@ pub async fn start_server(host: &str, port: u16) -> anyhow::Result<()> {
|
||||
|
||||
let protected_routes = Router::new()
|
||||
.route("/api/v1/files/register", post(register_file))
|
||||
.route("/api/v1/files/lookup", get(lookup_file_by_name))
|
||||
.route("/api/v1/unregister", post(unregister))
|
||||
.route("/api/v1/files/scan", get(scan_files))
|
||||
.route("/api/v1/file/:file_uuid/probe", get(probe_by_uuid))
|
||||
|
||||
Reference in New Issue
Block a user