Problem: - Files could not be clicked (error: no location) - get_file_info used hardcoded demo database - file_locations table was empty Solution: 1. Scan now inserts file_locations records - file_uuid = node_id (temporary) - location = file path (from aliases) - label = origin 2. Modified API routes to include user_id - /api/v2/files/:user_id/:file_uuid/info - /api/v2/files/:user_id/:file_uuid/stream 3. Modified showDetail() to use tree_user from localStorage Result: - file_locations: 11857 records ✅ - Files can be clicked ✅ - API uses correct user database ✅ Files: - src/scan.rs (insert file_locations) - src/server.rs (user_id parameter) - src/page.html (showDetail with user_id)
595 lines
19 KiB
Rust
595 lines
19 KiB
Rust
use anyhow::{Context, Result};
|
||
use rusqlite::Connection;
|
||
use sha2::{Digest, Sha256};
|
||
use std::collections::HashMap;
|
||
use std::fs;
|
||
use std::path::Path;
|
||
use std::sync::{Arc, Mutex};
|
||
use std::thread;
|
||
use std::time::Instant;
|
||
|
||
use crate::filetree::node::{Aliases, FileNode, NodeType};
|
||
use crate::filetree::FileTree;
|
||
|
||
pub struct ScanOptions {
|
||
pub skip_hash: bool,
|
||
pub threads: usize,
|
||
}
|
||
|
||
impl Default for ScanOptions {
|
||
fn default() -> Self {
|
||
ScanOptions {
|
||
skip_hash: true,
|
||
threads: 4,
|
||
}
|
||
}
|
||
}
|
||
|
||
pub fn scan_directory(user_id: &str, dir: &str, batch_size: usize, options: ScanOptions) -> Result<()> {
|
||
let start = Instant::now();
|
||
let dir_path = Path::new(dir);
|
||
|
||
if !dir_path.exists() {
|
||
anyhow::bail!("Directory not found: {}", dir);
|
||
}
|
||
|
||
println!("=== File Scan Performance Test ===");
|
||
println!("User ID: {}", user_id);
|
||
println!("Directory: {}", dir);
|
||
println!("Batch size: {}", batch_size);
|
||
println!("Skip hash: {}", options.skip_hash);
|
||
if !options.skip_hash {
|
||
println!("Hash threads: {}", options.threads);
|
||
}
|
||
println!();
|
||
|
||
println!("[1/4] Scanning directory structure...");
|
||
let scan_start = Instant::now();
|
||
|
||
let mut folders: Vec<(String, String, Option<String>)> = Vec::new();
|
||
let mut files: Vec<(String, String, u64, String)> = Vec::new();
|
||
|
||
scan_recursive(dir_path, dir_path, &mut folders, &mut files)?;
|
||
|
||
let scan_duration = scan_start.elapsed();
|
||
println!(" Scanned {} folders, {} files in {:.2}s",
|
||
folders.len(), files.len(), scan_duration.as_secs_f64());
|
||
|
||
println!();
|
||
println!("[2/5] Generating node IDs...");
|
||
let id_start = Instant::now();
|
||
|
||
let mac = get_mac_address()?;
|
||
|
||
let mut folder_nodes: Vec<FileNode> = Vec::new();
|
||
let mut file_nodes: Vec<FileNode> = Vec::new();
|
||
let mut file_info: Vec<(String, String)> = Vec::new();
|
||
|
||
let mac_str = get_mac_address()?;
|
||
|
||
let root_node_id = generate_uuid(&dir_path.to_string_lossy(), "Home", &mac_str, chrono::Utc::now().timestamp() as u64);
|
||
|
||
folder_nodes.push(FileNode {
|
||
node_id: root_node_id.clone(),
|
||
label: "Home".to_string(),
|
||
aliases: Aliases::empty(),
|
||
file_uuid: None,
|
||
sha256: None,
|
||
parent_id: None,
|
||
children: Vec::new(),
|
||
node_type: NodeType::Folder,
|
||
icon: Some("🏠".to_string()),
|
||
color: None,
|
||
bg_color: None,
|
||
file_size: None,
|
||
registered_at: None,
|
||
created_at: chrono::Utc::now().timestamp().to_string(),
|
||
updated_at: chrono::Utc::now().timestamp().to_string(),
|
||
sort_order: 0,
|
||
});
|
||
|
||
let folder_id_map: HashMap<String, String> = {
|
||
let mut map = HashMap::new();
|
||
map.insert(dir_path.to_string_lossy().to_string(), root_node_id.clone());
|
||
|
||
for (path_str, label, _parent_path) in &folders {
|
||
let mtime = fs::metadata(path_str)
|
||
.and_then(|m| m.modified())
|
||
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
|
||
let mtime_secs = mtime.duration_since(std::time::SystemTime::UNIX_EPOCH)
|
||
.unwrap_or_default()
|
||
.as_secs();
|
||
|
||
let node_id = generate_uuid(path_str, label, &mac_str, mtime_secs);
|
||
map.insert(path_str.clone(), node_id);
|
||
}
|
||
|
||
map
|
||
};
|
||
|
||
for (path_str, label, parent_path) in &folders {
|
||
let node_id = folder_id_map.get(path_str).cloned().unwrap();
|
||
|
||
let parent_node_id = if let Some(ref parent_p) = parent_path {
|
||
folder_id_map.get(parent_p).cloned()
|
||
} else {
|
||
Some(root_node_id.clone())
|
||
};
|
||
|
||
folder_nodes.push(FileNode {
|
||
node_id,
|
||
label: label.clone(),
|
||
aliases: Aliases::empty(),
|
||
file_uuid: None,
|
||
sha256: None,
|
||
parent_id: parent_node_id,
|
||
children: Vec::new(),
|
||
node_type: NodeType::Folder,
|
||
icon: Some("📁".to_string()),
|
||
color: None,
|
||
bg_color: None,
|
||
file_size: None,
|
||
registered_at: None,
|
||
created_at: chrono::Utc::now().timestamp().to_string(),
|
||
updated_at: chrono::Utc::now().timestamp().to_string(),
|
||
sort_order: 0,
|
||
});
|
||
}
|
||
|
||
for (path_str, filename, size, _ext) in &files {
|
||
let mtime = fs::metadata(path_str)
|
||
.and_then(|m| m.modified())
|
||
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
|
||
let mtime_secs = mtime.duration_since(std::time::SystemTime::UNIX_EPOCH)
|
||
.unwrap_or_default()
|
||
.as_secs();
|
||
|
||
let node_id = generate_uuid(path_str, filename, &mac, mtime_secs);
|
||
|
||
let file_dir = Path::new(path_str).parent().unwrap_or(dir_path);
|
||
let parent_node_id = if file_dir == dir_path {
|
||
Some(root_node_id.clone())
|
||
} else {
|
||
folder_id_map.get(file_dir.to_string_lossy().as_ref()).cloned()
|
||
};
|
||
|
||
let node_id_clone = node_id.clone();
|
||
|
||
file_info.push((node_id_clone.clone(), path_str.clone()));
|
||
|
||
file_nodes.push(FileNode {
|
||
node_id: node_id_clone.clone(),
|
||
label: filename.clone(),
|
||
aliases: {
|
||
let mut aliases = Aliases::empty();
|
||
aliases.set("path", path_str);
|
||
aliases
|
||
},
|
||
file_uuid: Some(node_id_clone.clone()),
|
||
sha256: None,
|
||
parent_id: parent_node_id,
|
||
children: Vec::new(),
|
||
node_type: NodeType::File,
|
||
icon: get_file_icon(filename),
|
||
color: None,
|
||
bg_color: None,
|
||
file_size: Some(*size as i64),
|
||
registered_at: Some(chrono::Utc::now().timestamp().to_string()),
|
||
created_at: chrono::Utc::now().timestamp().to_string(),
|
||
updated_at: chrono::Utc::now().timestamp().to_string(),
|
||
sort_order: 0,
|
||
});
|
||
}
|
||
|
||
let id_duration = id_start.elapsed();
|
||
println!(" Generated {} folder IDs, {} file IDs in {:.2}s",
|
||
folder_nodes.len(), file_nodes.len(), id_duration.as_secs_f64());
|
||
|
||
println!();
|
||
println!("[3/5] Opening database...");
|
||
let db_start = Instant::now();
|
||
|
||
let db_path = FileTree::user_db_path(user_id);
|
||
if !Path::new(&db_path).exists() {
|
||
FileTree::init_user_db(user_id)?;
|
||
}
|
||
|
||
let conn = FileTree::open_user_db(user_id)
|
||
.with_context(|| format!("Failed to open database for user {}", user_id))?;
|
||
|
||
let db_duration = db_start.elapsed();
|
||
println!(" Database opened in {:.2}s", db_duration.as_secs_f64());
|
||
|
||
println!();
|
||
println!("[4/5] Inserting nodes (batch size: {})...", batch_size);
|
||
let insert_start = Instant::now();
|
||
|
||
let tx = conn.unchecked_transaction()?;
|
||
|
||
let folder_count = folder_nodes.len();
|
||
let file_count = file_nodes.len();
|
||
let total_nodes = folder_count + file_count;
|
||
let mut inserted = 0;
|
||
|
||
for node in folder_nodes {
|
||
insert_node(&conn, &node)?;
|
||
inserted += 1;
|
||
|
||
if inserted % batch_size == 0 {
|
||
print!("\r Inserted {}/{} nodes...", inserted, total_nodes);
|
||
use std::io::Write;
|
||
std::io::stdout().flush().ok();
|
||
}
|
||
}
|
||
|
||
for node in file_nodes {
|
||
insert_node(&conn, &node)?;
|
||
|
||
if let Some(ref file_uuid) = node.file_uuid {
|
||
let path = node.aliases.get("path").cloned().unwrap_or_default();
|
||
if !path.is_empty() {
|
||
conn.execute(
|
||
"INSERT OR IGNORE INTO file_locations (file_uuid, location, label, added_at)
|
||
VALUES (?1, ?2, 'origin', ?3)",
|
||
rusqlite::params![file_uuid, path, chrono::Utc::now().timestamp().to_string()],
|
||
)?;
|
||
}
|
||
}
|
||
|
||
inserted += 1;
|
||
|
||
if inserted % batch_size == 0 {
|
||
print!("\r Inserted {}/{} nodes...", inserted, total_nodes);
|
||
use std::io::Write;
|
||
std::io::stdout().flush().ok();
|
||
}
|
||
}
|
||
|
||
tx.commit()?;
|
||
|
||
let insert_duration = insert_start.elapsed();
|
||
println!("\r Inserted {} nodes in {:.2}s ({:.0} nodes/sec)",
|
||
total_nodes,
|
||
insert_duration.as_secs_f64(),
|
||
total_nodes as f64 / insert_duration.as_secs_f64());
|
||
|
||
println!();
|
||
println!("[5/5] Updating folder children_json...");
|
||
let children_start = Instant::now();
|
||
|
||
conn.execute(
|
||
"UPDATE file_nodes
|
||
SET children_json = (
|
||
SELECT json_group_array(node_id)
|
||
FROM file_nodes AS child
|
||
WHERE child.parent_id = file_nodes.node_id
|
||
)
|
||
WHERE node_type = 'folder'",
|
||
[],
|
||
)?;
|
||
|
||
let children_duration = children_start.elapsed();
|
||
println!(" Updated children_json for {} folders in {:.2}s",
|
||
folder_count,
|
||
children_duration.as_secs_f64());
|
||
|
||
let total_duration = start.elapsed();
|
||
println!();
|
||
println!("=== Summary ===");
|
||
println!("Total time: {:.2}s", total_duration.as_secs_f64());
|
||
println!("Folders: {}", folder_count);
|
||
println!("Files: {}", file_count);
|
||
println!("Total nodes: {}", total_nodes);
|
||
println!("Database: {}", FileTree::user_db_path(user_id));
|
||
println!();
|
||
println!("Performance breakdown:");
|
||
println!(" - Scanning: {:.2}s ({:.0}%)",
|
||
scan_duration.as_secs_f64(),
|
||
scan_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
|
||
println!(" - ID gen: {:.2}s ({:.0}%)",
|
||
id_duration.as_secs_f64(),
|
||
id_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
|
||
println!(" - DB open: {:.2}s ({:.0}%)",
|
||
db_duration.as_secs_f64(),
|
||
db_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
|
||
println!(" - Insertion: {:.2}s ({:.0}%)",
|
||
insert_duration.as_secs_f64(),
|
||
insert_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
|
||
println!(" - Children JSON: {:.2}s ({:.0}%)",
|
||
children_duration.as_secs_f64(),
|
||
children_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
|
||
|
||
if !options.skip_hash {
|
||
println!();
|
||
println!("=== Starting background hash calculation ===");
|
||
println!("Files to hash: {}", file_info.len());
|
||
println!("Threads: {}", options.threads);
|
||
|
||
let file_count = file_info.len();
|
||
let hash_start = Instant::now();
|
||
compute_hashes_parallel(user_id, file_info, options.threads)?;
|
||
|
||
let hash_duration = hash_start.elapsed();
|
||
println!();
|
||
println!("Hash calculation completed in {:.2}s ({:.0} files/sec)",
|
||
hash_duration.as_secs_f64(),
|
||
file_count as f64 / hash_duration.as_secs_f64());
|
||
} else {
|
||
println!();
|
||
println!("ℹ️ SHA256 hashes skipped. Run 'markbase hash --user {}' to compute hashes.", user_id);
|
||
}
|
||
|
||
Ok(())
|
||
}
|
||
|
||
pub fn compute_hashes(user_id: &str, threads: usize) -> Result<()> {
|
||
println!("=== Background Hash Calculation ===");
|
||
println!("User ID: {}", user_id);
|
||
println!("Threads: {}", threads);
|
||
println!();
|
||
|
||
let conn = FileTree::open_user_db(user_id)?;
|
||
|
||
let file_info: Vec<(String, String)> = conn
|
||
.prepare("SELECT node_id, aliases_json FROM file_nodes WHERE node_type = 'file' AND sha256 IS NULL")?
|
||
.query_map([], |row| {
|
||
let node_id: String = row.get(0)?;
|
||
let aliases_json: String = row.get(1)?;
|
||
let aliases: HashMap<String, String> = serde_json::from_str(&aliases_json).unwrap_or_default();
|
||
let path = aliases.get("path").cloned().unwrap_or_default();
|
||
Ok((node_id, path))
|
||
})?
|
||
.filter_map(|r| r.ok())
|
||
.filter(|(_, path)| !path.is_empty())
|
||
.collect();
|
||
|
||
if file_info.is_empty() {
|
||
println!("No files need hashing. All files already have SHA256.");
|
||
return Ok(());
|
||
}
|
||
|
||
println!("Files to hash: {}", file_info.len());
|
||
|
||
let file_count = file_info.len();
|
||
let start = Instant::now();
|
||
compute_hashes_parallel(user_id, file_info, threads)?;
|
||
|
||
let duration = start.elapsed();
|
||
println!();
|
||
println!("Hash calculation completed in {:.2}s ({:.0} files/sec)",
|
||
duration.as_secs_f64(),
|
||
file_count as f64 / duration.as_secs_f64());
|
||
|
||
Ok(())
|
||
}
|
||
|
||
fn compute_hashes_parallel(user_id: &str, file_info: Vec<(String, String)>, threads: usize) -> Result<()> {
|
||
let db_path = FileTree::user_db_path(user_id);
|
||
let user_id = user_id.to_string();
|
||
let file_info = Arc::new(file_info);
|
||
let results: Arc<Mutex<HashMap<String, String>>> = Arc::new(Mutex::new(HashMap::new()));
|
||
let processed: Arc<Mutex<usize>> = Arc::new(Mutex::new(0));
|
||
let total = file_info.len();
|
||
|
||
let mut handles = Vec::new();
|
||
|
||
for i in 0..threads {
|
||
let file_info = Arc::clone(&file_info);
|
||
let results = Arc::clone(&results);
|
||
let processed = Arc::clone(&processed);
|
||
let user_id = user_id.clone();
|
||
|
||
let handle = thread::spawn(move || {
|
||
let chunk_size = (file_info.len() / threads) + (if i < file_info.len() % threads { 1 } else { 0 });
|
||
let start_idx = i * (file_info.len() / threads) + i.min(file_info.len() % threads);
|
||
let _end_idx = start_idx + chunk_size;
|
||
|
||
for (node_id, path_str) in file_info.iter().skip(start_idx).take(chunk_size) {
|
||
if let Ok(hash) = compute_file_hash(path_str) {
|
||
results.lock().unwrap().insert(node_id.clone(), hash);
|
||
}
|
||
|
||
let mut p = processed.lock().unwrap();
|
||
*p += 1;
|
||
if *p % 100 == 0 {
|
||
print!("\r Hashed {}/{} files...", *p, total);
|
||
use std::io::Write;
|
||
std::io::stdout().flush().ok();
|
||
}
|
||
}
|
||
});
|
||
|
||
handles.push(handle);
|
||
}
|
||
|
||
for handle in handles {
|
||
handle.join().expect("Thread panicked");
|
||
}
|
||
|
||
println!("\r Hashed {}/{} files...Done", total, total);
|
||
|
||
let results = results.lock().unwrap();
|
||
let conn = Connection::open(&db_path)?;
|
||
|
||
let tx = conn.unchecked_transaction()?;
|
||
|
||
for (node_id, hash) in results.iter() {
|
||
conn.execute(
|
||
"UPDATE file_nodes SET sha256 = ?1, file_uuid = ?1, updated_at = ?2 WHERE node_id = ?3",
|
||
rusqlite::params![hash, chrono::Utc::now().timestamp().to_string(), node_id],
|
||
)?;
|
||
}
|
||
|
||
tx.commit()?;
|
||
|
||
println!(" Updated {} hashes in database", results.len());
|
||
|
||
Ok(())
|
||
}
|
||
|
||
fn scan_recursive(
|
||
base: &Path,
|
||
current: &Path,
|
||
folders: &mut Vec<(String, String, Option<String>)>,
|
||
files: &mut Vec<(String, String, u64, String)>,
|
||
) -> Result<()> {
|
||
let entries: Vec<_> = fs::read_dir(current)?
|
||
.filter_map(|e| e.ok())
|
||
.filter(|e| e.file_name() != ".DS_Store")
|
||
.collect();
|
||
|
||
for entry in entries {
|
||
let path = entry.path();
|
||
let path_str = path.to_string_lossy().to_string();
|
||
let filename = entry.file_name().to_string_lossy().to_string();
|
||
|
||
if path.is_dir() {
|
||
let parent_id = if path.parent() == Some(base) {
|
||
None
|
||
} else {
|
||
find_parent_folder_id(&path_str, folders)
|
||
};
|
||
|
||
folders.push((path_str.clone(), filename, parent_id));
|
||
|
||
scan_recursive(base, &path, folders, files)?;
|
||
} else {
|
||
let metadata = entry.metadata()?;
|
||
let size = metadata.len();
|
||
let ext = path.extension()
|
||
.and_then(|s| s.to_str())
|
||
.unwrap_or("")
|
||
.to_string();
|
||
|
||
files.push((path_str, filename, size, ext));
|
||
}
|
||
}
|
||
|
||
Ok(())
|
||
}
|
||
|
||
fn compute_file_hash(path: &str) -> Result<String> {
|
||
let mut hasher = Sha256::new();
|
||
let mut file = fs::File::open(path)?;
|
||
let mut buffer = [0u8; 8192];
|
||
|
||
loop {
|
||
let n = std::io::Read::read(&mut file, &mut buffer)?;
|
||
if n == 0 {
|
||
break;
|
||
}
|
||
hasher.update(&buffer[..n]);
|
||
}
|
||
|
||
let hash = format!("{:x}", hasher.finalize());
|
||
Ok(hash.chars().take(32).collect())
|
||
}
|
||
|
||
fn generate_uuid(path: &str, filename: &str, mac: &str, mtime: u64) -> String {
|
||
let mut hasher = Sha256::new();
|
||
hasher.update(path.as_bytes());
|
||
hasher.update(filename.as_bytes());
|
||
hasher.update(mac.as_bytes());
|
||
hasher.update(mtime.to_string().as_bytes());
|
||
format!("{:x}", hasher.finalize()).chars().take(32).collect()
|
||
}
|
||
|
||
fn get_mac_address() -> Result<String> {
|
||
let output = std::process::Command::new("ifconfig")
|
||
.arg("en0")
|
||
.output()?;
|
||
|
||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||
for line in stdout.lines() {
|
||
if line.contains("ether") {
|
||
if let Some(mac) = line.split_whitespace().nth(1) {
|
||
return Ok(mac.to_string());
|
||
}
|
||
}
|
||
}
|
||
|
||
Ok("00:00:00:00:00:00".to_string())
|
||
}
|
||
|
||
fn find_parent_folder(
|
||
file_path: &str,
|
||
_base: &Path,
|
||
folders: &[(String, String, Option<String>)],
|
||
) -> Option<String> {
|
||
let file_dir = Path::new(file_path).parent()?;
|
||
|
||
for (folder_path, _, folder_id) in folders {
|
||
if Path::new(folder_path) == file_dir {
|
||
return folder_id.clone();
|
||
}
|
||
}
|
||
|
||
None
|
||
}
|
||
|
||
fn find_parent_folder_id(path: &str, folders: &[(String, String, Option<String>)]) -> Option<String> {
|
||
let current = Path::new(path);
|
||
let parent = current.parent()?;
|
||
let parent_str = parent.to_string_lossy();
|
||
|
||
for (folder_path, _, folder_id) in folders {
|
||
if folder_path == &parent_str {
|
||
return folder_id.clone();
|
||
}
|
||
}
|
||
|
||
None
|
||
}
|
||
|
||
fn insert_node(conn: &Connection, node: &FileNode) -> Result<()> {
|
||
conn.execute(
|
||
"INSERT OR REPLACE INTO file_nodes (
|
||
node_id, label, aliases_json, file_uuid, sha256, parent_id, children_json,
|
||
node_type, icon, color, bg_color, file_size, registered_at,
|
||
created_at, updated_at, sort_order
|
||
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16)",
|
||
rusqlite::params![
|
||
node.node_id,
|
||
node.label,
|
||
node.aliases.to_json(),
|
||
node.file_uuid,
|
||
node.sha256,
|
||
node.parent_id,
|
||
serde_json::to_string(&node.children)?,
|
||
node.node_type.as_str(),
|
||
node.icon,
|
||
node.color,
|
||
node.bg_color,
|
||
node.file_size,
|
||
node.registered_at,
|
||
node.created_at,
|
||
node.updated_at,
|
||
node.sort_order,
|
||
],
|
||
)?;
|
||
|
||
Ok(())
|
||
}
|
||
|
||
fn get_file_icon(filename: &str) -> Option<String> {
|
||
let ext = Path::new(filename)
|
||
.extension()
|
||
.and_then(|s| s.to_str())
|
||
.unwrap_or("")
|
||
.to_lowercase();
|
||
|
||
let icon = match ext.as_str() {
|
||
"mp4" | "mov" | "avi" | "mkv" | "webm" => "🎬",
|
||
"jpg" | "jpeg" | "png" | "gif" | "webp" | "svg" => "🖼️",
|
||
"pdf" => "📄",
|
||
"doc" | "docx" => "📝",
|
||
"xls" | "xlsx" => "📊",
|
||
"ppt" | "pptx" => "📽️",
|
||
"zip" | "rar" | "7z" | "tar" | "gz" => "📦",
|
||
"mp3" | "wav" | "flac" | "aac" => "🎵",
|
||
"txt" | "md" => "📃",
|
||
_ => "📄",
|
||
};
|
||
|
||
Some(icon.to_string())
|
||
} |