Files
markbase/src/scan.rs
Warren 89aa4989da feat: Add file_locations to scan and fix file info API
Problem:
- Files could not be clicked (error: no location)
- get_file_info used hardcoded demo database
- file_locations table was empty

Solution:
1. Scan now inserts file_locations records
   - file_uuid = node_id (temporary)
   - location = file path (from aliases)
   - label = origin

2. Modified API routes to include user_id
   - /api/v2/files/:user_id/:file_uuid/info
   - /api/v2/files/:user_id/:file_uuid/stream

3. Modified showDetail() to use tree_user from localStorage

Result:
- file_locations: 11857 records 
- Files can be clicked 
- API uses correct user database 

Files:
- src/scan.rs (insert file_locations)
- src/server.rs (user_id parameter)
- src/page.html (showDetail with user_id)
2026-05-17 04:29:46 +08:00

595 lines
19 KiB
Rust
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use anyhow::{Context, Result};
use rusqlite::Connection;
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use std::sync::{Arc, Mutex};
use std::thread;
use std::time::Instant;
use crate::filetree::node::{Aliases, FileNode, NodeType};
use crate::filetree::FileTree;
pub struct ScanOptions {
pub skip_hash: bool,
pub threads: usize,
}
impl Default for ScanOptions {
fn default() -> Self {
ScanOptions {
skip_hash: true,
threads: 4,
}
}
}
pub fn scan_directory(user_id: &str, dir: &str, batch_size: usize, options: ScanOptions) -> Result<()> {
let start = Instant::now();
let dir_path = Path::new(dir);
if !dir_path.exists() {
anyhow::bail!("Directory not found: {}", dir);
}
println!("=== File Scan Performance Test ===");
println!("User ID: {}", user_id);
println!("Directory: {}", dir);
println!("Batch size: {}", batch_size);
println!("Skip hash: {}", options.skip_hash);
if !options.skip_hash {
println!("Hash threads: {}", options.threads);
}
println!();
println!("[1/4] Scanning directory structure...");
let scan_start = Instant::now();
let mut folders: Vec<(String, String, Option<String>)> = Vec::new();
let mut files: Vec<(String, String, u64, String)> = Vec::new();
scan_recursive(dir_path, dir_path, &mut folders, &mut files)?;
let scan_duration = scan_start.elapsed();
println!(" Scanned {} folders, {} files in {:.2}s",
folders.len(), files.len(), scan_duration.as_secs_f64());
println!();
println!("[2/5] Generating node IDs...");
let id_start = Instant::now();
let mac = get_mac_address()?;
let mut folder_nodes: Vec<FileNode> = Vec::new();
let mut file_nodes: Vec<FileNode> = Vec::new();
let mut file_info: Vec<(String, String)> = Vec::new();
let mac_str = get_mac_address()?;
let root_node_id = generate_uuid(&dir_path.to_string_lossy(), "Home", &mac_str, chrono::Utc::now().timestamp() as u64);
folder_nodes.push(FileNode {
node_id: root_node_id.clone(),
label: "Home".to_string(),
aliases: Aliases::empty(),
file_uuid: None,
sha256: None,
parent_id: None,
children: Vec::new(),
node_type: NodeType::Folder,
icon: Some("🏠".to_string()),
color: None,
bg_color: None,
file_size: None,
registered_at: None,
created_at: chrono::Utc::now().timestamp().to_string(),
updated_at: chrono::Utc::now().timestamp().to_string(),
sort_order: 0,
});
let folder_id_map: HashMap<String, String> = {
let mut map = HashMap::new();
map.insert(dir_path.to_string_lossy().to_string(), root_node_id.clone());
for (path_str, label, _parent_path) in &folders {
let mtime = fs::metadata(path_str)
.and_then(|m| m.modified())
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
let mtime_secs = mtime.duration_since(std::time::SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let node_id = generate_uuid(path_str, label, &mac_str, mtime_secs);
map.insert(path_str.clone(), node_id);
}
map
};
for (path_str, label, parent_path) in &folders {
let node_id = folder_id_map.get(path_str).cloned().unwrap();
let parent_node_id = if let Some(ref parent_p) = parent_path {
folder_id_map.get(parent_p).cloned()
} else {
Some(root_node_id.clone())
};
folder_nodes.push(FileNode {
node_id,
label: label.clone(),
aliases: Aliases::empty(),
file_uuid: None,
sha256: None,
parent_id: parent_node_id,
children: Vec::new(),
node_type: NodeType::Folder,
icon: Some("📁".to_string()),
color: None,
bg_color: None,
file_size: None,
registered_at: None,
created_at: chrono::Utc::now().timestamp().to_string(),
updated_at: chrono::Utc::now().timestamp().to_string(),
sort_order: 0,
});
}
for (path_str, filename, size, _ext) in &files {
let mtime = fs::metadata(path_str)
.and_then(|m| m.modified())
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
let mtime_secs = mtime.duration_since(std::time::SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let node_id = generate_uuid(path_str, filename, &mac, mtime_secs);
let file_dir = Path::new(path_str).parent().unwrap_or(dir_path);
let parent_node_id = if file_dir == dir_path {
Some(root_node_id.clone())
} else {
folder_id_map.get(file_dir.to_string_lossy().as_ref()).cloned()
};
let node_id_clone = node_id.clone();
file_info.push((node_id_clone.clone(), path_str.clone()));
file_nodes.push(FileNode {
node_id: node_id_clone.clone(),
label: filename.clone(),
aliases: {
let mut aliases = Aliases::empty();
aliases.set("path", path_str);
aliases
},
file_uuid: Some(node_id_clone.clone()),
sha256: None,
parent_id: parent_node_id,
children: Vec::new(),
node_type: NodeType::File,
icon: get_file_icon(filename),
color: None,
bg_color: None,
file_size: Some(*size as i64),
registered_at: Some(chrono::Utc::now().timestamp().to_string()),
created_at: chrono::Utc::now().timestamp().to_string(),
updated_at: chrono::Utc::now().timestamp().to_string(),
sort_order: 0,
});
}
let id_duration = id_start.elapsed();
println!(" Generated {} folder IDs, {} file IDs in {:.2}s",
folder_nodes.len(), file_nodes.len(), id_duration.as_secs_f64());
println!();
println!("[3/5] Opening database...");
let db_start = Instant::now();
let db_path = FileTree::user_db_path(user_id);
if !Path::new(&db_path).exists() {
FileTree::init_user_db(user_id)?;
}
let conn = FileTree::open_user_db(user_id)
.with_context(|| format!("Failed to open database for user {}", user_id))?;
let db_duration = db_start.elapsed();
println!(" Database opened in {:.2}s", db_duration.as_secs_f64());
println!();
println!("[4/5] Inserting nodes (batch size: {})...", batch_size);
let insert_start = Instant::now();
let tx = conn.unchecked_transaction()?;
let folder_count = folder_nodes.len();
let file_count = file_nodes.len();
let total_nodes = folder_count + file_count;
let mut inserted = 0;
for node in folder_nodes {
insert_node(&conn, &node)?;
inserted += 1;
if inserted % batch_size == 0 {
print!("\r Inserted {}/{} nodes...", inserted, total_nodes);
use std::io::Write;
std::io::stdout().flush().ok();
}
}
for node in file_nodes {
insert_node(&conn, &node)?;
if let Some(ref file_uuid) = node.file_uuid {
let path = node.aliases.get("path").cloned().unwrap_or_default();
if !path.is_empty() {
conn.execute(
"INSERT OR IGNORE INTO file_locations (file_uuid, location, label, added_at)
VALUES (?1, ?2, 'origin', ?3)",
rusqlite::params![file_uuid, path, chrono::Utc::now().timestamp().to_string()],
)?;
}
}
inserted += 1;
if inserted % batch_size == 0 {
print!("\r Inserted {}/{} nodes...", inserted, total_nodes);
use std::io::Write;
std::io::stdout().flush().ok();
}
}
tx.commit()?;
let insert_duration = insert_start.elapsed();
println!("\r Inserted {} nodes in {:.2}s ({:.0} nodes/sec)",
total_nodes,
insert_duration.as_secs_f64(),
total_nodes as f64 / insert_duration.as_secs_f64());
println!();
println!("[5/5] Updating folder children_json...");
let children_start = Instant::now();
conn.execute(
"UPDATE file_nodes
SET children_json = (
SELECT json_group_array(node_id)
FROM file_nodes AS child
WHERE child.parent_id = file_nodes.node_id
)
WHERE node_type = 'folder'",
[],
)?;
let children_duration = children_start.elapsed();
println!(" Updated children_json for {} folders in {:.2}s",
folder_count,
children_duration.as_secs_f64());
let total_duration = start.elapsed();
println!();
println!("=== Summary ===");
println!("Total time: {:.2}s", total_duration.as_secs_f64());
println!("Folders: {}", folder_count);
println!("Files: {}", file_count);
println!("Total nodes: {}", total_nodes);
println!("Database: {}", FileTree::user_db_path(user_id));
println!();
println!("Performance breakdown:");
println!(" - Scanning: {:.2}s ({:.0}%)",
scan_duration.as_secs_f64(),
scan_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
println!(" - ID gen: {:.2}s ({:.0}%)",
id_duration.as_secs_f64(),
id_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
println!(" - DB open: {:.2}s ({:.0}%)",
db_duration.as_secs_f64(),
db_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
println!(" - Insertion: {:.2}s ({:.0}%)",
insert_duration.as_secs_f64(),
insert_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
println!(" - Children JSON: {:.2}s ({:.0}%)",
children_duration.as_secs_f64(),
children_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
if !options.skip_hash {
println!();
println!("=== Starting background hash calculation ===");
println!("Files to hash: {}", file_info.len());
println!("Threads: {}", options.threads);
let file_count = file_info.len();
let hash_start = Instant::now();
compute_hashes_parallel(user_id, file_info, options.threads)?;
let hash_duration = hash_start.elapsed();
println!();
println!("Hash calculation completed in {:.2}s ({:.0} files/sec)",
hash_duration.as_secs_f64(),
file_count as f64 / hash_duration.as_secs_f64());
} else {
println!();
println!(" SHA256 hashes skipped. Run 'markbase hash --user {}' to compute hashes.", user_id);
}
Ok(())
}
pub fn compute_hashes(user_id: &str, threads: usize) -> Result<()> {
println!("=== Background Hash Calculation ===");
println!("User ID: {}", user_id);
println!("Threads: {}", threads);
println!();
let conn = FileTree::open_user_db(user_id)?;
let file_info: Vec<(String, String)> = conn
.prepare("SELECT node_id, aliases_json FROM file_nodes WHERE node_type = 'file' AND sha256 IS NULL")?
.query_map([], |row| {
let node_id: String = row.get(0)?;
let aliases_json: String = row.get(1)?;
let aliases: HashMap<String, String> = serde_json::from_str(&aliases_json).unwrap_or_default();
let path = aliases.get("path").cloned().unwrap_or_default();
Ok((node_id, path))
})?
.filter_map(|r| r.ok())
.filter(|(_, path)| !path.is_empty())
.collect();
if file_info.is_empty() {
println!("No files need hashing. All files already have SHA256.");
return Ok(());
}
println!("Files to hash: {}", file_info.len());
let file_count = file_info.len();
let start = Instant::now();
compute_hashes_parallel(user_id, file_info, threads)?;
let duration = start.elapsed();
println!();
println!("Hash calculation completed in {:.2}s ({:.0} files/sec)",
duration.as_secs_f64(),
file_count as f64 / duration.as_secs_f64());
Ok(())
}
fn compute_hashes_parallel(user_id: &str, file_info: Vec<(String, String)>, threads: usize) -> Result<()> {
let db_path = FileTree::user_db_path(user_id);
let user_id = user_id.to_string();
let file_info = Arc::new(file_info);
let results: Arc<Mutex<HashMap<String, String>>> = Arc::new(Mutex::new(HashMap::new()));
let processed: Arc<Mutex<usize>> = Arc::new(Mutex::new(0));
let total = file_info.len();
let mut handles = Vec::new();
for i in 0..threads {
let file_info = Arc::clone(&file_info);
let results = Arc::clone(&results);
let processed = Arc::clone(&processed);
let user_id = user_id.clone();
let handle = thread::spawn(move || {
let chunk_size = (file_info.len() / threads) + (if i < file_info.len() % threads { 1 } else { 0 });
let start_idx = i * (file_info.len() / threads) + i.min(file_info.len() % threads);
let _end_idx = start_idx + chunk_size;
for (node_id, path_str) in file_info.iter().skip(start_idx).take(chunk_size) {
if let Ok(hash) = compute_file_hash(path_str) {
results.lock().unwrap().insert(node_id.clone(), hash);
}
let mut p = processed.lock().unwrap();
*p += 1;
if *p % 100 == 0 {
print!("\r Hashed {}/{} files...", *p, total);
use std::io::Write;
std::io::stdout().flush().ok();
}
}
});
handles.push(handle);
}
for handle in handles {
handle.join().expect("Thread panicked");
}
println!("\r Hashed {}/{} files...Done", total, total);
let results = results.lock().unwrap();
let conn = Connection::open(&db_path)?;
let tx = conn.unchecked_transaction()?;
for (node_id, hash) in results.iter() {
conn.execute(
"UPDATE file_nodes SET sha256 = ?1, file_uuid = ?1, updated_at = ?2 WHERE node_id = ?3",
rusqlite::params![hash, chrono::Utc::now().timestamp().to_string(), node_id],
)?;
}
tx.commit()?;
println!(" Updated {} hashes in database", results.len());
Ok(())
}
fn scan_recursive(
base: &Path,
current: &Path,
folders: &mut Vec<(String, String, Option<String>)>,
files: &mut Vec<(String, String, u64, String)>,
) -> Result<()> {
let entries: Vec<_> = fs::read_dir(current)?
.filter_map(|e| e.ok())
.filter(|e| e.file_name() != ".DS_Store")
.collect();
for entry in entries {
let path = entry.path();
let path_str = path.to_string_lossy().to_string();
let filename = entry.file_name().to_string_lossy().to_string();
if path.is_dir() {
let parent_id = if path.parent() == Some(base) {
None
} else {
find_parent_folder_id(&path_str, folders)
};
folders.push((path_str.clone(), filename, parent_id));
scan_recursive(base, &path, folders, files)?;
} else {
let metadata = entry.metadata()?;
let size = metadata.len();
let ext = path.extension()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
files.push((path_str, filename, size, ext));
}
}
Ok(())
}
fn compute_file_hash(path: &str) -> Result<String> {
let mut hasher = Sha256::new();
let mut file = fs::File::open(path)?;
let mut buffer = [0u8; 8192];
loop {
let n = std::io::Read::read(&mut file, &mut buffer)?;
if n == 0 {
break;
}
hasher.update(&buffer[..n]);
}
let hash = format!("{:x}", hasher.finalize());
Ok(hash.chars().take(32).collect())
}
fn generate_uuid(path: &str, filename: &str, mac: &str, mtime: u64) -> String {
let mut hasher = Sha256::new();
hasher.update(path.as_bytes());
hasher.update(filename.as_bytes());
hasher.update(mac.as_bytes());
hasher.update(mtime.to_string().as_bytes());
format!("{:x}", hasher.finalize()).chars().take(32).collect()
}
fn get_mac_address() -> Result<String> {
let output = std::process::Command::new("ifconfig")
.arg("en0")
.output()?;
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines() {
if line.contains("ether") {
if let Some(mac) = line.split_whitespace().nth(1) {
return Ok(mac.to_string());
}
}
}
Ok("00:00:00:00:00:00".to_string())
}
fn find_parent_folder(
file_path: &str,
_base: &Path,
folders: &[(String, String, Option<String>)],
) -> Option<String> {
let file_dir = Path::new(file_path).parent()?;
for (folder_path, _, folder_id) in folders {
if Path::new(folder_path) == file_dir {
return folder_id.clone();
}
}
None
}
fn find_parent_folder_id(path: &str, folders: &[(String, String, Option<String>)]) -> Option<String> {
let current = Path::new(path);
let parent = current.parent()?;
let parent_str = parent.to_string_lossy();
for (folder_path, _, folder_id) in folders {
if folder_path == &parent_str {
return folder_id.clone();
}
}
None
}
fn insert_node(conn: &Connection, node: &FileNode) -> Result<()> {
conn.execute(
"INSERT OR REPLACE INTO file_nodes (
node_id, label, aliases_json, file_uuid, sha256, parent_id, children_json,
node_type, icon, color, bg_color, file_size, registered_at,
created_at, updated_at, sort_order
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16)",
rusqlite::params![
node.node_id,
node.label,
node.aliases.to_json(),
node.file_uuid,
node.sha256,
node.parent_id,
serde_json::to_string(&node.children)?,
node.node_type.as_str(),
node.icon,
node.color,
node.bg_color,
node.file_size,
node.registered_at,
node.created_at,
node.updated_at,
node.sort_order,
],
)?;
Ok(())
}
fn get_file_icon(filename: &str) -> Option<String> {
let ext = Path::new(filename)
.extension()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_lowercase();
let icon = match ext.as_str() {
"mp4" | "mov" | "avi" | "mkv" | "webm" => "🎬",
"jpg" | "jpeg" | "png" | "gif" | "webp" | "svg" => "🖼️",
"pdf" => "📄",
"doc" | "docx" => "📝",
"xls" | "xlsx" => "📊",
"ppt" | "pptx" => "📽️",
"zip" | "rar" | "7z" | "tar" | "gz" => "📦",
"mp3" | "wav" | "flac" | "aac" => "🎵",
"txt" | "md" => "📃",
_ => "📄",
};
Some(icon.to_string())
}