use anyhow::{Context, Result}; use rusqlite::Connection; use sha2::{Digest, Sha256}; use std::collections::HashMap; use std::fs; use std::path::Path; use std::sync::{Arc, Mutex}; use std::thread; use std::time::Instant; use crate::filetree::node::{Aliases, FileNode, NodeType}; use crate::filetree::FileTree; pub struct ScanOptions { pub skip_hash: bool, pub threads: usize, } impl Default for ScanOptions { fn default() -> Self { ScanOptions { skip_hash: true, threads: 4, } } } pub fn scan_directory(user_id: &str, dir: &str, batch_size: usize, options: ScanOptions) -> Result<()> { let start = Instant::now(); let dir_path = Path::new(dir); if !dir_path.exists() { anyhow::bail!("Directory not found: {}", dir); } println!("=== File Scan Performance Test ==="); println!("User ID: {}", user_id); println!("Directory: {}", dir); println!("Batch size: {}", batch_size); println!("Skip hash: {}", options.skip_hash); if !options.skip_hash { println!("Hash threads: {}", options.threads); } println!(); println!("[1/4] Scanning directory structure..."); let scan_start = Instant::now(); let mut folders: Vec<(String, String, Option)> = Vec::new(); let mut files: Vec<(String, String, u64, String)> = Vec::new(); scan_recursive(dir_path, dir_path, &mut folders, &mut files)?; let scan_duration = scan_start.elapsed(); println!(" Scanned {} folders, {} files in {:.2}s", folders.len(), files.len(), scan_duration.as_secs_f64()); println!(); println!("[2/5] Generating node IDs..."); let id_start = Instant::now(); let mac = get_mac_address()?; let mut folder_nodes: Vec = Vec::new(); let mut file_nodes: Vec = Vec::new(); let mut file_info: Vec<(String, String)> = Vec::new(); let mac_str = get_mac_address()?; let root_node_id = generate_uuid(&dir_path.to_string_lossy(), "Home", &mac_str, chrono::Utc::now().timestamp() as u64); folder_nodes.push(FileNode { node_id: root_node_id.clone(), label: "Home".to_string(), aliases: Aliases::empty(), file_uuid: None, sha256: None, parent_id: None, children: Vec::new(), node_type: NodeType::Folder, icon: Some("🏠".to_string()), color: None, bg_color: None, file_size: None, registered_at: None, created_at: chrono::Utc::now().timestamp().to_string(), updated_at: chrono::Utc::now().timestamp().to_string(), sort_order: 0, }); let folder_id_map: HashMap = { let mut map = HashMap::new(); map.insert(dir_path.to_string_lossy().to_string(), root_node_id.clone()); for (path_str, label, _parent_path) in &folders { let mtime = fs::metadata(path_str) .and_then(|m| m.modified()) .unwrap_or(std::time::SystemTime::UNIX_EPOCH); let mtime_secs = mtime.duration_since(std::time::SystemTime::UNIX_EPOCH) .unwrap_or_default() .as_secs(); let node_id = generate_uuid(path_str, label, &mac_str, mtime_secs); map.insert(path_str.clone(), node_id); } map }; for (path_str, label, parent_path) in &folders { let node_id = folder_id_map.get(path_str).cloned().unwrap(); let parent_node_id = if let Some(ref parent_p) = parent_path { folder_id_map.get(parent_p).cloned() } else { Some(root_node_id.clone()) }; folder_nodes.push(FileNode { node_id, label: label.clone(), aliases: Aliases::empty(), file_uuid: None, sha256: None, parent_id: parent_node_id, children: Vec::new(), node_type: NodeType::Folder, icon: Some("📁".to_string()), color: None, bg_color: None, file_size: None, registered_at: None, created_at: chrono::Utc::now().timestamp().to_string(), updated_at: chrono::Utc::now().timestamp().to_string(), sort_order: 0, }); } for (path_str, filename, size, _ext) in &files { let mtime = fs::metadata(path_str) .and_then(|m| m.modified()) .unwrap_or(std::time::SystemTime::UNIX_EPOCH); let mtime_secs = mtime.duration_since(std::time::SystemTime::UNIX_EPOCH) .unwrap_or_default() .as_secs(); let node_id = generate_uuid(path_str, filename, &mac, mtime_secs); let file_dir = Path::new(path_str).parent().unwrap_or(dir_path); let parent_node_id = if file_dir == dir_path { Some(root_node_id.clone()) } else { folder_id_map.get(file_dir.to_string_lossy().as_ref()).cloned() }; let node_id_clone = node_id.clone(); file_info.push((node_id_clone.clone(), path_str.clone())); file_nodes.push(FileNode { node_id: node_id_clone.clone(), label: filename.clone(), aliases: { let mut aliases = Aliases::empty(); aliases.set("path", path_str); aliases }, file_uuid: Some(node_id_clone.clone()), sha256: None, parent_id: parent_node_id, children: Vec::new(), node_type: NodeType::File, icon: get_file_icon(filename), color: None, bg_color: None, file_size: Some(*size as i64), registered_at: Some(chrono::Utc::now().timestamp().to_string()), created_at: chrono::Utc::now().timestamp().to_string(), updated_at: chrono::Utc::now().timestamp().to_string(), sort_order: 0, }); } let id_duration = id_start.elapsed(); println!(" Generated {} folder IDs, {} file IDs in {:.2}s", folder_nodes.len(), file_nodes.len(), id_duration.as_secs_f64()); println!(); println!("[3/5] Opening database..."); let db_start = Instant::now(); let db_path = FileTree::user_db_path(user_id); if !Path::new(&db_path).exists() { FileTree::init_user_db(user_id)?; } let conn = FileTree::open_user_db(user_id) .with_context(|| format!("Failed to open database for user {}", user_id))?; let db_duration = db_start.elapsed(); println!(" Database opened in {:.2}s", db_duration.as_secs_f64()); println!(); println!("[4/5] Inserting nodes (batch size: {})...", batch_size); let insert_start = Instant::now(); let tx = conn.unchecked_transaction()?; let folder_count = folder_nodes.len(); let file_count = file_nodes.len(); let total_nodes = folder_count + file_count; let mut inserted = 0; for node in folder_nodes { insert_node(&conn, &node)?; inserted += 1; if inserted % batch_size == 0 { print!("\r Inserted {}/{} nodes...", inserted, total_nodes); use std::io::Write; std::io::stdout().flush().ok(); } } for node in file_nodes { insert_node(&conn, &node)?; if let Some(ref file_uuid) = node.file_uuid { let path = node.aliases.get("path").cloned().unwrap_or_default(); if !path.is_empty() { conn.execute( "INSERT OR IGNORE INTO file_locations (file_uuid, location, label, added_at) VALUES (?1, ?2, 'origin', ?3)", rusqlite::params![file_uuid, path, chrono::Utc::now().timestamp().to_string()], )?; } } inserted += 1; if inserted % batch_size == 0 { print!("\r Inserted {}/{} nodes...", inserted, total_nodes); use std::io::Write; std::io::stdout().flush().ok(); } } tx.commit()?; let insert_duration = insert_start.elapsed(); println!("\r Inserted {} nodes in {:.2}s ({:.0} nodes/sec)", total_nodes, insert_duration.as_secs_f64(), total_nodes as f64 / insert_duration.as_secs_f64()); println!(); println!("[5/5] Updating folder children_json..."); let children_start = Instant::now(); conn.execute( "UPDATE file_nodes SET children_json = ( SELECT json_group_array(node_id) FROM file_nodes AS child WHERE child.parent_id = file_nodes.node_id ) WHERE node_type = 'folder'", [], )?; let children_duration = children_start.elapsed(); println!(" Updated children_json for {} folders in {:.2}s", folder_count, children_duration.as_secs_f64()); let total_duration = start.elapsed(); println!(); println!("=== Summary ==="); println!("Total time: {:.2}s", total_duration.as_secs_f64()); println!("Folders: {}", folder_count); println!("Files: {}", file_count); println!("Total nodes: {}", total_nodes); println!("Database: {}", FileTree::user_db_path(user_id)); println!(); println!("Performance breakdown:"); println!(" - Scanning: {:.2}s ({:.0}%)", scan_duration.as_secs_f64(), scan_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0); println!(" - ID gen: {:.2}s ({:.0}%)", id_duration.as_secs_f64(), id_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0); println!(" - DB open: {:.2}s ({:.0}%)", db_duration.as_secs_f64(), db_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0); println!(" - Insertion: {:.2}s ({:.0}%)", insert_duration.as_secs_f64(), insert_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0); println!(" - Children JSON: {:.2}s ({:.0}%)", children_duration.as_secs_f64(), children_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0); if !options.skip_hash { println!(); println!("=== Starting background hash calculation ==="); println!("Files to hash: {}", file_info.len()); println!("Threads: {}", options.threads); let file_count = file_info.len(); let hash_start = Instant::now(); compute_hashes_parallel(user_id, file_info, options.threads)?; let hash_duration = hash_start.elapsed(); println!(); println!("Hash calculation completed in {:.2}s ({:.0} files/sec)", hash_duration.as_secs_f64(), file_count as f64 / hash_duration.as_secs_f64()); } else { println!(); println!("â„šī¸ SHA256 hashes skipped. Run 'markbase hash --user {}' to compute hashes.", user_id); } Ok(()) } pub fn compute_hashes(user_id: &str, threads: usize) -> Result<()> { println!("=== Background Hash Calculation ==="); println!("User ID: {}", user_id); println!("Threads: {}", threads); println!(); let conn = FileTree::open_user_db(user_id)?; let file_info: Vec<(String, String)> = conn .prepare("SELECT node_id, aliases_json FROM file_nodes WHERE node_type = 'file' AND sha256 IS NULL")? .query_map([], |row| { let node_id: String = row.get(0)?; let aliases_json: String = row.get(1)?; let aliases: HashMap = serde_json::from_str(&aliases_json).unwrap_or_default(); let path = aliases.get("path").cloned().unwrap_or_default(); Ok((node_id, path)) })? .filter_map(|r| r.ok()) .filter(|(_, path)| !path.is_empty()) .collect(); if file_info.is_empty() { println!("No files need hashing. All files already have SHA256."); return Ok(()); } println!("Files to hash: {}", file_info.len()); let file_count = file_info.len(); let start = Instant::now(); compute_hashes_parallel(user_id, file_info, threads)?; let duration = start.elapsed(); println!(); println!("Hash calculation completed in {:.2}s ({:.0} files/sec)", duration.as_secs_f64(), file_count as f64 / duration.as_secs_f64()); Ok(()) } fn compute_hashes_parallel(user_id: &str, file_info: Vec<(String, String)>, threads: usize) -> Result<()> { let db_path = FileTree::user_db_path(user_id); let user_id = user_id.to_string(); let file_info = Arc::new(file_info); let results: Arc>> = Arc::new(Mutex::new(HashMap::new())); let processed: Arc> = Arc::new(Mutex::new(0)); let total = file_info.len(); let mut handles = Vec::new(); for i in 0..threads { let file_info = Arc::clone(&file_info); let results = Arc::clone(&results); let processed = Arc::clone(&processed); let user_id = user_id.clone(); let handle = thread::spawn(move || { let chunk_size = (file_info.len() / threads) + (if i < file_info.len() % threads { 1 } else { 0 }); let start_idx = i * (file_info.len() / threads) + i.min(file_info.len() % threads); let _end_idx = start_idx + chunk_size; for (node_id, path_str) in file_info.iter().skip(start_idx).take(chunk_size) { if let Ok(hash) = compute_file_hash(path_str) { results.lock().unwrap().insert(node_id.clone(), hash); } let mut p = processed.lock().unwrap(); *p += 1; if *p % 100 == 0 { print!("\r Hashed {}/{} files...", *p, total); use std::io::Write; std::io::stdout().flush().ok(); } } }); handles.push(handle); } for handle in handles { handle.join().expect("Thread panicked"); } println!("\r Hashed {}/{} files...Done", total, total); let results = results.lock().unwrap(); let conn = Connection::open(&db_path)?; let tx = conn.unchecked_transaction()?; for (node_id, hash) in results.iter() { conn.execute( "UPDATE file_nodes SET sha256 = ?1, file_uuid = ?1, updated_at = ?2 WHERE node_id = ?3", rusqlite::params![hash, chrono::Utc::now().timestamp().to_string(), node_id], )?; } tx.commit()?; println!(" Updated {} hashes in database", results.len()); Ok(()) } fn scan_recursive( base: &Path, current: &Path, folders: &mut Vec<(String, String, Option)>, files: &mut Vec<(String, String, u64, String)>, ) -> Result<()> { let entries: Vec<_> = fs::read_dir(current)? .filter_map(|e| e.ok()) .filter(|e| e.file_name() != ".DS_Store") .collect(); for entry in entries { let path = entry.path(); let path_str = path.to_string_lossy().to_string(); let filename = entry.file_name().to_string_lossy().to_string(); if path.is_dir() { let parent_id = if path.parent() == Some(base) { None } else { find_parent_folder_id(&path_str, folders) }; folders.push((path_str.clone(), filename, parent_id)); scan_recursive(base, &path, folders, files)?; } else { let metadata = entry.metadata()?; let size = metadata.len(); let ext = path.extension() .and_then(|s| s.to_str()) .unwrap_or("") .to_string(); files.push((path_str, filename, size, ext)); } } Ok(()) } fn compute_file_hash(path: &str) -> Result { let mut hasher = Sha256::new(); let mut file = fs::File::open(path)?; let mut buffer = [0u8; 8192]; loop { let n = std::io::Read::read(&mut file, &mut buffer)?; if n == 0 { break; } hasher.update(&buffer[..n]); } let hash = format!("{:x}", hasher.finalize()); Ok(hash.chars().take(32).collect()) } fn generate_uuid(path: &str, filename: &str, mac: &str, mtime: u64) -> String { let mut hasher = Sha256::new(); hasher.update(path.as_bytes()); hasher.update(filename.as_bytes()); hasher.update(mac.as_bytes()); hasher.update(mtime.to_string().as_bytes()); format!("{:x}", hasher.finalize()).chars().take(32).collect() } fn get_mac_address() -> Result { let output = std::process::Command::new("ifconfig") .arg("en0") .output()?; let stdout = String::from_utf8_lossy(&output.stdout); for line in stdout.lines() { if line.contains("ether") { if let Some(mac) = line.split_whitespace().nth(1) { return Ok(mac.to_string()); } } } Ok("00:00:00:00:00:00".to_string()) } fn find_parent_folder( file_path: &str, _base: &Path, folders: &[(String, String, Option)], ) -> Option { let file_dir = Path::new(file_path).parent()?; for (folder_path, _, folder_id) in folders { if Path::new(folder_path) == file_dir { return folder_id.clone(); } } None } fn find_parent_folder_id(path: &str, folders: &[(String, String, Option)]) -> Option { let current = Path::new(path); let parent = current.parent()?; let parent_str = parent.to_string_lossy(); for (folder_path, _, folder_id) in folders { if folder_path == &parent_str { return folder_id.clone(); } } None } fn insert_node(conn: &Connection, node: &FileNode) -> Result<()> { conn.execute( "INSERT OR REPLACE INTO file_nodes ( node_id, label, aliases_json, file_uuid, sha256, parent_id, children_json, node_type, icon, color, bg_color, file_size, registered_at, created_at, updated_at, sort_order ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16)", rusqlite::params![ node.node_id, node.label, node.aliases.to_json(), node.file_uuid, node.sha256, node.parent_id, serde_json::to_string(&node.children)?, node.node_type.as_str(), node.icon, node.color, node.bg_color, node.file_size, node.registered_at, node.created_at, node.updated_at, node.sort_order, ], )?; Ok(()) } fn get_file_icon(filename: &str) -> Option { let ext = Path::new(filename) .extension() .and_then(|s| s.to_str()) .unwrap_or("") .to_lowercase(); let icon = match ext.as_str() { "mp4" | "mov" | "avi" | "mkv" | "webm" => "đŸŽŦ", "jpg" | "jpeg" | "png" | "gif" | "webp" | "svg" => "đŸ–ŧī¸", "pdf" => "📄", "doc" | "docx" => "📝", "xls" | "xlsx" => "📊", "ppt" | "pptx" => "đŸ“Ŋī¸", "zip" | "rar" | "7z" | "tar" | "gz" => "đŸ“Ļ", "mp3" | "wav" | "flac" | "aac" => "đŸŽĩ", "txt" | "md" => "📃", _ => "📄", }; Some(icon.to_string()) }