Files
markbase/markbase-core/src/scan.rs
Warren 1300a4e223
Some checks failed
Test / test (push) Has been cancelled
Test / build (push) Has been cancelled
MarkBase架构升级:Multi-Volume Virtual Tree + Dual-View Management + Git Remote修正
核心功能:
-  Categories/Series双视图管理(category_view.rs + import_markdown.rs)
-  FUSE Multi-Volume支持(tree_type参数)
-  SSH/SFTP/SCP/rsync协议完整实现(4042行)
-  NFS/SMB Module Phase 1-3完成
-  Archive Module Phase 1-4完成(2916行)
-  Download Center API完整实现
-  S3兼容API实现(560行)

Git配置修正:
-  删除错误origin(gitea.momentry.ddns.net)
-  删除m5max128(指向机器名)
-  设置origin = m5max128gitea.momentry.ddns.net/admin/markbase
-  设置m4minigitea = m4minigitea.momentry.ddns.net/warren/markbase

数据清理:
-  删除38个临时SQLite(保留accusys.sqlite、demo.sqlite)
-  删除.bak、test_*.bin、调试脚本等临时文件
-  删除临时目录(build/、download files/、raid_test/等)
-  更新.gitignore排除临时文件

架构优化:
- 52个文件修改,2434行新增,4739行删除
- Workspace成员整合(16个crate)
- 数据库状态:accusys.sqlite保留(主demo测试)

远程同步:
-  准备推送到m5max128gitea(远程Gitea)
-  准备推送到m4minigitea(本地Gitea)
2026-06-12 12:59:54 +08:00

649 lines
19 KiB
Rust
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use anyhow::{Context, Result};
use rusqlite::Connection;
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use std::sync::{Arc, Mutex};
use std::thread;
use std::time::Instant;
use filetree::node::{Aliases, FileNode, NodeType};
use filetree::FileTree;
pub struct ScanOptions {
pub skip_hash: bool,
pub threads: usize,
}
impl Default for ScanOptions {
fn default() -> Self {
ScanOptions {
skip_hash: true,
threads: 4,
}
}
}
pub fn scan_directory(
user_id: &str,
dir: &str,
batch_size: usize,
options: ScanOptions,
) -> Result<()> {
let start = Instant::now();
let dir_path = Path::new(dir);
if !dir_path.exists() {
anyhow::bail!("Directory not found: {}", dir);
}
println!("=== File Scan Performance Test ===");
println!("User ID: {}", user_id);
println!("Directory: {}", dir);
println!("Batch size: {}", batch_size);
println!("Skip hash: {}", options.skip_hash);
if !options.skip_hash {
println!("Hash threads: {}", options.threads);
}
println!();
println!("[1/4] Scanning directory structure...");
let scan_start = Instant::now();
let mut folders: Vec<(String, String, Option<String>)> = Vec::new();
let mut files: Vec<(String, String, u64, String)> = Vec::new();
scan_recursive(dir_path, dir_path, &mut folders, &mut files)?;
let scan_duration = scan_start.elapsed();
println!(
" Scanned {} folders, {} files in {:.2}s",
folders.len(),
files.len(),
scan_duration.as_secs_f64()
);
println!();
println!("[2/5] Generating node IDs...");
let id_start = Instant::now();
let mac = get_mac_address()?;
let mut folder_nodes: Vec<FileNode> = Vec::new();
let mut file_nodes: Vec<FileNode> = Vec::new();
let mut file_info: Vec<(String, String)> = Vec::new();
let mac_str = get_mac_address()?;
let root_node_id = generate_uuid(
&dir_path.to_string_lossy(),
"Home",
&mac_str,
chrono::Utc::now().timestamp() as u64,
);
folder_nodes.push(FileNode {
node_id: root_node_id.clone(),
label: "Home".to_string(),
aliases: Aliases::empty(),
file_uuid: None,
sha256: None,
parent_id: None,
children: Vec::new(),
node_type: NodeType::Folder,
icon: Some("🏠".to_string()),
color: None,
bg_color: None,
file_size: None,
registered_at: None,
created_at: chrono::Utc::now().timestamp().to_string(),
updated_at: chrono::Utc::now().timestamp().to_string(),
sort_order: 0,
});
let folder_id_map: HashMap<String, String> = {
let mut map = HashMap::new();
map.insert(dir_path.to_string_lossy().to_string(), root_node_id.clone());
for (path_str, label, _parent_path) in &folders {
let mtime = fs::metadata(path_str)
.and_then(|m| m.modified())
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
let mtime_secs = mtime
.duration_since(std::time::SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let node_id = generate_uuid(path_str, label, &mac_str, mtime_secs);
map.insert(path_str.clone(), node_id);
}
map
};
for (path_str, label, parent_path) in &folders {
let node_id = folder_id_map.get(path_str).cloned().unwrap();
let parent_node_id = if let Some(ref parent_p) = parent_path {
folder_id_map.get(parent_p).cloned()
} else {
Some(root_node_id.clone())
};
folder_nodes.push(FileNode {
node_id,
label: label.clone(),
aliases: Aliases::empty(),
file_uuid: None,
sha256: None,
parent_id: parent_node_id,
children: Vec::new(),
node_type: NodeType::Folder,
icon: Some("📁".to_string()),
color: None,
bg_color: None,
file_size: None,
registered_at: None,
created_at: chrono::Utc::now().timestamp().to_string(),
updated_at: chrono::Utc::now().timestamp().to_string(),
sort_order: 0,
});
}
for (path_str, filename, size, _ext) in &files {
let mtime = fs::metadata(path_str)
.and_then(|m| m.modified())
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
let mtime_secs = mtime
.duration_since(std::time::SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let node_id = generate_uuid(path_str, filename, &mac, mtime_secs);
let file_dir = Path::new(path_str).parent().unwrap_or(dir_path);
let parent_node_id = if file_dir == dir_path {
Some(root_node_id.clone())
} else {
folder_id_map
.get(file_dir.to_string_lossy().as_ref())
.cloned()
};
let node_id_clone = node_id.clone();
file_info.push((node_id_clone.clone(), path_str.clone()));
file_nodes.push(FileNode {
node_id: node_id_clone.clone(),
label: filename.clone(),
aliases: {
let mut aliases = Aliases::empty();
aliases.set("path", path_str);
aliases
},
file_uuid: Some(node_id_clone.clone()),
sha256: None,
parent_id: parent_node_id,
children: Vec::new(),
node_type: NodeType::File,
icon: get_file_icon(filename),
color: None,
bg_color: None,
file_size: Some(*size as i64),
registered_at: Some(chrono::Utc::now().timestamp().to_string()),
created_at: chrono::Utc::now().timestamp().to_string(),
updated_at: chrono::Utc::now().timestamp().to_string(),
sort_order: 0,
});
}
let id_duration = id_start.elapsed();
println!(
" Generated {} folder IDs, {} file IDs in {:.2}s",
folder_nodes.len(),
file_nodes.len(),
id_duration.as_secs_f64()
);
println!();
println!("[3/5] Opening database...");
let db_start = Instant::now();
let db_path = FileTree::user_db_path(user_id);
if !Path::new(&db_path).exists() {
FileTree::init_user_db(user_id)?;
}
let conn = FileTree::open_user_db(user_id)
.with_context(|| format!("Failed to open database for user {}", user_id))?;
let db_duration = db_start.elapsed();
println!(" Database opened in {:.2}s", db_duration.as_secs_f64());
println!();
println!("[4/5] Inserting nodes (batch size: {})...", batch_size);
let insert_start = Instant::now();
let tx = conn.unchecked_transaction()?;
let folder_count = folder_nodes.len();
let file_count = file_nodes.len();
let total_nodes = folder_count + file_count;
let mut inserted = 0;
for node in folder_nodes {
insert_node(&conn, &node)?;
inserted += 1;
if inserted % batch_size == 0 {
print!("\r Inserted {}/{} nodes...", inserted, total_nodes);
use std::io::Write;
std::io::stdout().flush().ok();
}
}
for node in file_nodes {
insert_node(&conn, &node)?;
if let Some(ref file_uuid) = node.file_uuid {
let path = node.aliases.get("path").cloned().unwrap_or_default();
if !path.is_empty() {
conn.execute(
"INSERT OR IGNORE INTO file_locations (file_uuid, location, label, added_at)
VALUES (?1, ?2, 'origin', ?3)",
rusqlite::params![file_uuid, path, chrono::Utc::now().timestamp().to_string()],
)?;
}
}
inserted += 1;
if inserted % batch_size == 0 {
print!("\r Inserted {}/{} nodes...", inserted, total_nodes);
use std::io::Write;
std::io::stdout().flush().ok();
}
}
tx.commit()?;
let insert_duration = insert_start.elapsed();
println!(
"\r Inserted {} nodes in {:.2}s ({:.0} nodes/sec)",
total_nodes,
insert_duration.as_secs_f64(),
total_nodes as f64 / insert_duration.as_secs_f64()
);
println!();
println!("[5/5] Updating folder children_json...");
let children_start = Instant::now();
conn.execute(
"UPDATE file_nodes
SET children_json = (
SELECT json_group_array(node_id)
FROM file_nodes AS child
WHERE child.parent_id = file_nodes.node_id
)
WHERE node_type = 'folder'",
[],
)?;
let children_duration = children_start.elapsed();
println!(
" Updated children_json for {} folders in {:.2}s",
folder_count,
children_duration.as_secs_f64()
);
let total_duration = start.elapsed();
println!();
println!("=== Summary ===");
println!("Total time: {:.2}s", total_duration.as_secs_f64());
println!("Folders: {}", folder_count);
println!("Files: {}", file_count);
println!("Total nodes: {}", total_nodes);
println!("Database: {}", FileTree::user_db_path(user_id));
println!();
println!("Performance breakdown:");
println!(
" - Scanning: {:.2}s ({:.0}%)",
scan_duration.as_secs_f64(),
scan_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0
);
println!(
" - ID gen: {:.2}s ({:.0}%)",
id_duration.as_secs_f64(),
id_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0
);
println!(
" - DB open: {:.2}s ({:.0}%)",
db_duration.as_secs_f64(),
db_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0
);
println!(
" - Insertion: {:.2}s ({:.0}%)",
insert_duration.as_secs_f64(),
insert_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0
);
println!(
" - Children JSON: {:.2}s ({:.0}%)",
children_duration.as_secs_f64(),
children_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0
);
if !options.skip_hash {
println!();
println!("=== Starting background hash calculation ===");
println!("Files to hash: {}", file_info.len());
println!("Threads: {}", options.threads);
let file_count = file_info.len();
let hash_start = Instant::now();
compute_hashes_parallel(user_id, file_info, options.threads)?;
let hash_duration = hash_start.elapsed();
println!();
println!(
"Hash calculation completed in {:.2}s ({:.0} files/sec)",
hash_duration.as_secs_f64(),
file_count as f64 / hash_duration.as_secs_f64()
);
} else {
println!();
println!(
" SHA256 hashes skipped. Run 'markbase hash --user {}' to compute hashes.",
user_id
);
}
Ok(())
}
pub fn compute_hashes(user_id: &str, threads: usize) -> Result<()> {
println!("=== Background Hash Calculation ===");
println!("User ID: {}", user_id);
println!("Threads: {}", threads);
println!();
let conn = FileTree::open_user_db(user_id)?;
let file_info: Vec<(String, String)> = conn
.prepare("SELECT node_id, aliases_json FROM file_nodes WHERE node_type = 'file' AND sha256 IS NULL")?
.query_map([], |row| {
let node_id: String = row.get(0)?;
let aliases_json: String = row.get(1)?;
let aliases: HashMap<String, String> = serde_json::from_str(&aliases_json).unwrap_or_default();
let path = aliases.get("path").cloned().unwrap_or_default();
Ok((node_id, path))
})?
.filter_map(|r| r.ok())
.filter(|(_, path)| !path.is_empty())
.collect();
if file_info.is_empty() {
println!("No files need hashing. All files already have SHA256.");
return Ok(());
}
println!("Files to hash: {}", file_info.len());
let file_count = file_info.len();
let start = Instant::now();
compute_hashes_parallel(user_id, file_info, threads)?;
let duration = start.elapsed();
println!();
println!(
"Hash calculation completed in {:.2}s ({:.0} files/sec)",
duration.as_secs_f64(),
file_count as f64 / duration.as_secs_f64()
);
Ok(())
}
fn compute_hashes_parallel(
user_id: &str,
file_info: Vec<(String, String)>,
threads: usize,
) -> Result<()> {
let db_path = FileTree::user_db_path(user_id);
let user_id = user_id.to_string();
let file_info = Arc::new(file_info);
let results: Arc<Mutex<HashMap<String, String>>> = Arc::new(Mutex::new(HashMap::new()));
let processed: Arc<Mutex<usize>> = Arc::new(Mutex::new(0));
let total = file_info.len();
let mut handles = Vec::new();
for i in 0..threads {
let file_info = Arc::clone(&file_info);
let results = Arc::clone(&results);
let processed = Arc::clone(&processed);
let _user_id = user_id.clone();
let handle = thread::spawn(move || {
let chunk_size =
(file_info.len() / threads) + (if i < file_info.len() % threads { 1 } else { 0 });
let start_idx = i * (file_info.len() / threads) + i.min(file_info.len() % threads);
let _end_idx = start_idx + chunk_size;
for (node_id, path_str) in file_info.iter().skip(start_idx).take(chunk_size) {
if let Ok(hash) = compute_file_hash(path_str) {
results.lock().unwrap().insert(node_id.clone(), hash);
}
let mut p = processed.lock().unwrap();
*p += 1;
if *p % 100 == 0 {
print!("\r Hashed {}/{} files...", *p, total);
use std::io::Write;
std::io::stdout().flush().ok();
}
}
});
handles.push(handle);
}
for handle in handles {
handle.join().expect("Thread panicked");
}
println!("\r Hashed {}/{} files...Done", total, total);
let results = results.lock().unwrap();
let conn = Connection::open(&db_path)?;
let tx = conn.unchecked_transaction()?;
for (node_id, hash) in results.iter() {
conn.execute(
"UPDATE file_nodes SET sha256 = ?1, file_uuid = ?1, updated_at = ?2 WHERE node_id = ?3",
rusqlite::params![hash, chrono::Utc::now().timestamp().to_string(), node_id],
)?;
}
tx.commit()?;
println!(" Updated {} hashes in database", results.len());
Ok(())
}
fn scan_recursive(
base: &Path,
current: &Path,
folders: &mut Vec<(String, String, Option<String>)>,
files: &mut Vec<(String, String, u64, String)>,
) -> Result<()> {
let entries: Vec<_> = fs::read_dir(current)?
.filter_map(|e| e.ok())
.filter(|e| e.file_name() != ".DS_Store")
.collect();
for entry in entries {
let path = entry.path();
let path_str = path.to_string_lossy().to_string();
let filename = entry.file_name().to_string_lossy().to_string();
if path.is_dir() {
let parent_id = if path.parent() == Some(base) {
None
} else {
find_parent_folder_id(&path_str, folders)
};
folders.push((path_str.clone(), filename, parent_id));
scan_recursive(base, &path, folders, files)?;
} else {
let metadata = entry.metadata()?;
let size = metadata.len();
let ext = path
.extension()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
files.push((path_str, filename, size, ext));
}
}
Ok(())
}
fn compute_file_hash(path: &str) -> Result<String> {
let mut hasher = Sha256::new();
let mut file = fs::File::open(path)?;
let mut buffer = [0u8; 8192];
loop {
let n = std::io::Read::read(&mut file, &mut buffer)?;
if n == 0 {
break;
}
hasher.update(&buffer[..n]);
}
let hash = format!("{:x}", hasher.finalize());
Ok(hash.chars().take(32).collect())
}
fn generate_uuid(path: &str, filename: &str, mac: &str, mtime: u64) -> String {
let mut hasher = Sha256::new();
hasher.update(path.as_bytes());
hasher.update(filename.as_bytes());
hasher.update(mac.as_bytes());
hasher.update(mtime.to_string().as_bytes());
format!("{:x}", hasher.finalize())
.chars()
.take(32)
.collect()
}
fn get_mac_address() -> Result<String> {
let output = std::process::Command::new("ifconfig").arg("en0").output()?;
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines() {
if line.contains("ether") {
if let Some(mac) = line.split_whitespace().nth(1) {
return Ok(mac.to_string());
}
}
}
Ok("00:00:00:00:00:00".to_string())
}
fn find_parent_folder(
file_path: &str,
_base: &Path,
folders: &[(String, String, Option<String>)],
) -> Option<String> {
let file_dir = Path::new(file_path).parent()?;
for (folder_path, _, folder_id) in folders {
if Path::new(folder_path) == file_dir {
return folder_id.clone();
}
}
None
}
fn find_parent_folder_id(
path: &str,
folders: &[(String, String, Option<String>)],
) -> Option<String> {
let current = Path::new(path);
let parent = current.parent()?;
let parent_str = parent.to_string_lossy();
for (folder_path, _, folder_id) in folders {
if folder_path == &parent_str {
return folder_id.clone();
}
}
None
}
fn insert_node(conn: &Connection, node: &FileNode) -> Result<()> {
conn.execute(
"INSERT OR REPLACE INTO file_nodes (
node_id, label, aliases_json, file_uuid, sha256, parent_id, children_json,
node_type, icon, color, bg_color, file_size, registered_at,
created_at, updated_at, sort_order
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16)",
rusqlite::params![
node.node_id,
node.label,
node.aliases.to_json(),
node.file_uuid,
node.sha256,
node.parent_id,
serde_json::to_string(&node.children)?,
node.node_type.as_str(),
node.icon,
node.color,
node.bg_color,
node.file_size,
node.registered_at,
node.created_at,
node.updated_at,
node.sort_order,
],
)?;
Ok(())
}
fn get_file_icon(filename: &str) -> Option<String> {
let ext = Path::new(filename)
.extension()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_lowercase();
let icon = match ext.as_str() {
"mp4" | "mov" | "avi" | "mkv" | "webm" => "🎬",
"jpg" | "jpeg" | "png" | "gif" | "webp" | "svg" => "🖼️",
"pdf" => "📄",
"doc" | "docx" => "📝",
"xls" | "xlsx" => "📊",
"ppt" | "pptx" => "📽️",
"zip" | "rar" | "7z" | "tar" | "gz" => "📦",
"mp3" | "wav" | "flac" | "aac" => "🎵",
"txt" | "md" => "📃",
_ => "📄",
};
Some(icon.to_string())
}