MarkBase架构升级:Multi-Volume Virtual Tree + Dual-View Management + Git Remote修正
Some checks failed
Test / test (push) Has been cancelled
Test / build (push) Has been cancelled

核心功能:
-  Categories/Series双视图管理(category_view.rs + import_markdown.rs)
-  FUSE Multi-Volume支持(tree_type参数)
-  SSH/SFTP/SCP/rsync协议完整实现(4042行)
-  NFS/SMB Module Phase 1-3完成
-  Archive Module Phase 1-4完成(2916行)
-  Download Center API完整实现
-  S3兼容API实现(560行)

Git配置修正:
-  删除错误origin(gitea.momentry.ddns.net)
-  删除m5max128(指向机器名)
-  设置origin = m5max128gitea.momentry.ddns.net/admin/markbase
-  设置m4minigitea = m4minigitea.momentry.ddns.net/warren/markbase

数据清理:
-  删除38个临时SQLite(保留accusys.sqlite、demo.sqlite)
-  删除.bak、test_*.bin、调试脚本等临时文件
-  删除临时目录(build/、download files/、raid_test/等)
-  更新.gitignore排除临时文件

架构优化:
- 52个文件修改,2434行新增,4739行删除
- Workspace成员整合(16个crate)
- 数据库状态:accusys.sqlite保留(主demo测试)

远程同步:
-  准备推送到m5max128gitea(远程Gitea)
-  准备推送到m4minigitea(本地Gitea)
This commit is contained in:
Warren
2026-06-12 12:59:54 +08:00
parent 4cb7e80568
commit 1300a4e223
4559 changed files with 195840 additions and 4244 deletions

View File

@@ -8,8 +8,8 @@ use std::sync::{Arc, Mutex};
use std::thread;
use std::time::Instant;
use crate::filetree::node::{Aliases, FileNode, NodeType};
use crate::filetree::FileTree;
use filetree::node::{Aliases, FileNode, NodeType};
use filetree::FileTree;
pub struct ScanOptions {
pub skip_hash: bool,
@@ -25,14 +25,19 @@ impl Default for ScanOptions {
}
}
pub fn scan_directory(user_id: &str, dir: &str, batch_size: usize, options: ScanOptions) -> Result<()> {
pub fn scan_directory(
user_id: &str,
dir: &str,
batch_size: usize,
options: ScanOptions,
) -> Result<()> {
let start = Instant::now();
let dir_path = Path::new(dir);
if !dir_path.exists() {
anyhow::bail!("Directory not found: {}", dir);
}
println!("=== File Scan Performance Test ===");
println!("User ID: {}", user_id);
println!("Directory: {}", dir);
@@ -42,33 +47,42 @@ pub fn scan_directory(user_id: &str, dir: &str, batch_size: usize, options: Scan
println!("Hash threads: {}", options.threads);
}
println!();
println!("[1/4] Scanning directory structure...");
let scan_start = Instant::now();
let mut folders: Vec<(String, String, Option<String>)> = Vec::new();
let mut files: Vec<(String, String, u64, String)> = Vec::new();
scan_recursive(dir_path, dir_path, &mut folders, &mut files)?;
let scan_duration = scan_start.elapsed();
println!(" Scanned {} folders, {} files in {:.2}s",
folders.len(), files.len(), scan_duration.as_secs_f64());
println!(
" Scanned {} folders, {} files in {:.2}s",
folders.len(),
files.len(),
scan_duration.as_secs_f64()
);
println!();
println!("[2/5] Generating node IDs...");
let id_start = Instant::now();
let mac = get_mac_address()?;
let mut folder_nodes: Vec<FileNode> = Vec::new();
let mut file_nodes: Vec<FileNode> = Vec::new();
let mut file_info: Vec<(String, String)> = Vec::new();
let mac_str = get_mac_address()?;
let root_node_id = generate_uuid(&dir_path.to_string_lossy(), "Home", &mac_str, chrono::Utc::now().timestamp() as u64);
let root_node_id = generate_uuid(
&dir_path.to_string_lossy(),
"Home",
&mac_str,
chrono::Utc::now().timestamp() as u64,
);
folder_nodes.push(FileNode {
node_id: root_node_id.clone(),
label: "Home".to_string(),
@@ -87,35 +101,36 @@ pub fn scan_directory(user_id: &str, dir: &str, batch_size: usize, options: Scan
updated_at: chrono::Utc::now().timestamp().to_string(),
sort_order: 0,
});
let folder_id_map: HashMap<String, String> = {
let mut map = HashMap::new();
map.insert(dir_path.to_string_lossy().to_string(), root_node_id.clone());
for (path_str, label, _parent_path) in &folders {
let mtime = fs::metadata(path_str)
.and_then(|m| m.modified())
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
let mtime_secs = mtime.duration_since(std::time::SystemTime::UNIX_EPOCH)
let mtime_secs = mtime
.duration_since(std::time::SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let node_id = generate_uuid(path_str, label, &mac_str, mtime_secs);
map.insert(path_str.clone(), node_id);
}
map
};
for (path_str, label, parent_path) in &folders {
let node_id = folder_id_map.get(path_str).cloned().unwrap();
let parent_node_id = if let Some(ref parent_p) = parent_path {
folder_id_map.get(parent_p).cloned()
} else {
Some(root_node_id.clone())
};
folder_nodes.push(FileNode {
node_id,
label: label.clone(),
@@ -135,28 +150,31 @@ pub fn scan_directory(user_id: &str, dir: &str, batch_size: usize, options: Scan
sort_order: 0,
});
}
for (path_str, filename, size, _ext) in &files {
let mtime = fs::metadata(path_str)
.and_then(|m| m.modified())
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
let mtime_secs = mtime.duration_since(std::time::SystemTime::UNIX_EPOCH)
let mtime_secs = mtime
.duration_since(std::time::SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let node_id = generate_uuid(path_str, filename, &mac, mtime_secs);
let file_dir = Path::new(path_str).parent().unwrap_or(dir_path);
let parent_node_id = if file_dir == dir_path {
Some(root_node_id.clone())
} else {
folder_id_map.get(file_dir.to_string_lossy().as_ref()).cloned()
folder_id_map
.get(file_dir.to_string_lossy().as_ref())
.cloned()
};
let node_id_clone = node_id.clone();
file_info.push((node_id_clone.clone(), path_str.clone()));
file_nodes.push(FileNode {
node_id: node_id_clone.clone(),
label: filename.clone(),
@@ -180,51 +198,55 @@ pub fn scan_directory(user_id: &str, dir: &str, batch_size: usize, options: Scan
sort_order: 0,
});
}
let id_duration = id_start.elapsed();
println!(" Generated {} folder IDs, {} file IDs in {:.2}s",
folder_nodes.len(), file_nodes.len(), id_duration.as_secs_f64());
println!(
" Generated {} folder IDs, {} file IDs in {:.2}s",
folder_nodes.len(),
file_nodes.len(),
id_duration.as_secs_f64()
);
println!();
println!("[3/5] Opening database...");
let db_start = Instant::now();
let db_path = FileTree::user_db_path(user_id);
if !Path::new(&db_path).exists() {
FileTree::init_user_db(user_id)?;
}
let conn = FileTree::open_user_db(user_id)
.with_context(|| format!("Failed to open database for user {}", user_id))?;
let db_duration = db_start.elapsed();
println!(" Database opened in {:.2}s", db_duration.as_secs_f64());
println!();
println!("[4/5] Inserting nodes (batch size: {})...", batch_size);
let insert_start = Instant::now();
let tx = conn.unchecked_transaction()?;
let folder_count = folder_nodes.len();
let file_count = file_nodes.len();
let total_nodes = folder_count + file_count;
let mut inserted = 0;
for node in folder_nodes {
insert_node(&conn, &node)?;
inserted += 1;
if inserted % batch_size == 0 {
print!("\r Inserted {}/{} nodes...", inserted, total_nodes);
use std::io::Write;
std::io::stdout().flush().ok();
}
}
for node in file_nodes {
insert_node(&conn, &node)?;
if let Some(ref file_uuid) = node.file_uuid {
let path = node.aliases.get("path").cloned().unwrap_or_default();
if !path.is_empty() {
@@ -235,28 +257,30 @@ pub fn scan_directory(user_id: &str, dir: &str, batch_size: usize, options: Scan
)?;
}
}
inserted += 1;
if inserted % batch_size == 0 {
print!("\r Inserted {}/{} nodes...", inserted, total_nodes);
use std::io::Write;
std::io::stdout().flush().ok();
}
}
tx.commit()?;
let insert_duration = insert_start.elapsed();
println!("\r Inserted {} nodes in {:.2}s ({:.0} nodes/sec)",
println!(
"\r Inserted {} nodes in {:.2}s ({:.0} nodes/sec)",
total_nodes,
insert_duration.as_secs_f64(),
total_nodes as f64 / insert_duration.as_secs_f64());
total_nodes as f64 / insert_duration.as_secs_f64()
);
println!();
println!("[5/5] Updating folder children_json...");
let children_start = Instant::now();
conn.execute(
"UPDATE file_nodes
SET children_json = (
@@ -267,12 +291,14 @@ pub fn scan_directory(user_id: &str, dir: &str, batch_size: usize, options: Scan
WHERE node_type = 'folder'",
[],
)?;
let children_duration = children_start.elapsed();
println!(" Updated children_json for {} folders in {:.2}s",
println!(
" Updated children_json for {} folders in {:.2}s",
folder_count,
children_duration.as_secs_f64());
children_duration.as_secs_f64()
);
let total_duration = start.elapsed();
println!();
println!("=== Summary ===");
@@ -283,42 +309,57 @@ pub fn scan_directory(user_id: &str, dir: &str, batch_size: usize, options: Scan
println!("Database: {}", FileTree::user_db_path(user_id));
println!();
println!("Performance breakdown:");
println!(" - Scanning: {:.2}s ({:.0}%)",
println!(
" - Scanning: {:.2}s ({:.0}%)",
scan_duration.as_secs_f64(),
scan_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
println!(" - ID gen: {:.2}s ({:.0}%)",
scan_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0
);
println!(
" - ID gen: {:.2}s ({:.0}%)",
id_duration.as_secs_f64(),
id_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
println!(" - DB open: {:.2}s ({:.0}%)",
id_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0
);
println!(
" - DB open: {:.2}s ({:.0}%)",
db_duration.as_secs_f64(),
db_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
println!(" - Insertion: {:.2}s ({:.0}%)",
db_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0
);
println!(
" - Insertion: {:.2}s ({:.0}%)",
insert_duration.as_secs_f64(),
insert_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
println!(" - Children JSON: {:.2}s ({:.0}%)",
insert_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0
);
println!(
" - Children JSON: {:.2}s ({:.0}%)",
children_duration.as_secs_f64(),
children_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0);
children_duration.as_secs_f64() / total_duration.as_secs_f64() * 100.0
);
if !options.skip_hash {
println!();
println!("=== Starting background hash calculation ===");
println!("Files to hash: {}", file_info.len());
println!("Threads: {}", options.threads);
let file_count = file_info.len();
let hash_start = Instant::now();
compute_hashes_parallel(user_id, file_info, options.threads)?;
let hash_duration = hash_start.elapsed();
println!();
println!("Hash calculation completed in {:.2}s ({:.0} files/sec)",
println!(
"Hash calculation completed in {:.2}s ({:.0} files/sec)",
hash_duration.as_secs_f64(),
file_count as f64 / hash_duration.as_secs_f64());
file_count as f64 / hash_duration.as_secs_f64()
);
} else {
println!();
println!(" SHA256 hashes skipped. Run 'markbase hash --user {}' to compute hashes.", user_id);
println!(
" SHA256 hashes skipped. Run 'markbase hash --user {}' to compute hashes.",
user_id
);
}
Ok(())
}
@@ -327,9 +368,9 @@ pub fn compute_hashes(user_id: &str, threads: usize) -> Result<()> {
println!("User ID: {}", user_id);
println!("Threads: {}", threads);
println!();
let conn = FileTree::open_user_db(user_id)?;
let file_info: Vec<(String, String)> = conn
.prepare("SELECT node_id, aliases_json FROM file_nodes WHERE node_type = 'file' AND sha256 IS NULL")?
.query_map([], |row| {
@@ -342,53 +383,60 @@ pub fn compute_hashes(user_id: &str, threads: usize) -> Result<()> {
.filter_map(|r| r.ok())
.filter(|(_, path)| !path.is_empty())
.collect();
if file_info.is_empty() {
println!("No files need hashing. All files already have SHA256.");
return Ok(());
}
println!("Files to hash: {}", file_info.len());
let file_count = file_info.len();
let start = Instant::now();
compute_hashes_parallel(user_id, file_info, threads)?;
let duration = start.elapsed();
println!();
println!("Hash calculation completed in {:.2}s ({:.0} files/sec)",
println!(
"Hash calculation completed in {:.2}s ({:.0} files/sec)",
duration.as_secs_f64(),
file_count as f64 / duration.as_secs_f64());
file_count as f64 / duration.as_secs_f64()
);
Ok(())
}
fn compute_hashes_parallel(user_id: &str, file_info: Vec<(String, String)>, threads: usize) -> Result<()> {
fn compute_hashes_parallel(
user_id: &str,
file_info: Vec<(String, String)>,
threads: usize,
) -> Result<()> {
let db_path = FileTree::user_db_path(user_id);
let user_id = user_id.to_string();
let file_info = Arc::new(file_info);
let results: Arc<Mutex<HashMap<String, String>>> = Arc::new(Mutex::new(HashMap::new()));
let processed: Arc<Mutex<usize>> = Arc::new(Mutex::new(0));
let total = file_info.len();
let mut handles = Vec::new();
for i in 0..threads {
let file_info = Arc::clone(&file_info);
let results = Arc::clone(&results);
let processed = Arc::clone(&processed);
let _user_id = user_id.clone();
let handle = thread::spawn(move || {
let chunk_size = (file_info.len() / threads) + (if i < file_info.len() % threads { 1 } else { 0 });
let chunk_size =
(file_info.len() / threads) + (if i < file_info.len() % threads { 1 } else { 0 });
let start_idx = i * (file_info.len() / threads) + i.min(file_info.len() % threads);
let _end_idx = start_idx + chunk_size;
for (node_id, path_str) in file_info.iter().skip(start_idx).take(chunk_size) {
if let Ok(hash) = compute_file_hash(path_str) {
results.lock().unwrap().insert(node_id.clone(), hash);
}
let mut p = processed.lock().unwrap();
*p += 1;
if *p % 100 == 0 {
@@ -398,32 +446,32 @@ fn compute_hashes_parallel(user_id: &str, file_info: Vec<(String, String)>, thre
}
}
});
handles.push(handle);
}
for handle in handles {
handle.join().expect("Thread panicked");
}
println!("\r Hashed {}/{} files...Done", total, total);
let results = results.lock().unwrap();
let conn = Connection::open(&db_path)?;
let tx = conn.unchecked_transaction()?;
for (node_id, hash) in results.iter() {
conn.execute(
"UPDATE file_nodes SET sha256 = ?1, file_uuid = ?1, updated_at = ?2 WHERE node_id = ?3",
rusqlite::params![hash, chrono::Utc::now().timestamp().to_string(), node_id],
)?;
}
tx.commit()?;
println!(" Updated {} hashes in database", results.len());
Ok(())
}
@@ -437,34 +485,35 @@ fn scan_recursive(
.filter_map(|e| e.ok())
.filter(|e| e.file_name() != ".DS_Store")
.collect();
for entry in entries {
let path = entry.path();
let path_str = path.to_string_lossy().to_string();
let filename = entry.file_name().to_string_lossy().to_string();
if path.is_dir() {
let parent_id = if path.parent() == Some(base) {
None
} else {
find_parent_folder_id(&path_str, folders)
};
folders.push((path_str.clone(), filename, parent_id));
scan_recursive(base, &path, folders, files)?;
} else {
let metadata = entry.metadata()?;
let size = metadata.len();
let ext = path.extension()
let ext = path
.extension()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
files.push((path_str, filename, size, ext));
}
}
Ok(())
}
@@ -472,7 +521,7 @@ fn compute_file_hash(path: &str) -> Result<String> {
let mut hasher = Sha256::new();
let mut file = fs::File::open(path)?;
let mut buffer = [0u8; 8192];
loop {
let n = std::io::Read::read(&mut file, &mut buffer)?;
if n == 0 {
@@ -480,7 +529,7 @@ fn compute_file_hash(path: &str) -> Result<String> {
}
hasher.update(&buffer[..n]);
}
let hash = format!("{:x}", hasher.finalize());
Ok(hash.chars().take(32).collect())
}
@@ -491,14 +540,15 @@ fn generate_uuid(path: &str, filename: &str, mac: &str, mtime: u64) -> String {
hasher.update(filename.as_bytes());
hasher.update(mac.as_bytes());
hasher.update(mtime.to_string().as_bytes());
format!("{:x}", hasher.finalize()).chars().take(32).collect()
format!("{:x}", hasher.finalize())
.chars()
.take(32)
.collect()
}
fn get_mac_address() -> Result<String> {
let output = std::process::Command::new("ifconfig")
.arg("en0")
.output()?;
let output = std::process::Command::new("ifconfig").arg("en0").output()?;
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines() {
if line.contains("ether") {
@@ -507,7 +557,7 @@ fn get_mac_address() -> Result<String> {
}
}
}
Ok("00:00:00:00:00:00".to_string())
}
@@ -517,27 +567,30 @@ fn find_parent_folder(
folders: &[(String, String, Option<String>)],
) -> Option<String> {
let file_dir = Path::new(file_path).parent()?;
for (folder_path, _, folder_id) in folders {
if Path::new(folder_path) == file_dir {
return folder_id.clone();
}
}
None
}
fn find_parent_folder_id(path: &str, folders: &[(String, String, Option<String>)]) -> Option<String> {
fn find_parent_folder_id(
path: &str,
folders: &[(String, String, Option<String>)],
) -> Option<String> {
let current = Path::new(path);
let parent = current.parent()?;
let parent_str = parent.to_string_lossy();
for (folder_path, _, folder_id) in folders {
if folder_path == &parent_str {
return folder_id.clone();
}
}
None
}
@@ -567,7 +620,7 @@ fn insert_node(conn: &Connection, node: &FileNode) -> Result<()> {
node.sort_order,
],
)?;
Ok(())
}
@@ -577,7 +630,7 @@ fn get_file_icon(filename: &str) -> Option<String> {
.and_then(|s| s.to_str())
.unwrap_or("")
.to_lowercase();
let icon = match ext.as_str() {
"mp4" | "mov" | "avi" | "mkv" | "webm" => "🎬",
"jpg" | "jpeg" | "png" | "gif" | "webp" | "svg" => "🖼️",
@@ -590,6 +643,6 @@ fn get_file_icon(filename: &str) -> Option<String> {
"txt" | "md" => "📃",
_ => "📄",
};
Some(icon.to_string())
}
}