Files
markbase/filetree-hybrid/src/lib.rs
Warren 1300a4e223
Some checks failed
Test / test (push) Has been cancelled
Test / build (push) Has been cancelled
MarkBase架构升级:Multi-Volume Virtual Tree + Dual-View Management + Git Remote修正
核心功能:
-  Categories/Series双视图管理(category_view.rs + import_markdown.rs)
-  FUSE Multi-Volume支持(tree_type参数)
-  SSH/SFTP/SCP/rsync协议完整实现(4042行)
-  NFS/SMB Module Phase 1-3完成
-  Archive Module Phase 1-4完成(2916行)
-  Download Center API完整实现
-  S3兼容API实现(560行)

Git配置修正:
-  删除错误origin(gitea.momentry.ddns.net)
-  删除m5max128(指向机器名)
-  设置origin = m5max128gitea.momentry.ddns.net/admin/markbase
-  设置m4minigitea = m4minigitea.momentry.ddns.net/warren/markbase

数据清理:
-  删除38个临时SQLite(保留accusys.sqlite、demo.sqlite)
-  删除.bak、test_*.bin、调试脚本等临时文件
-  删除临时目录(build/、download files/、raid_test/等)
-  更新.gitignore排除临时文件

架构优化:
- 52个文件修改,2434行新增,4739行删除
- Workspace成员整合(16个crate)
- 数据库状态:accusys.sqlite保留(主demo测试)

远程同步:
-  准备推送到m5max128gitea(远程Gitea)
-  准备推送到m4minigitea(本地Gitea)
2026-06-12 12:59:54 +08:00

655 lines
20 KiB
Rust

use anyhow::{Context, Result};
use rusqlite::Connection;
use serde::{Deserialize, Serialize};
use sled::Db;
use std::collections::HashMap;
use std::str::FromStr;
use std::sync::Arc;
use std::time::{Duration, Instant};
use uuid::Uuid;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileNode {
pub node_id: String,
pub label: String,
pub aliases: Aliases,
pub file_uuid: Option<String>,
pub sha256: Option<String>,
pub parent_id: Option<String>,
pub children: Vec<String>,
pub node_type: NodeType,
pub icon: Option<String>,
pub color: Option<String>,
pub bg_color: Option<String>,
pub file_size: Option<i64>,
pub registered_at: Option<String>,
pub created_at: String,
pub updated_at: String,
pub sort_order: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Aliases {
#[serde(flatten)]
pub map: HashMap<String, String>,
}
impl Aliases {
pub fn empty() -> Self {
Aliases {
map: HashMap::new(),
}
}
pub fn to_json(&self) -> String {
serde_json::to_string(&self.map).unwrap_or_else(|_| "{}".to_string())
}
pub fn from_json(s: &str) -> Self {
let map: HashMap<String, String> = serde_json::from_str(s).unwrap_or_default();
Aliases { map }
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum NodeType {
Folder,
File,
DynamicLayer,
}
impl NodeType {
pub fn as_str(&self) -> &'static str {
match self {
NodeType::Folder => "folder",
NodeType::File => "file",
NodeType::DynamicLayer => "dynamic_layer",
}
}
}
impl FromStr for NodeType {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"folder" => Ok(NodeType::Folder),
"file" => Ok(NodeType::File),
"dynamic_layer" => Ok(NodeType::DynamicLayer),
_ => Ok(NodeType::Folder),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CachedMetadata {
pub node_id: String,
pub label: String,
pub parent_id: Option<String>,
pub node_type: NodeType,
pub file_size: Option<i64>,
pub sha256: Option<String>,
pub cached_at: String,
pub ttl: u32,
}
impl CachedMetadata {
pub fn from_node(node: &FileNode) -> Self {
CachedMetadata {
node_id: node.node_id.clone(),
label: node.label.clone(),
parent_id: node.parent_id.clone(),
node_type: node.node_type,
file_size: node.file_size,
sha256: node.sha256.clone(),
cached_at: chrono::Utc::now().to_rfc3339(),
ttl: 3600,
}
}
pub fn to_file_node(&self) -> FileNode {
FileNode {
node_id: self.node_id.clone(),
label: self.label.clone(),
aliases: Aliases::empty(),
file_uuid: None,
sha256: self.sha256.clone(),
parent_id: self.parent_id.clone(),
children: Vec::new(),
node_type: self.node_type,
icon: None,
color: None,
bg_color: None,
file_size: self.file_size,
registered_at: None,
created_at: self.cached_at.clone(),
updated_at: self.cached_at.clone(),
sort_order: 0,
}
}
}
#[derive(Debug, Clone)]
pub struct CacheStats {
pub cache_size: usize,
pub hot_count: usize,
pub cold_count: usize,
pub expired_count: usize,
pub avg_ttl: f64,
}
pub enum QueryType {
ParentChildren,
FileUuidJoin,
WhereFilter,
ContentHashLookup,
HotFileCache,
MetadataCache,
NodeLookup,
}
pub enum DatabaseType {
SQLite,
Sled,
Hybrid,
}
pub struct CacheConfig {
pub max_cache_size: usize,
pub default_ttl: u32,
pub hot_threshold: u32,
pub cold_threshold: u32,
pub cleanup_interval: u32,
}
impl Default for CacheConfig {
fn default() -> Self {
CacheConfig {
max_cache_size: 10000,
default_ttl: 3600,
hot_threshold: 50,
cold_threshold: 5,
cleanup_interval: 300,
}
}
}
pub struct CacheMetrics {
pub cache_size: usize,
pub cache_hits: u64,
pub cache_misses: u64,
pub avg_cache_latency: Duration,
pub avg_sqlite_latency: Duration,
}
impl CacheMetrics {
pub fn hit_rate(&self) -> f64 {
if self.cache_hits + self.cache_misses == 0 {
return 0.0;
}
self.cache_hits as f64 / (self.cache_hits + self.cache_misses) as f64
}
}
pub struct HybridRouter {
sqlite_conn: Connection,
sled_db: Db,
config: CacheConfig,
metrics: Arc<std::sync::Mutex<CacheMetrics>>,
}
impl HybridRouter {
pub fn user_db_path(user_id: &str) -> String {
format!("data/users_hybrid/{}.hybrid", user_id)
}
pub fn init_user_db(user_id: &str) -> Result<Self> {
let db_path = Self::user_db_path(user_id);
let parent = std::path::Path::new(&db_path).parent().unwrap();
std::fs::create_dir_all(parent)?;
let sqlite_path = format!("{}.sqlite", db_path);
let sled_path = format!("{}.sled", db_path);
let sqlite_conn = Connection::open(&sqlite_path)?;
Self::init_sqlite_tables(&sqlite_conn)?;
let sled_db = sled::open(&sled_path)?;
Self::init_sled_trees(&sled_db)?;
let metrics = Arc::new(std::sync::Mutex::new(CacheMetrics {
cache_size: 0,
cache_hits: 0,
cache_misses: 0,
avg_cache_latency: Duration::from_secs(0),
avg_sqlite_latency: Duration::from_secs(0),
}));
Ok(HybridRouter {
sqlite_conn,
sled_db,
config: CacheConfig::default(),
metrics,
})
}
fn init_sqlite_tables(conn: &Connection) -> Result<()> {
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS file_nodes (
node_id TEXT PRIMARY KEY,
label TEXT NOT NULL,
aliases_json TEXT NOT NULL DEFAULT '{}',
file_uuid TEXT,
sha256 TEXT,
parent_id TEXT,
children_json TEXT NOT NULL DEFAULT '[]',
node_type TEXT NOT NULL DEFAULT 'folder',
icon TEXT,
color TEXT,
bg_color TEXT,
file_size INTEGER,
registered_at TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
sort_order INTEGER NOT NULL DEFAULT 0
);
CREATE INDEX IF NOT EXISTS idx_parent_id ON file_nodes(parent_id);
CREATE INDEX IF NOT EXISTS idx_sha256 ON file_nodes(sha256);
CREATE INDEX IF NOT EXISTS idx_file_uuid ON file_nodes(file_uuid);
",
)?;
Ok(())
}
fn init_sled_trees(db: &Db) -> Result<()> {
db.open_tree("metadata_cache")?;
db.open_tree("hot_files_cache")?;
db.open_tree("import_queue")?;
Ok(())
}
pub fn route_query(&self, query_type: QueryType) -> DatabaseType {
match query_type {
QueryType::ParentChildren => DatabaseType::SQLite,
QueryType::FileUuidJoin => DatabaseType::SQLite,
QueryType::WhereFilter => DatabaseType::SQLite,
QueryType::ContentHashLookup => DatabaseType::Sled,
QueryType::HotFileCache => DatabaseType::Sled,
QueryType::MetadataCache => DatabaseType::Sled,
QueryType::NodeLookup => DatabaseType::Hybrid,
}
}
pub fn get_node(&self, node_id: &str) -> Result<Option<FileNode>> {
let cache_tree = self.sled_db.open_tree("metadata_cache")?;
let start = Instant::now();
if let Some(cache_data) = cache_tree.get(node_id.as_bytes())? {
let cache: CachedMetadata = serde_json::from_slice(&cache_data)?;
if cache.ttl > 0 {
let mut metrics = self.metrics.lock().unwrap();
metrics.cache_hits += 1;
metrics.avg_cache_latency = start.elapsed();
return Ok(Some(cache.to_file_node()));
}
}
let mut metrics = self.metrics.lock().unwrap();
metrics.cache_misses += 1;
let start_sqlite = Instant::now();
let node = self.sqlite_query_node(node_id)?;
metrics.avg_sqlite_latency = start_sqlite.elapsed();
if let Some(n) = &node {
let cache = CachedMetadata::from_node(n);
cache_tree.insert(node_id.as_bytes(), serde_json::to_vec(&cache)?)?;
}
Ok(node)
}
fn sqlite_query_node(&self, node_id: &str) -> Result<Option<FileNode>> {
let mut stmt = self.sqlite_conn.prepare(
"SELECT node_id, label, aliases_json, file_uuid, sha256, parent_id, children_json,
node_type, icon, color, bg_color, file_size, registered_at,
created_at, updated_at, sort_order
FROM file_nodes WHERE node_id = ?",
)?;
let nodes = stmt.query_map([node_id], |row| {
let children_json: String = row.get(6)?;
let children: Vec<String> = serde_json::from_str(&children_json).unwrap_or_default();
Ok(FileNode {
node_id: row.get(0)?,
label: row.get(1)?,
aliases: Aliases::from_json(&row.get::<_, String>(2)?),
file_uuid: row.get(3)?,
sha256: row.get(4)?,
parent_id: row.get(5)?,
children,
node_type: NodeType::from_str(&row.get::<_, String>(7)?)
.unwrap_or(NodeType::Folder),
icon: row.get(8)?,
color: row.get(9)?,
bg_color: row.get(10)?,
file_size: row.get(11)?,
registered_at: row.get(12)?,
created_at: row.get(13)?,
updated_at: row.get(14)?,
sort_order: row.get(15)?,
})
})?;
let node = nodes.into_iter().next();
match node {
Some(n) => Ok(Some(n?)),
None => Ok(None),
}
}
pub fn get_children(&self, parent_id: &str) -> Result<Vec<FileNode>> {
let mut stmt = self.sqlite_conn.prepare(
"SELECT node_id, label, aliases_json, file_uuid, sha256, parent_id, children_json,
node_type, icon, color, bg_color, file_size, registered_at,
created_at, updated_at, sort_order
FROM file_nodes WHERE parent_id = ? ORDER BY sort_order ASC, created_at ASC",
)?;
let nodes = stmt.query_map([parent_id], |row| {
let children_json: String = row.get(6)?;
let children: Vec<String> = serde_json::from_str(&children_json).unwrap_or_default();
Ok(FileNode {
node_id: row.get(0)?,
label: row.get(1)?,
aliases: Aliases::from_json(&row.get::<_, String>(2)?),
file_uuid: row.get(3)?,
sha256: row.get(4)?,
parent_id: row.get(5)?,
children,
node_type: NodeType::from_str(&row.get::<_, String>(7)?)
.unwrap_or(NodeType::Folder),
icon: row.get(8)?,
color: row.get(9)?,
bg_color: row.get(10)?,
file_size: row.get(11)?,
registered_at: row.get(12)?,
created_at: row.get(13)?,
updated_at: row.get(14)?,
sort_order: row.get(15)?,
})
})?;
Ok(nodes.collect::<Result<Vec<_>, _>>()?)
}
pub fn insert_node(&self, node: &FileNode) -> Result<()> {
self.sqlite_insert_node(node)?;
self.sled_update_cache(node)?;
Ok(())
}
fn sqlite_insert_node(&self, node: &FileNode) -> Result<()> {
self.sqlite_conn.execute(
"INSERT OR REPLACE INTO file_nodes (
node_id, label, aliases_json, file_uuid, sha256, parent_id, children_json,
node_type, icon, color, bg_color, file_size, registered_at,
created_at, updated_at, sort_order
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16)",
rusqlite::params![
node.node_id,
node.label,
node.aliases.to_json(),
node.file_uuid,
node.sha256,
node.parent_id,
serde_json::to_string(&node.children)?,
node.node_type.as_str(),
node.icon,
node.color,
node.bg_color,
node.file_size,
node.registered_at,
node.created_at,
node.updated_at,
node.sort_order,
],
)?;
Ok(())
}
fn sled_update_cache(&self, node: &FileNode) -> Result<()> {
let cache_tree = self.sled_db.open_tree("metadata_cache")?;
let cache = CachedMetadata::from_node(node);
cache_tree.insert(node.node_id.as_bytes(), serde_json::to_vec(&cache)?)?;
Ok(())
}
pub fn insert_node_batch(&self, nodes: &[FileNode]) -> Result<()> {
let tx = self.sqlite_conn.unchecked_transaction()?;
for node in nodes {
tx.execute(
"INSERT OR REPLACE INTO file_nodes (
node_id, label, aliases_json, file_uuid, sha256, parent_id, children_json,
node_type, icon, color, bg_color, file_size, registered_at,
created_at, updated_at, sort_order
) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16)",
rusqlite::params![
node.node_id,
node.label,
node.aliases.to_json(),
node.file_uuid,
node.sha256,
node.parent_id,
serde_json::to_string(&node.children)?,
node.node_type.as_str(),
node.icon,
node.color,
node.bg_color,
node.file_size,
node.registered_at,
node.created_at,
node.updated_at,
node.sort_order,
],
)?;
}
tx.commit()?;
let cache_tree = self.sled_db.open_tree("metadata_cache")?;
for node in nodes {
let cache = CachedMetadata::from_node(node);
cache_tree.insert(node.node_id.as_bytes(), serde_json::to_vec(&cache)?)?;
}
Ok(())
}
pub fn invalidate_cache(&self, node_id: &str) -> Result<()> {
let cache_tree = self.sled_db.open_tree("metadata_cache")?;
cache_tree.remove(node_id.as_bytes())?;
Ok(())
}
pub fn warmup_cache(&self, hot_node_ids: &[String]) -> Result<usize> {
let cache_tree = self.sled_db.open_tree("metadata_cache")?;
let mut warmed = 0;
for node_id in hot_node_ids {
if let Some(node) = self.sqlite_query_node(node_id)? {
let cache = CachedMetadata::from_node(&node);
cache_tree.insert(node_id.as_bytes(), serde_json::to_vec(&cache)?)?;
warmed += 1;
}
}
Ok(warmed)
}
pub fn warmup_cache_by_pattern(&self, pattern: &str) -> Result<usize> {
let mut stmt = self.sqlite_conn.prepare(
"SELECT node_id FROM file_nodes WHERE label LIKE ? ORDER BY sort_order ASC LIMIT 1000",
)?;
let node_ids: Vec<String> = stmt
.query_map([pattern], |row| row.get(0))?
.collect::<Result<Vec<_>, _>>()?;
self.warmup_cache(&node_ids)
}
pub fn batch_update_cache(&self, nodes: &[FileNode]) -> Result<()> {
let cache_tree = self.sled_db.open_tree("metadata_cache")?;
for node in nodes {
let cache = CachedMetadata::from_node(node);
cache_tree.insert(node.node_id.as_bytes(), serde_json::to_vec(&cache)?)?;
}
Ok(())
}
pub fn lru_eviction(&self) -> Result<usize> {
let cache_tree = self.sled_db.open_tree("metadata_cache")?;
if cache_tree.len() <= self.config.max_cache_size {
return Ok(0);
}
let mut evicted = 0;
let to_evict = cache_tree.len() - self.config.max_cache_size;
for item in cache_tree.iter() {
let (key, value) = item?;
let cache: CachedMetadata = serde_json::from_slice(&value)?;
if cache.ttl <= 1 {
cache_tree.remove(key)?;
evicted += 1;
if evicted >= to_evict {
break;
}
}
}
Ok(evicted)
}
pub fn cleanup_expired_cache(&self) -> Result<usize> {
let cache_tree = self.sled_db.open_tree("metadata_cache")?;
let mut cleaned = 0;
for item in cache_tree.iter() {
let (key, value) = item?;
let cache: CachedMetadata = serde_json::from_slice(&value)?;
if cache.ttl == 0 {
cache_tree.remove(key)?;
cleaned += 1;
}
}
Ok(cleaned)
}
pub fn update_cache_ttl(&self, node_id: &str, ttl: u32) -> Result<()> {
let cache_tree = self.sled_db.open_tree("metadata_cache")?;
if let Some(cache_data) = cache_tree.get(node_id.as_bytes())? {
let mut cache: CachedMetadata = serde_json::from_slice(&cache_data)?;
cache.ttl = ttl;
cache_tree.insert(node_id.as_bytes(), serde_json::to_vec(&cache)?)?;
}
Ok(())
}
pub fn get_cache_stats(&self) -> Result<CacheStats> {
let cache_tree = self.sled_db.open_tree("metadata_cache")?;
let mut hot_count = 0;
let mut cold_count = 0;
let mut expired_count = 0;
let mut total_ttl = 0;
for item in cache_tree.iter() {
let (_, value) = item?;
let cache: CachedMetadata = serde_json::from_slice(&value)?;
if cache.ttl >= self.config.hot_threshold {
hot_count += 1;
} else if cache.ttl <= self.config.cold_threshold {
cold_count += 1;
}
if cache.ttl == 0 {
expired_count += 1;
}
total_ttl += cache.ttl;
}
let avg_ttl = if cache_tree.len() > 0 {
total_ttl as f64 / cache_tree.len() as f64
} else {
0.0
};
Ok(CacheStats {
cache_size: cache_tree.len(),
hot_count,
cold_count,
expired_count,
avg_ttl,
})
}
pub fn get_metrics(&self) -> CacheMetrics {
let metrics = self.metrics.lock().unwrap();
CacheMetrics {
cache_size: metrics.cache_size,
cache_hits: metrics.cache_hits,
cache_misses: metrics.cache_misses,
avg_cache_latency: metrics.avg_cache_latency,
avg_sqlite_latency: metrics.avg_sqlite_latency,
}
}
pub fn count_nodes(&self) -> Result<usize> {
let count: i64 =
self.sqlite_conn
.query_row("SELECT COUNT(*) FROM file_nodes", [], |row| row.get(0))?;
Ok(count as usize)
}
pub fn cache_size(&self) -> Result<usize> {
let cache_tree = self.sled_db.open_tree("metadata_cache")?;
Ok(cache_tree.len())
}
pub fn new_folder(label: &str, parent_id: Option<&str>) -> FileNode {
FileNode {
node_id: Uuid::new_v4().to_string().replace("-", ""),
label: label.to_string(),
aliases: Aliases::empty(),
file_uuid: None,
sha256: None,
parent_id: parent_id.map(|s| s.to_string()),
children: Vec::new(),
node_type: NodeType::Folder,
icon: None,
color: None,
bg_color: None,
file_size: None,
registered_at: None,
created_at: chrono::Utc::now().to_rfc3339(),
updated_at: chrono::Utc::now().to_rfc3339(),
sort_order: 0,
}
}
}