diff --git a/markbase-core/Cargo.toml b/markbase-core/Cargo.toml new file mode 100644 index 0000000..0425738 --- /dev/null +++ b/markbase-core/Cargo.toml @@ -0,0 +1,57 @@ +[package] +name = "markbase-core" +version = "0.2.0" +edition = "2021" + +[dependencies] +# === 核心压缩库(Phase 1基础)=== +zip = "0.6" # ZIP格式(稳定版本) +tar = "0.4.46" # TAR格式 +flate2 = "1.1" # GZIP格式(已有) + +anyhow = "1" +axum = { version = "0.7", features = ["macros"] } +bcrypt = "0.16" +chrono = { version = "0.4", features = ["serde"] } +clap = { version = "4", features = ["derive"] } +dav-server = "0.11" +filetree = { path = "../filetree" } +futures-util = "0.3" +log = "0.4" +env_logger = "0.11" +markbase-webdav = { path = "../markbase-webdav" } +pulldown-cmark = "0.12" +rusqlite = { version = "0.32", features = ["bundled"] } +sled = "1.0.0-alpha.124" +serde = { version = "1", features = ["derive"] } +serde_json = "1" +sha2 = "0.10" +hmac = "0.12" +base64 = "0.22" +tokio = { version = "1", features = ["full"] } +tokio-postgres = "0.7" +russh = "0.61.2" +russh-keys = "0.50.0-beta.7" +russh-sftp = "2.3.0" +ssh2 = "0.9.4" +ssh-key = "0.7.0-rc.10" +rand = "0.8" +axum-extra = { version = "0.9", features = ["multipart"] } +tokio-util = { version = "0.7", features = ["io"] } +toml = "0.8" +uuid = { version = "1", features = ["v4"] } +dashmap = "6.1" +md5 = "0.8" +adler = "1.0" +byteorder = "1.5" +x25519-dalek = "2.0" +ed25519-dalek = { version = "2.0", features = ["rand_core"] } +aes = "0.8" +ctr = "0.9" + +[dev-dependencies] +tempfile = "3.12" + +[[bin]] +name = "markbase-core" +path = "src/main.rs" diff --git a/markbase-core/src/archive/config.rs b/markbase-core/src/archive/config.rs new file mode 100644 index 0000000..cc50376 --- /dev/null +++ b/markbase-core/src/archive/config.rs @@ -0,0 +1,169 @@ +// Archive Configuration - User Configurable Options + +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use std::path::Path; + +/// Archive Configuration +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ArchiveConfig { + // Optional formats (controversial) + pub enable_rar: bool, // ⚠️ Legal risk (RARLAB patent) + pub enable_xz: bool, // ⚠️ External dependency (liblzma) + pub enable_7z: bool, // ⚠️ Unstable library + + // Performance settings + pub cache_size_mb: u64, + pub max_concurrent_extractions: usize, + + // Security settings + pub max_decompression_ratio: u64, + pub max_file_size_mb: u64, +} + +impl Default for ArchiveConfig { + fn default() -> Self { + Self { + // Optional formats (default disabled) + enable_rar: false, + enable_xz: false, + enable_7z: false, + + // Performance + cache_size_mb: 100, + max_concurrent_extractions: 4, + + // Security + max_decompression_ratio: 1000, + max_file_size_mb: 1024, + } + } +} + +impl ArchiveConfig { + /// Load configuration from TOML file + pub fn load(path: &str) -> Result { + let content = std::fs::read_to_string(path)?; + let config: ArchiveConfig = toml::from_str(&content)?; + + // Validate configuration + config.validate()?; + + Ok(config) + } + + /// Save configuration to TOML file + pub fn save(&self, path: &str) -> Result<()> { + let content = toml::to_string_pretty(self)?; + std::fs::write(path, content)?; + Ok(()) + } + + /// Validate configuration + pub fn validate(&self) -> Result<()> { + if self.cache_size_mb > 1000 { + warn!("Cache size > 1GB may cause memory pressure"); + } + + if self.max_concurrent_extractions > 10 { + warn!("Concurrent extractions > 10 may cause resource exhaustion"); + } + + if self.max_decompression_ratio < 10 { + return Err(anyhow::anyhow!("Max decompression ratio too low (min 10)")); + } + + if self.max_file_size_mb > 10_000 { // 10GB + warn!("Max file size > 10GB may cause disk space issues"); + } + + Ok(()) + } + + /// Generate default config file template + pub fn generate_template() -> String { + let config = Self::default(); + + format!( + "# === Archive Configuration === +# MarkBase Universal Compression Format Support + +[archive] +# === Optional Formats (Default Disabled) === + +# ⚠️ RAR Format Legal Risk Warning +# - RAR compression algorithm is patented by RARLAB +# - Commercial use requires license (approx $1000+) +# - unrar library only supports decompression, no compression +# - User assumes all legal risks by enabling +# - License info: https://rarlab.com/license.htm +enable_rar = false + +# ⚠️ XZ Format External Dependency Warning +# - XZ format requires external liblzma library +# - macOS: brew install xz +# - Linux: apt install liblzma-dev +# - Windows: manual installation required +# - XZ disabled if liblzma not found +enable_xz = false + +# ⚠️ 7z Format Library Stability Warning +# - sevenz-rust library (v0.21.0) under development +# - Some compression algorithms not supported +# - Production stability limited +enable_7z = false + +# === Performance Settings === +cache_size_mb = {} # Decompression cache size +max_concurrent_extractions = {} # Max concurrent extractions + +# === Security Settings === +max_decompression_ratio = {} # Zip Bomb protection (ratio limit) +max_file_size_mb = {} # File size limit (MB) + +# === Core Formats (Always Enabled) === +# ZIP, TAR, GZIP, ZSTD, BZIP2, LZ4 +# Composite: TAR.GZ, TAR.BZ2, TAR.ZST +# Total: 9 formats, covering 80%+ scenarios +", + config.cache_size_mb, + config.max_concurrent_extractions, + config.max_decompression_ratio, + config.max_file_size_mb + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_config() { + let config = ArchiveConfig::default(); + + assert_eq!(config.enable_rar, false); + assert_eq!(config.enable_xz, false); + assert_eq!(config.enable_7z, false); + assert_eq!(config.cache_size_mb, 100); + assert_eq!(config.max_decompression_ratio, 1000); + } + + #[test] + fn test_config_validation() { + let config = ArchiveConfig { + max_decompression_ratio: 5, + ..Default::default() + }; + + assert!(config.validate().is_err()); + } + + #[test] + fn test_config_template() { + let template = ArchiveConfig::generate_template(); + + assert!(template.contains("enable_rar = false")); + assert!(template.contains("⚠️ RAR Format Legal Risk Warning")); + } +} \ No newline at end of file diff --git a/markbase-core/src/archive/detector.rs b/markbase-core/src/archive/detector.rs new file mode 100644 index 0000000..99f96de --- /dev/null +++ b/markbase-core/src/archive/detector.rs @@ -0,0 +1,139 @@ +// Format Detector - Automatic Detection Based on Magic Numbers + +use std::fs::File; +use std::io::Read; +use std::path::Path; +use anyhow::Result; + +use crate::archive::processor::ArchiveFormat; + +/// Format Detector based on Magic Numbers +pub struct FormatDetector { + magic_table: Vec<(Vec, ArchiveFormat, usize)>, +} + +impl FormatDetector { + pub fn new() -> Self { + let magic_table = vec![ + // ZIP: 50 4B 03 04 or 50 4B 05 06 (empty) or 50 4B 07 08 (spanned) + (vec![0x50, 0x4B, 0x03, 0x04], ArchiveFormat::Zip, 4), + (vec![0x50, 0x4B, 0x05, 0x06], ArchiveFormat::Zip, 4), + + // GZIP: 1F 8B + (vec![0x1F, 0x8B], ArchiveFormat::Gzip, 2), + ]; + + Self { magic_table } + } + + /// Detect file format based on Magic Number + pub fn detect(&self, path: &Path) -> Result { + let mut file = File::open(path)?; + let mut buffer = vec![0u8; 512]; + + let bytes_read = file.read(&mut buffer)?; + if bytes_read < 2 { + return Ok(ArchiveFormat::Unknown); + } + + // Match Magic Numbers + for (magic, format, offset) in &self.magic_table { + if buffer.len() >= *offset && buffer[0..magic.len()] == *magic { + return Ok(*format); + } + } + + // Special detection: TAR format (check ustar magic at offset 257) + if buffer.len() >= 262 { + if &buffer[257..262] == b"ustar" { + return Ok(ArchiveFormat::Tar); + } + } + + Ok(ArchiveFormat::Unknown) + } + + /// Detect composite format (e.g., TAR.GZ) + pub fn detect_composite(&self, path: &Path) -> Result { + let format = self.detect(path)?; + + // If GZIP, check if it's TAR.GZ (by extension for now) + if format == ArchiveFormat::Gzip { + let ext = path.extension() + .and_then(|e| e.to_str()) + .unwrap_or("") + .to_lowercase(); + + if ext == "tgz" || ext == "gz" { + // Check if filename contains .tar + let filename = path.file_name() + .and_then(|n| n.to_str()) + .unwrap_or(""); + + if filename.contains(".tar") { + return Ok(ArchiveFormat::TarGzip); + } + } + } + + Ok(format) + } +} + +impl Default for FormatDetector { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + use std::io::Write; + + #[test] + fn test_detect_zip() { + let temp_dir = TempDir::new().unwrap(); + let zip_path = temp_dir.path().join("test.zip"); + + // Create minimal ZIP file header + let mut file = File::create(&zip_path).unwrap(); + file.write_all(&[0x50, 0x4B, 0x03, 0x04]).unwrap(); + + let detector = FormatDetector::new(); + let format = detector.detect(&zip_path).unwrap(); + + assert_eq!(format, ArchiveFormat::Zip); + } + + #[test] + fn test_detect_gzip() { + let temp_dir = TempDir::new().unwrap(); + let gz_path = temp_dir.path().join("test.gz"); + + // Create minimal GZIP file header + let mut file = File::create(&gz_path).unwrap(); + file.write_all(&[0x1F, 0x8B]).unwrap(); + + let detector = FormatDetector::new(); + let format = detector.detect(&gz_path).unwrap(); + + assert_eq!(format, ArchiveFormat::Gzip); + } + + #[test] + fn test_detect_unknown() { + let temp_dir = TempDir::new().unwrap(); + let unknown_path = temp_dir.path().join("test.bin"); + + // Create unknown file + let mut file = File::create(&unknown_path).unwrap(); + file.write_all(b"unknown data").unwrap(); + + let detector = FormatDetector::new(); + let format = detector.detect(&unknown_path).unwrap(); + + assert_eq!(format, ArchiveFormat::Unknown); + } +} \ No newline at end of file diff --git a/markbase-core/src/archive/mod.rs b/markbase-core/src/archive/mod.rs new file mode 100644 index 0000000..301f908 --- /dev/null +++ b/markbase-core/src/archive/mod.rs @@ -0,0 +1,152 @@ +// Archive Module - Universal Compression Format Support +// Supports 12 compression formats: 9 core + 3 optional +// Core: ZIP, TAR, GZIP, ZSTD, BZIP2, LZ4, TAR.GZ, TAR.BZ2, TAR.ZST +// Optional: RAR (legal risk), XZ (external dependency), 7z (unstable library) + +pub mod config; +pub mod detector; +pub mod metadata; +pub mod processor; +pub mod warning; + +pub mod processors { + pub mod core; + #[cfg(feature = "optional-formats")] + pub mod optional; +} + +#[cfg(test)] +pub mod tests; + +use anyhow::Result; +use std::collections::HashMap; +use std::path::Path; + +use crate::archive::{ArchiveFormat, ArchiveProcessor, FormatDetector, ArchiveConfig}; + +/// Processor Registry - Plugin Architecture +pub struct ProcessorRegistry { + processors: HashMap>, + config: ArchiveConfig, +} + +impl ProcessorRegistry { + /// Create new registry with config + pub fn new(config: ArchiveConfig) -> Self { + Self { + processors: HashMap::new(), + config, + } + } + + /// Initialize all processors (based on config) + pub fn initialize(&mut self) -> Result<()> { + // Core formats (always registered) + self.register_core_processors()?; + + // Optional formats (based on config) + self.register_optional_processors()?; + + Ok(()) + } + + /// Register core format processors (9 formats) + fn register_core_processors(&mut self) -> Result<()> { + use crate::archive::processors::core::*; + + self.processors.insert(ArchiveFormat::Zip, Box::new(ZipProcessor::new())); + self.processors.insert(ArchiveFormat::Tar, Box::new(TarProcessor::new())); + self.processors.insert(ArchiveFormat::Gzip, Box::new(GzipProcessor::new())); + self.processors.insert(ArchiveFormat::Zstd, Box::new(ZstdProcessor::new())); + self.processors.insert(ArchiveFormat::Bzip2, Box::new(Bzip2Processor::new())); + self.processors.insert(ArchiveFormat::Lz4, Box::new(Lz4Processor::new())); + self.processors.insert(ArchiveFormat::TarGzip, Box::new(TarGzipProcessor::new())); + self.processors.insert(ArchiveFormat::TarBzip2, Box::new(TarBzip2Processor::new())); + self.processors.insert(ArchiveFormat::TarZstd, Box::new(TarZstdProcessor::new())); + + info!("✅ Core formats registered: 9 formats"); + Ok(()) + } + + /// Register optional format processors (3 formats, based on config) + fn register_optional_processors(&mut self) -> Result<()> { + #[cfg(feature = "optional-formats")] + { + use crate::archive::processors::optional::*; + + // RAR format (legal risk) + if self.config.enable_rar { + crate::archive::warning::show_rar_legal_warning(); + self.processors.insert(ArchiveFormat::Rar, Box::new(RarProcessor::new())); + warn!("⚠️ RAR format enabled (legal risk)"); + } + + // XZ format (external dependency) + if self.config.enable_xz { + if check_liblzma_available() { + self.processors.insert(ArchiveFormat::Xz, Box::new(XzProcessor::new())); + info!("✅ XZ format enabled"); + } else { + crate::archive::warning::show_xz_dependency_warning(); + warn!("⚠️ XZ format disabled (liblzma not found)"); + } + } + + // 7z format (unstable library) + if self.config.enable_7z { + crate::archive::warning::show_7z_stability_warning(); + self.processors.insert(ArchiveFormat::SevenZ, Box::new(SevenZProcessor::new())); + warn!("⚠️ 7z format enabled (stability warning)"); + } + } + + Ok(()) + } + + /// Get processor for detected format + pub fn get_processor(&self, path: &Path) -> Result<&dyn ArchiveProcessor> { + let detector = FormatDetector::new(); + let format = detector.detect(path)?; + + self.processors + .get(&format) + .map(|p| p.as_ref()) + .ok_or_else(|| anyhow::anyhow!("Format {} not supported or not enabled", format)) + } + + /// List all enabled formats + pub fn enabled_formats(&self) -> Vec { + self.processors.keys().cloned().collect() + } +} + +/// Check if liblzma library is available +#[cfg(feature = "optional-formats")] +fn check_liblzma_available() -> bool { + // Try to load xz2 library + // Simplified check: try to create XzProcessor + true // Simplified for now, actual implementation needs better detection +} + +#[cfg(not(feature = "optional-formats"))] +fn check_liblzma_available() -> bool { + false +} + +/// Initialize archive system with config +pub fn init_archive_system(config_path: Option<&str>) -> Result { + let config = if let Some(path) = config_path { + ArchiveConfig::load(path)? + } else { + ArchiveConfig::default() + }; + + // Show startup warnings for optional formats + crate::archive::warning::show_startup_warnings(&config); + + let mut registry = ProcessorRegistry::new(config); + registry.initialize()?; + + info!("Archive system initialized with {} formats", registry.enabled_formats().len()); + Ok(registry) +} \ No newline at end of file diff --git a/markbase-core/src/archive/processor.rs b/markbase-core/src/archive/processor.rs new file mode 100644 index 0000000..df209d4 --- /dev/null +++ b/markbase-core/src/archive/processor.rs @@ -0,0 +1,275 @@ +// ArchiveProcessor Trait - Universal Interface for All Compression Formats + +use anyhow::Result; +use std::path::{Path, PathBuf}; +use std::time::SystemTime; + +/// Archive Format Type Enumeration +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ArchiveFormat { + // Core formats (always enabled) + Zip, + Tar, + Gzip, + Zstd, + Bzip2, + Lz4, + TarGzip, + TarBzip2, + TarZstd, + + // Optional formats (controversial) + Rar, // ⚠️ Legal risk (RARLAB patent) + Xz, // ⚠️ External dependency (liblzma) + SevenZ, // ⚠️ Unstable library (sevenz-rust 0.21.0) + + Unknown, +} + +impl std::fmt::Display for ArchiveFormat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ArchiveFormat::Zip => write!(f, "ZIP"), + ArchiveFormat::Tar => write!(f, "TAR"), + ArchiveFormat::Gzip => write!(f, "GZIP"), + ArchiveFormat::Zstd => write!(f, "ZSTD"), + ArchiveFormat::Bzip2 => write!(f, "BZIP2"), + ArchiveFormat::Lz4 => write!(f, "LZ4"), + ArchiveFormat::TarGzip => write!(f, "TAR.GZ"), + ArchiveFormat::TarBzip2 => write!(f, "TAR.BZ2"), + ArchiveFormat::TarZstd => write!(f, "TAR.ZST"), + ArchiveFormat::Rar => write!(f, "RAR"), + ArchiveFormat::Xz => write!(f, "XZ"), + ArchiveFormat::SevenZ => write!(f, "7Z"), + ArchiveFormat::Unknown => write!(f, "Unknown"), + } + } +} + +/// Archive Processor Trait - Universal Interface +/// All compression format processors must implement this trait +pub trait ArchiveProcessor: Send + Sync { + /// Format type supported by this processor + fn format(&self) -> ArchiveFormat; + + /// Open archive file and read metadata + fn open(&mut self, path: &Path) -> Result; + + /// List all file entries in archive + fn list_entries(&self) -> Result>; + + /// Extract single file (on-demand decompression) + fn extract_file(&self, entry_path: &Path, output: &mut Vec) -> Result; + + /// Extract all files to directory (batch extraction) + fn extract_all(&self, output_dir: &Path) -> Result; + + /// Check if this processor can handle the format + fn can_process(format: ArchiveFormat) -> bool where Self: Sized; + + /// Create new processor instance + fn new() -> Self where Self: Sized; +} + +/// Archive File Metadata +#[derive(Debug, Clone)] +pub struct ArchiveMetadata { + pub format: ArchiveFormat, + pub total_files: u64, + pub total_size: u64, + pub compressed_size: u64, + pub compression_ratio: f64, + pub is_encrypted: bool, + pub is_multi_volume: bool, + pub created_time: Option, +} + +impl ArchiveMetadata { + /// Calculate compression ratio + pub fn compression_ratio(&self) -> f64 { + if self.compressed_size == 0 { + 0.0 + } else { + self.total_size as f64 / self.compressed_size as f64 + } + } +} + +/// Archive Entry Information +#[derive(Debug, Clone)] +pub struct ArchiveEntry { + pub path: PathBuf, + pub size: u64, + pub compressed_size: u64, + pub is_dir: bool, + pub is_file: bool, + pub is_encrypted: bool, + pub modified: SystemTime, + pub permissions: Option, +} + +/// Extract Result Statistics +#[derive(Debug)] +pub struct ExtractResult { + pub total_files: u64, + pub total_bytes: u64, + pub failed_files: Vec, + pub warnings: Vec, +} + +impl ExtractResult { + pub fn new() -> Self { + Self { + total_files: 0, + total_bytes: 0, + failed_files: Vec::new(), + warnings: Vec::new(), + } + } + + pub fn success_rate(&self) -> f64 { + if self.total_files == 0 { + 100.0 + } else { + let success_count = self.total_files - self.failed_files.len() as u64; + (success_count as f64 / self.total_files as f64) * 100.0 + } + } +} + +/// Security Validation - Zip Slip Protection +pub fn validate_extraction_path(entry_path: &Path, base_dir: &Path) -> Result { + use std::path::Component; + + // 1. Check path components + for component in entry_path.components() { + match component { + // Prohibit parent directory reference (../) + Component::ParentDir => { + return Err(anyhow::anyhow!("Zip Slip detected: path contains '..'")); + } + // Prohibit root directory (/) + Component::Prefix(_) | Component::RootDir => { + return Err(anyhow::anyhow!("Zip Slip detected: absolute path")); + } + // Allow normal components + Component::Normal(_) | Component::CurDir => {} + } + } + + // 2. Build full path + let full_path = base_dir.join(entry_path); + + // 3. Canonicalize and validate (ensure within base_dir) + let canonical_base = base_dir.canonicalize() + .map_err(|e| anyhow::anyhow!("Cannot canonicalize base dir: {}", e))?; + + // Create parent directories first + if let Some(parent) = full_path.parent() { + std::fs::create_dir_all(parent)?; + } + + // 4. Verify extraction path is within base_dir + // Note: full_path may not exist yet, so we check parent directory + if full_path.exists() { + let canonical_full = full_path.canonicalize() + .map_err(|e| anyhow::anyhow!("Cannot canonicalize full path: {}", e))?; + + if !canonical_full.starts_with(&canonical_base) { + return Err(anyhow::anyhow!("Zip Slip detected: path escapes base directory")); + } + } else { + // Check parent directory instead + if let Some(parent) = full_path.parent() { + let canonical_parent = parent.canonicalize() + .map_err(|e| anyhow::anyhow!("Cannot canonicalize parent: {}", e))?; + + if !canonical_parent.starts_with(&canonical_base) { + return Err(anyhow::anyhow!("Zip Slip detected: path escapes base directory")); + } + } + } + + Ok(full_path) +} + +/// Security Validation - Zip Bomb Protection +pub fn check_decompression_ratio(compressed_size: u64, decompressed_size: u64, max_ratio: u64) -> Result<()> { + if compressed_size == 0 { + return Ok(()); // Empty file, allow + } + + let ratio = decompressed_size / compressed_size; + + if ratio > max_ratio { + return Err(anyhow::anyhow!( + "Zip Bomb detected: compression ratio {} exceeds limit {}", + ratio, + max_ratio + )); + } + + Ok(()) +} + +/// File size limit check +pub fn check_file_size_limit(file_size: u64, max_size: u64) -> Result<()> { + if file_size > max_size { + return Err(anyhow::anyhow!( + "File size {} exceeds limit {} MB", + file_size, + max_size / 1024 / 1024 + )); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_zip_slip_protection() { + let temp_dir = TempDir::new().unwrap(); + let base = temp_dir.path(); + + // Safe path: should pass + let safe_path = Path::new("safe/file.txt"); + assert!(validate_extraction_path(safe_path, base).is_ok()); + + // Evil path: should be rejected + let evil_path = Path::new("../../etc/passwd"); + assert!(validate_extraction_path(evil_path, base).is_err()); + + // Absolute path: should be rejected + let abs_path = Path::new("/etc/passwd"); + assert!(validate_extraction_path(abs_path, base).is_err()); + } + + #[test] + fn test_zip_bomb_detection() { + // Normal ratio: should pass + assert!(check_decompression_ratio(1000, 5000, 1000).is_ok()); + + // Zip Bomb ratio: should be rejected + assert!(check_decompression_ratio(42_000, 5_000_000_000, 1000).is_err()); + } + + #[test] + fn test_compression_ratio_calculation() { + let metadata = ArchiveMetadata { + format: ArchiveFormat::Zip, + total_files: 10, + total_size: 1000, + compressed_size: 500, + compression_ratio: 0.0, + is_encrypted: false, + is_multi_volume: false, + created_time: None, + }; + + assert_eq!(metadata.compression_ratio(), 2.0); + } +} \ No newline at end of file diff --git a/markbase-core/src/archive/processors/core/mod.rs b/markbase-core/src/archive/processors/core/mod.rs new file mode 100644 index 0000000..65db225 --- /dev/null +++ b/markbase-core/src/archive/processors/core/mod.rs @@ -0,0 +1,161 @@ +// Core Format Processors - 9 Core Formats (Always Enabled) + +// Stub implementations for Phase 1 framework +// Actual implementations will be added in Phase 2 + +use crate::archive::{ArchiveFormat, ArchiveProcessor, ArchiveMetadata, ArchiveEntry, ExtractResult}; +use anyhow::Result; +use std::path::Path; + +/// ZIP Processor (Phase 2 implementation) +pub struct ZipProcessor; + +impl ZipProcessor { + pub fn new() -> Self { + Self + } +} + +impl ArchiveProcessor for ZipProcessor { + fn format(&self) -> ArchiveFormat { + ArchiveFormat::Zip + } + + fn open(&mut self, path: &Path) -> Result { + // Phase 2: Implement ZIP opening with zip library + Ok(ArchiveMetadata { + format: ArchiveFormat::Zip, + total_files: 0, + total_size: 0, + compressed_size: 0, + compression_ratio: 0.0, + is_encrypted: false, + is_multi_volume: false, + created_time: None, + }) + } + + fn list_entries(&self) -> Result> { + // Phase 2: Implement ZIP entry listing + Ok(Vec::new()) + } + + fn extract_file(&self, entry_path: &Path, output: &mut Vec) -> Result { + // Phase 2: Implement single file extraction + Ok(0) + } + + fn extract_all(&self, output_dir: &Path) -> Result { + // Phase 2: Implement batch extraction + Ok(ExtractResult::new()) + } + + fn can_process(format: ArchiveFormat) -> bool { + format == ArchiveFormat::Zip + } +} + +/// TAR Processor (Phase 2 implementation) +pub struct TarProcessor; + +impl TarProcessor { + pub fn new() -> Self { + Self + } +} + +impl ArchiveProcessor for TarProcessor { + fn format(&self) -> ArchiveFormat { + ArchiveFormat::Tar + } + + fn open(&mut self, path: &Path) -> Result { + Ok(ArchiveMetadata { + format: ArchiveFormat::Tar, + total_files: 0, + total_size: 0, + compressed_size: 0, + compression_ratio: 0.0, + is_encrypted: false, + is_multi_volume: false, + created_time: None, + }) + } + + fn list_entries(&self) -> Result> { + Ok(Vec::new()) + } + + fn extract_file(&self, entry_path: &Path, output: &mut Vec) -> Result { + Ok(0) + } + + fn extract_all(&self, output_dir: &Path) -> Result { + Ok(ExtractResult::new()) + } + + fn can_process(format: ArchiveFormat) -> bool { + format == ArchiveFormat::Tar + } +} + +/// GZIP Processor (Phase 2 implementation) +pub struct GzipProcessor; + +impl GzipProcessor { + pub fn new() -> Self { + Self + } +} + +impl ArchiveProcessor for GzipProcessor { + fn format(&self) -> ArchiveFormat { + ArchiveFormat::Gzip + } + + fn open(&mut self, path: &Path) -> Result { + Ok(ArchiveMetadata { + format: ArchiveFormat::Gzip, + total_files: 1, + total_size: 0, + compressed_size: 0, + compression_ratio: 0.0, + is_encrypted: false, + is_multi_volume: false, + created_time: None, + }) + } + + fn list_entries(&self) -> Result> { + Ok(Vec::new()) + } + + fn extract_file(&self, entry_path: &Path, output: &mut Vec) -> Result { + Ok(0) + } + + fn extract_all(&self, output_dir: &Path) -> Result { + Ok(ExtractResult::new()) + } + + fn can_process(format: ArchiveFormat) -> bool { + format == ArchiveFormat::Gzip + } +} + +// Stub processors for other core formats (Phase 2) +pub struct ZstdProcessor; +pub struct Bzip2Processor; +pub struct Lz4Processor; +pub struct TarGzipProcessor; +pub struct TarBzip2Processor; +pub struct TarZstdProcessor; + +impl ZstdProcessor { pub fn new() -> Self { Self } } +impl Bzip2Processor { pub fn new() -> Self { Self } } +impl Lz4Processor { pub fn new() -> Self { Self } } +impl TarGzipProcessor { pub fn new() -> Self { Self } } +impl TarBzip2Processor { pub fn new() -> Self { Self } } +impl TarZstdProcessor { pub fn new() -> Self { Self } } + +// ArchiveProcessor implementations will be added in Phase 2 \ No newline at end of file diff --git a/markbase-core/src/archive/processors/optional/mod.rs b/markbase-core/src/archive/processors/optional/mod.rs new file mode 100644 index 0000000..bca483b --- /dev/null +++ b/markbase-core/src/archive/processors/optional/mod.rs @@ -0,0 +1,141 @@ +// Optional Format Processors - 3 Optional Formats (Controversial) + +// Stub implementations for Phase 1 framework +// Actual implementations will be added in Phase 2 with warnings + +use crate::archive::{ArchiveFormat, ArchiveProcessor, ArchiveMetadata, ArchiveEntry, ExtractResult}; +use anyhow::Result; +use std::path::Path; + +/// RAR Processor (⚠️ Legal risk - Phase 2) +pub struct RarProcessor; + +impl RarProcessor { + pub fn new() -> Self { + Self + } +} + +impl ArchiveProcessor for RarProcessor { + fn format(&self) -> ArchiveFormat { + ArchiveFormat::Rar + } + + fn open(&mut self, path: &Path) -> Result { + // Phase 2: Implement RAR opening with unrar library + Ok(ArchiveMetadata { + format: ArchiveFormat::Rar, + total_files: 0, + total_size: 0, + compressed_size: 0, + compression_ratio: 0.0, + is_encrypted: false, + is_multi_volume: false, + created_time: None, + }) + } + + fn list_entries(&self) -> Result> { + Ok(Vec::new()) + } + + fn extract_file(&self, entry_path: &Path, output: &mut Vec) -> Result { + Ok(0) + } + + fn extract_all(&self, output_dir: &Path) -> Result { + Ok(ExtractResult::new()) + } + + fn can_process(format: ArchiveFormat) -> bool { + format == ArchiveFormat::Rar + } +} + +/// XZ Processor (⚠️ External dependency - Phase 2) +pub struct XzProcessor; + +impl XzProcessor { + pub fn new() -> Self { + Self + } +} + +impl ArchiveProcessor for XzProcessor { + fn format(&self) -> ArchiveFormat { + ArchiveFormat::Xz + } + + fn open(&mut self, path: &Path) -> Result { + Ok(ArchiveMetadata { + format: ArchiveFormat::Xz, + total_files: 1, + total_size: 0, + compressed_size: 0, + compression_ratio: 0.0, + is_encrypted: false, + is_multi_volume: false, + created_time: None, + }) + } + + fn list_entries(&self) -> Result> { + Ok(Vec::new()) + } + + fn extract_file(&self, entry_path: &Path, output: &mut Vec) -> Result { + Ok(0) + } + + fn extract_all(&self, output_dir: &Path) -> Result { + Ok(ExtractResult::new()) + } + + fn can_process(format: ArchiveFormat) -> bool { + format == ArchiveFormat::Xz + } +} + +/// 7z Processor (⚠️ Unstable library - Phase 2) +pub struct SevenZProcessor; + +impl SevenZProcessor { + pub fn new() -> Self { + Self + } +} + +impl ArchiveProcessor for SevenZProcessor { + fn format(&self) -> ArchiveFormat { + ArchiveFormat::SevenZ + } + + fn open(&mut self, path: &Path) -> Result { + Ok(ArchiveMetadata { + format: ArchiveFormat::SevenZ, + total_files: 0, + total_size: 0, + compressed_size: 0, + compression_ratio: 0.0, + is_encrypted: false, + is_multi_volume: false, + created_time: None, + }) + } + + fn list_entries(&self) -> Result> { + Ok(Vec::new()) + } + + fn extract_file(&self, entry_path: &Path, output: &mut Vec) -> Result { + Ok(0) + } + + fn extract_all(&self, output_dir: &Path) -> Result { + Ok(ExtractResult::new()) + } + + fn can_process(format: ArchiveFormat) -> bool { + format == ArchiveFormat::SevenZ + } +} \ No newline at end of file diff --git a/markbase-core/src/archive/tests/mod.rs b/markbase-core/src/archive/tests/mod.rs new file mode 100644 index 0000000..3143ab0 --- /dev/null +++ b/markbase-core/src/archive/tests/mod.rs @@ -0,0 +1,57 @@ +// Archive Module Tests + +#[cfg(test)] +mod tests { + use crate::archive::*; + + #[test] + fn test_processor_registry_initialization() { + let config = ArchiveConfig::default(); + let mut registry = ProcessorRegistry::new(config); + + registry.initialize().unwrap(); + + let formats = registry.enabled_formats(); + + // Core formats (9) should always be enabled + assert!(formats.contains(&ArchiveFormat::Zip)); + assert!(formats.contains(&ArchiveFormat::Tar)); + assert!(formats.contains(&ArchiveFormat::Gzip)); + + // Optional formats should be disabled by default + assert!(!formats.contains(&ArchiveFormat::Rar)); + assert!(!formats.contains(&ArchiveFormat::Xz)); + assert!(!formats.contains(&ArchiveFormat::SevenZ)); + + // Should have exactly 9 core formats + assert_eq!(formats.len(), 9); + } + + #[test] + fn test_optional_formats_disabled_by_default() { + let config = ArchiveConfig::default(); + + assert_eq!(config.enable_rar, false); + assert_eq!(config.enable_xz, false); + assert_eq!(config.enable_7z, false); + } + + #[test] + fn test_config_validation() { + let valid_config = ArchiveConfig::default(); + assert!(valid_config.validate().is_ok()); + + let invalid_config = ArchiveConfig { + max_decompression_ratio: 1, // Too low + ..Default::default() + }; + assert!(invalid_config.validate().is_err()); + } + + #[test] + fn test_archive_format_display() { + assert_eq!(ArchiveFormat::Zip.to_string(), "ZIP"); + assert_eq!(ArchiveFormat::TarGzip.to_string(), "TAR.GZ"); + assert_eq!(ArchiveFormat::Rar.to_string(), "RAR"); + } +} \ No newline at end of file diff --git a/markbase-core/src/archive/warning.rs b/markbase-core/src/archive/warning.rs new file mode 100644 index 0000000..4b32633 --- /dev/null +++ b/markbase-core/src/archive/warning.rs @@ -0,0 +1,141 @@ +// Warning System - Legal and Technical Warnings for Optional Formats + +use log::{warn, info}; + +use crate::archive::config::ArchiveConfig; + +/// Show RAR legal risk warning +pub fn show_rar_legal_warning() { + warn!(""); + warn!("⚠️ ⚠️ ⚠️ RAR FORMAT LEGAL WARNING ⚠️ ⚠️ ⚠️"); + warn!(""); + warn!("By enabling RAR format support, you acknowledge:"); + warn!(" 1. RAR compression algorithm is patented by RARLAB"); + warn!(" 2. Commercial use requires license purchase (approx $1000+)"); + warn!(" 3. You assume ALL legal responsibility for patent compliance"); + warn!(" 4. MarkBase provides RAR decompression only, NO compression"); + warn!(" 5. unrar library is free for personal use only"); + warn!(""); + warn!("License info: https://rarlab.com/license.htm"); + warn!(""); + warn!("⚠️ User accepts legal risk by enabling enable_rar = true in config"); + warn!(""); +} + +/// Show XZ external dependency warning +pub fn show_xz_dependency_warning() { + warn!(""); + warn!("⚠️ ⚠️ ⚠️ XZ FORMAT DEPENDENCY WARNING ⚠️ ⚠️ ⚠️"); + warn!(""); + warn!("XZ format requires external liblzma library (non-pure Rust)"); + warn!(""); + warn!("Installation instructions:"); + warn!(" macOS: brew install xz"); + warn!(" Linux: apt install liblzma-dev (Debian/Ubuntu)"); + warn!(" yum install xz-devel (CentOS/RHEL)"); + warn!(" Windows: Manual installation required (complex)"); + warn!(" Download from: https://tukaani.org/xz/"); + warn!(""); + warn!("⚠️ XZ format disabled if liblzma not found"); + warn!(""); +} + +/// Show 7z library stability warning +pub fn show_7z_stability_warning() { + warn!(""); + warn!("⚠️ ⚠️ ⚠️ 7Z FORMAT STABILITY WARNING ⚠️ ⚠️ ⚠️"); + warn!(""); + warn!("sevenz-rust library (v0.21.0) is under active development:"); + warn!(" 1. Some compression algorithms not yet supported"); + warn!(" 2. Production stability may be limited"); + warn!(" 3. Performance optimization ongoing"); + warn!(" 4. API may change in future versions"); + warn!(""); + warn!("Recommended: Wait for library maturity before production use"); + warn!("GitHub: https://github.com/frogmoreltd/sevenz-rust"); + warn!(""); + warn!("⚠️ Use with caution in production environments"); + warn!(""); +} + +/// Show startup warnings for optional formats +pub fn show_startup_warnings(config: &ArchiveConfig) { + if config.enable_rar { + show_rar_legal_warning(); + } + + if config.enable_xz { + // Dependency check happens in ProcessorRegistry + } + + if config.enable_7z { + show_7z_stability_warning(); + } + + // Show summary of enabled formats + let enabled_optional = [ + config.enable_rar, + config.enable_xz, + config.enable_7z, + ].iter().filter(|&x| *x).count(); + + if enabled_optional > 0 { + info!(""); + info!("⚠️ {} optional format(s) enabled with warnings shown above", enabled_optional); + info!("Core formats (9): ZIP, TAR, GZIP, ZSTD, BZIP2, LZ4, TAR.GZ, TAR.BZ2, TAR.ZST"); + info!(""); + } +} + +/// Generate user-facing legal disclaimer text +pub fn generate_rar_legal_disclaimer() -> String { + format!( + "RAR FORMAT LEGAL DISCLAIMER + +IMPORTANT WARNING: + +By enabling RAR format support in MarkBase, you acknowledge and agree to the following: + +1. RAR COMPRESSION ALGORITHM PATENT + - RAR compression algorithm is patented by RARLAB (Eugene Roshal) + - Patent protection applies to commercial use + - Personal/non-commercial use may be free (check RARLAB license) + +2. LICENSE REQUIREMENTS + - Commercial use requires purchasing license from RARLAB + - License cost: approximately $1000+ (contact RARLAB for exact pricing) + - License info: https://rarlab.com/license.htm + +3. MARKBASE LIABILITY DISCLAIMER + - MarkBase provides RAR DECOMPRESSION only (no compression) + - MarkBase uses unrar library (free for personal use) + - MarkBase DOES NOT provide RAR compression functionality + - MarkBase DOES NOT assume any legal liability for RAR patent issues + +4. USER RESPONSIBILITY + - You are solely responsible for verifying legal compliance + - If commercial use, you must purchase RARLAB license + - You accept all legal risks by enabling enable_rar = true + - MarkBase is not liable for any patent infringement + +5. USAGE GUIDELINES + - Personal use: Free (符合unrar免费许可) + - Commercial use: License required + - Distribution: Contact RARLAB for distribution license + +ENABLE RAR FORMAT: +Modify config.toml: + enable_rar = false # Default disabled + enable_rar = true # ⚠️ User accepts legal risk + +CONTACT: +- RARLAB: https://rarlab.com +- License: support@rarlab.com +- MarkBase: your-markbase-support + +Last Updated: 2026-06-10 +Version: 1.0 +Legal Consultation: [Please consult professional lawyer for commercial use] +" + ) +} \ No newline at end of file