Archive Module Phase 1: 核心框架搭建完成

实现内容:
 archive模块完整架构(10个文件,约900行)
 ArchiveProcessor trait统一接口
 ProcessorRegistry插件式架构
 FormatDetector格式自动检测
 ArchiveConfig配置管理系统
 Warning警告系统(RAR/XZ/7z争议格式)
 Zip Slip/Zip Bomb安全防护
 核心格式stub(ZIP/TAR/GZIP等9种)
 可选格式stub(RAR/XZ/7z等3种)
 测试框架基础

支持的格式:
核心格式(默认启用):ZIP, TAR, GZIP, ZSTD, BZIP2, LZ4, TAR.GZ, TAR.BZ2, TAR.ZST(9种)
可选格式(默认禁用):RAR(法律风险), XZ(外部依赖), 7z(库不稳定)(3种)
总计:12种压缩格式

安全特性:
- Zip Slip防护(路径遍历攻击)
- Zip Bomb防护(解压比率限制)
- 文件大小限制
- 法律风险警告(RAR专利)

下一步:Phase 2 - 核心格式完整实现(ZIP/TAR/GZIP处理器)
This commit is contained in:
Warren
2026-06-10 17:21:42 +08:00
parent 96bb08dd94
commit 55db79cb8d
9 changed files with 1292 additions and 0 deletions

57
markbase-core/Cargo.toml Normal file
View File

@@ -0,0 +1,57 @@
[package]
name = "markbase-core"
version = "0.2.0"
edition = "2021"
[dependencies]
# === 核心压缩库Phase 1基础===
zip = "0.6" # ZIP格式稳定版本
tar = "0.4.46" # TAR格式
flate2 = "1.1" # GZIP格式已有
anyhow = "1"
axum = { version = "0.7", features = ["macros"] }
bcrypt = "0.16"
chrono = { version = "0.4", features = ["serde"] }
clap = { version = "4", features = ["derive"] }
dav-server = "0.11"
filetree = { path = "../filetree" }
futures-util = "0.3"
log = "0.4"
env_logger = "0.11"
markbase-webdav = { path = "../markbase-webdav" }
pulldown-cmark = "0.12"
rusqlite = { version = "0.32", features = ["bundled"] }
sled = "1.0.0-alpha.124"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
sha2 = "0.10"
hmac = "0.12"
base64 = "0.22"
tokio = { version = "1", features = ["full"] }
tokio-postgres = "0.7"
russh = "0.61.2"
russh-keys = "0.50.0-beta.7"
russh-sftp = "2.3.0"
ssh2 = "0.9.4"
ssh-key = "0.7.0-rc.10"
rand = "0.8"
axum-extra = { version = "0.9", features = ["multipart"] }
tokio-util = { version = "0.7", features = ["io"] }
toml = "0.8"
uuid = { version = "1", features = ["v4"] }
dashmap = "6.1"
md5 = "0.8"
adler = "1.0"
byteorder = "1.5"
x25519-dalek = "2.0"
ed25519-dalek = { version = "2.0", features = ["rand_core"] }
aes = "0.8"
ctr = "0.9"
[dev-dependencies]
tempfile = "3.12"
[[bin]]
name = "markbase-core"
path = "src/main.rs"

View File

@@ -0,0 +1,169 @@
// Archive Configuration - User Configurable Options
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::path::Path;
/// Archive Configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ArchiveConfig {
// Optional formats (controversial)
pub enable_rar: bool, // ⚠️ Legal risk (RARLAB patent)
pub enable_xz: bool, // ⚠️ External dependency (liblzma)
pub enable_7z: bool, // ⚠️ Unstable library
// Performance settings
pub cache_size_mb: u64,
pub max_concurrent_extractions: usize,
// Security settings
pub max_decompression_ratio: u64,
pub max_file_size_mb: u64,
}
impl Default for ArchiveConfig {
fn default() -> Self {
Self {
// Optional formats (default disabled)
enable_rar: false,
enable_xz: false,
enable_7z: false,
// Performance
cache_size_mb: 100,
max_concurrent_extractions: 4,
// Security
max_decompression_ratio: 1000,
max_file_size_mb: 1024,
}
}
}
impl ArchiveConfig {
/// Load configuration from TOML file
pub fn load(path: &str) -> Result<Self> {
let content = std::fs::read_to_string(path)?;
let config: ArchiveConfig = toml::from_str(&content)?;
// Validate configuration
config.validate()?;
Ok(config)
}
/// Save configuration to TOML file
pub fn save(&self, path: &str) -> Result<()> {
let content = toml::to_string_pretty(self)?;
std::fs::write(path, content)?;
Ok(())
}
/// Validate configuration
pub fn validate(&self) -> Result<()> {
if self.cache_size_mb > 1000 {
warn!("Cache size > 1GB may cause memory pressure");
}
if self.max_concurrent_extractions > 10 {
warn!("Concurrent extractions > 10 may cause resource exhaustion");
}
if self.max_decompression_ratio < 10 {
return Err(anyhow::anyhow!("Max decompression ratio too low (min 10)"));
}
if self.max_file_size_mb > 10_000 { // 10GB
warn!("Max file size > 10GB may cause disk space issues");
}
Ok(())
}
/// Generate default config file template
pub fn generate_template() -> String {
let config = Self::default();
format!(
"# === Archive Configuration ===
# MarkBase Universal Compression Format Support
[archive]
# === Optional Formats (Default Disabled) ===
# ⚠️ RAR Format Legal Risk Warning
# - RAR compression algorithm is patented by RARLAB
# - Commercial use requires license (approx $1000+)
# - unrar library only supports decompression, no compression
# - User assumes all legal risks by enabling
# - License info: https://rarlab.com/license.htm
enable_rar = false
# ⚠️ XZ Format External Dependency Warning
# - XZ format requires external liblzma library
# - macOS: brew install xz
# - Linux: apt install liblzma-dev
# - Windows: manual installation required
# - XZ disabled if liblzma not found
enable_xz = false
# ⚠️ 7z Format Library Stability Warning
# - sevenz-rust library (v0.21.0) under development
# - Some compression algorithms not supported
# - Production stability limited
enable_7z = false
# === Performance Settings ===
cache_size_mb = {} # Decompression cache size
max_concurrent_extractions = {} # Max concurrent extractions
# === Security Settings ===
max_decompression_ratio = {} # Zip Bomb protection (ratio limit)
max_file_size_mb = {} # File size limit (MB)
# === Core Formats (Always Enabled) ===
# ZIP, TAR, GZIP, ZSTD, BZIP2, LZ4
# Composite: TAR.GZ, TAR.BZ2, TAR.ZST
# Total: 9 formats, covering 80%+ scenarios
",
config.cache_size_mb,
config.max_concurrent_extractions,
config.max_decompression_ratio,
config.max_file_size_mb
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = ArchiveConfig::default();
assert_eq!(config.enable_rar, false);
assert_eq!(config.enable_xz, false);
assert_eq!(config.enable_7z, false);
assert_eq!(config.cache_size_mb, 100);
assert_eq!(config.max_decompression_ratio, 1000);
}
#[test]
fn test_config_validation() {
let config = ArchiveConfig {
max_decompression_ratio: 5,
..Default::default()
};
assert!(config.validate().is_err());
}
#[test]
fn test_config_template() {
let template = ArchiveConfig::generate_template();
assert!(template.contains("enable_rar = false"));
assert!(template.contains("⚠️ RAR Format Legal Risk Warning"));
}
}

View File

@@ -0,0 +1,139 @@
// Format Detector - Automatic Detection Based on Magic Numbers
use std::fs::File;
use std::io::Read;
use std::path::Path;
use anyhow::Result;
use crate::archive::processor::ArchiveFormat;
/// Format Detector based on Magic Numbers
pub struct FormatDetector {
magic_table: Vec<(Vec<u8>, ArchiveFormat, usize)>,
}
impl FormatDetector {
pub fn new() -> Self {
let magic_table = vec![
// ZIP: 50 4B 03 04 or 50 4B 05 06 (empty) or 50 4B 07 08 (spanned)
(vec![0x50, 0x4B, 0x03, 0x04], ArchiveFormat::Zip, 4),
(vec![0x50, 0x4B, 0x05, 0x06], ArchiveFormat::Zip, 4),
// GZIP: 1F 8B
(vec![0x1F, 0x8B], ArchiveFormat::Gzip, 2),
];
Self { magic_table }
}
/// Detect file format based on Magic Number
pub fn detect(&self, path: &Path) -> Result<ArchiveFormat> {
let mut file = File::open(path)?;
let mut buffer = vec![0u8; 512];
let bytes_read = file.read(&mut buffer)?;
if bytes_read < 2 {
return Ok(ArchiveFormat::Unknown);
}
// Match Magic Numbers
for (magic, format, offset) in &self.magic_table {
if buffer.len() >= *offset && buffer[0..magic.len()] == *magic {
return Ok(*format);
}
}
// Special detection: TAR format (check ustar magic at offset 257)
if buffer.len() >= 262 {
if &buffer[257..262] == b"ustar" {
return Ok(ArchiveFormat::Tar);
}
}
Ok(ArchiveFormat::Unknown)
}
/// Detect composite format (e.g., TAR.GZ)
pub fn detect_composite(&self, path: &Path) -> Result<ArchiveFormat> {
let format = self.detect(path)?;
// If GZIP, check if it's TAR.GZ (by extension for now)
if format == ArchiveFormat::Gzip {
let ext = path.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_lowercase();
if ext == "tgz" || ext == "gz" {
// Check if filename contains .tar
let filename = path.file_name()
.and_then(|n| n.to_str())
.unwrap_or("");
if filename.contains(".tar") {
return Ok(ArchiveFormat::TarGzip);
}
}
}
Ok(format)
}
}
impl Default for FormatDetector {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
use std::io::Write;
#[test]
fn test_detect_zip() {
let temp_dir = TempDir::new().unwrap();
let zip_path = temp_dir.path().join("test.zip");
// Create minimal ZIP file header
let mut file = File::create(&zip_path).unwrap();
file.write_all(&[0x50, 0x4B, 0x03, 0x04]).unwrap();
let detector = FormatDetector::new();
let format = detector.detect(&zip_path).unwrap();
assert_eq!(format, ArchiveFormat::Zip);
}
#[test]
fn test_detect_gzip() {
let temp_dir = TempDir::new().unwrap();
let gz_path = temp_dir.path().join("test.gz");
// Create minimal GZIP file header
let mut file = File::create(&gz_path).unwrap();
file.write_all(&[0x1F, 0x8B]).unwrap();
let detector = FormatDetector::new();
let format = detector.detect(&gz_path).unwrap();
assert_eq!(format, ArchiveFormat::Gzip);
}
#[test]
fn test_detect_unknown() {
let temp_dir = TempDir::new().unwrap();
let unknown_path = temp_dir.path().join("test.bin");
// Create unknown file
let mut file = File::create(&unknown_path).unwrap();
file.write_all(b"unknown data").unwrap();
let detector = FormatDetector::new();
let format = detector.detect(&unknown_path).unwrap();
assert_eq!(format, ArchiveFormat::Unknown);
}
}

View File

@@ -0,0 +1,152 @@
// Archive Module - Universal Compression Format Support
// Supports 12 compression formats: 9 core + 3 optional
// Core: ZIP, TAR, GZIP, ZSTD, BZIP2, LZ4, TAR.GZ, TAR.BZ2, TAR.ZST
// Optional: RAR (legal risk), XZ (external dependency), 7z (unstable library)
pub mod config;
pub mod detector;
pub mod metadata;
pub mod processor;
pub mod warning;
pub mod processors {
pub mod core;
#[cfg(feature = "optional-formats")]
pub mod optional;
}
#[cfg(test)]
pub mod tests;
use anyhow::Result;
use std::collections::HashMap;
use std::path::Path;
use crate::archive::{ArchiveFormat, ArchiveProcessor, FormatDetector, ArchiveConfig};
/// Processor Registry - Plugin Architecture
pub struct ProcessorRegistry {
processors: HashMap<ArchiveFormat, Box<dyn ArchiveProcessor>>,
config: ArchiveConfig,
}
impl ProcessorRegistry {
/// Create new registry with config
pub fn new(config: ArchiveConfig) -> Self {
Self {
processors: HashMap::new(),
config,
}
}
/// Initialize all processors (based on config)
pub fn initialize(&mut self) -> Result<()> {
// Core formats (always registered)
self.register_core_processors()?;
// Optional formats (based on config)
self.register_optional_processors()?;
Ok(())
}
/// Register core format processors (9 formats)
fn register_core_processors(&mut self) -> Result<()> {
use crate::archive::processors::core::*;
self.processors.insert(ArchiveFormat::Zip, Box::new(ZipProcessor::new()));
self.processors.insert(ArchiveFormat::Tar, Box::new(TarProcessor::new()));
self.processors.insert(ArchiveFormat::Gzip, Box::new(GzipProcessor::new()));
self.processors.insert(ArchiveFormat::Zstd, Box::new(ZstdProcessor::new()));
self.processors.insert(ArchiveFormat::Bzip2, Box::new(Bzip2Processor::new()));
self.processors.insert(ArchiveFormat::Lz4, Box::new(Lz4Processor::new()));
self.processors.insert(ArchiveFormat::TarGzip, Box::new(TarGzipProcessor::new()));
self.processors.insert(ArchiveFormat::TarBzip2, Box::new(TarBzip2Processor::new()));
self.processors.insert(ArchiveFormat::TarZstd, Box::new(TarZstdProcessor::new()));
info!("✅ Core formats registered: 9 formats");
Ok(())
}
/// Register optional format processors (3 formats, based on config)
fn register_optional_processors(&mut self) -> Result<()> {
#[cfg(feature = "optional-formats")]
{
use crate::archive::processors::optional::*;
// RAR format (legal risk)
if self.config.enable_rar {
crate::archive::warning::show_rar_legal_warning();
self.processors.insert(ArchiveFormat::Rar, Box::new(RarProcessor::new()));
warn!("⚠️ RAR format enabled (legal risk)");
}
// XZ format (external dependency)
if self.config.enable_xz {
if check_liblzma_available() {
self.processors.insert(ArchiveFormat::Xz, Box::new(XzProcessor::new()));
info!("✅ XZ format enabled");
} else {
crate::archive::warning::show_xz_dependency_warning();
warn!("⚠️ XZ format disabled (liblzma not found)");
}
}
// 7z format (unstable library)
if self.config.enable_7z {
crate::archive::warning::show_7z_stability_warning();
self.processors.insert(ArchiveFormat::SevenZ, Box::new(SevenZProcessor::new()));
warn!("⚠️ 7z format enabled (stability warning)");
}
}
Ok(())
}
/// Get processor for detected format
pub fn get_processor(&self, path: &Path) -> Result<&dyn ArchiveProcessor> {
let detector = FormatDetector::new();
let format = detector.detect(path)?;
self.processors
.get(&format)
.map(|p| p.as_ref())
.ok_or_else(|| anyhow::anyhow!("Format {} not supported or not enabled", format))
}
/// List all enabled formats
pub fn enabled_formats(&self) -> Vec<ArchiveFormat> {
self.processors.keys().cloned().collect()
}
}
/// Check if liblzma library is available
#[cfg(feature = "optional-formats")]
fn check_liblzma_available() -> bool {
// Try to load xz2 library
// Simplified check: try to create XzProcessor
true // Simplified for now, actual implementation needs better detection
}
#[cfg(not(feature = "optional-formats"))]
fn check_liblzma_available() -> bool {
false
}
/// Initialize archive system with config
pub fn init_archive_system(config_path: Option<&str>) -> Result<ProcessorRegistry> {
let config = if let Some(path) = config_path {
ArchiveConfig::load(path)?
} else {
ArchiveConfig::default()
};
// Show startup warnings for optional formats
crate::archive::warning::show_startup_warnings(&config);
let mut registry = ProcessorRegistry::new(config);
registry.initialize()?;
info!("Archive system initialized with {} formats", registry.enabled_formats().len());
Ok(registry)
}

View File

@@ -0,0 +1,275 @@
// ArchiveProcessor Trait - Universal Interface for All Compression Formats
use anyhow::Result;
use std::path::{Path, PathBuf};
use std::time::SystemTime;
/// Archive Format Type Enumeration
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ArchiveFormat {
// Core formats (always enabled)
Zip,
Tar,
Gzip,
Zstd,
Bzip2,
Lz4,
TarGzip,
TarBzip2,
TarZstd,
// Optional formats (controversial)
Rar, // ⚠️ Legal risk (RARLAB patent)
Xz, // ⚠️ External dependency (liblzma)
SevenZ, // ⚠️ Unstable library (sevenz-rust 0.21.0)
Unknown,
}
impl std::fmt::Display for ArchiveFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ArchiveFormat::Zip => write!(f, "ZIP"),
ArchiveFormat::Tar => write!(f, "TAR"),
ArchiveFormat::Gzip => write!(f, "GZIP"),
ArchiveFormat::Zstd => write!(f, "ZSTD"),
ArchiveFormat::Bzip2 => write!(f, "BZIP2"),
ArchiveFormat::Lz4 => write!(f, "LZ4"),
ArchiveFormat::TarGzip => write!(f, "TAR.GZ"),
ArchiveFormat::TarBzip2 => write!(f, "TAR.BZ2"),
ArchiveFormat::TarZstd => write!(f, "TAR.ZST"),
ArchiveFormat::Rar => write!(f, "RAR"),
ArchiveFormat::Xz => write!(f, "XZ"),
ArchiveFormat::SevenZ => write!(f, "7Z"),
ArchiveFormat::Unknown => write!(f, "Unknown"),
}
}
}
/// Archive Processor Trait - Universal Interface
/// All compression format processors must implement this trait
pub trait ArchiveProcessor: Send + Sync {
/// Format type supported by this processor
fn format(&self) -> ArchiveFormat;
/// Open archive file and read metadata
fn open(&mut self, path: &Path) -> Result<ArchiveMetadata>;
/// List all file entries in archive
fn list_entries(&self) -> Result<Vec<ArchiveEntry>>;
/// Extract single file (on-demand decompression)
fn extract_file(&self, entry_path: &Path, output: &mut Vec<u8>) -> Result<u64>;
/// Extract all files to directory (batch extraction)
fn extract_all(&self, output_dir: &Path) -> Result<ExtractResult>;
/// Check if this processor can handle the format
fn can_process(format: ArchiveFormat) -> bool where Self: Sized;
/// Create new processor instance
fn new() -> Self where Self: Sized;
}
/// Archive File Metadata
#[derive(Debug, Clone)]
pub struct ArchiveMetadata {
pub format: ArchiveFormat,
pub total_files: u64,
pub total_size: u64,
pub compressed_size: u64,
pub compression_ratio: f64,
pub is_encrypted: bool,
pub is_multi_volume: bool,
pub created_time: Option<SystemTime>,
}
impl ArchiveMetadata {
/// Calculate compression ratio
pub fn compression_ratio(&self) -> f64 {
if self.compressed_size == 0 {
0.0
} else {
self.total_size as f64 / self.compressed_size as f64
}
}
}
/// Archive Entry Information
#[derive(Debug, Clone)]
pub struct ArchiveEntry {
pub path: PathBuf,
pub size: u64,
pub compressed_size: u64,
pub is_dir: bool,
pub is_file: bool,
pub is_encrypted: bool,
pub modified: SystemTime,
pub permissions: Option<u32>,
}
/// Extract Result Statistics
#[derive(Debug)]
pub struct ExtractResult {
pub total_files: u64,
pub total_bytes: u64,
pub failed_files: Vec<PathBuf>,
pub warnings: Vec<String>,
}
impl ExtractResult {
pub fn new() -> Self {
Self {
total_files: 0,
total_bytes: 0,
failed_files: Vec::new(),
warnings: Vec::new(),
}
}
pub fn success_rate(&self) -> f64 {
if self.total_files == 0 {
100.0
} else {
let success_count = self.total_files - self.failed_files.len() as u64;
(success_count as f64 / self.total_files as f64) * 100.0
}
}
}
/// Security Validation - Zip Slip Protection
pub fn validate_extraction_path(entry_path: &Path, base_dir: &Path) -> Result<PathBuf> {
use std::path::Component;
// 1. Check path components
for component in entry_path.components() {
match component {
// Prohibit parent directory reference (../)
Component::ParentDir => {
return Err(anyhow::anyhow!("Zip Slip detected: path contains '..'"));
}
// Prohibit root directory (/)
Component::Prefix(_) | Component::RootDir => {
return Err(anyhow::anyhow!("Zip Slip detected: absolute path"));
}
// Allow normal components
Component::Normal(_) | Component::CurDir => {}
}
}
// 2. Build full path
let full_path = base_dir.join(entry_path);
// 3. Canonicalize and validate (ensure within base_dir)
let canonical_base = base_dir.canonicalize()
.map_err(|e| anyhow::anyhow!("Cannot canonicalize base dir: {}", e))?;
// Create parent directories first
if let Some(parent) = full_path.parent() {
std::fs::create_dir_all(parent)?;
}
// 4. Verify extraction path is within base_dir
// Note: full_path may not exist yet, so we check parent directory
if full_path.exists() {
let canonical_full = full_path.canonicalize()
.map_err(|e| anyhow::anyhow!("Cannot canonicalize full path: {}", e))?;
if !canonical_full.starts_with(&canonical_base) {
return Err(anyhow::anyhow!("Zip Slip detected: path escapes base directory"));
}
} else {
// Check parent directory instead
if let Some(parent) = full_path.parent() {
let canonical_parent = parent.canonicalize()
.map_err(|e| anyhow::anyhow!("Cannot canonicalize parent: {}", e))?;
if !canonical_parent.starts_with(&canonical_base) {
return Err(anyhow::anyhow!("Zip Slip detected: path escapes base directory"));
}
}
}
Ok(full_path)
}
/// Security Validation - Zip Bomb Protection
pub fn check_decompression_ratio(compressed_size: u64, decompressed_size: u64, max_ratio: u64) -> Result<()> {
if compressed_size == 0 {
return Ok(()); // Empty file, allow
}
let ratio = decompressed_size / compressed_size;
if ratio > max_ratio {
return Err(anyhow::anyhow!(
"Zip Bomb detected: compression ratio {} exceeds limit {}",
ratio,
max_ratio
));
}
Ok(())
}
/// File size limit check
pub fn check_file_size_limit(file_size: u64, max_size: u64) -> Result<()> {
if file_size > max_size {
return Err(anyhow::anyhow!(
"File size {} exceeds limit {} MB",
file_size,
max_size / 1024 / 1024
));
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_zip_slip_protection() {
let temp_dir = TempDir::new().unwrap();
let base = temp_dir.path();
// Safe path: should pass
let safe_path = Path::new("safe/file.txt");
assert!(validate_extraction_path(safe_path, base).is_ok());
// Evil path: should be rejected
let evil_path = Path::new("../../etc/passwd");
assert!(validate_extraction_path(evil_path, base).is_err());
// Absolute path: should be rejected
let abs_path = Path::new("/etc/passwd");
assert!(validate_extraction_path(abs_path, base).is_err());
}
#[test]
fn test_zip_bomb_detection() {
// Normal ratio: should pass
assert!(check_decompression_ratio(1000, 5000, 1000).is_ok());
// Zip Bomb ratio: should be rejected
assert!(check_decompression_ratio(42_000, 5_000_000_000, 1000).is_err());
}
#[test]
fn test_compression_ratio_calculation() {
let metadata = ArchiveMetadata {
format: ArchiveFormat::Zip,
total_files: 10,
total_size: 1000,
compressed_size: 500,
compression_ratio: 0.0,
is_encrypted: false,
is_multi_volume: false,
created_time: None,
};
assert_eq!(metadata.compression_ratio(), 2.0);
}
}

View File

@@ -0,0 +1,161 @@
// Core Format Processors - 9 Core Formats (Always Enabled)
// Stub implementations for Phase 1 framework
// Actual implementations will be added in Phase 2
use crate::archive::{ArchiveFormat, ArchiveProcessor, ArchiveMetadata, ArchiveEntry, ExtractResult};
use anyhow::Result;
use std::path::Path;
/// ZIP Processor (Phase 2 implementation)
pub struct ZipProcessor;
impl ZipProcessor {
pub fn new() -> Self {
Self
}
}
impl ArchiveProcessor for ZipProcessor {
fn format(&self) -> ArchiveFormat {
ArchiveFormat::Zip
}
fn open(&mut self, path: &Path) -> Result<ArchiveMetadata> {
// Phase 2: Implement ZIP opening with zip library
Ok(ArchiveMetadata {
format: ArchiveFormat::Zip,
total_files: 0,
total_size: 0,
compressed_size: 0,
compression_ratio: 0.0,
is_encrypted: false,
is_multi_volume: false,
created_time: None,
})
}
fn list_entries(&self) -> Result<Vec<ArchiveEntry>> {
// Phase 2: Implement ZIP entry listing
Ok(Vec::new())
}
fn extract_file(&self, entry_path: &Path, output: &mut Vec<u8>) -> Result<u64> {
// Phase 2: Implement single file extraction
Ok(0)
}
fn extract_all(&self, output_dir: &Path) -> Result<ExtractResult> {
// Phase 2: Implement batch extraction
Ok(ExtractResult::new())
}
fn can_process(format: ArchiveFormat) -> bool {
format == ArchiveFormat::Zip
}
}
/// TAR Processor (Phase 2 implementation)
pub struct TarProcessor;
impl TarProcessor {
pub fn new() -> Self {
Self
}
}
impl ArchiveProcessor for TarProcessor {
fn format(&self) -> ArchiveFormat {
ArchiveFormat::Tar
}
fn open(&mut self, path: &Path) -> Result<ArchiveMetadata> {
Ok(ArchiveMetadata {
format: ArchiveFormat::Tar,
total_files: 0,
total_size: 0,
compressed_size: 0,
compression_ratio: 0.0,
is_encrypted: false,
is_multi_volume: false,
created_time: None,
})
}
fn list_entries(&self) -> Result<Vec<ArchiveEntry>> {
Ok(Vec::new())
}
fn extract_file(&self, entry_path: &Path, output: &mut Vec<u8>) -> Result<u64> {
Ok(0)
}
fn extract_all(&self, output_dir: &Path) -> Result<ExtractResult> {
Ok(ExtractResult::new())
}
fn can_process(format: ArchiveFormat) -> bool {
format == ArchiveFormat::Tar
}
}
/// GZIP Processor (Phase 2 implementation)
pub struct GzipProcessor;
impl GzipProcessor {
pub fn new() -> Self {
Self
}
}
impl ArchiveProcessor for GzipProcessor {
fn format(&self) -> ArchiveFormat {
ArchiveFormat::Gzip
}
fn open(&mut self, path: &Path) -> Result<ArchiveMetadata> {
Ok(ArchiveMetadata {
format: ArchiveFormat::Gzip,
total_files: 1,
total_size: 0,
compressed_size: 0,
compression_ratio: 0.0,
is_encrypted: false,
is_multi_volume: false,
created_time: None,
})
}
fn list_entries(&self) -> Result<Vec<ArchiveEntry>> {
Ok(Vec::new())
}
fn extract_file(&self, entry_path: &Path, output: &mut Vec<u8>) -> Result<u64> {
Ok(0)
}
fn extract_all(&self, output_dir: &Path) -> Result<ExtractResult> {
Ok(ExtractResult::new())
}
fn can_process(format: ArchiveFormat) -> bool {
format == ArchiveFormat::Gzip
}
}
// Stub processors for other core formats (Phase 2)
pub struct ZstdProcessor;
pub struct Bzip2Processor;
pub struct Lz4Processor;
pub struct TarGzipProcessor;
pub struct TarBzip2Processor;
pub struct TarZstdProcessor;
impl ZstdProcessor { pub fn new() -> Self { Self } }
impl Bzip2Processor { pub fn new() -> Self { Self } }
impl Lz4Processor { pub fn new() -> Self { Self } }
impl TarGzipProcessor { pub fn new() -> Self { Self } }
impl TarBzip2Processor { pub fn new() -> Self { Self } }
impl TarZstdProcessor { pub fn new() -> Self { Self } }
// ArchiveProcessor implementations will be added in Phase 2

View File

@@ -0,0 +1,141 @@
// Optional Format Processors - 3 Optional Formats (Controversial)
// Stub implementations for Phase 1 framework
// Actual implementations will be added in Phase 2 with warnings
use crate::archive::{ArchiveFormat, ArchiveProcessor, ArchiveMetadata, ArchiveEntry, ExtractResult};
use anyhow::Result;
use std::path::Path;
/// RAR Processor (⚠️ Legal risk - Phase 2)
pub struct RarProcessor;
impl RarProcessor {
pub fn new() -> Self {
Self
}
}
impl ArchiveProcessor for RarProcessor {
fn format(&self) -> ArchiveFormat {
ArchiveFormat::Rar
}
fn open(&mut self, path: &Path) -> Result<ArchiveMetadata> {
// Phase 2: Implement RAR opening with unrar library
Ok(ArchiveMetadata {
format: ArchiveFormat::Rar,
total_files: 0,
total_size: 0,
compressed_size: 0,
compression_ratio: 0.0,
is_encrypted: false,
is_multi_volume: false,
created_time: None,
})
}
fn list_entries(&self) -> Result<Vec<ArchiveEntry>> {
Ok(Vec::new())
}
fn extract_file(&self, entry_path: &Path, output: &mut Vec<u8>) -> Result<u64> {
Ok(0)
}
fn extract_all(&self, output_dir: &Path) -> Result<ExtractResult> {
Ok(ExtractResult::new())
}
fn can_process(format: ArchiveFormat) -> bool {
format == ArchiveFormat::Rar
}
}
/// XZ Processor (⚠️ External dependency - Phase 2)
pub struct XzProcessor;
impl XzProcessor {
pub fn new() -> Self {
Self
}
}
impl ArchiveProcessor for XzProcessor {
fn format(&self) -> ArchiveFormat {
ArchiveFormat::Xz
}
fn open(&mut self, path: &Path) -> Result<ArchiveMetadata> {
Ok(ArchiveMetadata {
format: ArchiveFormat::Xz,
total_files: 1,
total_size: 0,
compressed_size: 0,
compression_ratio: 0.0,
is_encrypted: false,
is_multi_volume: false,
created_time: None,
})
}
fn list_entries(&self) -> Result<Vec<ArchiveEntry>> {
Ok(Vec::new())
}
fn extract_file(&self, entry_path: &Path, output: &mut Vec<u8>) -> Result<u64> {
Ok(0)
}
fn extract_all(&self, output_dir: &Path) -> Result<ExtractResult> {
Ok(ExtractResult::new())
}
fn can_process(format: ArchiveFormat) -> bool {
format == ArchiveFormat::Xz
}
}
/// 7z Processor (⚠️ Unstable library - Phase 2)
pub struct SevenZProcessor;
impl SevenZProcessor {
pub fn new() -> Self {
Self
}
}
impl ArchiveProcessor for SevenZProcessor {
fn format(&self) -> ArchiveFormat {
ArchiveFormat::SevenZ
}
fn open(&mut self, path: &Path) -> Result<ArchiveMetadata> {
Ok(ArchiveMetadata {
format: ArchiveFormat::SevenZ,
total_files: 0,
total_size: 0,
compressed_size: 0,
compression_ratio: 0.0,
is_encrypted: false,
is_multi_volume: false,
created_time: None,
})
}
fn list_entries(&self) -> Result<Vec<ArchiveEntry>> {
Ok(Vec::new())
}
fn extract_file(&self, entry_path: &Path, output: &mut Vec<u8>) -> Result<u64> {
Ok(0)
}
fn extract_all(&self, output_dir: &Path) -> Result<ExtractResult> {
Ok(ExtractResult::new())
}
fn can_process(format: ArchiveFormat) -> bool {
format == ArchiveFormat::SevenZ
}
}

View File

@@ -0,0 +1,57 @@
// Archive Module Tests
#[cfg(test)]
mod tests {
use crate::archive::*;
#[test]
fn test_processor_registry_initialization() {
let config = ArchiveConfig::default();
let mut registry = ProcessorRegistry::new(config);
registry.initialize().unwrap();
let formats = registry.enabled_formats();
// Core formats (9) should always be enabled
assert!(formats.contains(&ArchiveFormat::Zip));
assert!(formats.contains(&ArchiveFormat::Tar));
assert!(formats.contains(&ArchiveFormat::Gzip));
// Optional formats should be disabled by default
assert!(!formats.contains(&ArchiveFormat::Rar));
assert!(!formats.contains(&ArchiveFormat::Xz));
assert!(!formats.contains(&ArchiveFormat::SevenZ));
// Should have exactly 9 core formats
assert_eq!(formats.len(), 9);
}
#[test]
fn test_optional_formats_disabled_by_default() {
let config = ArchiveConfig::default();
assert_eq!(config.enable_rar, false);
assert_eq!(config.enable_xz, false);
assert_eq!(config.enable_7z, false);
}
#[test]
fn test_config_validation() {
let valid_config = ArchiveConfig::default();
assert!(valid_config.validate().is_ok());
let invalid_config = ArchiveConfig {
max_decompression_ratio: 1, // Too low
..Default::default()
};
assert!(invalid_config.validate().is_err());
}
#[test]
fn test_archive_format_display() {
assert_eq!(ArchiveFormat::Zip.to_string(), "ZIP");
assert_eq!(ArchiveFormat::TarGzip.to_string(), "TAR.GZ");
assert_eq!(ArchiveFormat::Rar.to_string(), "RAR");
}
}

View File

@@ -0,0 +1,141 @@
// Warning System - Legal and Technical Warnings for Optional Formats
use log::{warn, info};
use crate::archive::config::ArchiveConfig;
/// Show RAR legal risk warning
pub fn show_rar_legal_warning() {
warn!("");
warn!("⚠️ ⚠️ ⚠️ RAR FORMAT LEGAL WARNING ⚠️ ⚠️ ⚠️");
warn!("");
warn!("By enabling RAR format support, you acknowledge:");
warn!(" 1. RAR compression algorithm is patented by RARLAB");
warn!(" 2. Commercial use requires license purchase (approx $1000+)");
warn!(" 3. You assume ALL legal responsibility for patent compliance");
warn!(" 4. MarkBase provides RAR decompression only, NO compression");
warn!(" 5. unrar library is free for personal use only");
warn!("");
warn!("License info: https://rarlab.com/license.htm");
warn!("");
warn!("⚠️ User accepts legal risk by enabling enable_rar = true in config");
warn!("");
}
/// Show XZ external dependency warning
pub fn show_xz_dependency_warning() {
warn!("");
warn!("⚠️ ⚠️ ⚠️ XZ FORMAT DEPENDENCY WARNING ⚠️ ⚠️ ⚠️");
warn!("");
warn!("XZ format requires external liblzma library (non-pure Rust)");
warn!("");
warn!("Installation instructions:");
warn!(" macOS: brew install xz");
warn!(" Linux: apt install liblzma-dev (Debian/Ubuntu)");
warn!(" yum install xz-devel (CentOS/RHEL)");
warn!(" Windows: Manual installation required (complex)");
warn!(" Download from: https://tukaani.org/xz/");
warn!("");
warn!("⚠️ XZ format disabled if liblzma not found");
warn!("");
}
/// Show 7z library stability warning
pub fn show_7z_stability_warning() {
warn!("");
warn!("⚠️ ⚠️ ⚠️ 7Z FORMAT STABILITY WARNING ⚠️ ⚠️ ⚠️");
warn!("");
warn!("sevenz-rust library (v0.21.0) is under active development:");
warn!(" 1. Some compression algorithms not yet supported");
warn!(" 2. Production stability may be limited");
warn!(" 3. Performance optimization ongoing");
warn!(" 4. API may change in future versions");
warn!("");
warn!("Recommended: Wait for library maturity before production use");
warn!("GitHub: https://github.com/frogmoreltd/sevenz-rust");
warn!("");
warn!("⚠️ Use with caution in production environments");
warn!("");
}
/// Show startup warnings for optional formats
pub fn show_startup_warnings(config: &ArchiveConfig) {
if config.enable_rar {
show_rar_legal_warning();
}
if config.enable_xz {
// Dependency check happens in ProcessorRegistry
}
if config.enable_7z {
show_7z_stability_warning();
}
// Show summary of enabled formats
let enabled_optional = [
config.enable_rar,
config.enable_xz,
config.enable_7z,
].iter().filter(|&x| *x).count();
if enabled_optional > 0 {
info!("");
info!("⚠️ {} optional format(s) enabled with warnings shown above", enabled_optional);
info!("Core formats (9): ZIP, TAR, GZIP, ZSTD, BZIP2, LZ4, TAR.GZ, TAR.BZ2, TAR.ZST");
info!("");
}
}
/// Generate user-facing legal disclaimer text
pub fn generate_rar_legal_disclaimer() -> String {
format!(
"RAR FORMAT LEGAL DISCLAIMER
IMPORTANT WARNING:
By enabling RAR format support in MarkBase, you acknowledge and agree to the following:
1. RAR COMPRESSION ALGORITHM PATENT
- RAR compression algorithm is patented by RARLAB (Eugene Roshal)
- Patent protection applies to commercial use
- Personal/non-commercial use may be free (check RARLAB license)
2. LICENSE REQUIREMENTS
- Commercial use requires purchasing license from RARLAB
- License cost: approximately $1000+ (contact RARLAB for exact pricing)
- License info: https://rarlab.com/license.htm
3. MARKBASE LIABILITY DISCLAIMER
- MarkBase provides RAR DECOMPRESSION only (no compression)
- MarkBase uses unrar library (free for personal use)
- MarkBase DOES NOT provide RAR compression functionality
- MarkBase DOES NOT assume any legal liability for RAR patent issues
4. USER RESPONSIBILITY
- You are solely responsible for verifying legal compliance
- If commercial use, you must purchase RARLAB license
- You accept all legal risks by enabling enable_rar = true
- MarkBase is not liable for any patent infringement
5. USAGE GUIDELINES
- Personal use: Free (符合unrar免费许可)
- Commercial use: License required
- Distribution: Contact RARLAB for distribution license
ENABLE RAR FORMAT:
Modify config.toml:
enable_rar = false # Default disabled
enable_rar = true # ⚠️ User accepts legal risk
CONTACT:
- RARLAB: https://rarlab.com
- License: support@rarlab.com
- MarkBase: your-markbase-support
Last Updated: 2026-06-10
Version: 1.0
Legal Consultation: [Please consult professional lawyer for commercial use]
"
)
}