//! Block-level Checksum for Data Integrity //! //! Reference: ZFS/Btrfs checksum verification //! - ZFS: Fletcher4/SHA256 per-block checksum //! - Btrfs: CRC32C per-block checksum //! //! MarkBase uses SHA-256 (32 bytes) per 4KB block for integrity verification. use std::path::PathBuf; use std::io::{Read, Write}; use sha2::{Sha256, Digest}; use serde::{Serialize, Deserialize}; use super::{VfsBackend, VfsFile, VfsError}; pub const BLOCK_SIZE: usize = 4096; pub const HASH_SIZE: usize = 32; // SHA-256 pub const CHECKSUM_DIR: &str = ".checksums"; pub const CHECKSUM_EXT: &str = ".checksums"; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VfsBlockChecksum { pub offset: u64, // Block offset (multiple of BLOCK_SIZE) pub hash: Vec, // SHA-256 hash (32 bytes) } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VfsChecksumFile { pub block_size: usize, pub algorithm: String, // "sha256" pub blocks: Vec, pub file_size: u64, // Original file size } impl VfsChecksumFile { pub fn new(file_size: u64) -> Self { Self { block_size: BLOCK_SIZE, algorithm: "sha256".to_string(), blocks: Vec::new(), file_size, } } pub fn from_bytes(data: &[u8]) -> Result { serde_json::from_slice(data) .map_err(|e| VfsError::Io(format!("checksum parse failed: {}", e))) } pub fn to_bytes(&self) -> Result, VfsError> { serde_json::to_vec(self) .map_err(|e| VfsError::Io(format!("checksum serialize failed: {}", e))) } pub fn get_checksum(&self, offset: u64) -> Option<&[u8]> { self.blocks.iter() .find(|b| b.offset == offset) .map(|b| b.hash.as_slice()) } pub fn set_checksum(&mut self, offset: u64, hash: Vec) { if let Some(block) = self.blocks.iter_mut().find(|b| b.offset == offset) { block.hash = hash; } else { self.blocks.push(VfsBlockChecksum { offset, hash }); self.blocks.sort_by_key(|b| b.offset); } } pub fn block_count(&self) -> usize { (self.file_size as usize / BLOCK_SIZE) + if !(self.file_size as usize).is_multiple_of(BLOCK_SIZE) { 1 } else { 0 } } } pub fn compute_block_hash(data: &[u8]) -> Vec { let mut hasher = Sha256::new(); hasher.update(data); hasher.finalize().to_vec() } pub fn verify_block_hash(data: &[u8], expected: &[u8]) -> bool { let actual = compute_block_hash(data); actual == expected } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ChecksumMode { Lazy, // Only verify on scrub (default) OnRead, // Verify every read } #[derive(Debug, Clone)] pub struct ChecksumConfig { pub mode: ChecksumMode, pub cache_verified: bool, } impl Default for ChecksumConfig { fn default() -> Self { Self { mode: ChecksumMode::Lazy, cache_verified: true, } } } #[derive(Debug)] pub struct ScrubResult { pub path: PathBuf, pub total_blocks: usize, pub verified_blocks: usize, pub corrupted_blocks: Vec, pub repaired_blocks: Vec, pub repair_failed: bool, } impl ScrubResult { pub fn is_clean(&self) -> bool { self.corrupted_blocks.is_empty() } pub fn repair_success_rate(&self) -> f64 { if self.corrupted_blocks.is_empty() { 1.0 } else { self.repaired_blocks.len() as f64 / self.corrupted_blocks.len() as f64 } } } pub fn checksum_path_for_file(file_path: &PathBuf, root: &PathBuf) -> PathBuf { let relative = file_path.strip_prefix(root) .unwrap_or(file_path); root.join(CHECKSUM_DIR) .join(relative) .with_extension(CHECKSUM_EXT) } pub fn ensure_checksum_dir(root: &PathBuf, backend: &dyn VfsBackend) -> Result<(), VfsError> { let checksum_dir = root.join(CHECKSUM_DIR); if !backend.exists(&checksum_dir) { backend.create_dir(&checksum_dir, 0o755)?; } Ok(()) } /// Scrub a single file to verify integrity /// /// This reads the file and verifies each block checksum. /// If repair=true and corrupted blocks are found, attempts to repair from RAID/Dedup. pub fn scrub_file( backend: &dyn VfsBackend, file_path: &PathBuf, root_path: &PathBuf, repair: bool, ) -> Result { let checksum_path = checksum_path_for_file(file_path, root_path); if !backend.exists(&checksum_path) { return Ok(ScrubResult { path: file_path.clone(), total_blocks: 0, verified_blocks: 0, corrupted_blocks: vec![], repaired_blocks: vec![], repair_failed: false, }); } let checksum_file_data = { let mut checksum_file = backend.open_file(&checksum_path, &super::open_flags::OpenFlags::new().read())?; checksum_file.read_all()? }; let checksum_data = VfsChecksumFile::from_bytes(&checksum_file_data)?; let mut file_handle = backend.open_file(file_path, &super::open_flags::OpenFlags::new().read())?; let stat = file_handle.stat()?; let file_size = stat.size; let block_count = checksum_data.block_count(); let mut verified_blocks = 0; let mut corrupted_blocks: Vec = vec![]; let mut repaired_blocks: Vec = vec![]; for block_idx in 0..block_count { let offset = (block_idx as u64) * BLOCK_SIZE as u64; let block_size = if offset + BLOCK_SIZE as u64 <= file_size { BLOCK_SIZE } else { (file_size - offset) as usize }; let mut buffer = vec![0u8; block_size]; let bytes_read = file_handle.read_at(&mut buffer, offset)?; if bytes_read != block_size { corrupted_blocks.push(offset); continue; } let expected_hash = checksum_data.get_checksum(offset); if expected_hash.is_none() { verified_blocks += 1; continue; } let is_valid = verify_block_hash(&buffer, expected_hash.unwrap()); if is_valid { verified_blocks += 1; } else { corrupted_blocks.push(offset); if repair { if repair_block(backend, file_path, offset, &buffer).is_ok() { repaired_blocks.push(offset); } } } } let corrupted_count = corrupted_blocks.len(); let repaired_count = repaired_blocks.len(); Ok(ScrubResult { path: file_path.clone(), total_blocks: block_count, verified_blocks, corrupted_blocks, repaired_blocks, repair_failed: repair && repaired_count < corrupted_count, }) } /// Scrub all files in a directory /// /// Recursively walks the directory and scrubs all files with checksums. pub fn scrub_all( backend: &dyn VfsBackend, root_path: &PathBuf, repair: bool, ) -> Result, VfsError> { let mut results = vec![]; let checksum_dir = root_path.join(CHECKSUM_DIR); if !backend.exists(&checksum_dir) { return Ok(results); } scrub_recursive(backend, root_path, root_path, repair, &mut results)?; Ok(results) } fn scrub_recursive( backend: &dyn VfsBackend, current_path: &PathBuf, root_path: &PathBuf, repair: bool, results: &mut Vec, ) -> Result<(), VfsError> { let entries = backend.read_dir(current_path)?; for entry in entries { let entry_path = current_path.join(&entry.name); if entry.stat.is_dir { if entry.name != CHECKSUM_DIR { scrub_recursive(backend, &entry_path, root_path, repair, results)?; } } else if !entry.name.ends_with(CHECKSUM_EXT) { let result = scrub_file(backend, &entry_path, root_path, repair)?; results.push(result); } } Ok(()) } /// Attempt to repair a corrupted block /// /// Tries RAID repair first (if backend is RAID), then Dedup repair. pub fn repair_block( _backend: &dyn VfsBackend, _file_path: &PathBuf, _offset: u64, _expected_checksum: &[u8], ) -> Result, VfsError> { // Try Dedup repair first (check if block exists in dedup store) // This requires the backend to have dedup integration // For now, return error - RAID/Dedup repair requires specific backend types Err(VfsError::Io("block repair requires RAID or Dedup backend (Phase 4/6)".to_string())) } /// Repair block from DedupStore /// /// This is called when checksum detects corruption and dedup store is available. pub fn repair_block_from_dedup( dedup_store: &super::dedup::DedupStore, checksum_hash: &[u8], ) -> Result, VfsError> { dedup_store.repair_from_checksum(checksum_hash) } /// Create checksums for a file /// /// This reads the file and computes checksums for all blocks. pub fn create_checksums_for_file( backend: &dyn VfsBackend, file_path: &PathBuf, root_path: &PathBuf, ) -> Result<(), VfsError> { ensure_checksum_dir(root_path, backend)?; let mut file_handle = backend.open_file(file_path, &super::open_flags::OpenFlags::new().read())?; let stat = file_handle.stat()?; let file_size = stat.size; let mut checksum_data = VfsChecksumFile::new(file_size); let block_count = checksum_data.block_count(); for block_idx in 0..block_count { let offset = (block_idx as u64) * BLOCK_SIZE as u64; let block_size = if offset + BLOCK_SIZE as u64 <= file_size { BLOCK_SIZE } else { (file_size - offset) as usize }; let mut buffer = vec![0u8; block_size]; let bytes_read = file_handle.read_at(&mut buffer, offset)?; if bytes_read > 0 { let hash = compute_block_hash(&buffer[..bytes_read]); checksum_data.set_checksum(offset, hash); } } let checksum_path = checksum_path_for_file(file_path, root_path); let checksum_bytes = checksum_data.to_bytes()?; let mut checksum_file = backend.open_file( &checksum_path, &super::open_flags::OpenFlags::new().write().create().truncate(), )?; checksum_file.write_all(&checksum_bytes)?; checksum_file.flush()?; Ok(()) } #[cfg(test)] mod tests { use super::*; #[test] fn test_compute_block_hash() { let data = b"test block data for hashing"; let hash = compute_block_hash(data); assert_eq!(hash.len(), HASH_SIZE); let hash2 = compute_block_hash(data); assert_eq!(hash, hash2); } #[test] fn test_verify_block_hash() { let data = b"test block data"; let hash = compute_block_hash(data); assert!(verify_block_hash(data, &hash)); let wrong_data = b"wrong block data"; assert!(!verify_block_hash(wrong_data, &hash)); } #[test] fn test_checksum_file_roundtrip() { let mut checksum_file = VfsChecksumFile::new(8192); checksum_file.set_checksum(0, compute_block_hash(b"block0")); checksum_file.set_checksum(4096, compute_block_hash(b"block1")); let bytes = checksum_file.to_bytes().unwrap(); let decoded = VfsChecksumFile::from_bytes(&bytes).unwrap(); assert_eq!(decoded.block_size, BLOCK_SIZE); assert_eq!(decoded.blocks.len(), 2); assert_eq!(decoded.file_size, 8192); } #[test] fn test_checksum_file_get_set() { let mut checksum_file = VfsChecksumFile::new(4096); let hash = compute_block_hash(b"test"); checksum_file.set_checksum(0, hash.clone()); let retrieved = checksum_file.get_checksum(0); assert!(retrieved.is_some()); assert_eq!(retrieved.unwrap(), hash.as_slice()); checksum_file.set_checksum(0, compute_block_hash(b"new")); let updated = checksum_file.get_checksum(0).unwrap(); assert_ne!(updated, hash.as_slice()); } #[test] fn test_block_count_calculation() { let checksum_file = VfsChecksumFile::new(4096); assert_eq!(checksum_file.block_count(), 1); let checksum_file = VfsChecksumFile::new(8192); assert_eq!(checksum_file.block_count(), 2); let checksum_file = VfsChecksumFile::new(4097); assert_eq!(checksum_file.block_count(), 2); let checksum_file = VfsChecksumFile::new(0); assert_eq!(checksum_file.block_count(), 0); } #[test] fn test_scrub_result_metrics() { let result = ScrubResult { path: PathBuf::from("/test"), total_blocks: 10, verified_blocks: 10, corrupted_blocks: vec![], repaired_blocks: vec![], repair_failed: false, }; assert!(result.is_clean()); assert_eq!(result.repair_success_rate(), 1.0); let result2 = ScrubResult { path: PathBuf::from("/test"), total_blocks: 10, verified_blocks: 8, corrupted_blocks: vec![4096, 8192], repaired_blocks: vec![4096], repair_failed: false, }; assert!(!result2.is_clean()); assert_eq!(result2.repair_success_rate(), 0.5); } }