Files
markbase/markbase-core/src/vfs/checksum.rs
Warren 1418e9958b
Some checks failed
Test / build (push) Has been cancelled
Test / test (push) Has been cancelled
Apply clippy fixes for code quality
Clippy Fixes Applied:
- Removed unused imports
- Fixed manual implementation of .is_multiple_of()
- Fixed unnecessary_sort_by suggestions
- Added missing Ipv4Addr imports

Files Modified:
- forward_acl.rs: Add Ipv4Addr import
- known_hosts.rs: Add Ipv4Addr import
- Various files: Remove unused imports

Build:  markbase-core
Tests: 495 passed
2026-06-24 11:18:02 +08:00

448 lines
13 KiB
Rust

//! Block-level Checksum for Data Integrity
//!
//! Reference: ZFS/Btrfs checksum verification
//! - ZFS: Fletcher4/SHA256 per-block checksum
//! - Btrfs: CRC32C per-block checksum
//!
//! MarkBase uses SHA-256 (32 bytes) per 4KB block for integrity verification.
use std::path::PathBuf;
use std::io::{Read, Write};
use sha2::{Sha256, Digest};
use serde::{Serialize, Deserialize};
use super::{VfsBackend, VfsFile, VfsError};
pub const BLOCK_SIZE: usize = 4096;
pub const HASH_SIZE: usize = 32; // SHA-256
pub const CHECKSUM_DIR: &str = ".checksums";
pub const CHECKSUM_EXT: &str = ".checksums";
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VfsBlockChecksum {
pub offset: u64, // Block offset (multiple of BLOCK_SIZE)
pub hash: Vec<u8>, // SHA-256 hash (32 bytes)
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VfsChecksumFile {
pub block_size: usize,
pub algorithm: String, // "sha256"
pub blocks: Vec<VfsBlockChecksum>,
pub file_size: u64, // Original file size
}
impl VfsChecksumFile {
pub fn new(file_size: u64) -> Self {
Self {
block_size: BLOCK_SIZE,
algorithm: "sha256".to_string(),
blocks: Vec::new(),
file_size,
}
}
pub fn from_bytes(data: &[u8]) -> Result<Self, VfsError> {
serde_json::from_slice(data)
.map_err(|e| VfsError::Io(format!("checksum parse failed: {}", e)))
}
pub fn to_bytes(&self) -> Result<Vec<u8>, VfsError> {
serde_json::to_vec(self)
.map_err(|e| VfsError::Io(format!("checksum serialize failed: {}", e)))
}
pub fn get_checksum(&self, offset: u64) -> Option<&[u8]> {
self.blocks.iter()
.find(|b| b.offset == offset)
.map(|b| b.hash.as_slice())
}
pub fn set_checksum(&mut self, offset: u64, hash: Vec<u8>) {
if let Some(block) = self.blocks.iter_mut().find(|b| b.offset == offset) {
block.hash = hash;
} else {
self.blocks.push(VfsBlockChecksum { offset, hash });
self.blocks.sort_by_key(|b| b.offset);
}
}
pub fn block_count(&self) -> usize {
(self.file_size as usize / BLOCK_SIZE) +
if !(self.file_size as usize).is_multiple_of(BLOCK_SIZE) { 1 } else { 0 }
}
}
pub fn compute_block_hash(data: &[u8]) -> Vec<u8> {
let mut hasher = Sha256::new();
hasher.update(data);
hasher.finalize().to_vec()
}
pub fn verify_block_hash(data: &[u8], expected: &[u8]) -> bool {
let actual = compute_block_hash(data);
actual == expected
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ChecksumMode {
Lazy, // Only verify on scrub (default)
OnRead, // Verify every read
}
#[derive(Debug, Clone)]
pub struct ChecksumConfig {
pub mode: ChecksumMode,
pub cache_verified: bool,
}
impl Default for ChecksumConfig {
fn default() -> Self {
Self {
mode: ChecksumMode::Lazy,
cache_verified: true,
}
}
}
#[derive(Debug)]
pub struct ScrubResult {
pub path: PathBuf,
pub total_blocks: usize,
pub verified_blocks: usize,
pub corrupted_blocks: Vec<u64>,
pub repaired_blocks: Vec<u64>,
pub repair_failed: bool,
}
impl ScrubResult {
pub fn is_clean(&self) -> bool {
self.corrupted_blocks.is_empty()
}
pub fn repair_success_rate(&self) -> f64 {
if self.corrupted_blocks.is_empty() {
1.0
} else {
self.repaired_blocks.len() as f64 / self.corrupted_blocks.len() as f64
}
}
}
pub fn checksum_path_for_file(file_path: &PathBuf, root: &PathBuf) -> PathBuf {
let relative = file_path.strip_prefix(root)
.unwrap_or(file_path);
root.join(CHECKSUM_DIR)
.join(relative)
.with_extension(CHECKSUM_EXT)
}
pub fn ensure_checksum_dir(root: &PathBuf, backend: &dyn VfsBackend) -> Result<(), VfsError> {
let checksum_dir = root.join(CHECKSUM_DIR);
if !backend.exists(&checksum_dir) {
backend.create_dir(&checksum_dir, 0o755)?;
}
Ok(())
}
/// Scrub a single file to verify integrity
///
/// This reads the file and verifies each block checksum.
/// If repair=true and corrupted blocks are found, attempts to repair from RAID/Dedup.
pub fn scrub_file(
backend: &dyn VfsBackend,
file_path: &PathBuf,
root_path: &PathBuf,
repair: bool,
) -> Result<ScrubResult, VfsError> {
let checksum_path = checksum_path_for_file(file_path, root_path);
if !backend.exists(&checksum_path) {
return Ok(ScrubResult {
path: file_path.clone(),
total_blocks: 0,
verified_blocks: 0,
corrupted_blocks: vec![],
repaired_blocks: vec![],
repair_failed: false,
});
}
let checksum_file_data = {
let mut checksum_file = backend.open_file(&checksum_path, &super::open_flags::OpenFlags::new().read())?;
checksum_file.read_all()?
};
let checksum_data = VfsChecksumFile::from_bytes(&checksum_file_data)?;
let mut file_handle = backend.open_file(file_path, &super::open_flags::OpenFlags::new().read())?;
let stat = file_handle.stat()?;
let file_size = stat.size;
let block_count = checksum_data.block_count();
let mut verified_blocks = 0;
let mut corrupted_blocks: Vec<u64> = vec![];
let mut repaired_blocks: Vec<u64> = vec![];
for block_idx in 0..block_count {
let offset = (block_idx as u64) * BLOCK_SIZE as u64;
let block_size = if offset + BLOCK_SIZE as u64 <= file_size {
BLOCK_SIZE
} else {
(file_size - offset) as usize
};
let mut buffer = vec![0u8; block_size];
let bytes_read = file_handle.read_at(&mut buffer, offset)?;
if bytes_read != block_size {
corrupted_blocks.push(offset);
continue;
}
let expected_hash = checksum_data.get_checksum(offset);
if expected_hash.is_none() {
verified_blocks += 1;
continue;
}
let is_valid = verify_block_hash(&buffer, expected_hash.unwrap());
if is_valid {
verified_blocks += 1;
} else {
corrupted_blocks.push(offset);
if repair {
if repair_block(backend, file_path, offset, &buffer).is_ok() {
repaired_blocks.push(offset);
}
}
}
}
let corrupted_count = corrupted_blocks.len();
let repaired_count = repaired_blocks.len();
Ok(ScrubResult {
path: file_path.clone(),
total_blocks: block_count,
verified_blocks,
corrupted_blocks,
repaired_blocks,
repair_failed: repair && repaired_count < corrupted_count,
})
}
/// Scrub all files in a directory
///
/// Recursively walks the directory and scrubs all files with checksums.
pub fn scrub_all(
backend: &dyn VfsBackend,
root_path: &PathBuf,
repair: bool,
) -> Result<Vec<ScrubResult>, VfsError> {
let mut results = vec![];
let checksum_dir = root_path.join(CHECKSUM_DIR);
if !backend.exists(&checksum_dir) {
return Ok(results);
}
scrub_recursive(backend, root_path, root_path, repair, &mut results)?;
Ok(results)
}
fn scrub_recursive(
backend: &dyn VfsBackend,
current_path: &PathBuf,
root_path: &PathBuf,
repair: bool,
results: &mut Vec<ScrubResult>,
) -> Result<(), VfsError> {
let entries = backend.read_dir(current_path)?;
for entry in entries {
let entry_path = current_path.join(&entry.name);
if entry.stat.is_dir {
if entry.name != CHECKSUM_DIR {
scrub_recursive(backend, &entry_path, root_path, repair, results)?;
}
} else if !entry.name.ends_with(CHECKSUM_EXT) {
let result = scrub_file(backend, &entry_path, root_path, repair)?;
results.push(result);
}
}
Ok(())
}
/// Attempt to repair a corrupted block
///
/// Tries RAID repair first (if backend is RAID), then Dedup repair.
pub fn repair_block(
_backend: &dyn VfsBackend,
_file_path: &PathBuf,
_offset: u64,
_expected_checksum: &[u8],
) -> Result<Vec<u8>, VfsError> {
// Try Dedup repair first (check if block exists in dedup store)
// This requires the backend to have dedup integration
// For now, return error - RAID/Dedup repair requires specific backend types
Err(VfsError::Io("block repair requires RAID or Dedup backend (Phase 4/6)".to_string()))
}
/// Repair block from DedupStore
///
/// This is called when checksum detects corruption and dedup store is available.
pub fn repair_block_from_dedup(
dedup_store: &super::dedup::DedupStore,
checksum_hash: &[u8],
) -> Result<Vec<u8>, VfsError> {
dedup_store.repair_from_checksum(checksum_hash)
}
/// Create checksums for a file
///
/// This reads the file and computes checksums for all blocks.
pub fn create_checksums_for_file(
backend: &dyn VfsBackend,
file_path: &PathBuf,
root_path: &PathBuf,
) -> Result<(), VfsError> {
ensure_checksum_dir(root_path, backend)?;
let mut file_handle = backend.open_file(file_path, &super::open_flags::OpenFlags::new().read())?;
let stat = file_handle.stat()?;
let file_size = stat.size;
let mut checksum_data = VfsChecksumFile::new(file_size);
let block_count = checksum_data.block_count();
for block_idx in 0..block_count {
let offset = (block_idx as u64) * BLOCK_SIZE as u64;
let block_size = if offset + BLOCK_SIZE as u64 <= file_size {
BLOCK_SIZE
} else {
(file_size - offset) as usize
};
let mut buffer = vec![0u8; block_size];
let bytes_read = file_handle.read_at(&mut buffer, offset)?;
if bytes_read > 0 {
let hash = compute_block_hash(&buffer[..bytes_read]);
checksum_data.set_checksum(offset, hash);
}
}
let checksum_path = checksum_path_for_file(file_path, root_path);
let checksum_bytes = checksum_data.to_bytes()?;
let mut checksum_file = backend.open_file(
&checksum_path,
&super::open_flags::OpenFlags::new().write().create().truncate(),
)?;
checksum_file.write_all(&checksum_bytes)?;
checksum_file.flush()?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compute_block_hash() {
let data = b"test block data for hashing";
let hash = compute_block_hash(data);
assert_eq!(hash.len(), HASH_SIZE);
let hash2 = compute_block_hash(data);
assert_eq!(hash, hash2);
}
#[test]
fn test_verify_block_hash() {
let data = b"test block data";
let hash = compute_block_hash(data);
assert!(verify_block_hash(data, &hash));
let wrong_data = b"wrong block data";
assert!(!verify_block_hash(wrong_data, &hash));
}
#[test]
fn test_checksum_file_roundtrip() {
let mut checksum_file = VfsChecksumFile::new(8192);
checksum_file.set_checksum(0, compute_block_hash(b"block0"));
checksum_file.set_checksum(4096, compute_block_hash(b"block1"));
let bytes = checksum_file.to_bytes().unwrap();
let decoded = VfsChecksumFile::from_bytes(&bytes).unwrap();
assert_eq!(decoded.block_size, BLOCK_SIZE);
assert_eq!(decoded.blocks.len(), 2);
assert_eq!(decoded.file_size, 8192);
}
#[test]
fn test_checksum_file_get_set() {
let mut checksum_file = VfsChecksumFile::new(4096);
let hash = compute_block_hash(b"test");
checksum_file.set_checksum(0, hash.clone());
let retrieved = checksum_file.get_checksum(0);
assert!(retrieved.is_some());
assert_eq!(retrieved.unwrap(), hash.as_slice());
checksum_file.set_checksum(0, compute_block_hash(b"new"));
let updated = checksum_file.get_checksum(0).unwrap();
assert_ne!(updated, hash.as_slice());
}
#[test]
fn test_block_count_calculation() {
let checksum_file = VfsChecksumFile::new(4096);
assert_eq!(checksum_file.block_count(), 1);
let checksum_file = VfsChecksumFile::new(8192);
assert_eq!(checksum_file.block_count(), 2);
let checksum_file = VfsChecksumFile::new(4097);
assert_eq!(checksum_file.block_count(), 2);
let checksum_file = VfsChecksumFile::new(0);
assert_eq!(checksum_file.block_count(), 0);
}
#[test]
fn test_scrub_result_metrics() {
let result = ScrubResult {
path: PathBuf::from("/test"),
total_blocks: 10,
verified_blocks: 10,
corrupted_blocks: vec![],
repaired_blocks: vec![],
repair_failed: false,
};
assert!(result.is_clean());
assert_eq!(result.repair_success_rate(), 1.0);
let result2 = ScrubResult {
path: PathBuf::from("/test"),
total_blocks: 10,
verified_blocks: 8,
corrupted_blocks: vec![4096, 8192],
repaired_blocks: vec![4096],
repair_failed: false,
};
assert!(!result2.is_clean());
assert_eq!(result2.repair_success_rate(), 0.5);
}
}