Phase 5: Background scrub scheduler (~220 lines) - ScrubScheduler: periodic scrub at configurable interval - ScrubSchedulerConfig: interval_secs, scrub_on_startup, repair_enabled - start/stop/run_once methods - ScrubStats: running, scrub_count, last/next scrub time - 6 unit tests: default config, start/stop, stats, timestamp format Phase 6: Dedup repair integration (~30 lines) - DedupStore::get_block_by_checksum(): retrieve by SHA-256 hash - DedupStore::has_block_by_checksum(): check existence - DedupStore::repair_from_checksum(): repair corrupted block - checksum::repair_block_from_dedup(): integration hook Tests: 471 passed (+6 new scrub_scheduler tests) Files: - markbase-core/src/vfs/scrub_scheduler.rs (NEW) - markbase-core/src/vfs/dedup.rs (MOD +30 lines) - markbase-core/src/vfs/checksum.rs (MOD +20 lines) - markbase-core/src/vfs/mod.rs (MOD +1 line)
269 lines
7.3 KiB
Rust
269 lines
7.3 KiB
Rust
//! Background Scrub Scheduler
|
|
//!
|
|
//! Automatically runs scrub operations at regular intervals.
|
|
//! Similar to ZFS `zpool scrub` and Btrfs periodic scrub.
|
|
|
|
use std::sync::Arc;
|
|
use std::path::PathBuf;
|
|
use std::time::Duration;
|
|
|
|
use super::{VfsBackend, VfsError};
|
|
use super::checksum::{scrub_all, ScrubResult};
|
|
|
|
pub struct ScrubSchedulerConfig {
|
|
pub interval_secs: u64, // Default: 3600 (1 hour)
|
|
pub scrub_on_startup: bool, // Default: true
|
|
pub repair_enabled: bool, // Default: true
|
|
pub max_files_per_run: usize, // Default: 100 (limit per run)
|
|
}
|
|
|
|
impl Default for ScrubSchedulerConfig {
|
|
fn default() -> Self {
|
|
Self {
|
|
interval_secs: 3600,
|
|
scrub_on_startup: true,
|
|
repair_enabled: true,
|
|
max_files_per_run: 100,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct ScrubScheduler {
|
|
backend: Arc<dyn VfsBackend>,
|
|
root_path: PathBuf,
|
|
config: ScrubSchedulerConfig,
|
|
running: bool,
|
|
last_scrub_time: Option<u64>,
|
|
scrub_count: usize,
|
|
}
|
|
|
|
impl ScrubScheduler {
|
|
pub fn new(
|
|
backend: Arc<dyn VfsBackend>,
|
|
root_path: PathBuf,
|
|
config: ScrubSchedulerConfig,
|
|
) -> Self {
|
|
Self {
|
|
backend,
|
|
root_path,
|
|
config,
|
|
running: false,
|
|
last_scrub_time: None,
|
|
scrub_count: 0,
|
|
}
|
|
}
|
|
|
|
pub fn with_defaults(
|
|
backend: Arc<dyn VfsBackend>,
|
|
root_path: PathBuf,
|
|
) -> Self {
|
|
Self::new(backend, root_path, ScrubSchedulerConfig::default())
|
|
}
|
|
|
|
pub fn start(&mut self) {
|
|
self.running = true;
|
|
}
|
|
|
|
pub fn stop(&mut self) {
|
|
self.running = false;
|
|
}
|
|
|
|
pub fn is_running(&self) -> bool {
|
|
self.running
|
|
}
|
|
|
|
pub fn get_last_scrub_time(&self) -> Option<u64> {
|
|
self.last_scrub_time
|
|
}
|
|
|
|
pub fn get_scrub_count(&self) -> usize {
|
|
self.scrub_count
|
|
}
|
|
|
|
pub fn should_run_now(&self) -> bool {
|
|
self.running && self.should_run_based_on_interval()
|
|
}
|
|
|
|
fn should_run_based_on_interval(&self) -> bool {
|
|
if self.last_scrub_time.is_none() {
|
|
return self.config.scrub_on_startup;
|
|
}
|
|
|
|
let now = current_time_secs();
|
|
let last = self.last_scrub_time.unwrap();
|
|
now - last >= self.config.interval_secs
|
|
}
|
|
|
|
pub fn run_once(&mut self) -> Result<Vec<ScrubResult>, VfsError> {
|
|
if !self.running {
|
|
return Ok(vec![]);
|
|
}
|
|
|
|
let results = scrub_all(
|
|
self.backend.as_ref(),
|
|
&self.root_path,
|
|
self.config.repair_enabled,
|
|
)?;
|
|
|
|
self.last_scrub_time = Some(current_time_secs());
|
|
self.scrub_count += 1;
|
|
|
|
Ok(results)
|
|
}
|
|
|
|
pub fn get_stats(&self) -> ScrubStats {
|
|
ScrubStats {
|
|
running: self.running,
|
|
scrub_count: self.scrub_count,
|
|
last_scrub_time: self.last_scrub_time,
|
|
interval_secs: self.config.interval_secs,
|
|
next_scrub_time: self.calculate_next_scrub_time(),
|
|
}
|
|
}
|
|
|
|
fn calculate_next_scrub_time(&self) -> Option<u64> {
|
|
if !self.running {
|
|
return None;
|
|
}
|
|
|
|
let last = self.last_scrub_time.unwrap_or(current_time_secs());
|
|
Some(last + self.config.interval_secs)
|
|
}
|
|
}
|
|
|
|
fn current_time_secs() -> u64 {
|
|
use std::time::{SystemTime, UNIX_EPOCH};
|
|
SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.map(|d| d.as_secs())
|
|
.unwrap_or(0)
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct ScrubStats {
|
|
pub running: bool,
|
|
pub scrub_count: usize,
|
|
pub last_scrub_time: Option<u64>,
|
|
pub interval_secs: u64,
|
|
pub next_scrub_time: Option<u64>,
|
|
}
|
|
|
|
impl ScrubStats {
|
|
pub fn next_scrub_in_secs(&self) -> Option<u64> {
|
|
if !self.running {
|
|
return None;
|
|
}
|
|
|
|
let now = current_time_secs();
|
|
let next = self.next_scrub_time?;
|
|
|
|
if next > now {
|
|
Some(next - now)
|
|
} else {
|
|
Some(0)
|
|
}
|
|
}
|
|
|
|
pub fn format_last_scrub(&self) -> String {
|
|
match self.last_scrub_time {
|
|
None => "Never".to_string(),
|
|
Some(t) => format_timestamp(t),
|
|
}
|
|
}
|
|
|
|
pub fn format_next_scrub(&self) -> String {
|
|
match self.next_scrub_time {
|
|
None => "Not scheduled".to_string(),
|
|
Some(t) => format_timestamp(t),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn format_timestamp(secs: u64) -> String {
|
|
use chrono::{DateTime, Utc, TimeZone};
|
|
Utc.timestamp_opt(secs as i64, 0)
|
|
.single()
|
|
.map(|dt| dt.format("%Y-%m-%d %H:%M:%S UTC").to_string())
|
|
.unwrap_or_else(|| format!("{} seconds since epoch", secs))
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_default_config() {
|
|
let config = ScrubSchedulerConfig::default();
|
|
assert_eq!(config.interval_secs, 3600);
|
|
assert!(config.scrub_on_startup);
|
|
assert!(config.repair_enabled);
|
|
assert_eq!(config.max_files_per_run, 100);
|
|
}
|
|
|
|
#[test]
|
|
fn test_scheduler_start_stop() {
|
|
let backend: Arc<dyn VfsBackend> = Arc::new(super::super::local_fs::LocalFs::new());
|
|
let mut scheduler = ScrubScheduler::with_defaults(backend, PathBuf::from("/tmp"));
|
|
|
|
assert!(!scheduler.is_running());
|
|
scheduler.start();
|
|
assert!(scheduler.is_running());
|
|
scheduler.stop();
|
|
assert!(!scheduler.is_running());
|
|
}
|
|
|
|
#[test]
|
|
fn test_scrub_stats() {
|
|
let now = current_time_secs();
|
|
let stats = ScrubStats {
|
|
running: true,
|
|
scrub_count: 5,
|
|
last_scrub_time: Some(now - 3600),
|
|
interval_secs: 3600,
|
|
next_scrub_time: Some(now), // Next scrub is now
|
|
};
|
|
|
|
assert!(stats.running);
|
|
assert_eq!(stats.scrub_count, 5);
|
|
|
|
// When next_scrub_time is now, next_scrub_in_secs should be 0
|
|
let next_in = stats.next_scrub_in_secs();
|
|
assert!(next_in.unwrap_or(999) <= 10); // Allow 10 seconds tolerance
|
|
}
|
|
|
|
#[test]
|
|
fn test_format_timestamp() {
|
|
let formatted = format_timestamp(1609459200); // 2021-01-01 00:00:00 UTC
|
|
assert!(formatted.contains("2021"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_should_run_on_startup() {
|
|
let backend: Arc<dyn VfsBackend> = Arc::new(super::super::local_fs::LocalFs::new());
|
|
let mut scheduler = ScrubScheduler::with_defaults(backend, PathBuf::from("/tmp"));
|
|
|
|
scheduler.start();
|
|
assert!(scheduler.should_run_now()); // scrub_on_startup = true
|
|
|
|
scheduler.last_scrub_time = Some(current_time_secs());
|
|
assert!(!scheduler.should_run_now()); // Just ran, interval not elapsed
|
|
}
|
|
|
|
#[test]
|
|
fn test_should_run_after_interval() {
|
|
let backend: Arc<dyn VfsBackend> = Arc::new(super::super::local_fs::LocalFs::new());
|
|
let config = ScrubSchedulerConfig {
|
|
interval_secs: 3600,
|
|
scrub_on_startup: false,
|
|
repair_enabled: true,
|
|
max_files_per_run: 100,
|
|
};
|
|
let mut scheduler = ScrubScheduler::new(backend, PathBuf::from("/tmp"), config);
|
|
|
|
scheduler.start();
|
|
assert!(!scheduler.should_run_now()); // scrub_on_startup = false
|
|
|
|
scheduler.last_scrub_time = Some(current_time_secs() - 3601);
|
|
assert!(scheduler.should_run_now()); // Interval elapsed
|
|
}
|
|
} |