use crate::checkpoint::Checkpoint;
use crate::error::Result;
use crate::merkle::MerkleTree;
use crate::storage::Storage;
use crate::timeline::Timeline;
use crate::types::FileEntry;
use crate::utils;
use serde::{Deserialize, Serialize};
use crate::collections::{HashMap, HashMapExt};
use std::collections::HashSet;
use std::time::Instant;
use tracing::{debug, error, info, warn};
#[derive(Debug)]
pub struct CheckpointVerifier<'a> {
storage: &'a Storage,
}
impl<'a> CheckpointVerifier<'a> {
pub fn new(storage: &'a Storage) -> Self {
Self { storage }
}
pub fn verify_complete(&self, checkpoint: &Checkpoint) -> Result<VerificationReport> {
let start = Instant::now();
let mut report = VerificationReport::new(checkpoint.id.clone());
debug!("Verifying checkpoint metadata for {}", checkpoint.short_id());
report.metadata_valid = checkpoint.verify_integrity()?;
let computed_state = checkpoint.compute_state_hash();
report.state_hash_valid = computed_state == checkpoint.state_hash;
if !report.state_hash_valid {
report.errors.push(format!(
"State hash mismatch: expected {}, got {}",
checkpoint.state_hash, computed_state
));
}
let manifest = match self.storage.load_manifest(&checkpoint.id) {
Ok(m) => m,
Err(e) => {
report.errors.push(format!("Failed to load manifest: {}", e));
report.verification_time_ms = start.elapsed().as_millis() as u64;
return Ok(report);
}
};
debug!("Building merkle tree for verification");
let merkle_tree = MerkleTree::from_entries(&manifest.files)?;
if let Some(root_hash) = merkle_tree.root_hash() {
report.merkle_root_valid = root_hash == checkpoint.content_merkle_root;
if !report.merkle_root_valid {
report.errors.push(format!(
"Merkle root mismatch: expected {}, got {}",
checkpoint.content_merkle_root, root_hash
));
}
} else {
report.merkle_root_valid = checkpoint.content_merkle_root.is_empty();
}
debug!("Verifying {} files (excluding directories)", manifest.files.len());
for file_entry in &manifest.files {
if file_entry.is_directory {
continue;
}
let file_check = self.verify_file_entry(file_entry)?;
if !file_check.is_valid() {
report.errors.push(format!(
"File verification failed: {:?}",
file_entry.path
));
}
report.file_checks.push(file_check);
}
if let Some(parent_id) = &checkpoint.parent_id {
report.parent_valid = self.storage.checkpoint_exists(parent_id);
if !report.parent_valid {
report.errors.push(format!("Parent checkpoint {} not found", parent_id));
}
} else {
report.parent_valid = true; }
report.orphaned_objects = self.find_orphaned_objects_for_checkpoint(checkpoint)?;
report.total_files_checked = report.file_checks.len();
report.files_valid = report.file_checks.iter().filter(|f| f.is_valid()).count();
report.verification_time_ms = start.elapsed().as_millis() as u64;
info!(
"Verified checkpoint {} in {}ms: {} / {} files valid",
checkpoint.short_id(),
report.verification_time_ms,
report.files_valid,
report.total_files_checked
);
Ok(report)
}
fn verify_file_entry(&self, entry: &FileEntry) -> Result<FileVerification> {
let mut verification = FileVerification {
path: entry.path.clone(),
content_hash_valid: false,
metadata_hash_valid: false,
object_exists: false,
size_matches: false,
error: None,
};
verification.object_exists = self.storage.object_exists(&entry.content_hash)?;
if !verification.object_exists {
verification.error = Some(format!("Object {} not found", &entry.content_hash[..8]));
return Ok(verification);
}
match self.storage.load_object(&entry.content_hash) {
Ok(content) => {
let computed_hash = utils::hash_data(&content);
verification.content_hash_valid = computed_hash == entry.content_hash;
verification.size_matches = content.len() as u64 == entry.size;
verification.metadata_hash_valid = verification.content_hash_valid;
}
Err(e) => {
verification.error = Some(format!("Failed to load object: {}", e));
}
}
Ok(verification)
}
fn find_orphaned_objects_for_checkpoint(&self, _checkpoint: &Checkpoint) -> Result<Vec<String>> {
let all_objects = self.storage.list_all_objects()?;
let mut referenced_objects = HashSet::new();
for checkpoint_id in self.storage.list_checkpoints()? {
if let Ok(manifest) = self.storage.load_manifest(&checkpoint_id) {
for file in &manifest.files {
if !file.is_directory {
referenced_objects.insert(file.content_hash.clone());
}
}
}
}
let orphaned: Vec<String> = all_objects
.into_iter()
.filter(|hash| !referenced_objects.contains(hash))
.collect();
debug!(
"Found {} orphaned objects out of {} total references",
orphaned.len(),
referenced_objects.len()
);
Ok(orphaned)
}
pub fn verify_timeline(&self, timeline: &Timeline) -> Result<TimelineVerificationReport> {
let start = Instant::now();
let mut report = TimelineVerificationReport::default();
report.total_checkpoints = timeline.checkpoints.len();
for checkpoint in timeline.checkpoints.values() {
debug!("Verifying checkpoint {} in timeline", checkpoint.short_id());
match self.verify_complete(checkpoint) {
Ok(checkpoint_report) => {
if checkpoint_report.is_valid() {
report.valid_checkpoints += 1;
} else {
report.invalid_checkpoints += 1;
report.checkpoint_errors.insert(
checkpoint.id.clone(),
checkpoint_report.errors.clone(),
);
}
report.checkpoint_reports.push(checkpoint_report);
}
Err(e) => {
report.invalid_checkpoints += 1;
report.checkpoint_errors.insert(
checkpoint.id.clone(),
vec![format!("Verification failed: {}", e)],
);
}
}
}
report.timeline_structure_valid = self.verify_timeline_structure(timeline)?;
report.no_hash_conflicts = self.check_hash_conflicts(timeline)?;
report.deduplication_working = self.verify_deduplication(timeline)?;
report.verification_time_ms = start.elapsed().as_millis() as u64;
info!(
"Timeline verification complete in {}ms: {}/{} checkpoints valid",
report.verification_time_ms,
report.valid_checkpoints,
report.total_checkpoints
);
Ok(report)
}
fn verify_timeline_structure(&self, timeline: &Timeline) -> Result<bool> {
for checkpoint in timeline.checkpoints.values() {
if let Some(parent_id) = &checkpoint.parent_id {
if !timeline.checkpoints.contains_key(parent_id) {
error!(
"Checkpoint {} references non-existent parent {}",
checkpoint.short_id(),
&parent_id[..8]
);
return Ok(false);
}
}
}
Ok(true)
}
fn check_hash_conflicts(&self, timeline: &Timeline) -> Result<bool> {
let mut content_hashes = HashMap::new();
for checkpoint in timeline.checkpoints.values() {
if let Ok(manifest) = self.storage.load_manifest(&checkpoint.id) {
for file in &manifest.files {
if let Some(existing) = content_hashes.get(&file.content_hash) {
if existing != &file.size {
warn!(
"Hash conflict detected: {} has different sizes {} vs {}",
&file.content_hash[..8],
existing,
file.size
);
return Ok(false);
}
} else {
content_hashes.insert(file.content_hash.clone(), file.size);
}
}
}
}
Ok(true)
}
fn verify_deduplication(&self, _timeline: &Timeline) -> Result<bool> {
Ok(true)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VerificationReport {
pub checkpoint_id: String,
pub metadata_valid: bool,
pub state_hash_valid: bool,
pub merkle_root_valid: bool,
pub file_checks: Vec<FileVerification>,
pub parent_valid: bool,
pub orphaned_objects: Vec<String>,
pub verification_time_ms: u64,
pub total_files_checked: usize,
pub files_valid: usize,
pub errors: Vec<String>,
}
impl VerificationReport {
pub fn new(checkpoint_id: String) -> Self {
Self {
checkpoint_id,
metadata_valid: false,
state_hash_valid: false,
merkle_root_valid: false,
file_checks: Vec::new(),
parent_valid: false,
orphaned_objects: Vec::new(),
verification_time_ms: 0,
total_files_checked: 0,
files_valid: 0,
errors: Vec::new(),
}
}
pub fn is_valid(&self) -> bool {
self.metadata_valid
&& self.state_hash_valid
&& self.merkle_root_valid
&& self.parent_valid
&& self.files_valid == self.total_files_checked
&& self.orphaned_objects.is_empty()
&& self.errors.is_empty()
}
pub fn summary(&self) -> String {
if self.is_valid() {
format!(
"Checkpoint {} is valid ({} files verified in {}ms)",
&self.checkpoint_id[..8],
self.total_files_checked,
self.verification_time_ms
)
} else {
let issues = vec![
(!self.metadata_valid).then(|| "invalid metadata"),
(!self.state_hash_valid).then(|| "state hash mismatch"),
(!self.merkle_root_valid).then(|| "merkle root mismatch"),
(!self.parent_valid).then(|| "parent missing"),
(self.files_valid < self.total_files_checked)
.then(|| "file verification failures"),
(!self.orphaned_objects.is_empty()).then(|| "orphaned objects"),
]
.into_iter()
.filter_map(|x| x)
.collect::<Vec<_>>()
.join(", ");
format!(
"Checkpoint {} is invalid: {} ({}/{} files valid)",
&self.checkpoint_id[..8],
issues,
self.files_valid,
self.total_files_checked
)
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileVerification {
pub path: std::path::PathBuf,
pub content_hash_valid: bool,
pub metadata_hash_valid: bool,
pub object_exists: bool,
pub size_matches: bool,
pub error: Option<String>,
}
impl FileVerification {
pub fn is_valid(&self) -> bool {
self.content_hash_valid
&& self.metadata_hash_valid
&& self.object_exists
&& self.size_matches
&& self.error.is_none()
}
}
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct TimelineVerificationReport {
pub total_checkpoints: usize,
pub valid_checkpoints: usize,
pub invalid_checkpoints: usize,
pub timeline_structure_valid: bool,
pub no_hash_conflicts: bool,
pub deduplication_working: bool,
pub checkpoint_reports: Vec<VerificationReport>,
pub checkpoint_errors: HashMap<String, Vec<String>>,
pub verification_time_ms: u64,
}
impl TimelineVerificationReport {
pub fn is_valid(&self) -> bool {
self.valid_checkpoints == self.total_checkpoints
&& self.timeline_structure_valid
&& self.no_hash_conflicts
&& self.deduplication_working
}
pub fn summary(&self) -> String {
if self.is_valid() {
format!(
"Timeline is valid: {} checkpoints verified in {}ms",
self.total_checkpoints, self.verification_time_ms
)
} else {
format!(
"Timeline has issues: {}/{} checkpoints valid, {} errors in {}ms",
self.valid_checkpoints,
self.total_checkpoints,
self.checkpoint_errors.len(),
self.verification_time_ms
)
}
}
}
#[derive(Debug)]
pub struct QuickVerifier;
impl QuickVerifier {
pub fn verify_checkpoint(checkpoint: &Checkpoint) -> Result<bool> {
let computed = checkpoint.compute_state_hash();
Ok(computed == checkpoint.state_hash)
}
pub fn verify_file(entry: &FileEntry, content: &[u8]) -> bool {
let hash = utils::hash_data(content);
hash == entry.content_hash
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::checkpoint::CheckpointMetadataBuilder;
use crate::compression::{CompressionEngine, CompressionStrategy};
use crate::types::{FileManifest, TitorConfig};
use chrono::Utc;
use tempfile::TempDir;
use std::path::PathBuf;
fn create_test_storage() -> (Storage, TempDir) {
let temp_dir = TempDir::new().unwrap();
let path = temp_dir.path().to_path_buf();
std::fs::remove_dir_all(&path).ok();
let config = TitorConfig {
root_path: PathBuf::from("/test"),
storage_path: path.clone(),
max_file_size: 0,
parallel_workers: 4,
ignore_patterns: vec![],
compression_strategy: "fast".to_string(),
follow_symlinks: false,
version: env!("CARGO_PKG_VERSION").to_string(),
};
let compression = CompressionEngine::new(CompressionStrategy::Fast);
let storage = Storage::init(
path,
config,
compression,
).unwrap();
(storage, temp_dir)
}
#[test]
fn test_checkpoint_verification() {
let (storage, _temp_dir) = create_test_storage();
let checkpoint = Checkpoint::new(
None,
Some("Test checkpoint".to_string()),
CheckpointMetadataBuilder::new()
.file_count(0)
.total_size(0)
.build(),
"merkle_root".to_string(),
);
storage.store_checkpoint(&checkpoint).unwrap();
let manifest = FileManifest {
checkpoint_id: checkpoint.id.clone(),
files: vec![],
total_size: 0,
file_count: 0,
merkle_root: "".to_string(),
created_at: Utc::now(),
};
storage.store_manifest(&manifest).unwrap();
let verifier = CheckpointVerifier::new(&storage);
let report = verifier.verify_complete(&checkpoint).unwrap();
assert!(report.metadata_valid);
assert!(report.state_hash_valid);
assert!(report.parent_valid);
assert_eq!(report.total_files_checked, 0);
assert_eq!(report.files_valid, 0);
}
#[test]
fn test_quick_verification() {
let checkpoint = Checkpoint::new(
None,
Some("Test".to_string()),
CheckpointMetadataBuilder::new().build(),
"merkle".to_string(),
);
assert!(QuickVerifier::verify_checkpoint(&checkpoint).unwrap());
let mut tampered = checkpoint.clone();
tampered.content_merkle_root = "tampered".to_string();
assert!(!QuickVerifier::verify_checkpoint(&tampered).unwrap());
}
#[test]
fn test_file_verification() {
let content = b"test content";
let hash = utils::hash_data(content);
let entry = FileEntry {
path: PathBuf::from("test.txt"),
content_hash: hash,
size: content.len() as u64,
permissions: 0o644,
modified: Utc::now(),
is_compressed: false,
metadata_hash: "meta".to_string(),
combined_hash: "combined".to_string(),
is_symlink: false,
symlink_target: None,
is_directory: false,
};
assert!(QuickVerifier::verify_file(&entry, content));
assert!(!QuickVerifier::verify_file(&entry, b"different content"));
}
}