use crate::{ArchiveError, ArchiveResult};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChecksumCheckpoint {
pub file_path: PathBuf,
pub file_size: u64,
pub bytes_processed: u64,
pub blake3_partial_hex: Option<String>,
pub sha256_state: Option<Sha256State>,
pub crc32_value: Option<u32>,
pub chunk_size: usize,
pub created_at_secs: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Sha256State {
pub h: [u32; 8],
pub total_bytes: u64,
pub pending: Vec<u8>,
}
impl ChecksumCheckpoint {
pub fn to_json(&self) -> ArchiveResult<String> {
serde_json::to_string_pretty(self)
.map_err(|e| ArchiveError::Validation(format!("checkpoint serialization failed: {e}")))
}
pub fn from_json(json: &str) -> ArchiveResult<Self> {
serde_json::from_str(json)
.map_err(|e| ArchiveError::Validation(format!("checkpoint deserialization failed: {e}")))
}
pub fn progress(&self) -> f64 {
if self.file_size == 0 {
return 1.0;
}
self.bytes_processed as f64 / self.file_size as f64
}
pub fn is_complete(&self) -> bool {
self.bytes_processed >= self.file_size
}
}
#[derive(Debug, Clone)]
pub struct IncrementalConfig {
pub enable_blake3: bool,
pub enable_sha256: bool,
pub enable_crc32: bool,
pub chunk_size: usize,
pub checkpoint_interval_bytes: u64,
}
impl Default for IncrementalConfig {
fn default() -> Self {
Self {
enable_blake3: true,
enable_sha256: true,
enable_crc32: true,
chunk_size: 1024 * 1024, checkpoint_interval_bytes: 64 * 1024 * 1024, }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IncrementalResult {
pub file_path: PathBuf,
pub file_size: u64,
pub blake3_hex: Option<String>,
pub sha256_hex: Option<String>,
pub crc32_hex: Option<String>,
pub was_resumed: bool,
pub bytes_processed: u64,
}
#[allow(clippy::unreadable_literal)]
const SHA256_K: [u32; 64] = [
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
];
#[allow(clippy::unreadable_literal)]
const SHA256_H_INIT: [u32; 8] = [
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
];
fn sha256_compress_block(state: &mut [u32; 8], block: &[u8]) {
let mut w = [0u32; 64];
for i in 0..16 {
w[i] = u32::from_be_bytes([
block[i * 4],
block[i * 4 + 1],
block[i * 4 + 2],
block[i * 4 + 3],
]);
}
for i in 16..64 {
let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3);
let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10);
w[i] = w[i - 16].wrapping_add(s0).wrapping_add(w[i - 7]).wrapping_add(s1);
}
let [mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h] = *state;
for i in 0..64 {
let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25);
let ch = (e & f) ^ ((!e) & g);
let temp1 = h.wrapping_add(s1).wrapping_add(ch).wrapping_add(SHA256_K[i]).wrapping_add(w[i]);
let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22);
let maj = (a & b) ^ (a & c) ^ (b & c);
let temp2 = s0.wrapping_add(maj);
h = g; g = f; f = e; e = d.wrapping_add(temp1);
d = c; c = b; b = a; a = temp1.wrapping_add(temp2);
}
state[0] = state[0].wrapping_add(a);
state[1] = state[1].wrapping_add(b);
state[2] = state[2].wrapping_add(c);
state[3] = state[3].wrapping_add(d);
state[4] = state[4].wrapping_add(e);
state[5] = state[5].wrapping_add(f);
state[6] = state[6].wrapping_add(g);
state[7] = state[7].wrapping_add(h);
}
#[derive(Debug, Clone)]
pub struct ResumableSha256 {
state: [u32; 8],
total_bytes: u64,
pending: Vec<u8>,
}
impl ResumableSha256 {
pub fn new() -> Self {
Self {
state: SHA256_H_INIT,
total_bytes: 0,
pending: Vec::with_capacity(64),
}
}
pub fn from_state(saved: &Sha256State) -> Self {
Self {
state: saved.h,
total_bytes: saved.total_bytes,
pending: saved.pending.clone(),
}
}
pub fn update(&mut self, data: &[u8]) {
self.total_bytes += data.len() as u64;
self.pending.extend_from_slice(data);
while self.pending.len() >= 64 {
let block: Vec<u8> = self.pending.drain(..64).collect();
sha256_compress_block(&mut self.state, &block);
}
}
pub fn save_state(&self) -> Sha256State {
Sha256State {
h: self.state,
total_bytes: self.total_bytes,
pending: self.pending.clone(),
}
}
pub fn finalize_hex(&self) -> String {
let mut state = self.state;
let bit_len = self.total_bytes.wrapping_mul(8);
let mut padded = [0u8; 128];
let rem = self.pending.len();
padded[..rem].copy_from_slice(&self.pending);
padded[rem] = 0x80;
let pad_len = if rem < 56 { 64 } else { 128 };
padded[pad_len - 8..pad_len].copy_from_slice(&bit_len.to_be_bytes());
sha256_compress_block(&mut state, &padded[..64]);
if pad_len == 128 {
sha256_compress_block(&mut state, &padded[64..128]);
}
let mut digest = [0u8; 32];
for (i, word) in state.iter().enumerate() {
digest[i * 4..(i + 1) * 4].copy_from_slice(&word.to_be_bytes());
}
digest.iter().map(|b| format!("{b:02x}")).collect()
}
}
impl Default for ResumableSha256 {
fn default() -> Self {
Self::new()
}
}
pub fn compute_incremental(
data: &[u8],
config: &IncrementalConfig,
checkpoint: Option<&ChecksumCheckpoint>,
) -> ArchiveResult<IncrementalResult> {
let start_offset = checkpoint
.map(|cp| cp.bytes_processed as usize)
.unwrap_or(0);
if start_offset > data.len() {
return Err(ArchiveError::Validation(
"checkpoint offset exceeds data length".to_string(),
));
}
let was_resumed = checkpoint.is_some();
let mut blake3_hasher = if config.enable_blake3 {
Some(blake3::Hasher::new())
} else {
None
};
let mut sha256_hasher = if config.enable_sha256 {
if let Some(cp) = checkpoint {
cp.sha256_state.as_ref().map(|s| ResumableSha256::from_state(s))
} else {
Some(ResumableSha256::new())
}
} else {
None
};
let mut crc32_val: Option<u32> = if config.enable_crc32 {
Some(
checkpoint
.and_then(|cp| cp.crc32_value)
.unwrap_or(0),
)
} else {
None
};
if config.enable_blake3 {
if let Some(ref mut hasher) = blake3_hasher {
hasher.update(&data[..start_offset]);
}
}
let remaining = &data[start_offset..];
let chunk_size = config.chunk_size.max(1);
for chunk in remaining.chunks(chunk_size) {
if let Some(ref mut hasher) = blake3_hasher {
hasher.update(chunk);
}
if let Some(ref mut hasher) = sha256_hasher {
hasher.update(chunk);
}
if let Some(ref mut crc) = crc32_val {
*crc = crc32fast::hash_with_seed(chunk, *crc);
}
}
let blake3_hex = blake3_hasher.map(|h| h.finalize().to_hex().to_string());
let sha256_hex = sha256_hasher.map(|h| h.finalize_hex());
let crc32_hex = crc32_val.map(|v| format!("{v:08x}"));
Ok(IncrementalResult {
file_path: checkpoint
.map(|cp| cp.file_path.clone())
.unwrap_or_default(),
file_size: data.len() as u64,
blake3_hex,
sha256_hex,
crc32_hex,
was_resumed,
bytes_processed: data.len() as u64,
})
}
pub fn create_checkpoint(
file_path: &Path,
file_size: u64,
bytes_processed: u64,
sha256_hasher: Option<&ResumableSha256>,
crc32_value: Option<u32>,
chunk_size: usize,
) -> ChecksumCheckpoint {
let now_secs = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
ChecksumCheckpoint {
file_path: file_path.to_path_buf(),
file_size,
bytes_processed,
blake3_partial_hex: None, sha256_state: sha256_hasher.map(|h| h.save_state()),
crc32_value,
chunk_size,
created_at_secs: now_secs,
}
}
#[derive(Debug, Default)]
pub struct CheckpointStore {
checkpoints: HashMap<PathBuf, ChecksumCheckpoint>,
}
impl CheckpointStore {
pub fn new() -> Self {
Self::default()
}
pub fn save(&mut self, checkpoint: ChecksumCheckpoint) {
self.checkpoints
.insert(checkpoint.file_path.clone(), checkpoint);
}
pub fn load(&self, path: &Path) -> Option<&ChecksumCheckpoint> {
self.checkpoints.get(path)
}
pub fn remove(&mut self, path: &Path) -> Option<ChecksumCheckpoint> {
self.checkpoints.remove(path)
}
pub fn len(&self) -> usize {
self.checkpoints.len()
}
pub fn is_empty(&self) -> bool {
self.checkpoints.is_empty()
}
pub fn paths(&self) -> Vec<&PathBuf> {
self.checkpoints.keys().collect()
}
pub fn clear(&mut self) {
self.checkpoints.clear();
}
}
#[cfg(test)]
mod tests {
use super::*;
fn default_config() -> IncrementalConfig {
IncrementalConfig::default()
}
#[test]
fn test_resumable_sha256_empty() {
let h = ResumableSha256::new();
assert_eq!(
h.finalize_hex(),
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
);
}
#[test]
fn test_resumable_sha256_abc() {
let mut h = ResumableSha256::new();
h.update(b"abc");
assert_eq!(
h.finalize_hex(),
"ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
);
}
#[test]
fn test_resumable_sha256_chunked_matches_whole() {
let data = b"The quick brown fox jumps over the lazy dog";
let mut h1 = ResumableSha256::new();
h1.update(data);
let mut h2 = ResumableSha256::new();
h2.update(&data[..10]);
h2.update(&data[10..25]);
h2.update(&data[25..]);
assert_eq!(h1.finalize_hex(), h2.finalize_hex());
}
#[test]
fn test_resumable_sha256_save_restore() {
let data = b"Hello world of incremental checksumming";
let mut h1 = ResumableSha256::new();
h1.update(&data[..20]);
let saved = h1.save_state();
let mut h2 = ResumableSha256::from_state(&saved);
h2.update(&data[20..]);
let mut h_full = ResumableSha256::new();
h_full.update(data);
assert_eq!(h2.finalize_hex(), h_full.finalize_hex());
}
#[test]
fn test_resumable_sha256_large_data() {
let data: Vec<u8> = (0u8..=255).cycle().take(2048).collect();
let mut h = ResumableSha256::new();
h.update(&data);
let hex = h.finalize_hex();
assert_eq!(hex.len(), 64);
let mut h2 = ResumableSha256::new();
h2.update(&data);
assert_eq!(h2.finalize_hex(), hex);
}
#[test]
fn test_compute_incremental_fresh() {
let data = b"test data for incremental checksumming";
let config = default_config();
let result = compute_incremental(data, &config, None)
.expect("compute_incremental failed");
assert!(!result.was_resumed);
assert_eq!(result.bytes_processed, data.len() as u64);
assert!(result.blake3_hex.is_some());
assert!(result.sha256_hex.is_some());
assert!(result.crc32_hex.is_some());
}
#[test]
fn test_compute_incremental_sha256_matches_standard() {
let data = b"abc";
let config = IncrementalConfig {
enable_blake3: false,
enable_sha256: true,
enable_crc32: false,
..default_config()
};
let result = compute_incremental(data, &config, None)
.expect("compute_incremental failed");
assert_eq!(
result.sha256_hex.as_deref(),
Some("ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad")
);
}
#[test]
fn test_compute_incremental_resume_sha256() {
let data = b"Hello world of incremental checksumming!";
let config = IncrementalConfig {
enable_blake3: false,
enable_sha256: true,
enable_crc32: true,
chunk_size: 8,
..default_config()
};
let full_result = compute_incremental(data, &config, None)
.expect("full compute failed");
let mut sha_hasher = ResumableSha256::new();
sha_hasher.update(&data[..20]);
let crc_val = crc32fast::hash(&data[..20]);
let checkpoint = create_checkpoint(
Path::new("/test/file.bin"),
data.len() as u64,
20,
Some(&sha_hasher),
Some(crc_val),
8,
);
let resumed_result = compute_incremental(data, &config, Some(&checkpoint))
.expect("resumed compute failed");
assert!(resumed_result.was_resumed);
assert_eq!(resumed_result.sha256_hex, full_result.sha256_hex);
assert_eq!(resumed_result.crc32_hex, full_result.crc32_hex);
}
#[test]
fn test_compute_incremental_crc32_only() {
let data = b"crc32 only test";
let config = IncrementalConfig {
enable_blake3: false,
enable_sha256: false,
enable_crc32: true,
..default_config()
};
let result = compute_incremental(data, &config, None)
.expect("compute_incremental failed");
assert!(result.blake3_hex.is_none());
assert!(result.sha256_hex.is_none());
assert!(result.crc32_hex.is_some());
}
#[test]
fn test_compute_incremental_empty_data() {
let data: &[u8] = b"";
let config = default_config();
let result = compute_incremental(data, &config, None)
.expect("compute_incremental failed");
assert_eq!(result.bytes_processed, 0);
assert_eq!(result.file_size, 0);
}
#[test]
fn test_checkpoint_json_roundtrip() {
let cp = create_checkpoint(
Path::new("/archive/video.mkv"),
1_000_000,
500_000,
None,
Some(0xDEADBEEF),
1024 * 1024,
);
let json = cp.to_json().expect("serialization failed");
let restored = ChecksumCheckpoint::from_json(&json)
.expect("deserialization failed");
assert_eq!(restored.file_path, cp.file_path);
assert_eq!(restored.file_size, cp.file_size);
assert_eq!(restored.bytes_processed, cp.bytes_processed);
assert_eq!(restored.crc32_value, cp.crc32_value);
assert_eq!(restored.chunk_size, cp.chunk_size);
}
#[test]
fn test_checkpoint_progress() {
let cp = create_checkpoint(
Path::new("/test"),
1000,
250,
None,
None,
1024,
);
assert!((cp.progress() - 0.25).abs() < 1e-10);
}
#[test]
fn test_checkpoint_progress_empty_file() {
let cp = create_checkpoint(
Path::new("/test"),
0,
0,
None,
None,
1024,
);
assert!((cp.progress() - 1.0).abs() < 1e-10);
}
#[test]
fn test_checkpoint_is_complete() {
let cp = create_checkpoint(
Path::new("/test"),
100,
100,
None,
None,
1024,
);
assert!(cp.is_complete());
}
#[test]
fn test_checkpoint_not_complete() {
let cp = create_checkpoint(
Path::new("/test"),
100,
50,
None,
None,
1024,
);
assert!(!cp.is_complete());
}
#[test]
fn test_checkpoint_store_save_load() {
let mut store = CheckpointStore::new();
assert!(store.is_empty());
let cp = create_checkpoint(
Path::new("/archive/a.mkv"),
1000,
500,
None,
Some(42),
1024,
);
store.save(cp);
assert_eq!(store.len(), 1);
let loaded = store.load(Path::new("/archive/a.mkv"));
assert!(loaded.is_some());
assert_eq!(loaded.map(|c| c.bytes_processed), Some(500));
}
#[test]
fn test_checkpoint_store_remove() {
let mut store = CheckpointStore::new();
let cp = create_checkpoint(Path::new("/a"), 100, 50, None, None, 1024);
store.save(cp);
assert_eq!(store.len(), 1);
let removed = store.remove(Path::new("/a"));
assert!(removed.is_some());
assert!(store.is_empty());
}
#[test]
fn test_checkpoint_store_clear() {
let mut store = CheckpointStore::new();
store.save(create_checkpoint(Path::new("/a"), 100, 50, None, None, 1024));
store.save(create_checkpoint(Path::new("/b"), 200, 100, None, None, 1024));
assert_eq!(store.len(), 2);
store.clear();
assert!(store.is_empty());
}
#[test]
fn test_checkpoint_store_paths() {
let mut store = CheckpointStore::new();
store.save(create_checkpoint(Path::new("/x"), 100, 50, None, None, 1024));
store.save(create_checkpoint(Path::new("/y"), 200, 100, None, None, 1024));
let paths = store.paths();
assert_eq!(paths.len(), 2);
}
#[test]
fn test_checkpoint_store_overwrite() {
let mut store = CheckpointStore::new();
store.save(create_checkpoint(Path::new("/a"), 100, 25, None, None, 1024));
store.save(create_checkpoint(Path::new("/a"), 100, 75, None, None, 1024));
assert_eq!(store.len(), 1);
let loaded = store.load(Path::new("/a"));
assert_eq!(loaded.map(|c| c.bytes_processed), Some(75));
}
#[test]
fn test_sha256_state_json_roundtrip() {
let mut hasher = ResumableSha256::new();
hasher.update(b"partial data");
let state = hasher.save_state();
let json = serde_json::to_string(&state).expect("serialize failed");
let restored: Sha256State = serde_json::from_str(&json).expect("deserialize failed");
assert_eq!(restored.h, state.h);
assert_eq!(restored.total_bytes, state.total_bytes);
assert_eq!(restored.pending, state.pending);
}
#[test]
fn test_invalid_checkpoint_offset() {
let data = b"short";
let config = default_config();
let cp = ChecksumCheckpoint {
file_path: PathBuf::from("/test"),
file_size: data.len() as u64,
bytes_processed: 9999,
blake3_partial_hex: None,
sha256_state: None,
crc32_value: None,
chunk_size: 1024,
created_at_secs: 0,
};
let result = compute_incremental(data, &config, Some(&cp));
assert!(result.is_err());
}
}