use serde::{Deserialize, Serialize};
use super::constants::{CDC_AVG, CDC_MAX, CDC_MIN};
use crate::aerocrypt::KEY_SIZE;
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct CdcBounds {
pub min: usize,
pub avg: usize,
pub max: usize,
}
impl CdcBounds {
pub fn defaults() -> Self {
Self {
min: CDC_MIN,
avg: CDC_AVG,
max: CDC_MAX,
}
}
pub fn for_level(level: i32) -> Self {
if level >= 19 {
Self {
min: 1024 * 1024,
avg: 4 * 1024 * 1024,
max: 16 * 1024 * 1024,
}
} else {
Self::defaults()
}
}
pub fn validate(&self) -> Result<(), String> {
if self.min < 4096
|| self.min > self.avg
|| self.avg > self.max
|| self.max > 256 * 1024 * 1024
|| !self.avg.is_power_of_two()
{
return Err(format!(
"Invalid AeroVault v3 CDC bounds: min={} avg={} max={}",
self.min, self.avg, self.max
));
}
Ok(())
}
}
pub fn keyed_chunk_id(key: &[u8; KEY_SIZE], plaintext: &[u8]) -> String {
let hash = blake3::keyed_hash(key, plaintext);
hex::encode(&hash.as_bytes()[..16])
}
pub fn gear_table() -> [u64; 256] {
let mut table = [0u64; 256];
for (idx, slot) in table.iter_mut().enumerate() {
let mut input = b"AeroVault v3 gear-cdc table".to_vec();
input.push(idx as u8);
let hash = blake3::hash(&input);
*slot = u64::from_le_bytes(hash.as_bytes()[0..8].try_into().expect("slice length"));
}
table
}
pub fn chunk_ranges_with(data: &[u8], bounds: &CdcBounds) -> Vec<(usize, usize)> {
if data.is_empty() {
return Vec::new();
}
if data.len() <= bounds.min {
return vec![(0, data.len())];
}
let table = gear_table();
let mask = (bounds.avg as u64) - 1;
let mut ranges = Vec::new();
let mut start = 0usize;
let mut rolling = 0u64;
for (idx, byte) in data.iter().enumerate() {
rolling = rolling.rotate_left(1).wrapping_add(table[*byte as usize]);
let len = idx + 1 - start;
if len >= bounds.min && ((rolling & mask) == 0 || len >= bounds.max) {
ranges.push((start, idx + 1));
start = idx + 1;
rolling = 0;
}
}
if start < data.len() {
ranges.push((start, data.len()));
}
ranges
}
pub fn zstd_compress(chunk: &[u8], level: i32) -> Result<Vec<u8>, String> {
zstd::stream::encode_all(chunk, level).map_err(|e| format!("zstd compress failed: {e}"))
}
pub fn zstd_decompress_bounded(compressed: &[u8], plaintext_len: u64) -> Result<Vec<u8>, String> {
use std::io::Read;
let decoder = zstd::stream::read::Decoder::new(compressed)
.map_err(|e| format!("zstd decoder init failed: {e}"))?;
let mut out = Vec::with_capacity(plaintext_len.min(1 << 20) as usize);
decoder
.take(plaintext_len + 1)
.read_to_end(&mut out)
.map_err(|e| format!("zstd decompress failed: {e}"))?;
Ok(out)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cdc_bounds_validate_and_profiles() {
assert!(CdcBounds::defaults().validate().is_ok());
assert!(CdcBounds::for_level(19).validate().is_ok());
assert_eq!(CdcBounds::for_level(9).avg, CDC_AVG);
assert_eq!(CdcBounds::for_level(19).avg, 4 * 1024 * 1024);
assert!(CdcBounds {
min: 4096,
avg: 3000,
max: 4096
}
.validate()
.is_err());
assert!(CdcBounds {
min: 8192,
avg: 4096,
max: 8192
}
.validate()
.is_err());
}
#[test]
fn empty_and_small_inputs() {
let b = CdcBounds::defaults();
assert!(chunk_ranges_with(&[], &b).is_empty());
let small = vec![0u8; 1024];
assert_eq!(chunk_ranges_with(&small, &b), vec![(0, 1024)]);
}
#[test]
fn chunk_ranges_cover_and_are_contiguous() {
let mut data = vec![0u8; 8 * 1024 * 1024];
let mut x = 0x9e3779b97f4a7c15u64;
for b in data.iter_mut() {
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
*b = (x & 0xff) as u8;
}
let bounds = CdcBounds::defaults();
let ranges = chunk_ranges_with(&data, &bounds);
assert!(ranges.len() > 1, "large input must split");
assert_eq!(ranges.first().unwrap().0, 0);
assert_eq!(ranges.last().unwrap().1, data.len());
for w in ranges.windows(2) {
assert_eq!(w[0].1, w[1].0);
}
for (i, (s, e)) in ranges.iter().enumerate() {
let len = e - s;
assert!(len <= bounds.max, "chunk over max");
if i + 1 < ranges.len() {
assert!(len >= bounds.min, "non-final chunk under min");
}
}
}
#[test]
fn gear_table_is_deterministic_known_answer() {
let t = gear_table();
let mut input = b"AeroVault v3 gear-cdc table".to_vec();
input.push(42u8);
let h = blake3::hash(&input);
assert_eq!(
t[42],
u64::from_le_bytes(h.as_bytes()[0..8].try_into().unwrap())
);
}
#[test]
fn keyed_chunk_id_known_answer() {
let key = [0x11u8; KEY_SIZE];
let id = keyed_chunk_id(&key, b"the quick brown fox");
assert_eq!(id.len(), 32, "16 bytes hex");
let h = blake3::keyed_hash(&key, b"the quick brown fox");
assert_eq!(id, hex::encode(&h.as_bytes()[..16]));
assert_ne!(id, keyed_chunk_id(&key, b"the quick brown foy"));
}
#[test]
fn zstd_round_trip_bounded() {
let data = b"AeroVault zstd round-trip payload".repeat(4096);
let c = zstd_compress(&data, DEFAULT_LEVEL).unwrap();
let back = zstd_decompress_bounded(&c, data.len() as u64).unwrap();
assert_eq!(back, data);
}
const DEFAULT_LEVEL: i32 = 9;
}