use hexz_common::constants::{DICT_TRAINING_SIZE, ENTROPY_THRESHOLD};
use hexz_common::crypto::KeyDerivationParams;
use hexz_common::{Error, Result};
use hexz_core::api::file::Archive;
use hexz_core::format::header::Header;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom, Write};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use walkdir::WalkDir;
use ignore::WalkBuilder;
use crate::parallel_pack::{CompressedChunk, RawChunk};
use crate::archive_writer::ArchiveWriter;
use hexz_core::algo::compression::{create_compressor_from_str, zstd::ZstdCompressor};
use hexz_core::algo::dedup::cdc::{StreamChunker, analyze_stream};
use hexz_core::algo::dedup::dcam::{DedupeParams, optimize_params};
use hexz_core::algo::encryption::{aes_gcm::AesGcmEncryptor, Encryptor};
use hexz_core::api::manifest::{ArchiveManifest, FileEntry};
#[cfg(unix)]
use std::os::unix::fs::MetadataExt;
#[derive(Debug, Clone, Default)]
pub struct PackTransformFlags {
pub encrypt: bool,
pub train_dict: bool,
pub parallel: bool,
}
#[derive(Debug, Clone, Default)]
pub struct PackAnalysisFlags {
pub show_progress: bool,
pub use_dcam: bool,
pub dcam_optimal: bool,
}
#[derive(Debug, Clone)]
pub struct PackConfig {
pub input: PathBuf,
pub base: Option<PathBuf>,
pub output: PathBuf,
pub compression: String,
pub password: Option<String>,
pub block_size: u32,
pub min_chunk: Option<u32>,
pub avg_chunk: Option<u32>,
pub max_chunk: Option<u32>,
pub num_workers: usize,
pub transform: PackTransformFlags,
pub analysis: PackAnalysisFlags,
}
impl Default for PackConfig {
fn default() -> Self {
Self {
input: PathBuf::new(),
base: None,
output: PathBuf::from("output.hxz"),
compression: "lz4".to_string(),
password: None,
block_size: 65536,
min_chunk: None,
avg_chunk: None,
max_chunk: None,
num_workers: 0, transform: PackTransformFlags {
encrypt: false,
train_dict: false,
parallel: true, },
analysis: PackAnalysisFlags {
show_progress: true, use_dcam: false, dcam_optimal: false,
},
}
}
}
pub fn calculate_entropy(data: &[u8]) -> f64 {
if data.is_empty() {
return 0.0;
}
let mut frequencies = [0u32; 256];
for &byte in data {
frequencies[byte as usize] += 1;
}
let len = data.len() as f64;
let mut entropy = 0.0;
for &count in &frequencies {
if count > 0 {
let p = count as f64 / len;
entropy = p.mul_add(-p.log2(), entropy);
}
}
entropy
}
pub const CDC_DEFAULT_MIN: u32 = 16_384; pub const CDC_DEFAULT_AVG: u32 = 65_536; pub const CDC_DEFAULT_MAX: u32 = 262_144;
pub fn resolve_cdc_params(path: &Path, config: &PackConfig) -> Result<DedupeParams> {
fn from_sizes(min: u32, avg: u32, max: u32) -> DedupeParams {
DedupeParams {
f: (avg as f64).log2().round() as u32,
m: min,
z: max,
w: 48,
v: 52,
}
}
if let (Some(min), Some(avg), Some(max)) =
(config.min_chunk, config.avg_chunk, config.max_chunk)
{
return Ok(from_sizes(min, avg, max));
}
if let Some(ref base_path) = config.base {
if let Ok(base_file) = File::open(base_path) {
let mut reader = std::io::BufReader::new(base_file);
if let Ok(header) = Header::read_from(&mut reader) {
if let Some((f, m, z)) = header.cdc_params {
let avg = 1u32 << f;
tracing::debug!("Inheriting CDC params from base archive: f={} m={} z={}", f, m, z);
return Ok(from_sizes(m, avg, z));
}
}
}
}
let (base_min, base_avg, base_max) = if config.analysis.use_dcam {
let baseline = DedupeParams::lbfs_baseline();
let file = File::open(path)?;
let file_size = file.metadata()?.len();
if file_size == 0 {
(CDC_DEFAULT_MIN, CDC_DEFAULT_AVG, CDC_DEFAULT_MAX)
} else {
let stats = analyze_stream(file, &baseline)?;
let optimized = optimize_params(file_size, stats.unique_bytes, &baseline, config.analysis.dcam_optimal);
let p = &optimized.params;
let avg = (2u32).pow(p.f);
tracing::debug!(
"DCAM auto-detected CDC params: f={} m={} z={} (change_rate={:.4}, predicted_ratio={:.4})",
p.f,
p.m,
p.z,
optimized.change_rate,
optimized.predicted_ratio,
);
(p.m, avg, p.z)
}
} else {
(CDC_DEFAULT_MIN, CDC_DEFAULT_AVG, CDC_DEFAULT_MAX)
};
let min = config.min_chunk.unwrap_or(base_min);
let avg = config.avg_chunk.unwrap_or(base_avg);
let max = config.max_chunk.unwrap_or(base_max);
Ok(from_sizes(min, avg, max))
}
pub fn pack_archive<F>(config: &PackConfig, progress_callback: Option<&F>) -> Result<()>
where
F: Fn(u64, u64) + Send + Sync,
{
let input_path = &config.input;
let parent = if let Some(ref base_path) = config.base {
Some(hexz_store::open_local(base_path, None)?)
} else {
None
};
let dictionary = if config.compression == "zstd" && config.transform.train_dict {
let sample_path = if input_path.is_dir() {
WalkDir::new(input_path)
.into_iter()
.filter_map(std::result::Result::ok)
.find(|e| e.file_type().is_file())
.ok_or_else(|| Error::Io(std::io::Error::new(std::io::ErrorKind::NotFound, "No files found for dictionary training")))?
.path()
.to_path_buf()
} else {
input_path.clone()
};
Some(train_dictionary(&sample_path, config.block_size)?)
} else {
None
};
let (compressor, compression_type) =
create_compressor_from_str(&config.compression, None, dictionary.as_deref())?;
let (encryptor, enc_params): (Option<Box<dyn Encryptor>>, _) = if config.transform.encrypt {
let password = config.password.clone().ok_or_else(|| {
Error::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Password required for encryption",
))
})?;
let params = KeyDerivationParams::default();
let enc = AesGcmEncryptor::new(password.as_bytes(), ¶ms.salt, params.iterations)?;
(Some(Box::new(enc) as Box<dyn Encryptor>), Some(params))
} else {
(None, None)
};
let cdc_params = if input_path.is_file() {
resolve_cdc_params(input_path, config)?
} else {
DedupeParams::lbfs_baseline()
};
let mut builder = ArchiveWriter::builder(&config.output, compressor, compression_type)
.block_size(config.block_size)
.variable_blocks(true)
.cdc_params(Some((cdc_params.f, cdc_params.m, cdc_params.z)));
if let Some(parent_snap) = parent {
builder = builder.parent(parent_snap);
}
if let (Some(enc), Some(params)) = (encryptor, enc_params) {
builder = builder.encryption(enc, params);
}
let mut writer = builder.build()?;
if let Some(d) = &dictionary {
writer.write_dictionary(d)?;
}
let dict_ref = dictionary.as_deref();
let manifest = if input_path.is_dir() {
Some(pack_directory(
input_path,
&mut writer,
&cdc_params,
config,
dict_ref,
progress_callback,
)?)
} else {
let total_size = input_path.metadata()?.len();
let progress_bar = if config.analysis.show_progress && progress_callback.is_none() && total_size > 0 {
Some(crate::progress::PackProgress::new(total_size, "Packing"))
} else {
None
};
let cb = |pos: u64, total: u64| {
if let Some(ref pb) = progress_bar {
pb.set_position(pos);
}
if let Some(ref cb) = progress_callback {
cb(pos, total);
}
};
process_stream(
input_path,
true,
&mut writer,
&cdc_params,
config,
dict_ref,
Some(&cb),
)?;
if let Some(ref pb) = progress_bar {
pb.finish();
}
None
};
let metadata = if let Some(m) = manifest {
Some(serde_json::to_vec(&m).map_err(|e| Error::Format(e.to_string()))?)
} else {
None
};
let parent_paths = if let Some(ref base) = config.base {
vec![base.to_string_lossy().into_owned()]
} else {
Vec::new()
};
writer.finalize(parent_paths, metadata.as_deref())?;
Ok(())
}
fn pack_directory<F>(
root: &Path,
writer: &mut ArchiveWriter,
cdc_params: &DedupeParams,
config: &PackConfig,
dictionary: Option<&[u8]>,
progress_callback: Option<&F>,
) -> Result<ArchiveManifest>
where
F: Fn(u64, u64) + Send + Sync,
{
let mut main_entries: Vec<(PathBuf, String, std::fs::Metadata)> = Vec::new();
let mut aux_entries: Vec<(PathBuf, String, std::fs::Metadata)> = Vec::new();
let walker = WalkBuilder::new(root)
.standard_filters(true)
.add_custom_ignore_filename(".hexzignore")
.hidden(false)
.build();
for entry in walker.filter_map(std::result::Result::ok) {
if !entry.file_type().is_some_and(|ft| ft.is_file()) {
continue;
}
let path = entry.path().to_path_buf();
if path.components().any(|c| c.as_os_str() == ".hexz") {
continue;
}
let rel_path = path.strip_prefix(root)
.map_err(|e| Error::Format(e.to_string()))?
.to_string_lossy().into_owned();
let metadata = entry.metadata().map_err(|e| Error::Io(std::io::Error::other(e.to_string())))?;
if rel_path == "memory" {
aux_entries.push((path, rel_path, metadata));
} else {
main_entries.push((path, rel_path, metadata));
}
}
let main_size: u64 = main_entries.iter().map(|(_, _, m)| m.len()).sum();
let aux_size: u64 = aux_entries.iter().map(|(_, _, m)| m.len()).sum();
let total_size = main_size + aux_size;
let progress_bar = if config.analysis.show_progress && progress_callback.is_none() && total_size > 0 {
Some(crate::progress::PackProgress::new(total_size, "Packing Directory"))
} else {
None
};
let mut files = Vec::new();
let mut current_logical_offset = 0u64;
let mut global_progress = 0u64;
writer.begin_stream(true, main_size);
for (path, rel_path, metadata) in &main_entries {
let size = metadata.len();
let file_entry = FileEntry {
path: rel_path.clone(),
offset: current_logical_offset,
size,
mode: { #[cfg(unix)] { metadata.mode() } #[cfg(not(unix))] { 0o644 } },
mtime: metadata.modified()?.duration_since(std::time::UNIX_EPOCH).unwrap_or_default().as_secs(),
};
let cur_offset = current_logical_offset;
let cb = |pos: u64, _total: u64| {
let gp = global_progress + pos;
if let Some(ref pb) = progress_bar {
pb.set_position(gp);
}
if let Some(ref cb) = progress_callback {
cb(cur_offset + pos, total_size);
}
};
pack_file_to_stream(path, writer, cdc_params, config, dictionary, Some(&cb))?;
writer.flush_stream()?;
files.push(file_entry);
current_logical_offset = writer.current_logical_pos();
global_progress += size;
}
writer.end_stream()?;
if !aux_entries.is_empty() {
writer.begin_stream(false, aux_size);
for (path, rel_path, metadata) in &aux_entries {
let size = metadata.len();
let file_entry = FileEntry {
path: rel_path.clone(),
offset: 0,
size,
mode: { #[cfg(unix)] { metadata.mode() } #[cfg(not(unix))] { 0o644 } },
mtime: metadata.modified()?.duration_since(std::time::UNIX_EPOCH).unwrap_or_default().as_secs(),
};
let cb = |pos: u64, _total: u64| {
let gp = global_progress + pos;
if let Some(ref pb) = progress_bar {
pb.set_position(gp);
}
if let Some(ref cb) = progress_callback {
cb(gp, total_size);
}
};
pack_file_to_stream(path, writer, cdc_params, config, dictionary, Some(&cb))?;
writer.flush_stream()?;
files.push(file_entry);
global_progress += size;
}
writer.end_stream()?;
}
if let Some(ref pb) = progress_bar {
pb.finish();
}
Ok(ArchiveManifest { files })
}
fn pack_file_to_stream<F>(
path: &Path,
writer: &mut ArchiveWriter,
cdc_params: &DedupeParams,
config: &PackConfig,
dictionary: Option<&[u8]>,
progress_callback: Option<&F>,
) -> Result<()>
where
F: Fn(u64, u64),
{
let f = File::open(path)?;
let len = f.metadata()?.len();
if config.transform.parallel && !config.transform.encrypt {
process_stream_parallel(
f,
len,
writer,
cdc_params,
config,
dictionary,
progress_callback,
)?;
} else {
process_stream_serial(f, len, writer, cdc_params, progress_callback)?;
}
Ok(())
}
pub fn extract_archive(
input_path: &Path,
output_path: &Path,
password: Option<String>,
) -> Result<()> {
use hexz_core::algo::encryption::aes_gcm::AesGcmEncryptor;
use hexz_core::algo::compression::create_compressor;
use hexz_core::ArchiveStream;
use hexz_store::local::MmapBackend;
use hexz_core::format::header::Header;
use hexz_core::api::file::ParentLoader;
let backend = Arc::new(MmapBackend::new(input_path)?);
let header = Header::read_from_backend(backend.as_ref())?;
let encryptor = if let (Some(params), Some(pass)) = (header.encryption.as_ref(), password) {
let enc = AesGcmEncryptor::new(pass.as_bytes(), ¶ms.salt, params.iterations)?;
Some(Box::new(enc) as Box<dyn hexz_core::algo::encryption::Encryptor>)
} else {
None
};
let dictionary = header.load_dictionary(backend.as_ref())?;
let compressor = create_compressor(header.compression, None, dictionary.as_deref());
let archive_dir = input_path.parent().unwrap_or_else(|| Path::new(".")).to_path_buf();
let loader: ParentLoader = Box::new(move |parent_path: &str| {
let p = Path::new(parent_path);
let full_parent_path = if p.exists() {
p.to_path_buf()
} else {
let rel = archive_dir.join(parent_path);
if rel.exists() {
rel
} else {
p.to_path_buf()
}
};
let pb: Arc<dyn hexz_core::store::StorageBackend> = Arc::new(MmapBackend::new(&full_parent_path)?);
Archive::open(pb, None)
});
let archive = Archive::with_cache_and_loader(
backend,
compressor,
encryptor,
None,
None,
Some(&loader),
)?;
if let Some(metadata) = &archive.metadata {
if let Ok(manifest) = serde_json::from_slice::<ArchiveManifest>(metadata) {
std::fs::create_dir_all(output_path)?;
for file in manifest.files {
let out_path = output_path.join(&file.path);
if let Some(parent) = out_path.parent() {
std::fs::create_dir_all(parent)?;
}
let mut out_file = File::create(&out_path)?;
let data = archive.read_at(ArchiveStream::Main, file.offset, file.size as usize)?;
out_file.write_all(&data)?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(&out_path, std::fs::Permissions::from_mode(file.mode))?;
}
}
return Ok(());
}
}
let mut out_file = File::create(output_path)?;
let size = archive.size(ArchiveStream::Main);
let chunk_size = 1024 * 1024; let mut pos = 0u64;
while pos < size {
let len = std::cmp::min(chunk_size as u64, size - pos) as usize;
let data = archive.read_at(ArchiveStream::Main, pos, len)?;
out_file.write_all(&data)?;
pos += len as u64;
}
Ok(())
}
fn train_dictionary(input_path: &Path, block_size: u32) -> Result<Vec<u8>> {
let mut f = File::open(input_path)?;
let file_len = f.metadata()?.len();
let mut samples = Vec::new();
let mut buffer = vec![0u8; block_size as usize];
let target_samples = DICT_TRAINING_SIZE;
let step = if file_len > 0 {
(file_len / target_samples as u64).max(block_size as u64)
} else {
0
};
let mut attempts = 0;
while samples.len() < target_samples && attempts < target_samples * 2 {
let offset = attempts as u64 * step;
if offset >= file_len {
break;
}
_ = f.seek(SeekFrom::Start(offset))?;
let n = f.read(&mut buffer)?;
if n == 0 {
break;
}
let chunk = &buffer[..n];
let is_zeros = chunk.iter().all(|&b| b == 0);
if !is_zeros {
let entropy = calculate_entropy(chunk);
if entropy < ENTROPY_THRESHOLD {
samples.push(chunk.to_vec());
}
}
attempts += 1;
}
if samples.is_empty() {
tracing::warn!("Input seems to be empty or high entropy. Dictionary will be empty.");
Ok(Vec::new())
} else {
let dict_bytes = ZstdCompressor::train(&samples, DICT_TRAINING_SIZE)?;
tracing::info!("Dictionary trained: {} bytes", dict_bytes.len());
Ok(dict_bytes)
}
}
fn process_stream<F>(
path: &Path,
is_disk: bool,
writer: &mut ArchiveWriter,
cdc_params: &DedupeParams,
config: &PackConfig,
dictionary: Option<&[u8]>,
progress_callback: Option<&F>,
) -> Result<()>
where
F: Fn(u64, u64),
{
let f = File::open(path)?;
let len = f.metadata()?.len();
writer.begin_stream(is_disk, len);
if config.transform.parallel && !config.transform.encrypt {
process_stream_parallel(
f,
len,
writer,
cdc_params,
config,
dictionary,
progress_callback,
)?;
} else {
process_stream_serial(f, len, writer, cdc_params, progress_callback)?;
}
writer.end_stream()?;
Ok(())
}
fn process_stream_serial<F>(
f: File,
len: u64,
writer: &mut ArchiveWriter,
cdc_params: &DedupeParams,
progress_callback: Option<&F>,
) -> Result<()>
where
F: Fn(u64, u64),
{
let mut logical_pos = 0u64;
let mut chunk_buf = Vec::with_capacity(cdc_params.z as usize);
let mut chunker = StreamChunker::new(f, *cdc_params);
while let Some(res) = chunker.next_into(&mut chunk_buf) {
let n = res?;
logical_pos += n as u64;
writer.write_data_block(&chunk_buf)?;
if let Some(callback) = progress_callback {
callback(logical_pos, len);
}
}
Ok(())
}
fn process_stream_parallel<F>(
f: File,
len: u64,
writer: &mut ArchiveWriter,
cdc_params: &DedupeParams,
config: &PackConfig,
dictionary: Option<&[u8]>,
progress_callback: Option<&F>,
) -> Result<()>
where
F: Fn(u64, u64),
{
use crossbeam::channel::bounded;
use hexz_core::algo::compression::Compressor;
use std::collections::BTreeMap;
use std::sync::Arc;
let num_workers = if config.num_workers > 0 {
config.num_workers
} else {
num_cpus::get()
};
let (compressor, _) = create_compressor_from_str(&config.compression, None, dictionary)?;
let compressor: Arc<Box<dyn Compressor + Send + Sync>> = Arc::new(compressor);
let channel_size = num_workers * 4;
let (tx_raw, rx_raw) = bounded::<(u64, RawChunk)>(channel_size);
let (tx_compressed, rx_compressed) = bounded::<(u64, CompressedChunk)>(channel_size);
let mut workers = Vec::with_capacity(num_workers);
for _ in 0..num_workers {
let rx = rx_raw.clone();
let tx = tx_compressed.clone();
let comp = compressor.clone();
workers.push(std::thread::spawn(move || -> Result<()> {
for (seq, chunk) in rx {
let compressed_data = comp.compress(&chunk.data)?;
let hash = blake3::hash(&chunk.data);
if tx
.send((
seq,
CompressedChunk {
compressed: compressed_data,
hash: hash.into(),
logical_offset: chunk.logical_offset,
original_size: chunk.data.len(),
},
))
.is_err()
{
break; }
}
Ok(())
}));
}
drop(rx_raw);
drop(tx_compressed);
let reader_cdc_params = *cdc_params;
let reader = std::thread::spawn(move || -> Result<()> {
let mut logical_pos = 0u64;
let chunker = StreamChunker::new(f, reader_cdc_params);
for (seq, chunk_res) in chunker.enumerate() {
let chunk = chunk_res?;
let chunk_len = chunk.len();
if tx_raw
.send((
seq as u64,
RawChunk {
data: chunk,
logical_offset: logical_pos,
},
))
.is_err()
{
break; }
logical_pos += chunk_len as u64;
}
Ok(())
});
let mut next_seq = 0u64;
let mut reorder_buf: BTreeMap<u64, CompressedChunk> = BTreeMap::new();
let mut write_error: Option<Error> = None;
for (seq, compressed) in &rx_compressed {
_ = reorder_buf.insert(seq, compressed);
while let Some(chunk) = reorder_buf.remove(&next_seq) {
match writer.write_precompressed_block(
&chunk.compressed,
&chunk.hash,
chunk.original_size as u32,
) {
Ok(()) => {
if let Some(callback) = progress_callback {
callback(chunk.logical_offset + chunk.original_size as u64, len);
}
next_seq += 1;
}
Err(e) => {
write_error = Some(e);
break;
}
}
}
if write_error.is_some() {
break;
}
}
drop(rx_compressed);
let reader_result = reader
.join()
.map_err(|_| Error::Io(std::io::Error::other("Reader thread panicked")))?;
for worker in workers {
_ = worker
.join()
.map_err(|_| Error::Io(std::io::Error::other("Worker thread panicked")))?
.ok(); }
if let Some(e) = write_error {
return Err(e);
}
reader_result?;
Ok(())
}
#[cfg(test)]
#[allow(clippy::float_cmp)]
mod tests {
use super::*;
#[test]
fn test_calculate_entropy_empty() {
assert_eq!(calculate_entropy(&[]), 0.0);
}
#[test]
fn test_calculate_entropy_uniform() {
let data = vec![0x42; 1000];
let entropy = calculate_entropy(&data);
assert!(
entropy < 0.01,
"Entropy should be near 0.0 for uniform data"
);
}
#[test]
fn test_calculate_entropy_binary() {
let mut data = vec![0u8; 500];
data.extend(vec![1u8; 500]);
let entropy = calculate_entropy(&data);
assert!(
entropy > 0.9 && entropy < 1.1,
"Entropy should be ~1.0 for binary data"
);
}
#[test]
fn test_calculate_entropy_random() {
let data: Vec<u8> = (0..=255).cycle().take(256 * 4).collect();
let entropy = calculate_entropy(&data);
assert!(
entropy > 7.5,
"Entropy should be high for all byte values: got {entropy}"
);
}
#[test]
fn test_calculate_entropy_single_byte() {
assert_eq!(calculate_entropy(&[42]), 0.0);
}
#[test]
fn test_calculate_entropy_two_different_bytes() {
let data = vec![0, 255];
let entropy = calculate_entropy(&data);
assert!(entropy > 0.9 && entropy < 1.1, "Entropy should be ~1.0");
}
#[test]
fn test_pack_config_default() {
let config = PackConfig::default();
assert_eq!(config.compression, "lz4");
assert!(!config.transform.encrypt);
assert_eq!(config.password, None);
assert!(!config.transform.train_dict);
assert_eq!(config.block_size, 65536);
assert_eq!(config.min_chunk, None);
assert_eq!(config.avg_chunk, None);
assert_eq!(config.max_chunk, None);
}
#[test]
fn test_pack_config_clone() {
let config1 = PackConfig {
input: PathBuf::from("/dev/sda"),
output: PathBuf::from("output.hxz"),
compression: "zstd".to_string(),
password: Some("secret".to_string()),
transform: PackTransformFlags { encrypt: true, ..Default::default() },
..Default::default()
};
let config2 = config1.clone();
assert_eq!(config2.input, config1.input);
assert_eq!(config2.output, config1.output);
assert_eq!(config2.compression, config1.compression);
assert_eq!(config2.transform.encrypt, config1.transform.encrypt);
assert_eq!(config2.password, config1.password);
}
#[test]
fn test_pack_config_debug() {
let config = PackConfig::default();
let debug_str = format!("{config:?}");
assert!(debug_str.contains("PackConfig"));
assert!(debug_str.contains("lz4"));
}
#[test]
fn test_entropy_threshold_filtering() {
let low_entropy_data = vec![0u8; 1024];
assert!(calculate_entropy(&low_entropy_data) < ENTROPY_THRESHOLD);
let high_entropy_data: Vec<u8> = (0..1024).map(|i| ((i * 7) % 256) as u8).collect();
let entropy = calculate_entropy(&high_entropy_data);
assert!((0.0..=8.0).contains(&entropy));
}
#[test]
fn test_entropy_calculation_properties() {
let data1 = vec![0u8; 100];
let data2 = [0u8, 1u8].repeat(50);
let mut data3 = Vec::new();
for i in 0..100 {
data3.push((i % 10) as u8);
}
let entropy1 = calculate_entropy(&data1);
let entropy2 = calculate_entropy(&data2);
let entropy3 = calculate_entropy(&data3);
assert!(
entropy1 < entropy2,
"More unique values should increase entropy"
);
assert!(
entropy2 < entropy3,
"Even more unique values should further increase entropy"
);
}
}