use std::collections::HashMap;
pub struct AdvancedZSTDCodec {
dictionary: Vec<u8>,
compression_level: u32,
entropy_threshold: f64,
}
impl AdvancedZSTDCodec {
pub fn new() -> Self {
Self {
dictionary: Vec::with_capacity(131072), compression_level: 19, entropy_threshold: 0.7,
}
}
pub fn calculate_compression_level(data: &[u8]) -> u32 {
let entropy = Self::calculate_entropy(data);
match entropy {
e if e < 2.0 => 22, e if e < 3.0 => 21, e if e < 4.0 => 20, e if e < 5.0 => 19, e if e < 6.0 => 18, _ => 17, }
}
fn calculate_entropy(data: &[u8]) -> f64 {
let mut freq = [0usize; 256];
for &byte in data {
freq[byte as usize] += 1;
}
let len = data.len() as f64;
let mut entropy = 0.0;
for count in &freq {
if *count > 0 {
let p = *count as f64 / len;
entropy -= p * p.log2();
}
}
entropy
}
pub fn build_dictionary_from_patterns(samples: &[&[u8]]) -> Vec<u8> {
let mut dictionary = Vec::with_capacity(131072);
let mut pattern_freq: HashMap<Vec<u8>, usize> = HashMap::new();
for sample in samples {
for window_size in [4, 8, 16, 32] {
for window in sample.windows(window_size) {
if window.len() == window_size {
*pattern_freq.entry(window.to_vec()).or_insert(0) += 1;
}
}
}
}
let mut patterns: Vec<_> = pattern_freq.into_iter().collect();
patterns.sort_by(|a, b| b.1.cmp(&a.1));
for (pattern, _) in patterns {
if dictionary.len() + pattern.len() > 131072 {
break;
}
dictionary.extend_from_slice(&pattern);
}
dictionary
}
pub fn compress(&self, data: &[u8]) -> Vec<u8> {
let level = Self::calculate_compression_level(data);
data.to_vec() }
pub fn decompress(&self, compressed: &[u8]) -> Vec<u8> {
compressed.to_vec()
}
}
impl Default for AdvancedZSTDCodec {
fn default() -> Self {
Self::new()
}
}
pub struct DeltaEncoder;
impl DeltaEncoder {
pub fn encode_i64(data: &[i64]) -> Vec<i64> {
if data.is_empty() {
return Vec::new();
}
let mut encoded = Vec::with_capacity(data.len());
encoded.push(data[0]);
for window in data.windows(2) {
encoded.push(window[1] - window[0]); }
encoded
}
pub fn encode_f64(data: &[f64], quantization_bits: u32) -> Vec<i64> {
let scale = (1u64 << quantization_bits) as f64;
let quantized: Vec<i64> = data
.iter()
.map(|&v| (v * scale) as i64)
.collect();
Self::encode_i64(&quantized)
}
pub fn decode_i64(encoded: &[i64]) -> Vec<i64> {
if encoded.is_empty() {
return Vec::new();
}
let mut decoded = Vec::with_capacity(encoded.len());
let mut current = encoded[0];
decoded.push(current);
for &delta in &encoded[1..] {
current += delta;
decoded.push(current);
}
decoded
}
pub fn decode_f64(encoded: &[i64], quantization_bits: u32) -> Vec<f64> {
let decoded = Self::decode_i64(encoded);
let scale = (1u64 << quantization_bits) as f64;
decoded.iter().map(|&v| v as f64 / scale).collect()
}
}
pub struct BitPacker;
impl BitPacker {
pub fn bits_needed(max_value: u64) -> u32 {
if max_value == 0 {
return 1;
}
64 - max_value.leading_zeros()
}
pub fn pack(values: &[u64], max_value: u64) -> Vec<u8> {
let bits_per_value = Self::bits_needed(max_value);
let total_bits = values.len() as u64 * bits_per_value as u64;
let bytes_needed = ((total_bits + 7) / 8) as usize;
let mut packed = vec![0u8; bytes_needed];
let mut bit_offset = 0usize;
for &value in values {
for bit in 0..bits_per_value {
let bit_set = (value >> bit) & 1 == 1;
if bit_set {
let byte_idx = bit_offset / 8;
let bit_idx = bit_offset % 8;
packed[byte_idx] |= 1 << bit_idx;
}
bit_offset += 1;
}
}
packed
}
pub fn unpack(packed: &[u8], count: usize, bits_per_value: u32) -> Vec<u64> {
let mut unpacked = Vec::with_capacity(count);
let mut bit_offset = 0usize;
for _ in 0..count {
let mut value = 0u64;
for bit in 0..bits_per_value {
let byte_idx = bit_offset / 8;
let bit_idx = bit_offset % 8;
if byte_idx < packed.len() && (packed[byte_idx] >> bit_idx) & 1 == 1 {
value |= 1 << bit;
}
bit_offset += 1;
}
unpacked.push(value);
}
unpacked
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_delta_encoding_sorted_i64() {
let data = vec![1000000, 1000001, 1000002, 1000003, 1000004];
let encoded = DeltaEncoder::encode_i64(&data);
assert_eq!(encoded[0], 1000000);
assert_eq!(encoded[1], 1);
assert_eq!(encoded[2], 1);
assert_eq!(encoded[3], 1);
assert_eq!(encoded[4], 1);
}
#[test]
fn test_delta_decoding() {
let original = vec![100, 102, 103, 110, 105];
let encoded = DeltaEncoder::encode_i64(&original);
let decoded = DeltaEncoder::decode_i64(&encoded);
assert_eq!(original, decoded);
}
#[test]
fn test_bit_packing() {
let values = vec![0, 1, 1, 0, 1, 0, 1, 1]; let packed = BitPacker::pack(&values, 1);
assert_eq!(packed.len(), 1); let unpacked = BitPacker::unpack(&packed, 8, 1);
assert_eq!(unpacked, values);
}
#[test]
fn test_entropy_calculation() {
let uniform = vec![42u8; 100];
let e1 = AdvancedZSTDCodec::calculate_entropy(&uniform);
assert!(e1 < 0.1);
let mixed = vec![1u8, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
let e2 = AdvancedZSTDCodec::calculate_entropy(&mixed);
assert!(e2 > 1.0);
}
#[test]
fn test_compression_level_selection() {
let low_entropy = vec![1u8; 100]; let level_low = AdvancedZSTDCodec::calculate_compression_level(&low_entropy);
assert_eq!(level_low, 22);
let high_entropy = (0..256).cycle().take(256).map(|x| x as u8).collect::<Vec<_>>();
let level_high = AdvancedZSTDCodec::calculate_compression_level(&high_entropy);
assert!(level_high <= 19); }
}