#![allow(clippy::cast_possible_truncation)]
#![allow(clippy::cast_precision_loss)]
use rustc_hash::FxHashMap;
use std::hash::Hash;
use std::mem::size_of;
#[derive(Debug, Clone, Default)]
pub struct CompressionStats {
pub unique_values: usize,
pub total_values: usize,
pub dictionary_size_bytes: usize,
pub encoded_size_bytes: usize,
pub compression_ratio: f64,
}
#[derive(Debug, Clone)]
pub struct DictCodebook<V> {
value_to_code: FxHashMap<V, u32>,
code_to_value: Vec<V>,
}
impl<V: Hash + Eq + Clone> Default for DictCodebook<V> {
fn default() -> Self {
Self {
value_to_code: FxHashMap::default(),
code_to_value: Vec::new(),
}
}
}
#[derive(Debug, Clone)]
pub struct DictionaryEncoder<V: Hash + Eq + Clone> {
codebook: DictCodebook<V>,
total_encoded: usize,
}
impl<V: Hash + Eq + Clone> DictionaryEncoder<V> {
#[must_use]
pub fn new() -> Self {
Self {
codebook: DictCodebook::default(),
total_encoded: 0,
}
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.codebook.code_to_value.is_empty()
}
#[must_use]
pub fn len(&self) -> usize {
self.codebook.code_to_value.len()
}
pub fn encode(&mut self, value: V) -> u32 {
self.total_encoded += 1;
if let Some(&code) = self.codebook.value_to_code.get(&value) {
return code;
}
let code = u32::try_from(self.codebook.code_to_value.len()).unwrap_or(u32::MAX);
self.codebook.value_to_code.insert(value.clone(), code);
self.codebook.code_to_value.push(value);
code
}
#[must_use]
pub fn decode(&self, code: u32) -> Option<&V> {
self.codebook.code_to_value.get(code as usize)
}
pub fn encode_batch(&mut self, values: &[V]) -> Vec<u32> {
values.iter().map(|v| self.encode(v.clone())).collect()
}
#[must_use]
pub fn decode_batch(&self, codes: &[u32]) -> Vec<V> {
codes
.iter()
.filter_map(|&code| self.decode(code).cloned())
.collect()
}
pub fn clear(&mut self) {
self.codebook.value_to_code.clear();
self.codebook.code_to_value.clear();
self.total_encoded = 0;
}
#[must_use]
pub fn stats(&self) -> CompressionStats {
let unique = self.len();
let total = self.total_encoded;
let value_size = size_of::<V>();
let original_size = total * value_size;
let dict_size = unique * value_size + unique * 4; let encoded_size = total * 4; let compressed_size = dict_size + encoded_size;
let ratio = if compressed_size > 0 {
original_size as f64 / compressed_size as f64
} else {
0.0
};
CompressionStats {
unique_values: unique,
total_values: total,
dictionary_size_bytes: dict_size,
encoded_size_bytes: encoded_size,
compression_ratio: ratio,
}
}
#[must_use]
pub fn codebook(&self) -> &DictCodebook<V> {
&self.codebook
}
}
impl<V: Hash + Eq + Clone> Default for DictionaryEncoder<V> {
fn default() -> Self {
Self::new()
}
}