velesdb_core/compression/
dictionary.rs1#![allow(clippy::cast_possible_truncation)]
7#![allow(clippy::cast_precision_loss)]
8
9use rustc_hash::FxHashMap;
10use std::hash::Hash;
11use std::mem::size_of;
12
13#[derive(Debug, Clone, Default)]
15pub struct CompressionStats {
16 pub unique_values: usize,
18 pub total_values: usize,
20 pub dictionary_size_bytes: usize,
22 pub encoded_size_bytes: usize,
24 pub compression_ratio: f64,
26}
27
28#[derive(Debug, Clone)]
30pub struct DictCodebook<V> {
31 value_to_code: FxHashMap<V, u32>,
33 code_to_value: Vec<V>,
35}
36
37impl<V: Hash + Eq + Clone> Default for DictCodebook<V> {
38 fn default() -> Self {
39 Self {
40 value_to_code: FxHashMap::default(),
41 code_to_value: Vec::new(),
42 }
43 }
44}
45
46#[derive(Debug, Clone)]
50pub struct DictionaryEncoder<V: Hash + Eq + Clone> {
51 codebook: DictCodebook<V>,
53 total_encoded: usize,
55}
56
57impl<V: Hash + Eq + Clone> DictionaryEncoder<V> {
58 #[must_use]
60 pub fn new() -> Self {
61 Self {
62 codebook: DictCodebook::default(),
63 total_encoded: 0,
64 }
65 }
66
67 #[must_use]
69 pub fn is_empty(&self) -> bool {
70 self.codebook.code_to_value.is_empty()
71 }
72
73 #[must_use]
75 pub fn len(&self) -> usize {
76 self.codebook.code_to_value.len()
77 }
78
79 pub fn encode(&mut self, value: V) -> u32 {
83 self.total_encoded += 1;
84
85 if let Some(&code) = self.codebook.value_to_code.get(&value) {
86 return code;
87 }
88
89 let code = self.codebook.code_to_value.len() as u32;
90 self.codebook.value_to_code.insert(value.clone(), code);
91 self.codebook.code_to_value.push(value);
92 code
93 }
94
95 #[must_use]
97 pub fn decode(&self, code: u32) -> Option<&V> {
98 self.codebook.code_to_value.get(code as usize)
99 }
100
101 pub fn encode_batch(&mut self, values: &[V]) -> Vec<u32> {
103 values.iter().map(|v| self.encode(v.clone())).collect()
104 }
105
106 #[must_use]
108 pub fn decode_batch(&self, codes: &[u32]) -> Vec<V> {
109 codes
110 .iter()
111 .filter_map(|&code| self.decode(code).cloned())
112 .collect()
113 }
114
115 pub fn clear(&mut self) {
117 self.codebook.value_to_code.clear();
118 self.codebook.code_to_value.clear();
119 self.total_encoded = 0;
120 }
121
122 #[must_use]
124 pub fn stats(&self) -> CompressionStats {
125 let unique = self.len();
126 let total = self.total_encoded;
127
128 let value_size = size_of::<V>();
130 let original_size = total * value_size;
131 let dict_size = unique * value_size + unique * 4; let encoded_size = total * 4; let compressed_size = dict_size + encoded_size;
134
135 let ratio = if compressed_size > 0 {
136 original_size as f64 / compressed_size as f64
137 } else {
138 0.0
139 };
140
141 CompressionStats {
142 unique_values: unique,
143 total_values: total,
144 dictionary_size_bytes: dict_size,
145 encoded_size_bytes: encoded_size,
146 compression_ratio: ratio,
147 }
148 }
149
150 #[must_use]
152 pub fn codebook(&self) -> &DictCodebook<V> {
153 &self.codebook
154 }
155}
156
157impl<V: Hash + Eq + Clone> Default for DictionaryEncoder<V> {
158 fn default() -> Self {
159 Self::new()
160 }
161}