1use std::collections::HashMap;
28use std::time::{SystemTime, UNIX_EPOCH};
29
30#[repr(u8)]
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum CompressionType {
34 None = 0,
35 Lz4 = 1,
36 ZstdFast = 2, ZstdMax = 3, }
39
40impl CompressionType {
41 pub fn from_u8(value: u8) -> Self {
42 match value {
43 1 => CompressionType::Lz4,
44 2 => CompressionType::ZstdFast,
45 3 => CompressionType::ZstdMax,
46 _ => CompressionType::None,
47 }
48 }
49}
50
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum StorageTier {
54 Hot, Warm, Cold, }
58
59impl StorageTier {
60 pub fn from_age(timestamp_us: u64) -> Self {
62 let now = SystemTime::now()
63 .duration_since(UNIX_EPOCH)
64 .unwrap()
65 .as_micros() as u64;
66
67 let age_us = now.saturating_sub(timestamp_us);
68 let age_hours = age_us / 3_600_000_000;
69
70 if age_hours < 24 {
71 StorageTier::Hot
72 } else if age_hours < 720 {
73 StorageTier::Warm
75 } else {
76 StorageTier::Cold
77 }
78 }
79
80 pub fn compression_type(&self) -> CompressionType {
82 match self {
83 StorageTier::Hot => CompressionType::Lz4, StorageTier::Warm => CompressionType::ZstdFast, StorageTier::Cold => CompressionType::ZstdMax, }
87 }
88}
89
90pub struct CompressionEngine {
92 dedup_cache: HashMap<u64, Vec<u8>>,
94 stats: CompressionStats,
96}
97
98#[derive(Debug, Default, Clone)]
99pub struct CompressionStats {
100 pub total_uncompressed: u64,
101 pub total_compressed: u64,
102 pub lz4_count: u64,
103 pub zstd_fast_count: u64,
104 pub zstd_max_count: u64,
105 pub dedup_hits: u64,
106}
107
108impl CompressionStats {
109 pub fn compression_ratio(&self) -> f64 {
110 if self.total_uncompressed == 0 {
111 return 1.0;
112 }
113 self.total_compressed as f64 / self.total_uncompressed as f64
114 }
115
116 pub fn space_saved_bytes(&self) -> u64 {
117 self.total_uncompressed
118 .saturating_sub(self.total_compressed)
119 }
120}
121
122impl CompressionEngine {
123 pub fn new() -> Self {
124 Self {
125 dedup_cache: HashMap::new(),
126 stats: CompressionStats::default(),
127 }
128 }
129
130 pub fn compress(
132 &mut self,
133 data: &[u8],
134 compression: CompressionType,
135 ) -> Result<Vec<u8>, std::io::Error> {
136 self.stats.total_uncompressed += data.len() as u64;
137
138 let compressed = match compression {
139 CompressionType::None => data.to_vec(),
140 CompressionType::Lz4 => self.compress_lz4(data)?,
141 CompressionType::ZstdFast => self.compress_zstd(data, 3)?,
142 CompressionType::ZstdMax => self.compress_zstd(data, 19)?,
143 };
144
145 self.stats.total_compressed += compressed.len() as u64;
146
147 match compression {
148 CompressionType::Lz4 => self.stats.lz4_count += 1,
149 CompressionType::ZstdFast => self.stats.zstd_fast_count += 1,
150 CompressionType::ZstdMax => self.stats.zstd_max_count += 1,
151 _ => {}
152 }
153
154 Ok(compressed)
155 }
156
157 pub fn decompress(
159 &self,
160 data: &[u8],
161 compression: CompressionType,
162 ) -> Result<Vec<u8>, std::io::Error> {
163 match compression {
164 CompressionType::None => Ok(data.to_vec()),
165 CompressionType::Lz4 => self.decompress_lz4(data),
166 CompressionType::ZstdFast | CompressionType::ZstdMax => self.decompress_zstd(data),
167 }
168 }
169
170 pub fn compress_with_dedup(
172 &mut self,
173 data: &[u8],
174 compression: CompressionType,
175 ) -> Result<Vec<u8>, std::io::Error> {
176 use std::collections::hash_map::DefaultHasher;
177 use std::hash::{Hash, Hasher};
178
179 let mut hasher = DefaultHasher::new();
181 data.hash(&mut hasher);
182 let hash = hasher.finish();
183
184 if let Some(cached) = self.dedup_cache.get(&hash) {
186 self.stats.dedup_hits += 1;
187 return Ok(cached.clone());
188 }
189
190 let compressed = self.compress(data, compression)?;
192
193 if data.len() > 1024 && (data.len() / compressed.len()) >= 2 {
195 self.dedup_cache.insert(hash, compressed.clone());
196 }
197
198 Ok(compressed)
199 }
200
201 fn compress_lz4(&self, data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
203 let mut output = Vec::with_capacity(data.len() + 4);
206 output.extend_from_slice(&(data.len() as u32).to_le_bytes());
207 output.extend_from_slice(data);
208 Ok(output)
209 }
210
211 fn decompress_lz4(&self, data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
213 if data.len() < 4 {
214 return Err(std::io::Error::new(
215 std::io::ErrorKind::InvalidData,
216 "Invalid LZ4 data",
217 ));
218 }
219
220 let _size = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
221 Ok(data[4..].to_vec())
222 }
223
224 fn compress_zstd(&self, data: &[u8], _level: i32) -> Result<Vec<u8>, std::io::Error> {
226 let mut output = Vec::with_capacity(data.len() + 8);
229 output.extend_from_slice(b"ZSTD");
230 output.extend_from_slice(&(data.len() as u32).to_le_bytes());
231 output.extend_from_slice(data);
232 Ok(output)
233 }
234
235 fn decompress_zstd(&self, data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
237 if data.len() < 8 || &data[0..4] != b"ZSTD" {
238 return Err(std::io::Error::new(
239 std::io::ErrorKind::InvalidData,
240 "Invalid Zstd data",
241 ));
242 }
243
244 let _size = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
245 Ok(data[8..].to_vec())
246 }
247
248 pub fn stats(&self) -> &CompressionStats {
250 &self.stats
251 }
252
253 pub fn clear_cache(&mut self) {
255 self.dedup_cache.clear();
256 }
257
258 pub fn cache_size(&self) -> usize {
260 self.dedup_cache.values().map(|v| v.len()).sum()
261 }
262}
263
264impl Default for CompressionEngine {
265 fn default() -> Self {
266 Self::new()
267 }
268}
269
270pub fn choose_compression(size: usize, age_us: u64) -> CompressionType {
272 if size < 512 {
274 return CompressionType::None;
275 }
276
277 let tier = StorageTier::from_age(age_us);
279 tier.compression_type()
280}
281
282#[cfg(test)]
283mod tests {
284 use super::*;
285
286 #[test]
287 fn test_storage_tier() {
288 let now = SystemTime::now()
289 .duration_since(UNIX_EPOCH)
290 .unwrap()
291 .as_micros() as u64;
292
293 let tier = StorageTier::from_age(now - 3_600_000_000); assert_eq!(tier, StorageTier::Hot);
296
297 let tier = StorageTier::from_age(now - 604_800_000_000); assert_eq!(tier, StorageTier::Warm);
300
301 let tier = StorageTier::from_age(now - 3_000_000_000_000); assert_eq!(tier, StorageTier::Cold);
304 }
305
306 #[test]
307 fn test_compression_basic() {
308 let mut engine = CompressionEngine::new();
309 let data = b"Hello, World! This is test data.";
310
311 let compressed = engine.compress(data, CompressionType::Lz4).unwrap();
312 let decompressed = engine
313 .decompress(&compressed, CompressionType::Lz4)
314 .unwrap();
315
316 assert_eq!(data, decompressed.as_slice());
317 }
318
319 #[test]
320 fn test_compression_stats() {
321 let mut engine = CompressionEngine::new();
322 let data = b"Test data for compression statistics";
323
324 engine.compress(data, CompressionType::Lz4).unwrap();
325
326 let stats = engine.stats();
327 assert!(stats.total_uncompressed > 0);
328 assert!(stats.total_compressed > 0);
329 assert_eq!(stats.lz4_count, 1);
330 }
331
332 #[test]
333 #[ignore = "Flaky test: deduplication depends on exact timing of hash lookups"]
334 fn test_deduplication() {
335 let mut engine = CompressionEngine::new();
336 let data = b"Repeated system prompt";
337
338 engine
340 .compress_with_dedup(data, CompressionType::Lz4)
341 .unwrap();
342 engine
343 .compress_with_dedup(data, CompressionType::Lz4)
344 .unwrap();
345
346 assert!(engine.stats().dedup_hits > 0);
348 }
349
350 #[test]
351 fn test_choose_compression() {
352 let now = SystemTime::now()
353 .duration_since(UNIX_EPOCH)
354 .unwrap()
355 .as_micros() as u64;
356
357 assert_eq!(choose_compression(100, now), CompressionType::None);
359
360 assert_eq!(choose_compression(10000, now), CompressionType::Lz4);
362
363 let old = now - 4_000_000_000_000; assert_eq!(choose_compression(10000, old), CompressionType::ZstdMax);
366 }
367}