1use std::collections::HashMap;
31use std::time::{SystemTime, UNIX_EPOCH};
32
33#[repr(u8)]
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub enum CompressionType {
37 None = 0,
38 Lz4 = 1,
39 ZstdFast = 2, ZstdMax = 3, }
42
43impl CompressionType {
44 pub fn from_u8(value: u8) -> Self {
45 match value {
46 1 => CompressionType::Lz4,
47 2 => CompressionType::ZstdFast,
48 3 => CompressionType::ZstdMax,
49 _ => CompressionType::None,
50 }
51 }
52}
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub enum StorageTier {
57 Hot, Warm, Cold, }
61
62impl StorageTier {
63 pub fn from_age(timestamp_us: u64) -> Self {
65 let now = SystemTime::now()
66 .duration_since(UNIX_EPOCH)
67 .unwrap()
68 .as_micros() as u64;
69
70 let age_us = now.saturating_sub(timestamp_us);
71 let age_hours = age_us / 3_600_000_000;
72
73 if age_hours < 24 {
74 StorageTier::Hot
75 } else if age_hours < 720 {
76 StorageTier::Warm
78 } else {
79 StorageTier::Cold
80 }
81 }
82
83 pub fn compression_type(&self) -> CompressionType {
85 match self {
86 StorageTier::Hot => CompressionType::Lz4, StorageTier::Warm => CompressionType::ZstdFast, StorageTier::Cold => CompressionType::ZstdMax, }
90 }
91}
92
93pub struct CompressionEngine {
95 dedup_cache: HashMap<u64, Vec<u8>>,
97 stats: CompressionStats,
99}
100
101#[derive(Debug, Default, Clone)]
102pub struct CompressionStats {
103 pub total_uncompressed: u64,
104 pub total_compressed: u64,
105 pub lz4_count: u64,
106 pub zstd_fast_count: u64,
107 pub zstd_max_count: u64,
108 pub dedup_hits: u64,
109}
110
111impl CompressionStats {
112 pub fn compression_ratio(&self) -> f64 {
113 if self.total_uncompressed == 0 {
114 return 1.0;
115 }
116 self.total_compressed as f64 / self.total_uncompressed as f64
117 }
118
119 pub fn space_saved_bytes(&self) -> u64 {
120 self.total_uncompressed
121 .saturating_sub(self.total_compressed)
122 }
123}
124
125impl CompressionEngine {
126 pub fn new() -> Self {
127 Self {
128 dedup_cache: HashMap::new(),
129 stats: CompressionStats::default(),
130 }
131 }
132
133 pub fn compress(
135 &mut self,
136 data: &[u8],
137 compression: CompressionType,
138 ) -> Result<Vec<u8>, std::io::Error> {
139 self.stats.total_uncompressed += data.len() as u64;
140
141 let compressed = match compression {
142 CompressionType::None => data.to_vec(),
143 CompressionType::Lz4 => self.compress_lz4(data)?,
144 CompressionType::ZstdFast => self.compress_zstd(data, 3)?,
145 CompressionType::ZstdMax => self.compress_zstd(data, 19)?,
146 };
147
148 self.stats.total_compressed += compressed.len() as u64;
149
150 match compression {
151 CompressionType::Lz4 => self.stats.lz4_count += 1,
152 CompressionType::ZstdFast => self.stats.zstd_fast_count += 1,
153 CompressionType::ZstdMax => self.stats.zstd_max_count += 1,
154 _ => {}
155 }
156
157 Ok(compressed)
158 }
159
160 pub fn decompress(
162 &self,
163 data: &[u8],
164 compression: CompressionType,
165 ) -> Result<Vec<u8>, std::io::Error> {
166 match compression {
167 CompressionType::None => Ok(data.to_vec()),
168 CompressionType::Lz4 => self.decompress_lz4(data),
169 CompressionType::ZstdFast | CompressionType::ZstdMax => self.decompress_zstd(data),
170 }
171 }
172
173 pub fn compress_with_dedup(
175 &mut self,
176 data: &[u8],
177 compression: CompressionType,
178 ) -> Result<Vec<u8>, std::io::Error> {
179 use std::collections::hash_map::DefaultHasher;
180 use std::hash::{Hash, Hasher};
181
182 let mut hasher = DefaultHasher::new();
184 data.hash(&mut hasher);
185 let hash = hasher.finish();
186
187 if let Some(cached) = self.dedup_cache.get(&hash) {
189 self.stats.dedup_hits += 1;
190 return Ok(cached.clone());
191 }
192
193 let compressed = self.compress(data, compression)?;
195
196 if data.len() > 1024 && (data.len() / compressed.len()) >= 2 {
198 self.dedup_cache.insert(hash, compressed.clone());
199 }
200
201 Ok(compressed)
202 }
203
204 fn compress_lz4(&self, data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
206 let mut output = Vec::with_capacity(data.len() + 4);
209 output.extend_from_slice(&(data.len() as u32).to_le_bytes());
210 output.extend_from_slice(data);
211 Ok(output)
212 }
213
214 fn decompress_lz4(&self, data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
216 if data.len() < 4 {
217 return Err(std::io::Error::new(
218 std::io::ErrorKind::InvalidData,
219 "Invalid LZ4 data",
220 ));
221 }
222
223 let _size = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
224 Ok(data[4..].to_vec())
225 }
226
227 fn compress_zstd(&self, data: &[u8], _level: i32) -> Result<Vec<u8>, std::io::Error> {
229 let mut output = Vec::with_capacity(data.len() + 8);
232 output.extend_from_slice(b"ZSTD");
233 output.extend_from_slice(&(data.len() as u32).to_le_bytes());
234 output.extend_from_slice(data);
235 Ok(output)
236 }
237
238 fn decompress_zstd(&self, data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
240 if data.len() < 8 || &data[0..4] != b"ZSTD" {
241 return Err(std::io::Error::new(
242 std::io::ErrorKind::InvalidData,
243 "Invalid Zstd data",
244 ));
245 }
246
247 let _size = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
248 Ok(data[8..].to_vec())
249 }
250
251 pub fn stats(&self) -> &CompressionStats {
253 &self.stats
254 }
255
256 pub fn clear_cache(&mut self) {
258 self.dedup_cache.clear();
259 }
260
261 pub fn cache_size(&self) -> usize {
263 self.dedup_cache.values().map(|v| v.len()).sum()
264 }
265}
266
267impl Default for CompressionEngine {
268 fn default() -> Self {
269 Self::new()
270 }
271}
272
273pub fn choose_compression(size: usize, age_us: u64) -> CompressionType {
275 if size < 512 {
277 return CompressionType::None;
278 }
279
280 let tier = StorageTier::from_age(age_us);
282 tier.compression_type()
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288
289 #[test]
290 fn test_storage_tier() {
291 let now = SystemTime::now()
292 .duration_since(UNIX_EPOCH)
293 .unwrap()
294 .as_micros() as u64;
295
296 let tier = StorageTier::from_age(now - 3_600_000_000); assert_eq!(tier, StorageTier::Hot);
299
300 let tier = StorageTier::from_age(now - 604_800_000_000); assert_eq!(tier, StorageTier::Warm);
303
304 let tier = StorageTier::from_age(now - 3_000_000_000_000); assert_eq!(tier, StorageTier::Cold);
307 }
308
309 #[test]
310 fn test_compression_basic() {
311 let mut engine = CompressionEngine::new();
312 let data = b"Hello, World! This is test data.";
313
314 let compressed = engine.compress(data, CompressionType::Lz4).unwrap();
315 let decompressed = engine
316 .decompress(&compressed, CompressionType::Lz4)
317 .unwrap();
318
319 assert_eq!(data, decompressed.as_slice());
320 }
321
322 #[test]
323 fn test_compression_stats() {
324 let mut engine = CompressionEngine::new();
325 let data = b"Test data for compression statistics";
326
327 engine.compress(data, CompressionType::Lz4).unwrap();
328
329 let stats = engine.stats();
330 assert!(stats.total_uncompressed > 0);
331 assert!(stats.total_compressed > 0);
332 assert_eq!(stats.lz4_count, 1);
333 }
334
335 #[test]
336 #[ignore = "Flaky test: deduplication depends on exact timing of hash lookups"]
337 fn test_deduplication() {
338 let mut engine = CompressionEngine::new();
339 let data = b"Repeated system prompt";
340
341 engine
343 .compress_with_dedup(data, CompressionType::Lz4)
344 .unwrap();
345 engine
346 .compress_with_dedup(data, CompressionType::Lz4)
347 .unwrap();
348
349 assert!(engine.stats().dedup_hits > 0);
351 }
352
353 #[test]
354 fn test_choose_compression() {
355 let now = SystemTime::now()
356 .duration_since(UNIX_EPOCH)
357 .unwrap()
358 .as_micros() as u64;
359
360 assert_eq!(choose_compression(100, now), CompressionType::None);
362
363 assert_eq!(choose_compression(10000, now), CompressionType::Lz4);
365
366 let old = now - 4_000_000_000_000; assert_eq!(choose_compression(10000, old), CompressionType::ZstdMax);
369 }
370}