oximedia_cache/
tier_compressor.rs1use thiserror::Error;
49
50const MAGIC: [u8; 3] = [0xCA, 0xCE, 0x00];
53const HEADER_LEN: usize = 3 + 4; const TAG_LITERAL: u8 = 0x00;
58const TAG_BACKREF: u8 = 0x01;
59
60#[derive(Debug, Error)]
64pub enum CompressorError {
65 #[error("invalid compressed data: {0}")]
67 InvalidData(String),
68 #[error("length mismatch: expected {expected}, got {actual}")]
70 LengthMismatch {
71 expected: usize,
73 actual: usize,
75 },
76 #[error("input too large: {0} bytes")]
78 InputTooLarge(usize),
79}
80
81struct LevelParams {
85 window: usize,
87 max_len: usize,
89}
90
91fn params_for_level(level: u8) -> LevelParams {
92 match level {
93 0 => LevelParams {
94 window: 64,
95 max_len: 16,
96 },
97 1 => LevelParams {
98 window: 128,
99 max_len: 32,
100 },
101 2 => LevelParams {
102 window: 256,
103 max_len: 64,
104 },
105 _ => LevelParams {
106 window: 512,
107 max_len: 128,
108 },
109 }
110}
111
112#[derive(Debug, Clone)]
118pub struct TierCompressor {
119 level: u8,
120}
121
122impl TierCompressor {
123 #[must_use]
126 pub fn new(level: u8) -> Self {
127 Self {
128 level: level.min(3),
129 }
130 }
131
132 pub fn compress(&self, input: &[u8]) -> Result<Vec<u8>, CompressorError> {
137 if input.len() > u32::MAX as usize {
138 return Err(CompressorError::InputTooLarge(input.len()));
139 }
140 let params = params_for_level(self.level);
141 let orig_len = input.len() as u32;
142
143 let mut out = Vec::with_capacity(HEADER_LEN + input.len() * 2);
145
146 out.extend_from_slice(&MAGIC);
148 out.extend_from_slice(&orig_len.to_le_bytes());
149
150 let mut pos = 0usize;
151 while pos < input.len() {
152 let window_start = pos.saturating_sub(params.window);
154 let search_window = &input[window_start..pos];
155
156 let max_match = params.max_len.min(input.len() - pos);
158
159 let best = find_longest_match(search_window, &input[pos..], max_match);
160
161 match best {
162 Some((offset_in_window, length)) if length >= 3 => {
163 let distance = (pos - window_start) - offset_in_window;
168 debug_assert!(distance > 0, "back-ref distance must be positive");
169 let dist_u16 = distance as u16;
170 let len_u8 = length as u8;
171 out.push(TAG_BACKREF);
172 out.push((dist_u16 & 0xFF) as u8);
173 out.push((dist_u16 >> 8) as u8);
174 out.push(len_u8);
175 pos += length;
176 }
177 _ => {
178 out.push(TAG_LITERAL);
180 out.push(input[pos]);
181 pos += 1;
182 }
183 }
184 }
185
186 Ok(out)
187 }
188
189 pub fn decompress(&self, input: &[u8]) -> Result<Vec<u8>, CompressorError> {
191 if input.len() < HEADER_LEN {
192 return Err(CompressorError::InvalidData(format!(
193 "too short: {} bytes (need at least {})",
194 input.len(),
195 HEADER_LEN
196 )));
197 }
198 if input[..3] != MAGIC {
200 return Err(CompressorError::InvalidData(
201 "magic bytes do not match".to_string(),
202 ));
203 }
204 let orig_len = u32::from_le_bytes([input[3], input[4], input[5], input[6]]) as usize;
206 let mut out = Vec::with_capacity(orig_len);
207
208 let payload = &input[HEADER_LEN..];
209 let mut pos = 0usize;
210
211 while pos < payload.len() {
212 let tag = payload[pos];
213 pos += 1;
214
215 match tag {
216 TAG_LITERAL => {
217 if pos >= payload.len() {
218 return Err(CompressorError::InvalidData(
219 "truncated literal token".to_string(),
220 ));
221 }
222 out.push(payload[pos]);
223 pos += 1;
224 }
225 TAG_BACKREF => {
226 if pos + 2 >= payload.len() {
227 return Err(CompressorError::InvalidData(
228 "truncated back-reference token".to_string(),
229 ));
230 }
231 let dist_lo = payload[pos] as u16;
232 let dist_hi = payload[pos + 1] as u16;
233 let length = payload[pos + 2] as usize;
234 pos += 3;
235
236 let distance = (dist_hi << 8 | dist_lo) as usize;
237 if distance == 0 || distance > out.len() {
238 return Err(CompressorError::InvalidData(format!(
239 "invalid back-ref distance {distance} at output position {}",
240 out.len()
241 )));
242 }
243 let start = out.len() - distance;
244 for i in 0..length {
246 let byte = out[start + (i % distance)];
247 out.push(byte);
248 }
249 }
250 unknown => {
251 return Err(CompressorError::InvalidData(format!(
252 "unknown tag byte 0x{unknown:02X} at offset {pos}"
253 )));
254 }
255 }
256 }
257
258 if out.len() != orig_len {
259 return Err(CompressorError::LengthMismatch {
260 expected: orig_len,
261 actual: out.len(),
262 });
263 }
264
265 Ok(out)
266 }
267
268 #[must_use]
270 pub fn level(&self) -> u8 {
271 self.level
272 }
273
274 pub fn compression_ratio(&self, input: &[u8]) -> Result<f64, CompressorError> {
278 if input.is_empty() {
279 return Ok(1.0);
280 }
281 let compressed = self.compress(input)?;
282 Ok(compressed.len() as f64 / input.len() as f64)
283 }
284}
285
286fn find_longest_match(window: &[u8], lookahead: &[u8], max_len: usize) -> Option<(usize, usize)> {
293 if window.is_empty() || lookahead.is_empty() || max_len == 0 {
294 return None;
295 }
296
297 let mut best_offset = 0usize;
298 let mut best_len = 0usize;
299
300 for start in 0..window.len() {
301 let mut len = 0usize;
302 while len < max_len && len < lookahead.len() && len < window.len() - start + lookahead.len()
303 {
304 let window_idx = start + (len % (window.len() - start));
306 if window[window_idx] != lookahead[len] {
307 break;
308 }
309 len += 1;
310 }
311 if len > best_len {
312 best_len = len;
313 best_offset = start;
314 }
315 }
316
317 if best_len >= 3 {
318 Some((best_offset, best_len))
319 } else {
320 None
321 }
322}
323
324#[cfg(test)]
327mod tests {
328 use super::*;
329
330 #[test]
332 fn test_round_trip_ascii() {
333 let c = TierCompressor::new(1);
334 let orig = b"hello world hello world".to_vec();
335 let compressed = c.compress(&orig).expect("compress");
336 let restored = c.decompress(&compressed).expect("decompress");
337 assert_eq!(restored, orig);
338 }
339
340 #[test]
342 fn test_round_trip_empty() {
343 let c = TierCompressor::new(0);
344 let orig: Vec<u8> = Vec::new();
345 let compressed = c.compress(&orig).expect("compress empty");
346 let restored = c.decompress(&compressed).expect("decompress empty");
347 assert_eq!(restored, orig);
348 }
349
350 #[test]
352 fn test_round_trip_single_byte() {
353 let c = TierCompressor::new(0);
354 let orig = vec![0xABu8];
355 let compressed = c.compress(&orig).expect("compress");
356 let restored = c.decompress(&compressed).expect("decompress");
357 assert_eq!(restored, orig);
358 }
359
360 #[test]
362 fn test_round_trip_zeros() {
363 let c = TierCompressor::new(2);
364 let orig = vec![0u8; 512];
365 let compressed = c.compress(&orig).expect("compress");
366 let restored = c.decompress(&compressed).expect("decompress");
367 assert_eq!(restored, orig);
368 }
369
370 #[test]
372 fn test_round_trip_binary() {
373 let c = TierCompressor::new(1);
374 let orig: Vec<u8> = (0u8..=255).cycle().take(300).collect();
375 let compressed = c.compress(&orig).expect("compress");
376 let restored = c.decompress(&compressed).expect("decompress");
377 assert_eq!(restored, orig);
378 }
379
380 #[test]
382 fn test_zeros_compress_smaller() {
383 let c = TierCompressor::new(2);
384 let orig = vec![0u8; 256];
385 let compressed = c.compress(&orig).expect("compress");
386 assert!(
388 compressed.len() < orig.len(),
389 "compressed {} should be < original {}",
390 compressed.len(),
391 orig.len()
392 );
393 }
394
395 #[test]
397 fn test_decompress_invalid_magic() {
398 let c = TierCompressor::new(0);
399 let bad = vec![0xDE, 0xAD, 0xBE, 0xEF, 0, 0, 0, 0];
400 assert!(c.decompress(&bad).is_err());
401 }
402
403 #[test]
405 fn test_decompress_truncated() {
406 let c = TierCompressor::new(0);
407 assert!(c.decompress(&[0xCA, 0xCE, 0x00]).is_err());
409 }
410
411 #[test]
413 fn test_level_getter() {
414 assert_eq!(TierCompressor::new(2).level(), 2);
415 assert_eq!(TierCompressor::new(99).level(), 3); }
417
418 #[test]
420 fn test_ratio_empty() {
421 let c = TierCompressor::new(1);
422 let ratio = c.compression_ratio(&[]).expect("ratio");
423 assert!((ratio - 1.0).abs() < 1e-9);
424 }
425
426 #[test]
428 fn test_ratio_repeated_lt_1() {
429 let c = TierCompressor::new(3);
430 let orig = b"abcabcabcabcabcabcabcabcabc".to_vec();
431 let ratio = c.compression_ratio(&orig).expect("ratio");
432 assert!(ratio < 2.0, "ratio {ratio} should be reasonable");
434 }
435
436 #[test]
438 fn test_round_trip_all_levels() {
439 let orig = b"the quick brown fox jumps over the lazy dog".repeat(5);
440 for level in 0..=3 {
441 let c = TierCompressor::new(level);
442 let compressed = c.compress(&orig).expect("compress");
443 let restored = c.decompress(&compressed).expect("decompress");
444 assert_eq!(restored, orig, "level {level} failed round-trip");
445 }
446 }
447
448 #[test]
450 fn test_header_length_field() {
451 let c = TierCompressor::new(0);
452 let orig = b"abcde".to_vec();
453 let compressed = c.compress(&orig).expect("compress");
454 let stored_len =
456 u32::from_le_bytes([compressed[3], compressed[4], compressed[5], compressed[6]])
457 as usize;
458 assert_eq!(stored_len, orig.len());
459 }
460
461 #[test]
463 fn test_overlapping_backref() {
464 let c = TierCompressor::new(2);
465 let orig: Vec<u8> = vec![b'a'; 200];
467 let compressed = c.compress(&orig).expect("compress");
468 let restored = c.decompress(&compressed).expect("decompress");
469 assert_eq!(restored, orig);
470 }
471
472 #[test]
474 fn test_large_input_round_trip() {
475 let c = TierCompressor::new(1);
476 let orig: Vec<u8> = b"media frame data segment"
478 .iter()
479 .copied()
480 .cycle()
481 .take(10_240)
482 .collect();
483 let compressed = c.compress(&orig).expect("compress");
484 let restored = c.decompress(&compressed).expect("decompress");
485 assert_eq!(restored, orig);
486 }
487}