Skip to main content

proof_engine/save/
compression.rs

1//! Compression algorithms for save data: RLE, LZ4-style, Huffman, and delta encoding.
2//!
3//! `Compressor::compress_auto` tries every algorithm and picks the smallest output,
4//! storing the result in a self-describing `CompressedBlock`.
5
6use std::collections::{BinaryHeap, HashMap};
7
8// ─────────────────────────────────────────────────────────────────────────────
9//  CompressionAlgorithm
10// ─────────────────────────────────────────────────────────────────────────────
11
12/// Which compression algorithm was used to produce a `CompressedBlock`.
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14#[repr(u8)]
15pub enum CompressionAlgorithm {
16    /// Data is stored verbatim.
17    None = 0,
18    /// Run-length encoding.
19    Rle = 1,
20    /// LZ4-style byte-level LZ77 with a hash chain.
21    Lz4Like = 2,
22    /// Integer delta encoding followed by RLE.
23    DeltaEncode = 3,
24    /// Canonical Huffman coding (code lengths ≤ 15 bits).
25    Huffman = 4,
26}
27
28impl CompressionAlgorithm {
29    fn from_u8(v: u8) -> Option<Self> {
30        match v {
31            0 => Some(Self::None),
32            1 => Some(Self::Rle),
33            2 => Some(Self::Lz4Like),
34            3 => Some(Self::DeltaEncode),
35            4 => Some(Self::Huffman),
36            _ => None,
37        }
38    }
39}
40
41// ─────────────────────────────────────────────────────────────────────────────
42//  CompressedBlock
43// ─────────────────────────────────────────────────────────────────────────────
44
45/// A self-describing compressed block.
46///
47/// Wire format: `[algorithm: u8][original_size: u32 LE][data…]`
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct CompressedBlock {
50    pub algorithm: CompressionAlgorithm,
51    pub original_size: usize,
52    pub data: Vec<u8>,
53}
54
55impl CompressedBlock {
56    /// Serialise to bytes (includes the 5-byte header).
57    pub fn to_bytes(&self) -> Vec<u8> {
58        let mut out = Vec::with_capacity(5 + self.data.len());
59        out.push(self.algorithm as u8);
60        let sz = self.original_size as u32;
61        out.extend_from_slice(&sz.to_le_bytes());
62        out.extend_from_slice(&self.data);
63        out
64    }
65
66    /// Deserialise from bytes produced by `to_bytes`.
67    pub fn from_bytes(bytes: &[u8]) -> Result<Self, String> {
68        if bytes.len() < 5 {
69            return Err("CompressedBlock too short".into());
70        }
71        let algorithm = CompressionAlgorithm::from_u8(bytes[0])
72            .ok_or_else(|| format!("unknown algorithm byte {}", bytes[0]))?;
73        let original_size =
74            u32::from_le_bytes([bytes[1], bytes[2], bytes[3], bytes[4]]) as usize;
75        let data = bytes[5..].to_vec();
76        Ok(Self { algorithm, original_size, data })
77    }
78
79    /// Decompress the block back to the original bytes.
80    pub fn decompress(&self) -> Result<Vec<u8>, String> {
81        match self.algorithm {
82            CompressionAlgorithm::None => Ok(self.data.clone()),
83            CompressionAlgorithm::Rle => RunLengthEncoder::decode(&self.data),
84            CompressionAlgorithm::Lz4Like => Lz4Decoder::decompress(&self.data),
85            CompressionAlgorithm::DeltaEncode => DeltaDecoder::decode_bytes(&self.data),
86            CompressionAlgorithm::Huffman => HuffmanDecoder::decompress(&self.data),
87        }
88    }
89}
90
91// ─────────────────────────────────────────────────────────────────────────────
92//  RunLengthEncoder
93// ─────────────────────────────────────────────────────────────────────────────
94
95/// Byte-level run-length encoding.
96///
97/// Format: alternating `[count: u8][byte]` pairs.  Counts are 1-based (a
98/// count byte of 0 means 1 repetition).  Runs longer than 255 are split.
99pub struct RunLengthEncoder;
100
101impl RunLengthEncoder {
102    /// Encode `input` bytes into RLE form.
103    pub fn encode(input: &[u8]) -> Vec<u8> {
104        if input.is_empty() {
105            return Vec::new();
106        }
107        let mut out = Vec::with_capacity(input.len());
108        let mut i = 0;
109        while i < input.len() {
110            let byte = input[i];
111            let mut run = 1usize;
112            while i + run < input.len() && input[i + run] == byte && run < 255 {
113                run += 1;
114            }
115            out.push(run as u8);
116            out.push(byte);
117            i += run;
118        }
119        out
120    }
121
122    /// Decode RLE bytes back to the original stream.
123    pub fn decode(input: &[u8]) -> Result<Vec<u8>, String> {
124        if input.len() % 2 != 0 {
125            return Err("RLE input has odd length".into());
126        }
127        let mut out = Vec::new();
128        let mut i = 0;
129        while i + 1 < input.len() {
130            let count = input[i] as usize;
131            let byte = input[i + 1];
132            for _ in 0..count {
133                out.push(byte);
134            }
135            i += 2;
136        }
137        Ok(out)
138    }
139
140    /// Encode a u16 stream as bytes (little-endian pairs) then RLE-compress.
141    pub fn encode_u16(input: &[u16]) -> Vec<u8> {
142        let mut bytes = Vec::with_capacity(input.len() * 2);
143        for &v in input {
144            bytes.extend_from_slice(&v.to_le_bytes());
145        }
146        Self::encode(&bytes)
147    }
148
149    /// Decode an RLE-compressed byte stream back to u16 values.
150    pub fn decode_u16(input: &[u8]) -> Result<Vec<u16>, String> {
151        let bytes = Self::decode(input)?;
152        if bytes.len() % 2 != 0 {
153            return Err("decoded byte count is not even".into());
154        }
155        let mut out = Vec::with_capacity(bytes.len() / 2);
156        for chunk in bytes.chunks_exact(2) {
157            out.push(u16::from_le_bytes([chunk[0], chunk[1]]));
158        }
159        Ok(out)
160    }
161}
162
163// ─────────────────────────────────────────────────────────────────────────────
164//  Lz4Encoder / Lz4Decoder
165// ─────────────────────────────────────────────────────────────────────────────
166
167/// LZ4-style block compressor using a 4-byte hash chain.
168///
169/// Sequence format: `[token: u8][extra literal len…][literals…][offset: u16 LE][extra match len…]`
170/// The token's high nibble is the literal count (0-14, 15 = overflow), low nibble is
171/// match-length minus 4 (0-14, 15 = overflow).  Minimum match length is 4.
172pub struct Lz4Encoder;
173
174impl Lz4Encoder {
175    const HASH_LOG: usize = 16;
176    const HASH_SIZE: usize = 1 << Self::HASH_LOG;
177    const MIN_MATCH: usize = 4;
178    const WINDOW: usize = 65535;
179
180    fn hash4(data: &[u8], pos: usize) -> usize {
181        if pos + 4 > data.len() {
182            return 0;
183        }
184        let v = u32::from_le_bytes([data[pos], data[pos+1], data[pos+2], data[pos+3]]);
185        ((v.wrapping_mul(0x9E37_79B1)) >> (32 - Self::HASH_LOG)) as usize
186    }
187
188    fn write_varint(out: &mut Vec<u8>, mut n: usize) {
189        while n >= 255 {
190            out.push(255);
191            n -= 255;
192        }
193        out.push(n as u8);
194    }
195
196    /// Compress `input` using LZ4-style block format.
197    pub fn compress(input: &[u8]) -> Vec<u8> {
198        let n = input.len();
199        let mut out = Vec::with_capacity(n);
200        let mut hash_table = vec![0usize; Self::HASH_SIZE];
201        let mut pos = 0usize;
202        let mut anchor = 0usize;
203
204        while pos + Self::MIN_MATCH <= n {
205            let h = Self::hash4(input, pos);
206            let candidate = hash_table[h];
207            hash_table[h] = pos;
208
209            // Check for a valid match
210            let match_ok = candidate < pos
211                && pos - candidate <= Self::WINDOW
212                && pos + Self::MIN_MATCH <= n
213                && candidate + Self::MIN_MATCH <= n
214                && input[pos..pos+Self::MIN_MATCH] == input[candidate..candidate+Self::MIN_MATCH];
215
216            if !match_ok {
217                pos += 1;
218                continue;
219            }
220
221            // Extend match
222            let mut match_len = Self::MIN_MATCH;
223            while pos + match_len < n && candidate + match_len < pos {
224                if input[pos + match_len] != input[candidate + match_len] {
225                    break;
226                }
227                match_len += 1;
228            }
229
230            // Write sequence: literals + match
231            let lit_len = pos - anchor;
232            let ml_extra = match_len - Self::MIN_MATCH;
233
234            let lit_tok = lit_len.min(15) as u8;
235            let ml_tok  = ml_extra.min(15) as u8;
236            out.push((lit_tok << 4) | ml_tok);
237
238            if lit_len >= 15 {
239                Self::write_varint(&mut out, lit_len - 15);
240            }
241            out.extend_from_slice(&input[anchor..pos]);
242
243            let offset = (pos - candidate) as u16;
244            out.extend_from_slice(&offset.to_le_bytes());
245
246            if ml_extra >= 15 {
247                Self::write_varint(&mut out, ml_extra - 15);
248            }
249
250            pos += match_len;
251            anchor = pos;
252        }
253
254        // Last literals
255        let lit_len = n - anchor;
256        let lit_tok = lit_len.min(15) as u8;
257        out.push(lit_tok << 4);
258        if lit_len >= 15 {
259            Self::write_varint(&mut out, lit_len - 15);
260        }
261        out.extend_from_slice(&input[anchor..]);
262
263        out
264    }
265}
266
267/// LZ4-style block decompressor.
268pub struct Lz4Decoder;
269
270impl Lz4Decoder {
271    fn read_varint(data: &[u8], pos: &mut usize, base: usize) -> Result<usize, String> {
272        let mut total = base;
273        loop {
274            if *pos >= data.len() {
275                return Err("unexpected end of stream in varint".into());
276            }
277            let b = data[*pos] as usize;
278            *pos += 1;
279            total += b;
280            if b < 255 {
281                break;
282            }
283        }
284        Ok(total)
285    }
286
287    /// Decompress an LZ4-style block.
288    pub fn decompress(input: &[u8]) -> Result<Vec<u8>, String> {
289        let mut out: Vec<u8> = Vec::new();
290        let mut pos = 0usize;
291
292        while pos < input.len() {
293            let token = input[pos];
294            pos += 1;
295
296            // Literal length
297            let mut lit_len = (token >> 4) as usize;
298            if lit_len == 15 {
299                lit_len = Self::read_varint(input, &mut pos, 15)?;
300            }
301
302            // Copy literals
303            if pos + lit_len > input.len() {
304                return Err("literal copy out of bounds".into());
305            }
306            out.extend_from_slice(&input[pos..pos + lit_len]);
307            pos += lit_len;
308
309            // End of block: last sequence has no match part
310            if pos >= input.len() {
311                break;
312            }
313
314            // Match offset
315            if pos + 2 > input.len() {
316                return Err("truncated match offset".into());
317            }
318            let offset = u16::from_le_bytes([input[pos], input[pos+1]]) as usize;
319            pos += 2;
320            if offset == 0 || offset > out.len() {
321                return Err(format!("invalid match offset {offset}"));
322            }
323
324            // Match length
325            let mut match_len = (token & 0x0F) as usize + 4;
326            if (token & 0x0F) == 15 {
327                match_len = Self::read_varint(input, &mut pos, match_len)?;
328            }
329
330            // Copy match (may overlap)
331            let match_start = out.len() - offset;
332            for i in 0..match_len {
333                let b = out[match_start + i];
334                out.push(b);
335            }
336        }
337
338        Ok(out)
339    }
340}
341
342// ─────────────────────────────────────────────────────────────────────────────
343//  HuffmanEncoder / HuffmanDecoder
344// ─────────────────────────────────────────────────────────────────────────────
345
346/// Node used when building the Huffman tree.
347#[derive(Debug, Clone, Eq, PartialEq)]
348struct HuffNode {
349    freq: u64,
350    symbol: Option<u8>,
351    left: Option<Box<HuffNode>>,
352    right: Option<Box<HuffNode>>,
353}
354
355impl Ord for HuffNode {
356    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
357        other.freq.cmp(&self.freq) // min-heap
358    }
359}
360
361impl PartialOrd for HuffNode {
362    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
363        Some(self.cmp(other))
364    }
365}
366
367/// Canonical Huffman encoder.
368///
369/// Output format:
370/// `[original_len: u32 LE][code_lengths: 256 bytes][bit_stream…][padding_bits: u8]`
371pub struct HuffmanEncoder;
372
373impl HuffmanEncoder {
374    const MAX_BITS: u32 = 15;
375
376    /// Build frequency table.
377    fn freq_table(data: &[u8]) -> [u64; 256] {
378        let mut freq = [0u64; 256];
379        for &b in data {
380            freq[b as usize] += 1;
381        }
382        freq
383    }
384
385    /// Build the Huffman tree using a min-heap.
386    fn build_tree(freq: &[u64; 256]) -> HuffNode {
387        let mut heap: BinaryHeap<HuffNode> = BinaryHeap::new();
388        for (sym, &f) in freq.iter().enumerate() {
389            if f > 0 {
390                heap.push(HuffNode { freq: f, symbol: Some(sym as u8), left: None, right: None });
391            }
392        }
393        if heap.is_empty() {
394            // All zeros – just push a dummy so the tree is non-empty
395            heap.push(HuffNode { freq: 1, symbol: Some(0), left: None, right: None });
396        }
397        while heap.len() > 1 {
398            let a = heap.pop().unwrap();
399            let b = heap.pop().unwrap();
400            heap.push(HuffNode {
401                freq: a.freq + b.freq,
402                symbol: None,
403                left: Some(Box::new(a)),
404                right: Some(Box::new(b)),
405            });
406        }
407        heap.pop().unwrap()
408    }
409
410    /// Walk the tree to assign code lengths.
411    fn assign_lengths(node: &HuffNode, depth: u32, lengths: &mut [u32; 256]) {
412        if let Some(sym) = node.symbol {
413            lengths[sym as usize] = depth.max(1);
414        } else {
415            if let Some(l) = &node.left  { Self::assign_lengths(l, depth + 1, lengths); }
416            if let Some(r) = &node.right { Self::assign_lengths(r, depth + 1, lengths); }
417        }
418    }
419
420    /// Limit code lengths to `MAX_BITS` using the package-merge approach (simplified: clamp + rebalance).
421    fn limit_lengths(lengths: &mut [u32; 256]) {
422        let mut over: i32 = 0;
423        for l in lengths.iter_mut() {
424            if *l > Self::MAX_BITS {
425                over += (1 << (*l - Self::MAX_BITS)) - 1;
426                *l = Self::MAX_BITS;
427            }
428        }
429        // Remove `over` worth of codes by bumping some short lengths up by 1
430        for l in lengths.iter_mut() {
431            if *l > 0 && *l < Self::MAX_BITS && over > 0 {
432                over -= 1;
433                *l += 1;
434            }
435        }
436    }
437
438    /// Assign canonical codes from lengths.
439    fn canonical_codes(lengths: &[u32; 256]) -> [u32; 256] {
440        let mut bl_count = [0u32; 16];
441        for &l in lengths.iter() {
442            if l > 0 {
443                bl_count[l as usize] += 1;
444            }
445        }
446        let mut next_code = [0u32; 16];
447        let mut code = 0u32;
448        for bits in 1..=15usize {
449            code = (code + bl_count[bits - 1]) << 1;
450            next_code[bits] = code;
451        }
452        let mut codes = [0u32; 256];
453        let mut order: Vec<usize> = (0..256).filter(|&i| lengths[i] > 0).collect();
454        order.sort_by_key(|&i| (lengths[i], i));
455        for i in order {
456            let l = lengths[i] as usize;
457            codes[i] = next_code[l];
458            next_code[l] += 1;
459        }
460        codes
461    }
462
463    /// Compress `data` using canonical Huffman coding.
464    pub fn compress(data: &[u8]) -> Vec<u8> {
465        if data.is_empty() {
466            let mut out = vec![0u8; 4 + 256 + 1];
467            return out;
468        }
469        let freq = Self::freq_table(data);
470        let tree = Self::build_tree(&freq);
471        let mut lengths = [0u32; 256];
472        Self::assign_lengths(&tree, 0, &mut lengths);
473        Self::limit_lengths(&mut lengths);
474        let codes = Self::canonical_codes(&lengths);
475
476        let orig_len = data.len() as u32;
477        let mut out = Vec::with_capacity(4 + 256 + data.len());
478        out.extend_from_slice(&orig_len.to_le_bytes());
479        for &l in &lengths {
480            out.push(l as u8);
481        }
482
483        // Bit-pack the encoded stream
484        let mut bit_buf = 0u32;
485        let mut bit_count = 0u32;
486        let mut encoded_bits: Vec<u8> = Vec::new();
487
488        for &byte in data {
489            let code = codes[byte as usize];
490            let len  = lengths[byte as usize];
491            bit_buf |= code << (32 - len - bit_count);
492            bit_count += len;
493            while bit_count >= 8 {
494                encoded_bits.push((bit_buf >> 24) as u8);
495                bit_buf <<= 8;
496                bit_count -= 8;
497            }
498        }
499        let padding = if bit_count > 0 { 8 - bit_count } else { 0 };
500        if bit_count > 0 {
501            encoded_bits.push((bit_buf >> 24) as u8);
502        }
503
504        out.extend_from_slice(&encoded_bits);
505        out.push(padding as u8);
506        out
507    }
508}
509
510/// Canonical Huffman decoder.
511pub struct HuffmanDecoder;
512
513impl HuffmanDecoder {
514    /// Decompress data produced by `HuffmanEncoder::compress`.
515    pub fn decompress(input: &[u8]) -> Result<Vec<u8>, String> {
516        if input.len() < 4 + 256 + 1 {
517            return Err("Huffman block too short".into());
518        }
519        let orig_len =
520            u32::from_le_bytes([input[0], input[1], input[2], input[3]]) as usize;
521        if orig_len == 0 {
522            return Ok(Vec::new());
523        }
524        let mut lengths = [0u32; 256];
525        for i in 0..256 {
526            lengths[i] = input[4 + i] as u32;
527        }
528
529        // Rebuild canonical codes
530        let codes = HuffmanEncoder::canonical_codes(&lengths);
531
532        // Build decode table: (code, length) → symbol
533        let mut decode_table: HashMap<(u32, u32), u8> = HashMap::new();
534        for (sym, &l) in lengths.iter().enumerate() {
535            if l > 0 {
536                decode_table.insert((codes[sym], l), sym as u8);
537            }
538        }
539
540        let payload = &input[4 + 256..];
541        if payload.is_empty() {
542            return Err("Huffman block has no payload".into());
543        }
544        let padding = *payload.last().unwrap() as u32;
545        let bit_data = &payload[..payload.len() - 1];
546
547        let mut out = Vec::with_capacity(orig_len);
548        let mut bit_buf = 0u32;
549        let mut bits_available = 0u32;
550        let mut byte_idx = 0usize;
551
552        let mut fill = |bit_buf: &mut u32, bits_available: &mut u32| {
553            while *bits_available < 24 && byte_idx < bit_data.len() {
554                *bit_buf |= (bit_data[byte_idx] as u32) << (24 - *bits_available);
555                *bits_available += 8;
556                byte_idx += 1;
557            }
558        };
559
560        while out.len() < orig_len {
561            fill(&mut bit_buf, &mut bits_available);
562            if bits_available == 0 {
563                break;
564            }
565            let mut found = false;
566            for try_len in 1..=15u32 {
567                if bits_available < try_len {
568                    if bits_available + padding >= try_len {
569                        // padding bits
570                        break;
571                    }
572                    break;
573                }
574                let candidate = bit_buf >> (32 - try_len);
575                if let Some(&sym) = decode_table.get(&(candidate, try_len)) {
576                    out.push(sym);
577                    bit_buf <<= try_len;
578                    bits_available -= try_len;
579                    found = true;
580                    break;
581                }
582            }
583            if !found {
584                break;
585            }
586        }
587
588        if out.len() != orig_len {
589            return Err(format!("Huffman decode produced {} bytes, expected {}", out.len(), orig_len));
590        }
591        Ok(out)
592    }
593}
594
595// ─────────────────────────────────────────────────────────────────────────────
596//  DeltaEncoder / DeltaDecoder
597// ─────────────────────────────────────────────────────────────────────────────
598
599/// Delta-encode sorted integer arrays then apply RLE on the deltas.
600///
601/// Useful for compact storage of entity ID lists.
602pub struct DeltaEncoder;
603
604impl DeltaEncoder {
605    /// Delta-encode a sorted slice of `u32` values, then RLE-compress the result.
606    ///
607    /// Wire: first encode `[count: u32 LE]`, then for each delta `[delta: u32 LE]`,
608    /// wrap the whole byte stream with RLE.
609    pub fn encode(values: &[u32]) -> Vec<u8> {
610        let mut raw = Vec::with_capacity(4 + values.len() * 4);
611        let count = values.len() as u32;
612        raw.extend_from_slice(&count.to_le_bytes());
613        let mut prev = 0u32;
614        for &v in values {
615            let delta = v.wrapping_sub(prev);
616            raw.extend_from_slice(&delta.to_le_bytes());
617            prev = v;
618        }
619        RunLengthEncoder::encode(&raw)
620    }
621
622    /// Encode bytes directly with delta+RLE (treats bytes as u8 deltas).
623    pub fn encode_bytes(data: &[u8]) -> Vec<u8> {
624        let mut raw = Vec::with_capacity(data.len());
625        let mut prev = 0u8;
626        for &b in data {
627            raw.push(b.wrapping_sub(prev));
628            prev = b;
629        }
630        RunLengthEncoder::encode(&raw)
631    }
632}
633
634/// Delta decoder.
635pub struct DeltaDecoder;
636
637impl DeltaDecoder {
638    /// Decode a stream produced by `DeltaEncoder::encode`.
639    pub fn decode(input: &[u8]) -> Result<Vec<u32>, String> {
640        let raw = RunLengthEncoder::decode(input)?;
641        if raw.len() < 4 {
642            return Err("DeltaDecoder: too short".into());
643        }
644        let count = u32::from_le_bytes([raw[0], raw[1], raw[2], raw[3]]) as usize;
645        if raw.len() < 4 + count * 4 {
646            return Err("DeltaDecoder: truncated".into());
647        }
648        let mut out = Vec::with_capacity(count);
649        let mut prev = 0u32;
650        for i in 0..count {
651            let off = 4 + i * 4;
652            let delta = u32::from_le_bytes([raw[off], raw[off+1], raw[off+2], raw[off+3]]);
653            let v = prev.wrapping_add(delta);
654            out.push(v);
655            prev = v;
656        }
657        Ok(out)
658    }
659
660    /// Decode a byte stream produced by `DeltaEncoder::encode_bytes`.
661    pub fn decode_bytes(input: &[u8]) -> Result<Vec<u8>, String> {
662        let raw = RunLengthEncoder::decode(input)?;
663        let mut out = Vec::with_capacity(raw.len());
664        let mut prev = 0u8;
665        for &delta in &raw {
666            let v = prev.wrapping_add(delta);
667            out.push(v);
668            prev = v;
669        }
670        Ok(out)
671    }
672}
673
674// ─────────────────────────────────────────────────────────────────────────────
675//  Compressor  (compress_auto)
676// ─────────────────────────────────────────────────────────────────────────────
677
678/// High-level compressor: tries every algorithm and picks the smallest result.
679pub struct Compressor;
680
681impl Compressor {
682    /// Compress `data` using each available algorithm, return the smallest `CompressedBlock`.
683    pub fn compress_auto(data: &[u8]) -> CompressedBlock {
684        let original_size = data.len();
685
686        let candidates: Vec<(CompressionAlgorithm, Vec<u8>)> = vec![
687            (CompressionAlgorithm::None,        data.to_vec()),
688            (CompressionAlgorithm::Rle,         RunLengthEncoder::encode(data)),
689            (CompressionAlgorithm::Lz4Like,     Lz4Encoder::compress(data)),
690            (CompressionAlgorithm::DeltaEncode, DeltaEncoder::encode_bytes(data)),
691            (CompressionAlgorithm::Huffman,     HuffmanEncoder::compress(data)),
692        ];
693
694        let best = candidates
695            .into_iter()
696            .min_by_key(|(_, d)| d.len())
697            .unwrap();
698
699        CompressedBlock { algorithm: best.0, original_size, data: best.1 }
700    }
701
702    /// Compress with a specific algorithm.
703    pub fn compress_with(data: &[u8], algo: CompressionAlgorithm) -> CompressedBlock {
704        let original_size = data.len();
705        let compressed = match algo {
706            CompressionAlgorithm::None        => data.to_vec(),
707            CompressionAlgorithm::Rle         => RunLengthEncoder::encode(data),
708            CompressionAlgorithm::Lz4Like     => Lz4Encoder::compress(data),
709            CompressionAlgorithm::DeltaEncode => DeltaEncoder::encode_bytes(data),
710            CompressionAlgorithm::Huffman     => HuffmanEncoder::compress(data),
711        };
712        CompressedBlock { algorithm: algo, original_size, data: compressed }
713    }
714}
715
716// ─────────────────────────────────────────────────────────────────────────────
717//  Tests
718// ─────────────────────────────────────────────────────────────────────────────
719
720#[cfg(test)]
721mod tests {
722    use super::*;
723
724    fn roundtrip_rle(input: &[u8]) {
725        let encoded = RunLengthEncoder::encode(input);
726        let decoded = RunLengthEncoder::decode(&encoded).expect("RLE decode failed");
727        assert_eq!(&decoded, input, "RLE roundtrip failed for {:?}", input);
728    }
729
730    #[test]
731    fn test_rle_empty() {
732        roundtrip_rle(&[]);
733    }
734
735    #[test]
736    fn test_rle_no_runs() {
737        roundtrip_rle(&[1, 2, 3, 4, 5]);
738    }
739
740    #[test]
741    fn test_rle_long_run() {
742        let input: Vec<u8> = vec![42u8; 300];
743        roundtrip_rle(&input);
744    }
745
746    #[test]
747    fn test_rle_mixed() {
748        roundtrip_rle(&[0, 0, 0, 1, 2, 2, 3]);
749    }
750
751    #[test]
752    fn test_rle_u16_roundtrip() {
753        let values: Vec<u16> = vec![0, 100, 200, 200, 300, 300, 300];
754        let encoded = RunLengthEncoder::encode_u16(&values);
755        let decoded = RunLengthEncoder::decode_u16(&encoded).expect("u16 decode failed");
756        assert_eq!(decoded, values);
757    }
758
759    #[test]
760    fn test_lz4_empty() {
761        let compressed = Lz4Encoder::compress(&[]);
762        let decompressed = Lz4Decoder::decompress(&compressed).expect("lz4 decompress failed");
763        assert_eq!(decompressed, &[]);
764    }
765
766    #[test]
767    fn test_lz4_roundtrip_simple() {
768        let input = b"hello world hello world hello world";
769        let compressed = Lz4Encoder::compress(input);
770        let decompressed = Lz4Decoder::decompress(&compressed).expect("lz4 decompress failed");
771        assert_eq!(&decompressed, input);
772    }
773
774    #[test]
775    fn test_lz4_roundtrip_random_like() {
776        let input: Vec<u8> = (0u8..=255).cycle().take(512).collect();
777        let compressed = Lz4Encoder::compress(&input);
778        let decompressed = Lz4Decoder::decompress(&compressed).expect("lz4 decompress failed");
779        assert_eq!(decompressed, input);
780    }
781
782    #[test]
783    fn test_huffman_roundtrip() {
784        let input = b"abracadabra";
785        let compressed = HuffmanEncoder::compress(input);
786        let decompressed = HuffmanDecoder::decompress(&compressed).expect("huffman decompress failed");
787        assert_eq!(&decompressed, input);
788    }
789
790    #[test]
791    fn test_huffman_uniform() {
792        let input: Vec<u8> = vec![0xAAu8; 100];
793        let compressed = HuffmanEncoder::compress(&input);
794        let decompressed = HuffmanDecoder::decompress(&compressed).expect("huffman decompress failed");
795        assert_eq!(decompressed, input);
796    }
797
798    #[test]
799    fn test_delta_encode_roundtrip() {
800        let values = vec![0u32, 5, 10, 15, 100, 200, 201];
801        let encoded = DeltaEncoder::encode(&values);
802        let decoded = DeltaDecoder::decode(&encoded).expect("delta decode failed");
803        assert_eq!(decoded, values);
804    }
805
806    #[test]
807    fn test_delta_bytes_roundtrip() {
808        let data: Vec<u8> = (0u8..50).collect();
809        let encoded = DeltaEncoder::encode_bytes(&data);
810        let decoded = DeltaDecoder::decode_bytes(&encoded).expect("delta bytes decode failed");
811        assert_eq!(decoded, data);
812    }
813
814    #[test]
815    fn test_compressed_block_serialization() {
816        let data = b"save data payload for block test";
817        let block = Compressor::compress_auto(data);
818        let bytes = block.to_bytes();
819        let restored = CompressedBlock::from_bytes(&bytes).expect("from_bytes failed");
820        let decompressed = restored.decompress().expect("decompress failed");
821        assert_eq!(&decompressed, data);
822    }
823
824    #[test]
825    fn test_compress_auto_correctness() {
826        let data: Vec<u8> = b"the quick brown fox jumps over the lazy dog".to_vec();
827        let block = Compressor::compress_auto(&data);
828        let decompressed = block.decompress().expect("auto decompress failed");
829        assert_eq!(decompressed, data);
830    }
831
832    #[test]
833    fn test_compress_with_all_algorithms() {
834        let data: Vec<u8> = vec![1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5];
835        for algo in [
836            CompressionAlgorithm::None,
837            CompressionAlgorithm::Rle,
838            CompressionAlgorithm::Lz4Like,
839            CompressionAlgorithm::DeltaEncode,
840            CompressionAlgorithm::Huffman,
841        ] {
842            let block = Compressor::compress_with(&data, algo);
843            let decompressed = block.decompress()
844                .unwrap_or_else(|e| panic!("decompress failed for {:?}: {}", algo, e));
845            assert_eq!(decompressed, data, "mismatch for {:?}", algo);
846        }
847    }
848}