1#[derive(Debug, PartialEq)]
16pub enum CompressError {
17 TooShort,
19 Truncated,
21 InvalidOffset,
23 SizeMismatch { expected: usize, got: usize },
25}
26
27impl core::fmt::Display for CompressError {
28 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
29 match self {
30 CompressError::TooShort => write!(f, "compressed data too short"),
31 CompressError::Truncated => write!(f, "compressed stream truncated"),
32 CompressError::InvalidOffset => write!(f, "invalid back-reference offset"),
33 CompressError::SizeMismatch { expected, got } => {
34 write!(f, "size mismatch: expected {expected}, got {got}")
35 }
36 }
37 }
38}
39
40#[inline]
42fn trigram_hash(a: u8, b: u8, c: u8) -> usize {
43 (((a as usize) << 4) ^ ((b as usize) << 2) ^ (c as usize)) & 0xFFF
44}
45
46fn flush_literals(output: &mut Vec<u8>, literals: &[u8]) {
48 let mut offset = 0;
49 while offset < literals.len() {
50 let chunk = core::cmp::min(128, literals.len() - offset);
51 output.push((chunk - 1) as u8); output.extend_from_slice(&literals[offset..offset + chunk]);
53 offset += chunk;
54 }
55}
56
57pub fn compress(input: &[u8]) -> Vec<u8> {
61 let mut output = Vec::with_capacity(input.len());
62
63 output.extend_from_slice(&(input.len() as u32).to_le_bytes());
65
66 if input.is_empty() {
67 return output;
68 }
69
70 let mut table = [0u32; 4096];
72 let mut literals: Vec<u8> = Vec::new();
73 let mut pos = 0;
74
75 while pos < input.len() {
76 let mut best_len = 0usize;
77 let mut best_offset = 0usize;
78
79 if pos + 3 <= input.len() {
80 let hash = trigram_hash(input[pos], input[pos + 1], input[pos + 2]);
81 let candidate = table[hash] as usize;
82 table[hash] = pos as u32;
83
84 if candidate < pos && pos - candidate <= 4096 {
85 let max_len = core::cmp::min(10, input.len() - pos);
86 let mut match_len = 0;
87 while match_len < max_len
88 && input[candidate + match_len] == input[pos + match_len]
89 {
90 match_len += 1;
91 }
92 if match_len >= 3 {
93 best_len = match_len;
94 best_offset = pos - candidate;
95 }
96 }
97 }
98
99 if best_len >= 3 {
100 flush_literals(&mut output, &literals);
102 literals.clear();
103
104 let len_code = (best_len - 3) as u8; let offset_val = (best_offset - 1) as u16; let offset_hi = ((offset_val >> 8) & 0x0F) as u8;
108 let offset_lo = (offset_val & 0xFF) as u8;
109
110 output.push(0x80 | (len_code << 4) | offset_hi);
111 output.push(offset_lo);
112
113 for i in 1..best_len {
115 if pos + i + 3 <= input.len() {
116 let h = trigram_hash(input[pos + i], input[pos + i + 1], input[pos + i + 2]);
117 table[h] = (pos + i) as u32;
118 }
119 }
120
121 pos += best_len;
122 } else {
123 literals.push(input[pos]);
124 pos += 1;
125 }
126 }
127
128 flush_literals(&mut output, &literals);
130
131 output
132}
133
134pub fn decompress(compressed: &[u8]) -> Result<Vec<u8>, CompressError> {
136 if compressed.len() < 4 {
137 return Err(CompressError::TooShort);
138 }
139
140 let original_size = u32::from_le_bytes([
141 compressed[0],
142 compressed[1],
143 compressed[2],
144 compressed[3],
145 ]) as usize;
146
147 let mut output = Vec::with_capacity(original_size);
148 let mut pos = 4;
149
150 while output.len() < original_size && pos < compressed.len() {
151 let control = compressed[pos];
152 pos += 1;
153
154 if control & 0x80 == 0 {
155 let count = (control as usize) + 1;
157 if pos + count > compressed.len() {
158 return Err(CompressError::Truncated);
159 }
160 output.extend_from_slice(&compressed[pos..pos + count]);
161 pos += count;
162 } else {
163 if pos >= compressed.len() {
165 return Err(CompressError::Truncated);
166 }
167 let length = (((control >> 4) & 0x07) as usize) + 3;
168 let offset_hi = (control & 0x0F) as usize;
169 let offset_lo = compressed[pos] as usize;
170 pos += 1;
171 let offset = (offset_hi << 8 | offset_lo) + 1;
172
173 if offset > output.len() {
174 return Err(CompressError::InvalidOffset);
175 }
176
177 let start = output.len() - offset;
178 for i in 0..length {
179 let byte = output[start + i];
180 output.push(byte);
181 }
182 }
183 }
184
185 if output.len() != original_size {
186 return Err(CompressError::SizeMismatch {
187 expected: original_size,
188 got: output.len(),
189 });
190 }
191
192 Ok(output)
193}
194
195#[cfg(test)]
196mod tests {
197 use super::*;
198
199 #[test]
200 fn empty_round_trip() {
201 let compressed = compress(b"");
202 assert_eq!(compressed, [0, 0, 0, 0]); let decompressed = decompress(&compressed).unwrap();
204 assert!(decompressed.is_empty());
205 }
206
207 #[test]
208 fn short_literal_round_trip() {
209 let input = b"Hello, World!";
210 let compressed = compress(input);
211 let decompressed = decompress(&compressed).unwrap();
212 assert_eq!(&decompressed, input);
213 }
214
215 #[test]
216 fn repeated_data_compresses() {
217 let input: Vec<u8> = (0..1000).map(|i| (i % 7) as u8).collect();
219 let compressed = compress(&input);
220 assert!(
221 compressed.len() < input.len(),
222 "compressed {} >= original {}",
223 compressed.len(),
224 input.len()
225 );
226 let decompressed = decompress(&compressed).unwrap();
227 assert_eq!(decompressed, input);
228 }
229
230 #[test]
231 fn wasm_like_data_compresses() {
232 let mut wasm = Vec::new();
234 wasm.extend_from_slice(&[0x00, 0x61, 0x73, 0x6D, 0x01, 0x00, 0x00, 0x00]);
236 for _ in 0..100 {
238 wasm.extend_from_slice(&[0x01, 0x06, 0x01, 0x60, 0x01, 0x7F, 0x01, 0x7F]);
239 }
240 wasm.resize(wasm.len() + 500, 0x00);
242
243 let compressed = compress(&wasm);
244 assert!(
245 compressed.len() < wasm.len(),
246 "compressed {} >= original {}",
247 compressed.len(),
248 wasm.len()
249 );
250 let decompressed = decompress(&compressed).unwrap();
251 assert_eq!(decompressed, wasm);
252 }
253
254 #[test]
255 fn random_like_data_round_trips() {
256 let input: Vec<u8> = (0..500).map(|i| ((i * 131 + 17) % 256) as u8).collect();
258 let compressed = compress(&input);
259 let decompressed = decompress(&compressed).unwrap();
260 assert_eq!(decompressed, input);
261 }
262
263 #[test]
264 fn large_data_round_trip() {
265 let input: Vec<u8> = (0..8000).map(|i| ((i * 37 + i / 100) % 256) as u8).collect();
266 let compressed = compress(&input);
267 let decompressed = decompress(&compressed).unwrap();
268 assert_eq!(decompressed, input);
269 }
270
271 #[test]
272 fn all_zeros_compress_well() {
273 let input = vec![0u8; 4096];
274 let compressed = compress(&input);
275 assert!(compressed.len() < input.len() / 2);
277 let decompressed = decompress(&compressed).unwrap();
278 assert_eq!(decompressed, input);
279 }
280
281 #[test]
282 fn decompress_truncated_fails() {
283 let compressed = compress(b"test data for truncation");
284 let truncated = &compressed[..compressed.len() / 2];
286 assert!(decompress(truncated).is_err());
287 }
288
289 #[test]
290 fn decompress_too_short_fails() {
291 assert_eq!(decompress(&[0, 0]), Err(CompressError::TooShort));
292 }
293
294 #[test]
295 fn compress_error_display() {
296 let e = CompressError::SizeMismatch {
297 expected: 100,
298 got: 50,
299 };
300 assert!(format!("{e}").contains("100"));
301 }
302
303 #[test]
304 fn exactly_128_byte_literal_run() {
305 let input: Vec<u8> = (0..128).map(|i| (i * 2 + 1) as u8).collect();
307 let compressed = compress(&input);
308 let decompressed = decompress(&compressed).unwrap();
309 assert_eq!(decompressed, input);
310 }
311}