gistools/util/compression/
fflate.rs

1// Ported from https://github.com/101arrowz/fflate
2
3// DEFLATE is a complex format; to read this code, you should probably check the RFC first:
4// <https://tools.ietf.org/html/rfc1951>
5// You may also wish to take a look at the guide I made about this program:
6// <https://gist.github.com/101arrowz/253f31eb5abc3d9275ab943003ffecad>
7
8// Some of the following code is similar to that of UZIP.js:
9// <https://github.com/photopea/UZIP.js>
10// However, the vast majority of the codebase has diverged from UZIP.js to increase performance
11// and reduce bundle size.
12
13// Sometimes 0 will appear where -1 would be more appropriate. This is because using a uint
14// is better for memory in most engines (I *think*).
15
16use alloc::{vec, vec::Vec};
17use core::result::Result;
18
19/// Handles compression errors
20#[derive(Debug, PartialEq)]
21pub enum FFlateError {
22    /// Unexpected EOF
23    UnexpectedEof,
24    /// Invalid block type
25    InvalidBlockType,
26    /// Invalid length/literal
27    InvalidLengthLiteral,
28    /// Invalid distance
29    InvalidDistance,
30    /// Stream finished
31    StreamFinished,
32    /// No stream handler
33    NoStreamHandler,
34    /// No callback
35    NoCallback,
36    /// Other
37    Other,
38}
39
40/// fixed length extra bits
41const FLEB: [u8; 32] = [
42    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0,
43    // unused
44    0, 0, // impossible
45    0,
46];
47/// fixed distance extra bits
48const FDEB: [u8; 32] = [
49    0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
50    13, // unused
51    0, 0,
52];
53/// code length index map
54const CLIM: [u8; 19] = [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15];
55
56// fixed length map
57const FLRM: [usize; 512] = [
58    4103, 1288, 264, 4488, 4359, 1800, 776, 3081, 4231, 1544, 520, 2569, 8, 2056, 1032, 3593, 4167,
59    1416, 392, 2313, 4423, 1928, 904, 3337, 4295, 1672, 648, 2825, 136, 2184, 1160, 3849, 4135,
60    1352, 328, 4552, 4391, 1864, 840, 3209, 4263, 1608, 584, 2697, 72, 2120, 1096, 3721, 4199,
61    1480, 456, 2441, 4455, 1992, 968, 3465, 4327, 1736, 712, 2953, 200, 2248, 1224, 3977, 4119,
62    1320, 296, 4520, 4375, 1832, 808, 3145, 4247, 1576, 552, 2633, 40, 2088, 1064, 3657, 4183,
63    1448, 424, 2377, 4439, 1960, 936, 3401, 4311, 1704, 680, 2889, 168, 2216, 1192, 3913, 4151,
64    1384, 360, 4584, 4407, 1896, 872, 3273, 4279, 1640, 616, 2761, 104, 2152, 1128, 3785, 4215,
65    1512, 488, 2505, 4471, 2024, 1000, 3529, 4343, 1768, 744, 3017, 232, 2280, 1256, 4041, 4103,
66    1304, 280, 4504, 4359, 1816, 792, 3113, 4231, 1560, 536, 2601, 24, 2072, 1048, 3625, 4167,
67    1432, 408, 2345, 4423, 1944, 920, 3369, 4295, 1688, 664, 2857, 152, 2200, 1176, 3881, 4135,
68    1368, 344, 4568, 4391, 1880, 856, 3241, 4263, 1624, 600, 2729, 88, 2136, 1112, 3753, 4199,
69    1496, 472, 2473, 4455, 2008, 984, 3497, 4327, 1752, 728, 2985, 216, 2264, 1240, 4009, 4119,
70    1336, 312, 4536, 4375, 1848, 824, 3177, 4247, 1592, 568, 2665, 56, 2104, 1080, 3689, 4183,
71    1464, 440, 2409, 4439, 1976, 952, 3433, 4311, 1720, 696, 2921, 184, 2232, 1208, 3945, 4151,
72    1400, 376, 4600, 4407, 1912, 888, 3305, 4279, 1656, 632, 2793, 120, 2168, 1144, 3817, 4215,
73    1528, 504, 2537, 4471, 2040, 1016, 3561, 4343, 1784, 760, 3049, 248, 2296, 1272, 4073, 4103,
74    1288, 264, 4488, 4359, 1800, 776, 3097, 4231, 1544, 520, 2585, 8, 2056, 1032, 3609, 4167, 1416,
75    392, 2329, 4423, 1928, 904, 3353, 4295, 1672, 648, 2841, 136, 2184, 1160, 3865, 4135, 1352,
76    328, 4552, 4391, 1864, 840, 3225, 4263, 1608, 584, 2713, 72, 2120, 1096, 3737, 4199, 1480, 456,
77    2457, 4455, 1992, 968, 3481, 4327, 1736, 712, 2969, 200, 2248, 1224, 3993, 4119, 1320, 296,
78    4520, 4375, 1832, 808, 3161, 4247, 1576, 552, 2649, 40, 2088, 1064, 3673, 4183, 1448, 424,
79    2393, 4439, 1960, 936, 3417, 4311, 1704, 680, 2905, 168, 2216, 1192, 3929, 4151, 1384, 360,
80    4584, 4407, 1896, 872, 3289, 4279, 1640, 616, 2777, 104, 2152, 1128, 3801, 4215, 1512, 488,
81    2521, 4471, 2024, 1000, 3545, 4343, 1768, 744, 3033, 232, 2280, 1256, 4057, 4103, 1304, 280,
82    4504, 4359, 1816, 792, 3129, 4231, 1560, 536, 2617, 24, 2072, 1048, 3641, 4167, 1432, 408,
83    2361, 4423, 1944, 920, 3385, 4295, 1688, 664, 2873, 152, 2200, 1176, 3897, 4135, 1368, 344,
84    4568, 4391, 1880, 856, 3257, 4263, 1624, 600, 2745, 88, 2136, 1112, 3769, 4199, 1496, 472,
85    2489, 4455, 2008, 984, 3513, 4327, 1752, 728, 3001, 216, 2264, 1240, 4025, 4119, 1336, 312,
86    4536, 4375, 1848, 824, 3193, 4247, 1592, 568, 2681, 56, 2104, 1080, 3705, 4183, 1464, 440,
87    2425, 4439, 1976, 952, 3449, 4311, 1720, 696, 2937, 184, 2232, 1208, 3961, 4151, 1400, 376,
88    4600, 4407, 1912, 888, 3321, 4279, 1656, 632, 2809, 120, 2168, 1144, 3833, 4215, 1528, 504,
89    2553, 4471, 2040, 1016, 3577, 4343, 1784, 760, 3065, 248, 2296, 1272, 4089,
90];
91// fixed distance map
92const FDRM: [usize; 32] = [
93    5, 261, 133, 389, 69, 325, 197, 453, 37, 293, 165, 421, 101, 357, 229, 485, 21, 277, 149, 405,
94    85, 341, 213, 469, 53, 309, 181, 437, 117, 373, 245, 501,
95];
96
97const FL: [usize; 31] = [
98    3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131,
99    163, 195, 227, 258, 260, 261,
100];
101const FD: [usize; 31] = [
102    1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537,
103    2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 32769,
104];
105static REV: [usize; 32768] = compute_rev();
106const fn compute_rev() -> [usize; 32768] {
107    let mut rev = [0; 32768];
108    let mut i: usize = 0;
109    while i < 32768 {
110        let mut x: usize = ((i & 0xaaaa) >> 1) | ((i & 0x5555) << 1);
111        x = ((x & 0xcccc) >> 2) | ((x & 0x3333) << 2);
112        x = ((x & 0xf0f0) >> 4) | ((x & 0x0f0f) << 4);
113        x = (((x & 0xff00) >> 8) | ((x & 0x00ff) << 8)) >> 1;
114        rev[i] = x;
115        i += 1;
116    }
117    rev
118}
119
120/// Expands compressed GZIP, Zlib/DEFLATE, or DEFLATE_RAW data, automatically detecting the format
121pub fn decompress_fflate(data: &[u8], dict: Option<&[u8]>) -> Result<Vec<u8>, FFlateError> {
122    let data_0 = data[0] as usize;
123    let data_1 = data[1] as usize;
124    let data_2 = data[2] as usize;
125    if data_0 == 31 && data_1 == 139 && data_2 == 8 {
126        gunzip_sync(data, dict)
127    } else if (data_0 & 15) != 8 || data_0 >> 4 > 7 || !((data_0 << 8) | data_1).is_multiple_of(31)
128    {
129        inflate_sync(data, dict)
130    } else {
131        unzlib_sync(data, dict)
132    }
133}
134
135/// Expands GZIP data
136pub fn gunzip_sync(data: &[u8], dict: Option<&[u8]>) -> Result<Vec<u8>, FFlateError> {
137    let st = gzs(data)?;
138    if st + 8 > data.len() {
139        return Err(FFlateError::NoCallback);
140    }
141    inflt(&data[st..data.len() - 8], Some(&vec![0; gzl(data)]), dict)
142}
143
144/// Expands DEFLATE data with no wrapper
145pub fn inflate_sync(data: &[u8], dict: Option<&[u8]>) -> Result<Vec<u8>, FFlateError> {
146    inflt(data, None, dict)
147}
148
149/// Expands Zlib data
150pub fn unzlib_sync(data: &[u8], dict: Option<&[u8]>) -> Result<Vec<u8>, FFlateError> {
151    inflt(&data[zls(data, dict)?..data.len() - 4], None, dict)
152}
153
154/// expands raw DEFLATE data
155fn inflt(dat: &[u8], bf: Option<&[u8]>, dict: Option<&[u8]>) -> Result<Vec<u8>, FFlateError> {
156    // source lengt - dict length
157    let sl = dat.len();
158    let mut dl: usize = 0;
159    if let Some(d) = dict {
160        dl = d.len();
161    }
162    if sl == 0 {
163        return Ok(bf.unwrap_or(&[]).to_vec());
164    }
165    let no_buf = bf.is_none();
166    // have to estimate size
167    let resize = no_buf;
168    // Assumes roughly 33% compression ratio average
169    let mut buf: Vec<u8> = bf.unwrap_or(&vec![0; sl * 3]).to_vec();
170    // ensure buffer can fit at least l elements
171    let cbuf = |l: usize, buf: &mut Vec<u8>| {
172        let bl = buf.len();
173        if l > bl {
174            let new_size = usize::max(bl * 2, l);
175            buf.resize(new_size, 0); // Resize the buffer, filling new elements with `0`
176        }
177    };
178    //  last chunk - bitpos - bytes
179    let mut finl = 0;
180    let mut pos = 0;
181    let mut bt = 0;
182    let mut _lm_vec: Vec<usize> = vec![];
183    let mut lm: Option<&[usize]> = None;
184    let mut _dm_vec: Vec<usize> = vec![];
185    let mut dm: Option<&[usize]> = None;
186    let mut lbt = 0;
187    let mut dbt = 0;
188    // total bits
189    let tbts = sl * 8;
190    while finl == 0 || lm.is_some() {
191        if lm.is_none() {
192            // BFINAL - this is only 1 when last chunk is next
193            finl = bits(dat, pos, 1);
194            // type: 0 = no compression, 1 = fixed huffman, 2 = dynamic huffman
195            let c_type = bits(dat, pos + 1, 3);
196            pos += 3;
197            if c_type == 0 {
198                // go to end of byte boundary
199                let s = shft(pos) + 4;
200                let l: usize = (dat[s - 4] as usize) | ((dat[s - 3] as usize) << 8);
201                let t = s + l;
202                if t > sl {
203                    return Err(FFlateError::UnexpectedEof);
204                }
205                // ensure size
206                if resize {
207                    cbuf(bt + l, &mut buf);
208                }
209                // Copy over uncompressed data
210                // buf.set(dat.subarray(s, t), bt);
211                buf[bt..(t - s + bt)].copy_from_slice(&dat[s..t]);
212                // buf[(s + bt)..(t + bt)].copy_from_slice(&dat[s..t]);
213                // Get new bitpos, update byte count
214                bt += l;
215                pos = t * 8;
216                continue;
217            } else if c_type == 1 {
218                lm = Some(&FLRM);
219                dm = Some(&FDRM);
220                lbt = 9;
221                dbt = 5;
222            } else if c_type == 2 {
223                // literal & lengths
224                let h_lit = bits(dat, pos, 31) + 257;
225                let hc_len = bits(dat, pos + 10, 15) + 4;
226                let tl = h_lit + bits(dat, pos + 5, 31) + 1;
227
228                pos += 14;
229                // length + distance tree
230                let mut ldt: Vec<usize> = vec![0; tl];
231                // code length tree
232                let mut clt: Vec<usize> = vec![0; 19];
233                for i in 0..hc_len {
234                    // use index map to get real code
235                    clt[CLIM[i] as usize] = bits(dat, pos + i * 3, 7);
236                }
237                pos += hc_len * 3;
238                // code lengths bits
239                let clb = max(&clt);
240                let clbmsk = (1 << clb) - 1;
241                // code lengths map
242                let clm = h_map(&clt, clb, true);
243                let mut i = 0;
244                loop {
245                    let r = clm[bits(dat, pos, clbmsk)];
246                    // bits read
247                    pos += r & 15;
248                    // symbol
249                    let s = r >> 4;
250                    // code length to copy
251                    if s < 16 {
252                        ldt[i] = s;
253                        i += 1;
254                    } else {
255                        // copy & count
256                        let mut c = 0;
257                        let mut n = 0;
258                        if s == 16 {
259                            n = 3 + bits(dat, pos, 3);
260                            pos += 2;
261                            c = ldt[i - 1];
262                        } else if s == 17 {
263                            n = 3 + bits(dat, pos, 7);
264                            pos += 3;
265                        } else if s == 18 {
266                            n = 11 + bits(dat, pos, 127);
267                            pos += 7;
268                        }
269                        while n != 0 {
270                            n -= 1;
271                            ldt[i] = c;
272                            i += 1;
273                        }
274                    }
275                    if i >= tl {
276                        break;
277                    }
278                }
279                // length tree & distance tree
280                let lt = &ldt[0..h_lit];
281                let dt = &ldt[h_lit..];
282                // max length bits
283                lbt = max(lt);
284                // max dist bits
285                dbt = max(dt);
286                _lm_vec = h_map(lt, lbt, true);
287                lm = Some(&_lm_vec);
288                _dm_vec = h_map(dt, dbt, true);
289                dm = Some(&_dm_vec);
290            } else {
291                return Err(FFlateError::InvalidBlockType);
292            }
293            if pos > tbts {
294                return Err(FFlateError::UnexpectedEof);
295            }
296        }
297        // Make sure the buffer can hold this + the largest possible addition
298        // Maximum chunk size (practically, theoretically infinite) is 2^17
299        if resize {
300            cbuf(bt + 131072, &mut buf);
301        }
302        let lms = (1 << lbt) - 1;
303        let dms = (1 << dbt) - 1;
304        loop {
305            // bits read, code
306            let mut c = 0;
307            if let Some(lm) = lm {
308                c = lm[bits16(dat, pos) & lms]; // Safe access
309            }
310            let sym = c >> 4;
311            pos += c & 15;
312            if pos > tbts {
313                return Err(FFlateError::UnexpectedEof);
314            }
315            if c == 0 {
316                return Err(FFlateError::InvalidLengthLiteral);
317            }
318            match sym {
319                0..=255 => {
320                    buf[bt] = sym as u8;
321                    bt += 1;
322                }
323                256 => {
324                    lm = None;
325                    break;
326                }
327                _ => {
328                    let mut add = sym - 254;
329                    // no extra bits needed if less
330                    if sym > 264 {
331                        // index
332                        let i = sym - 257;
333                        let b = FLEB[i];
334                        add = bits(dat, pos, (1 << b) - 1) + FL[i];
335                        pos += b as usize;
336                    }
337                    // dist
338                    let mut d = 0;
339                    if let Some(dm) = dm {
340                        d = dm[bits16(dat, pos) & dms];
341                    }
342                    let dsym = d >> 4;
343                    if d == 0 {
344                        return Err(FFlateError::InvalidDistance);
345                    }
346                    pos += d & 15;
347                    let mut dt = FD[dsym];
348                    if dsym > 3 {
349                        let b = FDEB[dsym];
350                        dt += bits16(dat, pos) & ((1 << b) - 1);
351                        pos += b as usize;
352                    }
353                    if pos > tbts {
354                        return Err(FFlateError::UnexpectedEof);
355                    }
356                    if resize {
357                        cbuf(bt + 131072, &mut buf);
358                    }
359                    let end = bt + add;
360                    if bt < dt {
361                        let shift: isize = dl as isize - dt as isize;
362                        let dend = dt.min(end);
363                        if shift + (bt as isize) < 0 {
364                            return Err(FFlateError::InvalidDistance);
365                        }
366                        if let Some(dict) = dict {
367                            loop {
368                                if bt >= dend {
369                                    break;
370                                }
371                                let shift_bt_sum: usize =
372                                    (shift + (bt as isize)).try_into().unwrap();
373                                buf[bt] = dict[shift_bt_sum];
374                                bt += 1;
375                            }
376                        }
377                    }
378                    loop {
379                        if bt >= end {
380                            break;
381                        }
382                        if bt >= dt {
383                            buf[bt] = buf[bt - dt];
384                        }
385                        bt += 1;
386                    }
387                }
388            }
389        }
390    }
391    // don't reallocate for streams or user buffers
392    if bt != buf.len() && no_buf { Ok(slc(&buf, 0, bt).to_vec()) } else { Ok(buf[0..bt].to_vec()) }
393}
394
395/// create huffman tree from u8 "map": index -> code length for code index
396/// mb (max bits) must be at most 15
397/// if r is true, its a decoder, otherwise its an encoder
398fn h_map(cd: &[usize], mb: usize, r: bool) -> Vec<usize> {
399    let s = cd.len();
400    // u16 "map": index -> # of codes with bit length = index
401    let mut l: Vec<usize> = vec![0; mb];
402    // length of cd must be 288 (total # of codes)
403    for i in 0..s {
404        if cd[i] != 0 {
405            l[cd[i] - 1] += 1;
406        }
407    }
408    // u16 "map": index -> minimum code for bit length = index
409    let mut le = vec![0; mb];
410    for i in 1..mb {
411        le[i] = (le[i - 1] + l[i - 1]) << 1;
412    }
413    let mut co: Vec<usize>;
414    if r {
415        // u16 "map": index -> number of actual bits, symbol for code
416        co = vec![0; 1 << mb];
417        // bits to remove for reverser
418        let rvb = 15 - mb;
419        for i in 0..s {
420            // ignore 0 lengths
421            if cd[i] != 0 {
422                // num encoding both symbol and bits read
423                let sv = (i << 4) | (cd[i]);
424                // free bits
425                let r = mb - cd[i];
426                // start value
427                let mut v = le[cd[i] - 1] << r;
428                le[cd[i] - 1] += 1;
429                // m is end value
430                let m = v | ((1 << r) - 1);
431                loop {
432                    // every 16 bit value starting with the code yields the same result
433                    co[REV[v] >> rvb] = sv;
434                    v += 1;
435                    if v > m {
436                        break;
437                    }
438                }
439            }
440        }
441    } else {
442        co = vec![0; s];
443    }
444
445    co
446}
447
448/// find max of array
449fn max(a: &[usize]) -> usize {
450    a.iter().copied().max().unwrap()
451}
452
453/// read d, starting at bit p and mask with m
454fn bits(d: &[u8], p: usize, m: usize) -> usize {
455    let o = p / 8;
456    let d_0 = d.get(o).copied().unwrap_or(0) as usize;
457    let d_1 = d.get(o + 1).copied().unwrap_or(0) as usize;
458    ((d_0 | (d_1 << 8)) >> (p & 7)) & m
459}
460
461/// read d, starting at bit p continuing for at least 16 bits
462fn bits16(d: &[u8], p: usize) -> usize {
463    let o = p / 8;
464    let d_0 = d.get(o).copied().unwrap_or(0) as usize;
465    let d_1 = d.get(o + 1).copied().unwrap_or(0) as usize;
466    let d_2 = d.get(o + 2).copied().unwrap_or(0) as usize;
467    (d_0 | (d_1 << 8) | (d_2 << 16)) >> (p & 7)
468}
469
470/// get end of byte
471fn shft(p: usize) -> usize {
472    p.div_ceil(8)
473}
474
475/// typed array slice - allows garbage collector to free original reference,
476/// while being more compatible than .slice
477fn slc(v: &[u8], s: usize, mut e: usize) -> &[u8] {
478    e = e.min(v.len());
479
480    &v[s..e]
481}
482
483/// Determines the start position of gzip-compressed data.
484fn gzs(d: &[u8]) -> Result<usize, FFlateError> {
485    if d.len() < 10 || d[0] != 31 || d[1] != 139 || d[2] != 8 {
486        return Err(FFlateError::NoCallback);
487    }
488
489    let flg = d[3];
490    let mut st = 10;
491
492    if (flg & 4) != 0 {
493        st += (d[10] as usize | ((d[11] as usize) << 8)) + 2;
494    }
495
496    let mut zs = ((flg >> 3) & 1) + ((flg >> 4) & 1);
497    while zs > 0 {
498        if d[st] == 0 {
499            zs -= 1;
500        }
501        st += 1;
502    }
503
504    Ok(st + ((flg & 2) as usize))
505}
506
507/// gzip length
508fn gzl(d: &[u8]) -> usize {
509    let l = d.len();
510    (d[l - 4] as usize)
511        | ((d[l - 3] as usize) << 8)
512        | ((d[l - 2] as usize) << 16)
513        | ((d[l - 1] as usize) << 24)
514}
515
516/// zlib start
517fn zls(d: &[u8], dict: Option<&[u8]>) -> Result<usize, FFlateError> {
518    if (d[0] & 15) != 8
519        || d[0] >> 4 > 7
520        || !(((d[0] as u32) << 8_u32) | d[1] as u32).is_multiple_of(31)
521    {
522        return Err(FFlateError::NoCallback);
523    }
524    if ((d[1] >> 5) & 1) == 0 && dict.is_some() {
525        return Err(FFlateError::NoCallback);
526    }
527
528    Ok((((d[1] as usize) >> 3) & 4) + 2)
529}
530
531#[cfg(test)]
532mod tests {
533    use super::*;
534
535    #[test]
536    fn test_compute_rev() {
537        let rev_local = compute_rev();
538        assert_eq!(rev_local, REV);
539    }
540}