nydus_utils/compress/
zlib_random.rs

1// Copyright (C) 2022 Alibaba Cloud. All rights reserved.
2//
3// SPDX-License-Identifier: Apache-2.0
4
5//! Generate context information to randomly access gzip/zlib stream.
6
7use std::alloc::{self, Layout};
8use std::convert::TryFrom;
9use std::io::{Read, Result};
10use std::ops::DerefMut;
11use std::os::raw::{c_int, c_void};
12use std::sync::{Arc, Mutex};
13use std::{mem, ptr};
14
15use libz_sys::{
16    inflate, inflateEnd, inflateInit2_, inflatePrime, inflateReset, inflateReset2,
17    inflateSetDictionary, uInt, z_stream, zlibVersion, Z_BLOCK, Z_BUF_ERROR, Z_OK, Z_STREAM_END,
18};
19use sha2::{Digest, Sha256};
20
21/// Size of inflate dictionary to support random access.
22pub const ZRAN_DICT_WIN_SIZE: usize = 1 << 15;
23/// Maximum number of random access slices per compression object.
24pub const ZRAN_MAX_CI_ENTRIES: usize = 1 << 24;
25/// Buffer size for ZRAN reader.
26pub const ZRAN_READER_BUF_SIZE: usize = 64 * 1024;
27
28const ZRAN_MIN_COMP_SIZE: u64 = 768 * 1024;
29const ZRAN_MAX_COMP_SIZE: u64 = 2048 * 1024;
30const ZRAN_MAX_UNCOMP_SIZE: u64 = 2048 * 1024;
31const ZLIB_ALIGN: usize = std::mem::align_of::<usize>();
32
33/// Information to retrieve a data chunk from an associated random access slice.
34#[derive(Debug, Eq, PartialEq)]
35pub struct ZranChunkInfo {
36    /// Index into the inflate context array for the associated inflate context.
37    pub ci_index: u32,
38    /// Offset to get data chunk from the uncompressed content.
39    pub ci_offset: u32,
40    /// Size of the uncompressed chunk data.
41    pub ci_len: u32,
42    /// Position in the compressed data stream.
43    pub in_pos: u64,
44    /// Size of compressed data in input stream.
45    pub in_len: u32,
46}
47
48/// Context information to decode data from a random access slice.
49pub struct ZranContext {
50    /// Offset in the original compression data stream.
51    pub in_offset: u64,
52    /// Offset in the uncompression data stream.
53    pub out_offset: u64,
54    /// Size of original compressed data.
55    pub in_len: u32,
56    /// Size of uncompressed data.
57    pub out_len: u32,
58    /// Optional previous byte in the original compressed data stream, used when `ctx_bits` is non-zero.
59    pub ctx_byte: u8,
60    /// Bits from previous byte to feeds into the inflate context for random access.
61    pub ctx_bits: u8,
62    /// Inflate dictionary for random access.
63    pub dict: Vec<u8>,
64}
65
66impl ZranContext {
67    fn new(info: &ZranCompInfo, dict: Vec<u8>) -> Self {
68        ZranContext {
69            in_offset: info.in_pos,
70            out_offset: info.out_pos,
71            in_len: 0,
72            out_len: 0,
73            ctx_byte: info.previous_byte,
74            ctx_bits: info.pending_bits,
75            dict,
76        }
77    }
78}
79
80/// Gzip/zlib decoder to randomly uncompress Gzip/zlib stream.
81pub struct ZranDecoder {
82    stream: ZranStream,
83}
84
85impl ZranDecoder {
86    /// Create a new instance of `ZranDecoder`.
87    pub fn new() -> Result<Self> {
88        let stream = ZranStream::new(true)?;
89        Ok(Self { stream })
90    }
91
92    /// Uncompress gzip/zlib compressed data chunk.
93    ///
94    /// # Arguments
95    /// - ctx: context to random access compressed stream.
96    /// - dict: use this dictionary instead of `ctx.dict` to decode data
97    /// - input: input compressed data stream
98    /// - output: buffer to receive uncompressed data
99    pub fn uncompress(
100        &mut self,
101        ctx: &ZranContext,
102        dict: Option<&[u8]>,
103        input: &[u8],
104        output: &mut [u8],
105    ) -> Result<usize> {
106        if input.len() != ctx.in_len as usize {
107            return Err(einval!("size of input buffer doesn't match"));
108        } else if ctx.out_len as usize > output.len() {
109            return Err(einval!("buffer to receive decompressed data is too small"));
110        }
111
112        self.stream.reset()?;
113        if ctx.ctx_bits != 0 {
114            let bits = ctx.ctx_bits & 0x7;
115            self.stream.set_prime(bits, ctx.ctx_byte)?;
116        }
117        let dict = dict.unwrap_or(ctx.dict.as_slice());
118        self.stream.set_dict(dict)?;
119
120        self.stream.set_next_in(input);
121
122        let mut left = ctx.out_len;
123        loop {
124            let used = (ctx.out_len - left) as usize;
125            self.stream.set_next_out(&mut output[used..]);
126            self.stream.set_avail_out(left as uInt);
127            let mut got = self.stream.avail_out();
128            let mut ret = self.stream.raw_inflate(0);
129            got -= self.stream.avail_out();
130            left -= got;
131
132            match ret {
133                Z_OK => {
134                    let count = self.stream.next_out() as usize - output.as_ptr() as usize;
135                    if count != ctx.out_len as usize {
136                        return Err(eio!("failed to decode data from stream, size mismatch"));
137                    } else {
138                        return Ok(count);
139                    }
140                }
141                Z_STREAM_END => {
142                    // Discard the gzip trailer.
143                    let drop = 8;
144                    if self.stream.avail_in() >= drop {
145                        let avail_in = self.stream.avail_in();
146                        let used = input.len() - avail_in as usize + drop as usize;
147                        self.stream.set_next_in(&input[used..]);
148                    } else {
149                        // The input does not have a complete trailer.
150                        return Err(eio!("the input does not have a complete gzip trailer"));
151                    }
152                    // Use inflate to skip the gzip header and resume the raw inflate there.
153                    self.stream.reset2(true)?;
154                    let mut discard = vec![0u8; ZRAN_DICT_WIN_SIZE];
155                    loop {
156                        self.stream.set_next_out(&mut discard);
157                        self.stream.set_avail_out(ZRAN_DICT_WIN_SIZE as u32);
158                        ret = self.stream.raw_inflate(Z_BLOCK); // stop at end of header
159                        if ret == Z_OK && (self.stream.data_type() & 0x80) == 0 {
160                            continue;
161                        }
162
163                        if ret != Z_OK {
164                            return Err(eio!(format!(
165                                "failed to handle gzip multi member, ret: {:?}",
166                                ret
167                            )));
168                        }
169                        self.stream.reset2(false)?;
170                        break;
171                    }
172                }
173                e => {
174                    return Err(eio!(format!(
175                        "failed to decode data from compressed data stream, ret: {}",
176                        e
177                    )))
178                }
179            }
180        }
181    }
182}
183
184/// Struct to generate random access information for OCIv1 image tarballs.
185///
186/// `ZranGenerator` generates decompression context information to support random access to the
187/// tarball later. It only tracks information related to Tar file content, and ignores all other
188/// tar headers and zlib headers when possible. The work flow is:
189/// 1) create a `ZranGenerator` object `zran`.
190/// 2) create a tar::Archive object from `zran`.
191/// 3) walk all entries in the tarball, for each tar regular file:
192///     3.1) get file size and split it into chunks, for each file data chunk
193///     3.2) call zran.begin_data_chunk()
194///     3.3) read file content from the tar Entry object
195///     3.4) call zran.end_data_chunk() to get chunk decompression information
196/// 4) call zran.get_compression_info_array() to get all decompression context information for
197///    random access later
198pub struct ZranGenerator<R> {
199    reader: ZranReader<R>,
200    min_comp_size: u64,
201    max_comp_size: u64,
202    max_uncomp_size: u64,
203    curr_block_start: u64,
204    curr_ci_offset: u64,
205    curr_in_offset: u64,
206    curr_ci_idx: Option<usize>,
207    ci_array: Vec<ZranContext>,
208}
209
210impl<R: Read> ZranGenerator<R> {
211    /// Create a new instance of `ZranGenerator` from a reader.
212    pub fn new(reader: ZranReader<R>) -> Self {
213        Self {
214            reader,
215            min_comp_size: ZRAN_MIN_COMP_SIZE,
216            max_comp_size: ZRAN_MAX_COMP_SIZE,
217            max_uncomp_size: ZRAN_MAX_UNCOMP_SIZE,
218            curr_block_start: 0,
219            curr_ci_offset: 0,
220            curr_in_offset: 0,
221            curr_ci_idx: None,
222            ci_array: Vec::new(),
223        }
224    }
225
226    /// Begin a transaction to read data from the zlib stream.
227    ///
228    /// # Arguments
229    /// - `chunk_size`: size of data to be read from the zlib stream.
230    #[allow(clippy::if_same_then_else)]
231    pub fn begin_read(&mut self, chunk_size: u64) -> Result<u32> {
232        let info = self.reader.get_current_ctx_info();
233        let ci_idx = if let Some(idx) = self.curr_ci_idx {
234            let ctx = &self.ci_array[idx];
235            let comp_size = info.in_pos - ctx.in_offset;
236            let uncomp_size = info.out_pos - ctx.out_offset;
237            let first = self.is_first_block();
238            let enough = !first
239                && (comp_size >= self.max_comp_size / 2
240                    || uncomp_size + chunk_size >= self.max_uncomp_size);
241            if info.stream_switched != 0 || enough {
242                // The slice becomes too big after merging current data chunk.
243                self.new_ci_entry()?
244            } else if !first
245                && comp_size > 2 * ctx.in_len as u64
246                && ctx.in_len as u64 > self.min_comp_size
247            {
248                // The gap between current chunk and last chunk is too big.
249                self.new_ci_entry()?
250            } else {
251                idx
252            }
253        } else {
254            self.new_ci_entry()?
255        };
256
257        if ci_idx > ZRAN_MAX_CI_ENTRIES {
258            Err(einval!("too many compression information entries"))
259        } else {
260            self.curr_ci_idx = Some(ci_idx);
261            self.curr_ci_offset = info.out_pos;
262            self.curr_in_offset = info.in_pos;
263            Ok(ci_idx as u32)
264        }
265    }
266
267    /// Mark end of a data read operation and returns information to decode data from the random
268    /// access slice.
269    pub fn end_read(&mut self) -> Result<ZranChunkInfo> {
270        let info = self.reader.get_current_ctx_info();
271        if let Some(idx) = self.curr_ci_idx {
272            let ctx = &mut self.ci_array[idx];
273            let comp_size = info.in_pos - ctx.in_offset;
274            let uncomp_size = info.out_pos - ctx.out_offset;
275            let ci = ZranChunkInfo {
276                ci_index: idx as u32,
277                ci_offset: (self.curr_ci_offset - ctx.out_offset) as u32,
278                ci_len: (info.out_pos - self.curr_ci_offset) as u32,
279                in_pos: self.curr_in_offset,
280                in_len: (info.in_pos - self.curr_in_offset) as u32,
281            };
282            ctx.out_len = uncomp_size as u32;
283            ctx.in_len = comp_size as u32;
284            Ok(ci)
285        } else {
286            Err(einval!("invalid compression state"))
287        }
288    }
289
290    /// Get an immutable reference to the random access context information array.
291    pub fn get_compression_ctx_array(&self) -> &[ZranContext] {
292        &self.ci_array
293    }
294
295    /// Set minimal compressed size to emit an random access slice.
296    ///
297    /// Please ensure "min_compressed_size * 2 <= max_compressed_size".
298    pub fn set_min_compressed_size(&mut self, sz: u64) {
299        self.min_comp_size = sz;
300    }
301
302    /// Set maximum compressed size to emit an random access slice.
303    ///
304    /// Please ensure "min_compressed_size * 2 <= max_compressed_size".
305    pub fn set_max_compressed_size(&mut self, sz: u64) {
306        self.max_comp_size = sz;
307    }
308
309    /// Set maximum uncompressed size to emit an random access slice.
310    ///
311    /// Please ensure "min_compressed_size * 2 < max_compressed_size".
312    pub fn set_max_uncompressed_size(&mut self, sz: u64) {
313        self.max_uncomp_size = sz;
314    }
315
316    fn new_ci_entry(&mut self) -> Result<usize> {
317        let info = self.reader.get_block_ctx_info();
318        let dict = self.reader.get_block_ctx_dict();
319        self.ci_array.push(ZranContext::new(&info, dict));
320        self.curr_block_start = info.in_pos;
321        Ok(self.ci_array.len() - 1)
322    }
323
324    fn is_first_block(&self) -> bool {
325        let info = self.reader.get_block_ctx_info();
326        info.in_pos == self.curr_block_start
327    }
328}
329
330impl<R: Read> Read for ZranGenerator<R> {
331    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
332        self.reader.read(buf)
333    }
334}
335
336/// A specialized gzip reader for OCI image tarballs.
337///
338/// This reader assumes that the compressed file is a tar file, and restricts access patterns.
339pub struct ZranReader<R> {
340    inner: Arc<Mutex<ZranReaderState<R>>>,
341}
342
343impl<R> ZranReader<R> {
344    /// Create a `ZranReader` from a reader.
345    pub fn new(reader: R) -> Result<Self> {
346        let inner = ZranReaderState::new(reader)?;
347        Ok(Self {
348            inner: Arc::new(Mutex::new(inner)),
349        })
350    }
351
352    /// Copy data from the buffer into the internal input buffer.
353    pub fn set_initial_data(&self, buf: &[u8]) {
354        let mut state = self.inner.lock().unwrap();
355        assert_eq!(state.stream.avail_in(), 0);
356        assert!(buf.len() <= state.input.len());
357        let ptr = state.input.as_mut_ptr();
358        assert_eq!(state.stream.stream.next_in, ptr);
359
360        state.input[..buf.len()].copy_from_slice(buf);
361        state.reader_hash.update(buf);
362        state.reader_size += buf.len() as u64;
363        state.stream.set_avail_in(buf.len() as u32);
364    }
365
366    /// Get size of data read from the reader.
367    pub fn get_data_size(&self) -> u64 {
368        self.inner.lock().unwrap().reader_size
369    }
370
371    /// Get sha256 hash value of data read from the reader.
372    pub fn get_data_digest(&self) -> Sha256 {
373        self.inner.lock().unwrap().reader_hash.clone()
374    }
375
376    /// Get inflate context information for current inflate position.
377    fn get_current_ctx_info(&self) -> ZranCompInfo {
378        self.inner.lock().unwrap().get_compression_info()
379    }
380
381    /// Get inflate context information for current inflate block.
382    fn get_block_ctx_info(&self) -> ZranCompInfo {
383        self.inner.lock().unwrap().block_ctx_info
384    }
385
386    /// Get inflate dictionary for current inflate block.
387    fn get_block_ctx_dict(&self) -> Vec<u8> {
388        let state = self.inner.lock().unwrap();
389        state.block_ctx_dict[..state.block_ctx_dict_size].to_vec()
390    }
391}
392
393impl<R: Read> Read for ZranReader<R> {
394    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
395        self.inner.lock().unwrap().read(buf)
396    }
397}
398
399impl<R> Clone for ZranReader<R> {
400    fn clone(&self) -> Self {
401        Self {
402            inner: self.inner.clone(),
403        }
404    }
405}
406
407#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
408struct ZranCompInfo {
409    in_pos: u64,
410    out_pos: u64,
411    flags: u32,
412    previous_byte: u8,
413    pending_bits: u8,
414    stream_switched: u8,
415}
416
417struct ZranReaderState<R> {
418    stream: ZranStream,
419    input: Vec<u8>,
420    reader: R,
421    reader_hash: Sha256,
422    reader_size: u64,
423    block_ctx_info: ZranCompInfo,
424    block_ctx_dict: Vec<u8>,
425    block_ctx_dict_size: usize,
426    stream_switched: u8,
427}
428
429impl<R> ZranReaderState<R> {
430    fn new(reader: R) -> Result<Self> {
431        let mut stream = ZranStream::new(false)?;
432        let input = vec![0u8; ZRAN_READER_BUF_SIZE];
433        stream.set_next_in(&input[0..0]);
434
435        Ok(ZranReaderState {
436            stream,
437            input,
438            reader,
439            reader_hash: Sha256::new(),
440            reader_size: 0,
441            block_ctx_info: ZranCompInfo::default(),
442            block_ctx_dict: vec![0u8; ZRAN_DICT_WIN_SIZE],
443            block_ctx_dict_size: 0,
444            stream_switched: 0,
445        })
446    }
447
448    /// Get decompression information about the stream.
449    fn get_compression_info(&mut self) -> ZranCompInfo {
450        let stream_switched = self.stream_switched;
451        self.stream_switched = 0;
452        self.stream
453            .get_compression_info(&self.input, stream_switched)
454    }
455
456    fn get_compression_dict(&mut self) -> Result<()> {
457        self.block_ctx_dict_size = self.stream.get_compression_dict(&mut self.block_ctx_dict)?;
458        Ok(())
459    }
460}
461
462impl<R: Read> Read for ZranReaderState<R> {
463    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
464        self.stream.set_next_out(buf);
465        self.stream.set_avail_out(buf.len() as u32);
466
467        loop {
468            // Reload the input buffer when needed.
469            if self.stream.avail_in() == 0 {
470                if self.stream.stream.next_in > self.input.as_mut_ptr() {
471                    self.stream.last_byte = unsafe { *self.stream.stream.next_in.sub(1) };
472                }
473                let sz = self.reader.read(self.input.as_mut_slice())?;
474                if sz == 0 {
475                    return Ok(0);
476                }
477                self.reader_hash.update(&self.input[0..sz]);
478                self.reader_size += sz as u64;
479                self.stream.set_next_in(&self.input[..sz]);
480            }
481
482            match self.stream.inflate(false) {
483                Z_STREAM_END => {
484                    self.stream.reset()?;
485                    self.stream_switched = 1;
486                    continue;
487                }
488                Z_OK => {
489                    let count = self.stream.next_out() as usize - buf.as_ptr() as usize;
490                    let info = self.get_compression_info();
491                    if info.flags & 0x80 != 0 {
492                        self.get_compression_dict()?;
493                        self.block_ctx_info = info;
494                    }
495                    if count == 0 {
496                        // zlib/gzip compression header, continue for next data block.
497                        continue;
498                    } else {
499                        return Ok(count);
500                    }
501                }
502                Z_BUF_ERROR => {
503                    if self.stream.avail_in() == 0 {
504                        // Need more input data, continue to feed data into the input buffer.
505                        continue;
506                    } else {
507                        return Err(eio!("failed to decode data from compressed data stream"));
508                    }
509                }
510                e => {
511                    return Err(eio!(format!(
512                        "failed to decode data from compressed data stream, error code {}",
513                        e
514                    )));
515                }
516            }
517        }
518    }
519}
520
521struct ZranStream {
522    stream: Box<z_stream>,
523    total_in: u64,
524    total_out: u64,
525    last_byte: u8,
526}
527
528impl ZranStream {
529    fn new(decode: bool) -> Result<Self> {
530        let mut stream = Box::new(z_stream {
531            next_in: ptr::null_mut(),
532            avail_in: 0,
533            total_in: 0,
534            next_out: ptr::null_mut(),
535            avail_out: 0,
536            total_out: 0,
537            msg: ptr::null_mut(),
538            adler: 0,
539            data_type: 0,
540            reserved: 0,
541            opaque: ptr::null_mut(),
542            state: ptr::null_mut(),
543            zalloc,
544            zfree,
545        });
546        // windowBits can also be greater than 15 for optional gzip decoding.
547        // Add 32 to windowBits to enable zlib and gzip decoding with automatic header detection,
548        // or add 16 to decode only the gzip format (the zlib format will return a Z_DATA_ERROR).
549        // -15 means raw mode.
550        let mode = if decode { -15 } else { 31 };
551        let ret = unsafe {
552            inflateInit2_(
553                stream.deref_mut() as *mut z_stream,
554                mode,
555                zlibVersion(),
556                mem::size_of::<z_stream>() as c_int,
557            )
558        };
559        if ret != Z_OK {
560            return Err(einval!("failed to initialize zlib inflate context"));
561        }
562
563        Ok(Self {
564            stream,
565            total_in: 0,
566            total_out: 0,
567            last_byte: 0,
568        })
569    }
570
571    fn inflate(&mut self, decode: bool) -> i32 {
572        // Z_BLOCK requests that inflate() stop if and when it gets to the next deflate block
573        // boundary.  When decoding the zlib or gzip format, this will cause inflate() to return
574        // immediately after the header and before the first block.  When doing a raw inflate,
575        // inflate() will go ahead and process the first block, and will return when it gets to
576        // the end of that block, or when it runs out of data.
577        let mode = if decode { 0 } else { Z_BLOCK };
578        self.total_in += self.stream.avail_in as u64;
579        self.total_out += self.stream.avail_out as u64;
580        let ret = self.raw_inflate(mode);
581        self.total_in -= self.stream.avail_in as u64;
582        self.total_out -= self.stream.avail_out as u64;
583        ret
584    }
585
586    fn raw_inflate(&mut self, mode: i32) -> i32 {
587        unsafe { inflate(self.stream.deref_mut() as *mut z_stream, mode) }
588    }
589
590    fn reset(&mut self) -> Result<()> {
591        let ret = unsafe { inflateReset(self.stream.deref_mut() as *mut z_stream) };
592        if ret != Z_OK {
593            return Err(einval!("failed to reset zlib inflate context"));
594        }
595        Ok(())
596    }
597
598    fn reset2(&mut self, is_gzip: bool) -> Result<()> {
599        let winodw_bits = if is_gzip { 31 } else { -15 };
600        let ret = unsafe { inflateReset2(self.stream.deref_mut() as *mut z_stream, winodw_bits) };
601        if ret != Z_OK {
602            return Err(einval!("failed to reset zlib inflate context"));
603        }
604        Ok(())
605    }
606
607    fn get_compression_info(&mut self, buf: &[u8], stream_switched: u8) -> ZranCompInfo {
608        let previous_byte = if self.stream.data_type & 0x7 != 0 {
609            assert!(self.stream.next_in as usize >= buf.as_ptr() as usize);
610            if self.stream.next_in as usize == buf.as_ptr() as usize {
611                self.last_byte
612            } else {
613                unsafe { *self.stream.next_in.sub(1) }
614            }
615        } else {
616            0
617        };
618        ZranCompInfo {
619            in_pos: self.total_in,
620            out_pos: self.total_out,
621            flags: self.stream.data_type as u32,
622            previous_byte,
623            pending_bits: self.stream.data_type as u8 & 0x7,
624            stream_switched,
625        }
626    }
627
628    fn get_compression_dict(&mut self, buf: &mut [u8]) -> Result<usize> {
629        let mut len: uInt = 0;
630        assert_eq!(buf.len(), ZRAN_DICT_WIN_SIZE);
631
632        let ret = unsafe {
633            inflateGetDictionary(
634                self.stream.deref_mut() as *mut z_stream,
635                buf.as_mut_ptr(),
636                &mut len as *mut uInt,
637            )
638        };
639
640        if ret != Z_OK {
641            Err(einval!("failed to get inflate dictionary"))
642        } else {
643            Ok(len as usize)
644        }
645    }
646
647    fn set_dict(&mut self, dict: &[u8]) -> Result<()> {
648        let ret = unsafe {
649            inflateSetDictionary(self.stream.deref_mut(), dict.as_ptr(), dict.len() as uInt)
650        };
651        if ret != Z_OK {
652            return Err(einval!("failed to reset zlib inflate context"));
653        }
654        Ok(())
655    }
656
657    fn set_prime(&mut self, bits: u8, prime: u8) -> Result<()> {
658        let ret = unsafe {
659            inflatePrime(
660                self.stream.deref_mut(),
661                bits as c_int,
662                prime as c_int >> (8 - bits),
663            )
664        };
665        if ret != Z_OK {
666            return Err(einval!("failed to reset zlib inflate context"));
667        }
668        Ok(())
669    }
670
671    fn set_next_in(&mut self, buf: &[u8]) {
672        self.stream.next_in = buf.as_ptr() as *mut u8;
673        self.set_avail_in(buf.len() as u32);
674    }
675
676    fn avail_in(&self) -> u32 {
677        self.stream.avail_in
678    }
679
680    fn avail_out(&self) -> u32 {
681        self.stream.avail_out
682    }
683
684    fn data_type(&self) -> i32 {
685        self.stream.data_type
686    }
687
688    fn set_avail_in(&mut self, avail_in: u32) {
689        self.stream.avail_in = avail_in;
690    }
691
692    fn next_out(&self) -> *mut u8 {
693        self.stream.next_out
694    }
695
696    fn set_next_out(&mut self, buf: &mut [u8]) {
697        self.stream.next_out = buf.as_mut_ptr();
698    }
699
700    fn set_avail_out(&mut self, avail_out: u32) {
701        self.stream.avail_out = avail_out;
702    }
703}
704
705impl Drop for ZranStream {
706    fn drop(&mut self) {
707        unsafe { inflateEnd(self.stream.deref_mut() as *mut z_stream) };
708    }
709}
710
711// Code from https://github.com/rust-lang/flate2-rs/blob/main/src/ffi/c.rs with modification.
712fn align_up(size: usize, align: usize) -> usize {
713    (size + align - 1) & !(align - 1)
714}
715
716#[allow(unused)]
717extern "C" fn zalloc(_ptr: *mut c_void, items: uInt, item_size: uInt) -> *mut c_void {
718    // We need to multiply `items` and `item_size` to get the actual desired
719    // allocation size. Since `zfree` doesn't receive a size argument we
720    // also need to allocate space for a `usize` as a header so we can store
721    // how large the allocation is to deallocate later.
722    let size = match items
723        .checked_mul(item_size)
724        .and_then(|i| usize::try_from(i).ok())
725        .map(|size| align_up(size, ZLIB_ALIGN))
726        .and_then(|i| i.checked_add(std::mem::size_of::<usize>()))
727    {
728        Some(i) => i,
729        None => return ptr::null_mut(),
730    };
731
732    // Make sure the `size` isn't too big to fail `Layout`'s restrictions
733    let layout = match Layout::from_size_align(size, ZLIB_ALIGN) {
734        Ok(layout) => layout,
735        Err(_) => return ptr::null_mut(),
736    };
737
738    unsafe {
739        // Allocate the data, and if successful store the size we allocated
740        // at the beginning and then return an offset pointer.
741        let ptr = alloc::alloc(layout) as *mut usize;
742        if ptr.is_null() {
743            return ptr as *mut c_void;
744        }
745        *ptr = size;
746        ptr.add(1) as *mut c_void
747    }
748}
749
750#[allow(unused)]
751extern "C" fn zfree(_ptr: *mut c_void, address: *mut c_void) {
752    unsafe {
753        // Move our address being freed back one pointer, read the size we
754        // stored in `zalloc`, and then free it using the standard Rust
755        // allocator.
756        let ptr = (address as *mut usize).offset(-1);
757        let size = *ptr;
758        let layout = Layout::from_size_align_unchecked(size, ZLIB_ALIGN);
759        alloc::dealloc(ptr as *mut u8, layout)
760    }
761}
762
763extern "system" {
764    pub fn inflateGetDictionary(
765        strm: *mut z_stream,
766        dictionary: *mut u8,
767        dictLength: *mut uInt,
768    ) -> c_int;
769}
770
771#[cfg(test)]
772mod tests {
773    use super::*;
774    use std::fs::OpenOptions;
775    use std::io::{Seek, SeekFrom};
776    use std::path::PathBuf;
777    use tar::{Archive, EntryType};
778
779    #[test]
780    fn test_parse_single_gzip_object() {
781        let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR");
782        let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-single-stream.tar.gz");
783        let file = OpenOptions::new().read(true).open(path).unwrap();
784
785        let mut files = 0;
786        let mut objects = 0;
787        let reader = ZranReader::new(file).unwrap();
788        let mut tar = Archive::new(reader);
789        let entries = tar.entries().unwrap();
790        for entry in entries {
791            let entry = entry.unwrap();
792            objects += 1;
793            if entry.header().entry_type() == EntryType::Regular {
794                files += 1;
795            }
796        }
797
798        assert_eq!(objects, 7);
799        assert_eq!(files, 3);
800    }
801
802    #[test]
803    fn test_parse_first_gzip_object() {
804        let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR");
805        let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz");
806        let file = OpenOptions::new().read(true).open(path).unwrap();
807
808        let mut files = 0;
809        let mut objects = 0;
810        let reader = ZranReader::new(file).unwrap();
811        let mut tar = Archive::new(reader);
812
813        let entries = tar.entries().unwrap();
814        for entry in entries {
815            let entry = entry.unwrap();
816            objects += 1;
817            if entry.header().entry_type() == EntryType::Regular {
818                files += 1;
819            }
820        }
821
822        assert_eq!(objects, 7);
823        assert_eq!(files, 3);
824    }
825
826    #[test]
827    fn test_parse_two_gzip_objects() {
828        let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR");
829        let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz");
830        let file = OpenOptions::new().read(true).open(path).unwrap();
831
832        let mut files = 0;
833        let mut objects = 0;
834        let reader = ZranReader::new(file).unwrap();
835        let mut tar = Archive::new(reader);
836        tar.set_ignore_zeros(true);
837
838        let entries = tar.entries().unwrap();
839        for entry in entries {
840            let entry = entry.unwrap();
841            objects += 1;
842            if entry.header().entry_type() == EntryType::Regular {
843                files += 1;
844            }
845        }
846
847        assert_eq!(objects, 10);
848        assert_eq!(files, 5);
849    }
850
851    #[test]
852    fn test_parse_gzip_with_big_zero() {
853        let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR");
854        let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-zero-file.tar.gz");
855        let file = OpenOptions::new().read(true).open(path).unwrap();
856        let reader = ZranReader::new(file).unwrap();
857        let mut tar = Archive::new(reader.clone());
858        let entries = tar.entries().unwrap();
859
860        let mut last: Option<ZranCompInfo> = None;
861        for entry in entries {
862            let mut entry = entry.unwrap();
863            assert_eq!(entry.header().entry_type(), EntryType::Regular);
864            loop {
865                let mut buf = vec![0u8; 512];
866                let sz = entry.read(&mut buf).unwrap();
867                if sz == 0 {
868                    break;
869                }
870
871                let info = reader.get_current_ctx_info();
872                if let Some(prev) = last {
873                    assert_ne!(prev, info);
874                }
875                last = Some(info);
876            }
877        }
878    }
879
880    #[test]
881    fn test_generate_comp_info() {
882        let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR");
883        let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz");
884        let file = OpenOptions::new().read(true).open(path).unwrap();
885
886        let reader = ZranReader::new(file).unwrap();
887        let mut tar = Archive::new(reader.clone());
888        tar.set_ignore_zeros(true);
889        let mut generator = ZranGenerator::new(reader);
890        generator.set_min_compressed_size(1024);
891        generator.set_max_compressed_size(2048);
892        generator.set_max_uncompressed_size(4096);
893
894        let entries = tar.entries().unwrap();
895        for entry in entries {
896            let mut entry = entry.unwrap();
897            if entry.header().entry_type() == EntryType::Regular {
898                loop {
899                    let _start = generator.begin_read(512).unwrap();
900                    let mut buf = vec![0u8; 512];
901                    let sz = entry.read(&mut buf).unwrap();
902                    if sz == 0 {
903                        break;
904                    }
905                    let _info = generator.end_read().unwrap();
906                }
907            }
908        }
909
910        let ctx = generator.get_compression_ctx_array();
911        assert_eq!(ctx.len(), 3);
912    }
913
914    #[test]
915    fn test_zran_bgzip() {
916        let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR");
917        let path = PathBuf::from(root_dir).join("../tests/texture/zran/bgzip.tar.gz");
918        let file = OpenOptions::new().read(true).open(&path).unwrap();
919        let reader = ZranReader::new(file).unwrap();
920        let mut tar = Archive::new(reader.clone());
921        tar.set_ignore_zeros(true);
922        let mut generator = ZranGenerator::new(reader);
923        generator.set_min_compressed_size(1024);
924        generator.set_max_compressed_size(2048);
925        generator.set_max_uncompressed_size(4096);
926
927        let entries = tar.entries().unwrap();
928        for entry in entries {
929            let mut entry = entry.unwrap();
930            if entry.header().entry_type() == EntryType::Regular {
931                loop {
932                    let _start = generator.begin_read(512).unwrap();
933                    let mut buf = vec![0u8; 512];
934                    let sz = entry.read(&mut buf).unwrap();
935                    let _info = generator.end_read().unwrap();
936                    if sz == 0 {
937                        break;
938                    }
939                }
940            }
941        }
942
943        let ctx_array = generator.get_compression_ctx_array();
944        for ctx in ctx_array.iter() {
945            let mut c_buf = vec![0u8; ctx.in_len as usize];
946            let mut file = OpenOptions::new().read(true).open(&path).unwrap();
947            file.seek(SeekFrom::Start(ctx.in_offset)).unwrap();
948            file.read_exact(&mut c_buf).unwrap();
949
950            let mut d_buf = vec![0u8; ctx.out_len as usize];
951            let mut decoder = ZranDecoder::new().unwrap();
952            decoder.uncompress(ctx, None, &c_buf, &mut d_buf).unwrap();
953        }
954    }
955
956    #[test]
957    fn test_zran_decoder() {
958        let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR");
959        let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz");
960        let file = OpenOptions::new().read(true).open(&path).unwrap();
961
962        let reader = ZranReader::new(file).unwrap();
963        let mut tar = Archive::new(reader.clone());
964        tar.set_ignore_zeros(true);
965        let mut generator = ZranGenerator::new(reader);
966        generator.set_min_compressed_size(1024);
967        generator.set_max_compressed_size(2048);
968        generator.set_max_uncompressed_size(4096);
969
970        let entries = tar.entries().unwrap();
971        for entry in entries {
972            let mut entry = entry.unwrap();
973            if entry.header().entry_type() == EntryType::Regular {
974                loop {
975                    let _start = generator.begin_read(512).unwrap();
976                    let mut buf = vec![0u8; 512];
977                    let sz = entry.read(&mut buf).unwrap();
978                    let _info = generator.end_read().unwrap();
979                    if sz == 0 {
980                        break;
981                    }
982                }
983            }
984        }
985
986        let ctx_array = generator.get_compression_ctx_array();
987        assert_eq!(ctx_array.len(), 3);
988        for ctx in ctx_array.iter().take(3) {
989            let mut c_buf = vec![0u8; ctx.in_len as usize];
990            let mut file = OpenOptions::new().read(true).open(&path).unwrap();
991            file.seek(SeekFrom::Start(ctx.in_offset)).unwrap();
992            file.read_exact(&mut c_buf).unwrap();
993
994            let mut d_buf = vec![0u8; ctx.out_len as usize];
995            let mut decoder = ZranDecoder::new().unwrap();
996            decoder.uncompress(ctx, None, &c_buf, &mut d_buf).unwrap();
997        }
998    }
999
1000    #[test]
1001    fn test_zran_reader() {
1002        let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR");
1003        let path = PathBuf::from(root_dir).join("../tests/texture/zran/zran-two-streams.tar.gz");
1004        let file = OpenOptions::new().read(true).open(path).unwrap();
1005
1006        let reader = ZranReader::new(file).unwrap();
1007        assert_eq!(reader.get_data_size(), 0);
1008
1009        let buf = vec![0x0u8; 32];
1010        reader.set_initial_data(&buf);
1011        assert_eq!(reader.get_data_size(), 32);
1012    }
1013}