lz4_java_wrc/
lz4_block_input.rs

1use crate::common::{Checksum, ErrorChecksum, ErrorLz4WrongDecompressedSize, Result};
2use crate::compression::{Compression, Context};
3use crate::lz4_block_header::{CompressionMethod, Lz4BlockHeader};
4
5use std::cmp::min;
6use std::io::Read;
7
8/// Wrapper around a [`Read`] object to decompress data.
9///
10/// The data read from [`Lz4BlockInput`] is first read from the wrapped [`Read`], decompressed and then returned.
11///
12/// # Example
13///
14/// ```rust
15/// use lz4_java_wrc::Lz4BlockInput;
16/// use std::io::Read;
17///
18/// // &[u8] implements the Read trait
19/// const D: [u8; 24] = [
20///     76, 90, 52, 66, 108, 111, 99, 107, 16, 3, 0, 0, 0, 3, 0, 0, 0, 82, 228, 119, 6, 46, 46, 46,
21/// ];
22///
23/// fn main() -> std::io::Result<()> {
24///     let mut output = String::new();
25///     Lz4BlockInput::new(&D[..]).read_to_string(&mut output)?;
26///     println!("{}", output);
27///     Ok(())
28/// }
29/// ```
30pub type Lz4BlockInput<R> = Lz4BlockInputBase<R, Context>;
31
32impl<R: Read> Lz4BlockInput<R> {
33    /// Create a new [`Lz4BlockInput`] with the default [`Compression`] implementation.
34    ///
35    /// See [`Self::with_context()`]
36    pub fn new(r: R) -> Self {
37        Self::with_context(r, Context::default())
38    }
39}
40
41/// Wrapper around a [`Read`] object to decompress data.
42///
43/// Use this struct only if you want to provide your own Compression implementation. Otherwise use the alias [`Lz4BlockInput`].
44#[derive(Debug)]
45pub struct Lz4BlockInputBase<R: Read + Sized, C: Compression> {
46    reader: R,
47    compression: C,
48    compressed_buf: Vec<u8>,
49    decompressed_buf: Vec<u8>,
50    read_ptr: usize,
51    checksum: Checksum,
52    stop_on_empty_block: bool,
53}
54
55impl<R: Read, C: Compression> Lz4BlockInputBase<R, C> {
56    /// Create a new [`Lz4BlockInputBase`] with the default checksum implementation which matches the Java's default implementation, including the missing 4 bits bug.
57    ///
58    /// See [`Self::with_checksum()`]
59    pub fn with_context(r: R, c: C) -> Self {
60        Self::with_checksum(r, c, Lz4BlockHeader::default_checksum, true)
61    }
62
63    /// Create a new [`Lz4BlockInputBase`].
64    ///
65    /// The checksum must return a [`u32`].
66    pub fn with_checksum(
67        r: R,
68        c: C,
69        checksum: fn(&[u8]) -> u32,
70        stop_on_empty_block: bool,
71    ) -> Self {
72        Self {
73            reader: r,
74            compression: c,
75            compressed_buf: Vec::new(),
76            decompressed_buf: Vec::new(),
77            read_ptr: 0,
78            checksum: Checksum::new(checksum),
79            stop_on_empty_block,
80        }
81    }
82
83    fn read_header(&mut self) -> Result<Option<Lz4BlockHeader>> {
84        Ok(loop {
85            match Lz4BlockHeader::read(&mut self.reader)? {
86                None => break None,
87                Some(h) => {
88                    if h.decompressed_len > 0 {
89                        break Some(h);
90                    } else if self.stop_on_empty_block {
91                        break None;
92                    }
93                }
94            };
95        })
96    }
97
98    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
99        if self.read_ptr == self.decompressed_buf.len() {
100            let header = match self.read_header()? {
101                None => return Ok(0),
102                Some(h) => h,
103            };
104
105            ensure_vec(
106                &mut self.decompressed_buf,
107                header.compression_level.get_max_decompressed_buffer_len(),
108                header.decompressed_len,
109            );
110
111            match header.compression_method {
112                CompressionMethod::Raw => self.reader.read_exact(self.decompressed_buf.as_mut())?,
113                CompressionMethod::Lz4 => {
114                    ensure_vec(
115                        &mut self.compressed_buf,
116                        self.compression.get_maximum_compressed_buffer_len(
117                            header.compression_level.get_max_decompressed_buffer_len(),
118                        ),
119                        header.compressed_len,
120                    );
121                    self.reader.read_exact(self.compressed_buf.as_mut())?;
122                    match self
123                        .compression
124                        .decompress(self.compressed_buf.as_ref(), self.decompressed_buf.as_mut())
125                    {
126                        Ok(s) => {
127                            if s != self.decompressed_buf.len() {
128                                return ErrorLz4WrongDecompressedSize::new_error(
129                                    s,
130                                    self.decompressed_buf.len(),
131                                );
132                            }
133                        }
134                        Err(err) => {
135                            return Err(err.into());
136                        }
137                    };
138                }
139            }
140            let computed_checksum = self.checksum.run(self.decompressed_buf.as_ref());
141            if computed_checksum != header.checksum {
142                return ErrorChecksum::new_error(header.checksum, computed_checksum);
143            }
144            self.read_ptr = 0;
145        }
146
147        let size_to_copy = min(buf.len(), self.decompressed_buf.len() - self.read_ptr);
148        buf[..size_to_copy]
149            .copy_from_slice(&self.decompressed_buf[self.read_ptr..self.read_ptr + size_to_copy]);
150        self.read_ptr += size_to_copy;
151        Ok(size_to_copy)
152    }
153}
154
155fn ensure_vec(v: &mut Vec<u8>, max_block_size: usize, desired_len: u32) {
156    let max_block_size = max_block_size;
157    if v.capacity() < max_block_size {
158        v.reserve(max_block_size - v.len())
159    }
160    v.resize_with(desired_len as usize, u8::default);
161}
162
163impl<R: Read, C: Compression> Read for Lz4BlockInputBase<R, C> {
164    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
165        Ok(Self::read(self, buf)?)
166    }
167}
168
169#[cfg(test)]
170mod test_lz4_block_input {
171    use super::Lz4BlockInput;
172    use crate::compression::Context;
173    use crate::lz4_block_header::data::{VALID_DATA, VALID_EMPTY};
174
175    use std::io::Read;
176
177    #[test]
178    fn read_empty() {
179        let mut out = Vec::<u8>::new();
180        Lz4BlockInput::new(&VALID_EMPTY[..])
181            .read_to_end(&mut out)
182            .unwrap();
183        assert_eq!(out, []);
184    }
185
186    #[test]
187    fn read_basic() {
188        let mut out = Vec::<u8>::new();
189        Lz4BlockInput::new(&VALID_DATA[..])
190            .read_to_end(&mut out)
191            .unwrap();
192        assert_eq!(out, "...".as_bytes());
193    }
194
195    #[test]
196    fn read_with_checksum_invalid() {
197        let mut out = Vec::<u8>::new();
198        assert!(Lz4BlockInput::with_checksum(
199            &VALID_DATA[..],
200            Context::default(),
201            |_| 0x12345678,
202            true
203        )
204        .read_to_end(&mut out)
205        .is_err());
206    }
207
208    #[test]
209    fn read_with_checksum_valid() {
210        let mut out = Vec::<u8>::new();
211        Lz4BlockInput::with_checksum(&VALID_DATA[..], Context::default(), |_| 0x0677e452, true)
212            .read_to_end(&mut out)
213            .unwrap();
214        assert_eq!(out, "...".as_bytes());
215    }
216
217    #[test]
218    fn read_with_empty_block_stop() {
219        let mut input = VALID_EMPTY.to_vec();
220        input.extend_from_slice(&[0; 21]);
221
222        let mut out = Vec::<u8>::new();
223        Lz4BlockInput::with_checksum(&input[..], Context::default(), |_| 0x0677e452, true)
224            .read_to_end(&mut out)
225            .unwrap();
226        assert_eq!(out, "".as_bytes());
227    }
228
229    #[test]
230    fn read_with_empty_block_no_stop() {
231        let mut input = VALID_EMPTY.to_vec();
232        input.extend_from_slice(&VALID_EMPTY);
233        input.extend_from_slice(&VALID_EMPTY);
234        input.extend_from_slice(&VALID_EMPTY);
235
236        let mut out = Vec::<u8>::new();
237        Lz4BlockInput::with_checksum(&input[..], Context::default(), |_| 0x0677e452, false)
238            .read_to_end(&mut out)
239            .unwrap();
240        assert_eq!(out, "".as_bytes());
241    }
242
243    #[test]
244    fn read_with_empty_block_no_stop_with_error() {
245        let mut input = VALID_EMPTY.to_vec();
246        input.extend_from_slice(&VALID_EMPTY);
247        input.extend_from_slice(&VALID_EMPTY);
248        input.extend_from_slice(&VALID_EMPTY);
249        input.extend_from_slice(&[0; 21]);
250
251        let mut out = Vec::<u8>::new();
252        assert!(Lz4BlockInput::with_checksum(
253            &input[..],
254            Context::default(),
255            |_| 0x0677e452,
256            false
257        )
258        .read_to_end(&mut out)
259        .is_err());
260    }
261}