1use alloc::vec::Vec;
2use core::cmp;
3use core::mem;
4
5use crate::io;
6use crate::io::prelude::*;
7
8use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser};
9use crate::crc::CrcReader;
10use crate::deflate;
11use crate::Compression;
12
13fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
14 let min = cmp::min(into.len(), from.len() - *pos);
15 into[..min].copy_from_slice(&from[*pos..*pos + min]);
16 *pos += min;
17 min
18}
19
20#[cfg_attr(not(feature = "std"), doc = "```ignore")]
31#[cfg_attr(feature = "std", doc = "```")]
32#[derive(Debug)]
52pub struct GzEncoder<R> {
53 inner: deflate::bufread::DeflateEncoder<CrcReader<R>>,
54 header: Vec<u8>,
55 pos: usize,
56 eof: bool,
57}
58
59pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> {
60 let crc = CrcReader::new(r);
61 GzEncoder {
62 inner: deflate::bufread::DeflateEncoder::new(crc, lvl),
63 header,
64 pos: 0,
65 eof: false,
66 }
67}
68
69impl<R: BufRead> GzEncoder<R> {
70 pub fn new(r: R, level: Compression) -> GzEncoder<R> {
78 GzBuilder::new().buf_read(r, level)
79 }
80
81 fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> {
82 if self.pos == 8 {
83 return Ok(0);
84 }
85 let crc = self.inner.get_ref().crc();
86 let calced_crc_bytes = crc.sum().to_le_bytes();
87 let arr = [
88 calced_crc_bytes[0],
89 calced_crc_bytes[1],
90 calced_crc_bytes[2],
91 calced_crc_bytes[3],
92 crc.amount() as u8,
93 (crc.amount() >> 8) as u8,
94 (crc.amount() >> 16) as u8,
95 (crc.amount() >> 24) as u8,
96 ];
97 Ok(copy(into, &arr, &mut self.pos))
98 }
99}
100
101impl<R> GzEncoder<R> {
102 pub fn get_ref(&self) -> &R {
104 self.inner.get_ref().get_ref()
105 }
106
107 pub fn get_mut(&mut self) -> &mut R {
112 self.inner.get_mut().get_mut()
113 }
114
115 pub fn into_inner(self) -> R {
117 self.inner.into_inner().into_inner()
118 }
119}
120
121#[inline]
122fn finish(buf: &[u8; 8]) -> (u32, u32) {
123 let crc = (buf[0] as u32)
124 | ((buf[1] as u32) << 8)
125 | ((buf[2] as u32) << 16)
126 | ((buf[3] as u32) << 24);
127 let amt = (buf[4] as u32)
128 | ((buf[5] as u32) << 8)
129 | ((buf[6] as u32) << 16)
130 | ((buf[7] as u32) << 24);
131 (crc, amt)
132}
133
134impl<R: BufRead> Read for GzEncoder<R> {
135 fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> {
136 let mut amt = 0;
137 if self.eof {
138 return self.read_footer(into);
139 } else if self.pos < self.header.len() {
140 amt += copy(into, &self.header, &mut self.pos);
141 if amt == into.len() {
142 return Ok(amt);
143 }
144 let tmp = into;
145 into = &mut tmp[amt..];
146 }
147 match self.inner.read(into)? {
148 0 => {
149 self.eof = true;
150 self.pos = 0;
151 self.read_footer(into)
152 }
153 n => Ok(amt + n),
154 }
155 }
156}
157
158impl<R: BufRead + Write> Write for GzEncoder<R> {
159 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
160 self.get_mut().write(buf)
161 }
162
163 fn flush(&mut self) -> io::Result<()> {
164 self.get_mut().flush()
165 }
166}
167
168#[cfg_attr(not(feature = "std"), doc = "```ignore")]
189#[cfg_attr(feature = "std", doc = "```")]
190#[derive(Debug)]
214pub struct GzDecoder<R> {
215 state: GzState,
216 reader: CrcReader<deflate::bufread::DeflateDecoder<R>>,
217 multi: bool,
218}
219
220#[derive(Debug)]
221enum GzState {
222 Header(GzHeaderParser),
223 Body(GzHeader),
224 Finished(GzHeader, usize, [u8; 8]),
225 Err(io::Error),
226 End(Option<GzHeader>),
227}
228
229pub fn reset_decoder_data<R>(decoder: &mut GzDecoder<R>) {
230 decoder.state = GzState::Header(GzHeaderParser::new());
231 decoder.reader.reset(); decoder.reader.get_mut().reset_data(); }
234
235impl<R: BufRead> GzDecoder<R> {
236 pub fn new(mut r: R) -> GzDecoder<R> {
239 let mut header_parser = GzHeaderParser::new();
240
241 let state = match header_parser.parse(&mut r) {
242 Ok(_) => GzState::Body(GzHeader::from(header_parser)),
243 Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => {
244 GzState::Header(header_parser)
245 }
246 Err(err) => GzState::Err(err),
247 };
248
249 GzDecoder {
250 state,
251 reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)),
252 multi: false,
253 }
254 }
255
256 fn multi(mut self, flag: bool) -> GzDecoder<R> {
257 self.multi = flag;
258 self
259 }
260}
261
262impl<R> GzDecoder<R> {
263 pub fn header(&self) -> Option<&GzHeader> {
265 match &self.state {
266 GzState::Body(header) | GzState::Finished(header, _, _) => Some(header),
267 GzState::End(header) => header.as_ref(),
268 _ => None,
269 }
270 }
271
272 pub fn get_ref(&self) -> &R {
274 self.reader.get_ref().get_ref()
275 }
276
277 pub fn get_mut(&mut self) -> &mut R {
282 self.reader.get_mut().get_mut()
283 }
284
285 pub fn into_inner(self) -> R {
287 self.reader.into_inner().into_inner()
288 }
289
290 pub fn reset(&mut self, r: R) -> R {
298 reset_decoder_data(self);
299 self.reader.get_mut().reset(r)
300 }
301}
302
303impl<R: BufRead> Read for GzDecoder<R> {
304 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
305 loop {
306 match &mut self.state {
307 GzState::Header(parser) => {
308 parser.parse(self.reader.get_mut().get_mut())?;
309 self.state = GzState::Body(GzHeader::from(mem::take(parser)));
310 }
311 GzState::Body(header) => {
312 if into.is_empty() {
313 return Ok(0);
314 }
315 match self.reader.read(into)? {
316 0 => {
317 self.state = GzState::Finished(mem::take(header), 0, [0; 8]);
318 }
319 n => {
320 return Ok(n);
321 }
322 }
323 }
324 GzState::Finished(header, pos, buf) => {
325 if *pos < buf.len() {
326 *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?;
327 } else {
328 let (crc, amt) = finish(buf);
329
330 if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() {
331 self.state = GzState::End(Some(mem::take(header)));
332 return Err(corrupt());
333 } else if self.multi {
334 let is_eof = self
335 .reader
336 .get_mut()
337 .get_mut()
338 .fill_buf()
339 .map(|buf| buf.is_empty())?;
340
341 if is_eof {
342 self.state = GzState::End(Some(mem::take(header)));
343 } else {
344 self.reader.reset();
345 self.reader.get_mut().reset_data();
346 self.state = GzState::Header(GzHeaderParser::new())
347 }
348 } else {
349 self.state = GzState::End(Some(mem::take(header)));
350 }
351 }
352 }
353 GzState::Err(err) => {
354 let result = Err(mem::replace(err, io::ErrorKind::Other.into()));
355 self.state = GzState::End(None);
356 return result;
357 }
358 GzState::End(_) => return Ok(0),
359 }
360 }
361 }
362}
363
364impl<R: BufRead + Write> Write for GzDecoder<R> {
365 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
366 self.get_mut().write(buf)
367 }
368
369 fn flush(&mut self) -> io::Result<()> {
370 self.get_mut().flush()
371 }
372}
373
374#[cfg_attr(not(feature = "std"), doc = "```ignore")]
393#[cfg_attr(feature = "std", doc = "```")]
394#[derive(Debug)]
418pub struct MultiGzDecoder<R>(GzDecoder<R>);
419
420impl<R: BufRead> MultiGzDecoder<R> {
421 pub fn new(r: R) -> MultiGzDecoder<R> {
425 MultiGzDecoder(GzDecoder::new(r).multi(true))
426 }
427}
428
429impl<R> MultiGzDecoder<R> {
430 pub fn header(&self) -> Option<&GzHeader> {
432 self.0.header()
433 }
434
435 pub fn get_ref(&self) -> &R {
437 self.0.get_ref()
438 }
439
440 pub fn get_mut(&mut self) -> &mut R {
445 self.0.get_mut()
446 }
447
448 pub fn into_inner(self) -> R {
450 self.0.into_inner()
451 }
452}
453
454impl<R: BufRead> Read for MultiGzDecoder<R> {
455 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
456 self.0.read(into)
457 }
458}
459
460#[cfg(all(test, feature = "std"))]
461mod test {
462 use crate::bufread::GzDecoder;
463 use crate::gz::write;
464 use crate::Compression;
465 use std::io::{Read, Write};
466
467 #[test]
470 fn decode_extra_data() {
471 let expected = "Hello World";
472
473 let compressed = {
474 let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
475 e.write_all(expected.as_ref()).unwrap();
476 let mut b = e.finish().unwrap();
477 b.push(b'x');
478 b
479 };
480
481 let mut output = Vec::new();
482 let mut decoder = GzDecoder::new(compressed.as_slice());
483 let decoded_bytes = decoder.read_to_end(&mut output).unwrap();
484 assert_eq!(decoded_bytes, output.len());
485 let actual = std::str::from_utf8(&output).expect("String parsing error");
486 assert_eq!(
487 actual, expected,
488 "after decompression we obtain the original input"
489 );
490
491 output.clear();
492 assert_eq!(
493 decoder.read(&mut output).unwrap(),
494 0,
495 "subsequent read of decoder returns 0, but inner reader can return additional data"
496 );
497 let mut reader = decoder.into_inner();
498 assert_eq!(
499 reader.read_to_end(&mut output).unwrap(),
500 1,
501 "extra data is accessible in underlying buf-read"
502 );
503 assert_eq!(output, b"x");
504 }
505
506 fn compress_data(data: &[u8]) -> Vec<u8> {
507 use crate::write::GzEncoder;
508 use crate::Compression;
509
510 let mut e = GzEncoder::new(Vec::new(), Compression::default());
511 e.write_all(data).unwrap();
512 e.finish().unwrap()
513 }
514
515 #[test]
516 fn decode_with_reset() {
517 let data1 = b"Hello World";
518 let data2 = b"Goodbye World";
519
520 let compressed1 = compress_data(data1);
521 let compressed2 = compress_data(data2);
522
523 let mut output = Vec::new();
524 let mut decoder = GzDecoder::new(compressed1.as_slice());
525 decoder.read_to_end(&mut output).unwrap();
526 assert_eq!(output, data1);
527
528 output.clear();
529 decoder.reset(compressed2.as_slice());
530 decoder.read_to_end(&mut output).unwrap();
531 assert_eq!(output, data2);
532 }
533
534 #[test]
535 fn decode_with_reset_after_corruption() {
536 let valid_data = b"Hello World";
537 let valid_compressed = compress_data(valid_data);
538
539 let mut corrupted = valid_compressed.clone();
541 assert!(corrupted.len() >= 14);
542 corrupted[12] ^= 0xFF;
543 corrupted[13] ^= 0xFF;
544
545 let mut decoder = GzDecoder::new(corrupted.as_slice());
547 let mut output = Vec::new();
548 let _ = decoder.read_to_end(&mut output).unwrap_err();
549
550 decoder.reset(valid_compressed.as_slice());
552 output.clear();
553 decoder.read_to_end(&mut output).unwrap();
554 assert_eq!(output, valid_data);
555 }
556}