1use alloc::boxed::Box;
2use alloc::ffi::CString;
3use alloc::vec;
4use alloc::vec::Vec;
5
6use crate::io::{BufRead, Error, ErrorKind, Read, Result, Write};
7
8use crate::bufreader::BufReader;
9use crate::{Compression, Crc};
10
11pub static FHCRC: u8 = 1 << 1;
12pub static FEXTRA: u8 = 1 << 2;
13pub static FNAME: u8 = 1 << 3;
14pub static FCOMMENT: u8 = 1 << 4;
15pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7;
16
17pub mod bufread;
18pub mod read;
19pub mod write;
20
21const MAX_HEADER_BUF: usize = 65535;
24
25#[derive(PartialEq, Clone, Debug, Default)]
30pub struct GzHeader {
31 extra: Option<Vec<u8>>,
32 filename: Option<Vec<u8>>,
33 comment: Option<Vec<u8>>,
34 operating_system: u8,
35 mtime: u32,
36}
37
38impl GzHeader {
39 pub fn filename(&self) -> Option<&[u8]> {
41 self.filename.as_ref().map(|s| &s[..])
42 }
43
44 pub fn extra(&self) -> Option<&[u8]> {
46 self.extra.as_ref().map(|s| &s[..])
47 }
48
49 pub fn comment(&self) -> Option<&[u8]> {
51 self.comment.as_ref().map(|s| &s[..])
52 }
53
54 pub fn operating_system(&self) -> u8 {
59 self.operating_system
60 }
61
62 pub fn mtime(&self) -> u32 {
72 self.mtime
73 }
74
75 pub fn mtime_as_datetime<T: FromUnixTimestamp>(&self) -> Option<T> {
83 if self.mtime == 0 {
84 None
85 } else {
86 Some(T::from_unix_timestamp(u64::from(self.mtime)))
87 }
88 }
89}
90
91pub trait FromUnixTimestamp {
96 fn from_unix_timestamp(secs: u64) -> Self;
99}
100
101#[cfg(feature = "std")]
102impl FromUnixTimestamp for std::time::SystemTime {
103 fn from_unix_timestamp(secs: u64) -> Self {
104 std::time::UNIX_EPOCH + std::time::Duration::new(secs, 0)
105 }
106}
107
108#[derive(Debug, Default)]
109pub enum GzHeaderState {
110 Start(u8, [u8; 10]),
111 Xlen(Option<Box<Crc>>, u8, [u8; 2]),
112 Extra(Option<Box<Crc>>, u16),
113 Filename(Option<Box<Crc>>),
114 Comment(Option<Box<Crc>>),
115 Crc(Option<Box<Crc>>, u8, [u8; 2]),
116 #[default]
117 Complete,
118}
119
120#[derive(Debug, Default)]
121pub struct GzHeaderParser {
122 state: GzHeaderState,
123 flags: u8,
124 header: GzHeader,
125}
126
127impl GzHeaderParser {
128 fn new() -> Self {
129 GzHeaderParser {
130 state: GzHeaderState::Start(0, [0; 10]),
131 flags: 0,
132 header: GzHeader::default(),
133 }
134 }
135
136 fn parse<R: BufRead>(&mut self, r: &mut R) -> Result<()> {
137 loop {
138 match &mut self.state {
139 GzHeaderState::Start(count, buffer) => {
140 while (*count as usize) < buffer.len() {
141 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
142 }
143 if buffer[0] != 0x1f || buffer[1] != 0x8b {
145 return Err(bad_header());
146 }
147 if buffer[2] != 8 {
149 return Err(bad_header());
150 }
151 self.flags = buffer[3];
152 if self.flags & FRESERVED != 0 {
154 return Err(bad_header());
155 }
156 self.header.mtime = (buffer[4] as u32)
157 | ((buffer[5] as u32) << 8)
158 | ((buffer[6] as u32) << 16)
159 | ((buffer[7] as u32) << 24);
160 let _xfl = buffer[8];
161 self.header.operating_system = buffer[9];
162 let crc = if self.flags & FHCRC != 0 {
163 let mut crc = Box::new(Crc::new());
164 crc.update(buffer);
165 Some(crc)
166 } else {
167 None
168 };
169 self.state = GzHeaderState::Xlen(crc, 0, [0; 2]);
170 }
171 GzHeaderState::Xlen(crc, count, buffer) => {
172 if self.flags & FEXTRA != 0 {
173 while (*count as usize) < buffer.len() {
174 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
175 }
176 if let Some(crc) = crc {
177 crc.update(buffer);
178 }
179 let xlen = parse_le_u16(buffer);
180 self.header.extra = Some(vec![0; xlen as usize]);
181 self.state = GzHeaderState::Extra(crc.take(), 0);
182 } else {
183 self.state = GzHeaderState::Filename(crc.take());
184 }
185 }
186 GzHeaderState::Extra(crc, count) => {
187 debug_assert!(self.header.extra.is_some());
188 let extra = self.header.extra.as_mut().unwrap();
189 while (*count as usize) < extra.len() {
190 *count += read_into(r, &mut extra[*count as usize..])? as u16;
191 }
192 if let Some(crc) = crc {
193 crc.update(extra);
194 }
195 self.state = GzHeaderState::Filename(crc.take());
196 }
197 GzHeaderState::Filename(crc) => {
198 if self.flags & FNAME != 0 {
199 let filename = self.header.filename.get_or_insert_with(Vec::new);
200 read_to_nul(r, filename)?;
201 if let Some(crc) = crc {
202 crc.update(filename);
203 crc.update(b"\0");
204 }
205 }
206 self.state = GzHeaderState::Comment(crc.take());
207 }
208 GzHeaderState::Comment(crc) => {
209 if self.flags & FCOMMENT != 0 {
210 let comment = self.header.comment.get_or_insert_with(Vec::new);
211 read_to_nul(r, comment)?;
212 if let Some(crc) = crc {
213 crc.update(comment);
214 crc.update(b"\0");
215 }
216 }
217 self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]);
218 }
219 GzHeaderState::Crc(crc, count, buffer) => {
220 if let Some(crc) = crc {
221 debug_assert!(self.flags & FHCRC != 0);
222 while (*count as usize) < buffer.len() {
223 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
224 }
225 let stored_crc = parse_le_u16(buffer);
226 let calced_crc = crc.sum() as u16;
227 if stored_crc != calced_crc {
228 return Err(corrupt());
229 }
230 }
231 self.state = GzHeaderState::Complete;
232 }
233 GzHeaderState::Complete => {
234 return Ok(());
235 }
236 }
237 }
238 }
239
240 fn header(&self) -> Option<&GzHeader> {
241 match self.state {
242 GzHeaderState::Complete => Some(&self.header),
243 _ => None,
244 }
245 }
246}
247
248impl From<GzHeaderParser> for GzHeader {
249 fn from(parser: GzHeaderParser) -> Self {
250 debug_assert!(matches!(parser.state, GzHeaderState::Complete));
251 parser.header
252 }
253}
254
255fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
259 debug_assert!(!buffer.is_empty());
260 match r.read(buffer) {
261 Ok(0) => Err(ErrorKind::UnexpectedEof.into()),
262 Ok(n) => Ok(n),
263 Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0),
264 Err(e) => Err(e),
265 }
266}
267
268fn read_to_nul<R: BufRead>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
270 let mut bytes = r.bytes();
271 loop {
272 match bytes.next().transpose()? {
273 Some(0) => return Ok(()),
274 Some(_) if buffer.len() == MAX_HEADER_BUF => {
275 return Err(Error::new(
276 ErrorKind::InvalidInput,
277 "gzip header field too long",
278 ));
279 }
280 Some(byte) => {
281 buffer.push(byte);
282 }
283 None => {
284 return Err(ErrorKind::UnexpectedEof.into());
285 }
286 }
287 }
288}
289
290fn parse_le_u16(buffer: &[u8; 2]) -> u16 {
291 u16::from_le_bytes(*buffer)
292}
293
294fn bad_header() -> Error {
295 Error::new(ErrorKind::InvalidInput, "invalid gzip header")
296}
297
298fn corrupt() -> Error {
299 Error::new(
300 ErrorKind::InvalidInput,
301 "corrupt gzip stream does not have a matching checksum",
302 )
303}
304
305#[cfg_attr(not(feature = "std"), doc = "```ignore")]
312#[cfg_attr(feature = "std", doc = "```")]
313#[derive(Debug, Default)]
333pub struct GzBuilder {
334 extra: Option<Vec<u8>>,
335 filename: Option<CString>,
336 comment: Option<CString>,
337 operating_system: Option<u8>,
338 mtime: u32,
339}
340
341impl GzBuilder {
342 pub fn new() -> GzBuilder {
344 Self::default()
345 }
346
347 pub fn mtime(mut self, mtime: u32) -> GzBuilder {
349 self.mtime = mtime;
350 self
351 }
352
353 pub fn operating_system(mut self, os: u8) -> GzBuilder {
355 self.operating_system = Some(os);
356 self
357 }
358
359 pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder {
361 self.extra = Some(extra.into());
362 self
363 }
364
365 pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder {
371 self.filename = Some(CString::new(filename.into()).unwrap());
372 self
373 }
374
375 pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder {
381 self.comment = Some(CString::new(comment.into()).unwrap());
382 self
383 }
384
385 pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> {
390 write::gz_encoder(self.into_header(lvl), w, lvl)
391 }
392
393 pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> {
398 read::gz_encoder(self.buf_read(BufReader::new(r), lvl))
399 }
400
401 pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R>
406 where
407 R: BufRead,
408 {
409 bufread::gz_encoder(self.into_header(lvl), r, lvl)
410 }
411
412 fn into_header(self, lvl: Compression) -> Vec<u8> {
413 let GzBuilder {
414 extra,
415 filename,
416 comment,
417 operating_system,
418 mtime,
419 } = self;
420 let mut flg = 0;
421 let mut header = vec![0u8; 10];
422 if let Some(v) = extra {
423 flg |= FEXTRA;
424 header.extend((v.len() as u16).to_le_bytes());
425 header.extend(v);
426 }
427 if let Some(filename) = filename {
428 flg |= FNAME;
429 header.extend(filename.as_bytes_with_nul().iter().copied());
430 }
431 if let Some(comment) = comment {
432 flg |= FCOMMENT;
433 header.extend(comment.as_bytes_with_nul().iter().copied());
434 }
435 header[0] = 0x1f;
436 header[1] = 0x8b;
437 header[2] = 8;
438 header[3] = flg;
439 header[4] = mtime as u8;
440 header[5] = (mtime >> 8) as u8;
441 header[6] = (mtime >> 16) as u8;
442 header[7] = (mtime >> 24) as u8;
443 header[8] = if lvl.0 >= Compression::best().0 {
444 2
445 } else if lvl.0 <= Compression::fast().0 {
446 4
447 } else {
448 0
449 };
450
451 header[9] = operating_system.unwrap_or(255);
456 header
457 }
458}
459
460#[cfg(all(test, feature = "std"))]
461mod tests {
462 use std::io::prelude::*;
463
464 use super::{read, write, GzBuilder, GzHeaderParser};
465 use crate::{Compression, GzHeader};
466 use rand::{rng, Rng};
467
468 #[test]
469 fn roundtrip() {
470 let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
471 e.write_all(b"foo bar baz").unwrap();
472 let inner = e.finish().unwrap();
473 let mut d = read::GzDecoder::new(&inner[..]);
474 let mut s = String::new();
475 d.read_to_string(&mut s).unwrap();
476 assert_eq!(s, "foo bar baz");
477 }
478
479 #[test]
480 fn roundtrip_zero() {
481 let e = write::GzEncoder::new(Vec::new(), Compression::default());
482 let inner = e.finish().unwrap();
483 let mut d = read::GzDecoder::new(&inner[..]);
484 let mut s = String::new();
485 d.read_to_string(&mut s).unwrap();
486 assert_eq!(s, "");
487 }
488
489 #[test]
490 fn roundtrip_big() {
491 let mut real = Vec::new();
492 let mut w = write::GzEncoder::new(Vec::new(), Compression::default());
493 let v = crate::random_bytes().take(1024).collect::<Vec<_>>();
494 for _ in 0..200 {
495 let to_write = &v[..rng().random_range(0..v.len())];
496 real.extend(to_write.iter().copied());
497 w.write_all(to_write).unwrap();
498 }
499 let result = w.finish().unwrap();
500 let mut r = read::GzDecoder::new(&result[..]);
501 let mut v = Vec::new();
502 r.read_to_end(&mut v).unwrap();
503 assert_eq!(v, real);
504 }
505
506 #[test]
507 fn roundtrip_big2() {
508 let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>();
509 let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default()));
510 let mut res = Vec::new();
511 r.read_to_end(&mut res).unwrap();
512 assert_eq!(res, v);
513 }
514
515 struct Rfc1952Crc {
518 crc_table: [u32; 256],
520 }
521
522 impl Rfc1952Crc {
523 fn new() -> Self {
524 let mut crc = Rfc1952Crc {
525 crc_table: [0; 256],
526 };
527 for n in 0usize..256 {
529 let mut c = n as u32;
530 for _k in 0..8 {
531 if c & 1 != 0 {
532 c = 0xedb88320 ^ (c >> 1);
533 } else {
534 c >>= 1;
535 }
536 }
537 crc.crc_table[n] = c;
538 }
539 crc
540 }
541
542 fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
549 let mut c = crc ^ 0xffffffff;
550
551 for b in buf {
552 c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8);
553 }
554 c ^ 0xffffffff
555 }
556
557 fn crc(&self, buf: &[u8]) -> u32 {
559 self.update_crc(0, buf)
560 }
561 }
562
563 #[test]
564 fn roundtrip_header() {
565 let mut header = GzBuilder::new()
566 .mtime(1234)
567 .operating_system(57)
568 .filename("filename")
569 .comment("comment")
570 .into_header(Compression::fast());
571
572 header[3] ^= super::FHCRC;
574 let rfc1952_crc = Rfc1952Crc::new();
575 let crc32 = rfc1952_crc.crc(&header);
576 let crc16 = crc32 as u16;
577 header.extend(&crc16.to_le_bytes());
578
579 let mut parser = GzHeaderParser::new();
580 parser.parse(&mut header.as_slice()).unwrap();
581 let actual = parser.header().unwrap();
582 assert_eq!(
583 actual,
584 &GzHeader {
585 extra: None,
586 filename: Some("filename".as_bytes().to_vec()),
587 comment: Some("comment".as_bytes().to_vec()),
588 operating_system: 57,
589 mtime: 1234
590 }
591 )
592 }
593
594 #[test]
595 fn gzip_encoder_matches_rfc1952() {
596 fn extract_zip_footer(compressed: &[u8]) -> (u32, u32) {
598 assert!(compressed.len() >= 8, "Gzip output too short");
599 let footer_start = compressed.len() - 8;
600
601 let crc = u32::from_le_bytes([
602 compressed[footer_start],
603 compressed[footer_start + 1],
604 compressed[footer_start + 2],
605 compressed[footer_start + 3],
606 ]);
607
608 let size = u32::from_le_bytes([
609 compressed[footer_start + 4],
610 compressed[footer_start + 5],
611 compressed[footer_start + 6],
612 compressed[footer_start + 7],
613 ]);
614
615 (crc, size)
616 }
617
618 #[track_caller]
619 fn test_crc_for_write(data: &[u8], expected_crc: u32, description: &str) {
620 let mut encoder = write::GzEncoder::new(Vec::new(), Compression::default());
622 encoder.write_all(data).unwrap();
623 let compressed = encoder.finish().unwrap();
624
625 let expected_size = data.len() as u32;
626 let (actual_crc, actual_size) = extract_zip_footer(&compressed);
627
628 assert_eq!(
629 expected_crc, actual_crc,
630 "CRC32 mismatch for write {}: expected {:#08x}, got {:#08x}",
631 description, expected_crc, actual_crc
632 );
633 assert_eq!(
634 expected_size, actual_size,
635 "Size mismatch for write {}: expected {}, got {}",
636 description, expected_size, actual_size
637 );
638 }
639
640 #[track_caller]
641 fn test_crc_for_read(data: &[u8], expected_crc: u32, description: &str) {
642 let data_reader = std::io::Cursor::new(data);
644 let mut encoder = read::GzEncoder::new(data_reader, Compression::default());
645 let mut compressed = Vec::new();
646 encoder.read_to_end(&mut compressed).unwrap();
647
648 let expected_size = data.len() as u32;
649 let (actual_crc, actual_size) = extract_zip_footer(&compressed);
650
651 assert_eq!(
652 expected_crc, actual_crc,
653 "CRC32 mismatch for read {}: expected {:#08x}, got {:#08x}",
654 description, expected_crc, actual_crc
655 );
656 assert_eq!(
657 expected_size, actual_size,
658 "Size mismatch for read {}: expected {}, got {}",
659 description, expected_size, actual_size
660 );
661 }
662
663 #[track_caller]
664 fn test_crc_for_data(data: &[u8], description: &str) {
665 let rfc1952_crc = Rfc1952Crc::new();
666 let expected_crc = rfc1952_crc.crc(data);
667
668 test_crc_for_write(data, expected_crc, description);
669 test_crc_for_read(data, expected_crc, description);
670 }
671
672 test_crc_for_data(&[], "empty data");
674 test_crc_for_data(&[0x00], "single zero byte");
675 test_crc_for_data(&[0xFF], "single 0xFF byte");
676
677 test_crc_for_data(b"Hello World", "simple ASCII");
679 test_crc_for_data(b"AAAAAAA", "repeated 'A'");
680 test_crc_for_data(b"1234567890", "digits");
681
682 test_crc_for_data(&[0x00, 0x01, 0x02, 0x03, 0x04, 0x05], "sequential bytes");
684 test_crc_for_data(&[0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55], "alternating pattern");
685 test_crc_for_data(&[0x00; 10], "all zeros");
686 test_crc_for_data(&[0xFF; 10], "all ones");
687
688 let large_data = vec![0x42; 10240];
690 test_crc_for_data(&large_data, "10 kiB data");
691
692 {
694 let data = b"This is a test of multi-write CRC accumulation";
695 let rfc1952_crc = Rfc1952Crc::new();
696 let expected_crc = rfc1952_crc.crc(data);
697
698 let mut encoder = write::GzEncoder::new(Vec::new(), Compression::default());
699 encoder.write_all(&data[..10]).unwrap();
701 encoder.write_all(&data[10..20]).unwrap();
702 encoder.write_all(&data[20..]).unwrap();
703 let compressed = encoder.finish().unwrap();
704
705 let expected_size = data.len() as u32;
706 let (actual_crc, actual_size) = extract_zip_footer(&compressed);
707
708 assert_eq!(
709 expected_crc, actual_crc,
710 "Multi-write CRC mismatch: expected {:#08x}, got {:#08x}",
711 expected_crc, actual_crc
712 );
713 assert_eq!(
714 expected_size, actual_size,
715 "Size mismatch for multi-write: expected {}, got {}",
716 expected_size, actual_size
717 );
718 }
719 }
720
721 fn gzip_corrupted_crc() -> Vec<u8> {
722 let test_data = b"The quick brown fox jumps over the lazy dog";
723
724 let mut encoder = write::GzEncoder::new(Vec::new(), Compression::default());
725 encoder.write_all(test_data).unwrap();
726 let mut compressed = encoder.finish().unwrap();
727
728 let crc_offset = compressed.len() - 8;
730 compressed[crc_offset] ^= 0xFF;
731
732 compressed
733 }
734
735 #[test]
736 fn read_decoder_detects_corrupted_crc() {
737 let compressed = gzip_corrupted_crc();
738 let mut decoder = read::GzDecoder::new(&compressed[..]);
739 let mut output = Vec::new();
740 let error = decoder.read_to_end(&mut output).unwrap_err();
741 assert_eq!(error.kind(), std::io::ErrorKind::InvalidInput);
742 }
743
744 #[test]
745 fn write_decoder_detects_corrupted_crc() {
746 let compressed = gzip_corrupted_crc();
747 let mut decoder = write::GzDecoder::new(Vec::new());
748 decoder.write_all(&compressed).unwrap();
749 let error = decoder.finish().unwrap_err();
750 assert_eq!(error.kind(), std::io::ErrorKind::InvalidInput);
751 }
752
753 #[test]
754 fn fields() {
755 let r = [0, 2, 4, 6];
756 let e = GzBuilder::new()
757 .filename("foo.rs")
758 .comment("bar")
759 .extra(vec![0, 1, 2, 3])
760 .read(&r[..], Compression::default());
761 let mut d = read::GzDecoder::new(e);
762 assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..]));
763 assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..]));
764 assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..]));
765 let mut res = Vec::new();
766 d.read_to_end(&mut res).unwrap();
767 assert_eq!(res, vec![0, 2, 4, 6]);
768 }
769
770 #[test]
771 fn keep_reading_after_end() {
772 let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
773 e.write_all(b"foo bar baz").unwrap();
774 let inner = e.finish().unwrap();
775 let mut d = read::GzDecoder::new(&inner[..]);
776 let mut s = String::new();
777 d.read_to_string(&mut s).unwrap();
778 assert_eq!(s, "foo bar baz");
779 d.read_to_string(&mut s).unwrap();
780 assert_eq!(s, "foo bar baz");
781 }
782
783 #[test]
784 fn qc_reader() {
785 ::quickcheck::quickcheck(test as fn(_) -> _);
786
787 fn test(v: Vec<u8>) -> bool {
788 let r = read::GzEncoder::new(&v[..], Compression::default());
789 let mut r = read::GzDecoder::new(r);
790 let mut v2 = Vec::new();
791 r.read_to_end(&mut v2).unwrap();
792 v == v2
793 }
794 }
795
796 #[test]
797 fn flush_after_write() {
798 let mut f = write::GzEncoder::new(Vec::new(), Compression::default());
799 write!(f, "Hello world").unwrap();
800 f.flush().unwrap();
801 }
802}