1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
use crate::common::get_cstring;
use crate::structures::common::{self, StructureError};
use std::collections::HashMap;
/// Struct to store useful Gzip header info
#[derive(Debug, Clone, Default)]
pub struct GzipHeader {
pub os: String,
pub size: usize,
pub comment: String,
pub timestamp: u32,
pub original_name: String,
}
/// Parses a Gzip file header
pub fn parse_gzip_header(header_data: &[u8]) -> Result<GzipHeader, StructureError> {
// Some expected constant values
const CRC_SIZE: usize = 2;
const NULL_BYTE_SIZE: usize = 1;
const DEFLATE_COMPRESSION: usize = 8;
const FLAG_CRC: usize = 0b0000_0010;
const FLAG_EXTRA: usize = 0b0000_0100;
const FLAG_NAME: usize = 0b0000_1000;
const FLAG_COMMENT: usize = 0b0001_0000;
const FLAG_RESERVED: usize = 0b1110_0000;
let gzip_header_structure = vec![
("magic", "u16"),
("compression_method", "u8"),
("flags", "u8"),
("timestamp", "u32"),
("extra_flags", "u8"),
("osid", "u8"),
];
let gzip_extra_header_structure = vec![("id", "u16"), ("extra_data_len", "u16")];
let known_os_ids: HashMap<usize, &str> = HashMap::from([
(0, "FAT filesystem (MS-DOS, OS/2, NT/Win32"),
(1, "Amiga"),
(2, "VMS (or OpenVMS)"),
(3, "Unix"),
(4, "VM/CMS"),
(5, "Atari TOS"),
(6, "HPFS filesystem (OS/2, NT)"),
(7, "Macintosh"),
(8, "Z-System"),
(9, "CP/M"),
(10, "TOPS-20"),
(11, "NTFS filesystem (NT)"),
(12, "QDOS"),
(13, "Acorn RISCOS"),
(255, "unknown"),
]);
let mut header_info = GzipHeader {
..Default::default()
};
// End of the fixed-size portion of the gzip header
header_info.size = common::size(&gzip_header_structure);
// Parse the gzip header
if let Ok(gzip_header) = common::parse(header_data, &gzip_header_structure, "little") {
// Report the timestamp
header_info.timestamp = gzip_header["timestamp"] as u32;
// Sanity check; compression type should be deflate, reserved flag bits should not be set, OS ID should be a known value
if (gzip_header["flags"] & FLAG_RESERVED) == 0
&& gzip_header["compression_method"] == DEFLATE_COMPRESSION
&& known_os_ids.contains_key(&gzip_header["osid"])
{
// Set the operating system string
header_info.os = known_os_ids[&gzip_header["osid"]].to_string();
// Check if the optional "extra" data follows the standard Gzip header
if (gzip_header["flags"] & FLAG_EXTRA) != 0 {
// File offsets and sizes for parsing the extra header
let extra_header_size = common::size(&gzip_extra_header_structure);
let extra_header_start: usize = header_info.size;
let extra_header_end: usize = extra_header_start + extra_header_size;
match header_data.get(extra_header_start..extra_header_end) {
None => {
return Err(StructureError);
}
Some(extra_header_data) => {
// Parse the extra header and update the header_info.size to include this data
match common::parse(
extra_header_data,
&gzip_extra_header_structure,
"little",
) {
Err(e) => {
return Err(e);
}
Ok(extra_header) => {
header_info.size +=
extra_header_size + extra_header["extra_data_len"];
}
}
}
}
}
// If the NULL-terminated original file name is included, it will be next
if (gzip_header["flags"] & FLAG_NAME) != 0 {
match header_data.get(header_info.size..) {
None => {
return Err(StructureError);
}
Some(file_name_bytes) => {
header_info.original_name = get_cstring(file_name_bytes);
// The value returned by get_cstring does not include the terminating NULL byte
header_info.size += header_info.original_name.len() + NULL_BYTE_SIZE;
}
}
}
// If a NULL-terminated comment is included, it will be next
if (gzip_header["flags"] & FLAG_COMMENT) != 0 {
match header_data.get(header_info.size..) {
None => {
return Err(StructureError);
}
Some(comment_bytes) => {
header_info.comment = get_cstring(comment_bytes);
// The value returned by get_cstring does not include the terminating NULL byte
header_info.size += header_info.comment.len() + NULL_BYTE_SIZE;
}
}
}
// Finally, a checksum field may be included
if (gzip_header["flags"] & FLAG_CRC) != 0 {
header_info.size += CRC_SIZE;
}
// Deflate data should start at header_info.size; make sure this offset is sane
if header_data.len() >= header_info.size {
return Ok(header_info);
}
}
}
Err(StructureError)
}