stream_unpack/zip/structures/
central_directory.rs

1use std::io::Cursor;
2
3use byteorder::{ReadBytesExt, LittleEndian};
4use thiserror::Error;
5
6use crate::zip::ZipPosition;
7
8use super::{CompressionMethod, file_header::{FileHeaderExtraField, Zip64OriginalData, Zip64ProcessedData}};
9
10#[derive(Debug, Error)]
11pub enum CentralDirectoryError {
12    #[error("input too short")]
13    InputTooShort,
14
15    #[error("invalid central directory file header signature at {0}")]
16    InvalidSignature(usize),
17
18    #[error("malformed central directory file header at {0}")]
19    MalformedHeader(usize),
20
21    #[error("{0} bytes left over after reading entire central directory")]
22    LeftoverBytes(usize)
23}
24
25/// Represents a ZIP central directory.
26/// 
27/// The unpacker requires the central directory to be sorted
28/// in the order of ascending position. [CentralDirectory::sort] 
29/// can be used to obtain a [SortedCentralDirectory]
30#[derive(Debug)]
31pub struct CentralDirectory {
32    headers: Vec<CentralDirectoryFileHeader>
33}
34
35impl CentralDirectory {
36    /// Tries to read all CDFH from the central directory
37    pub fn from_bytes(data: impl AsRef<[u8]>) -> Result<Self, CentralDirectoryError> {
38        let data = data.as_ref();
39        if data.len() < 4 + CDFH_CONSTANT_SIZE {
40            return Err(CentralDirectoryError::InputTooShort);
41        }
42
43        let mut headers = Vec::new();
44
45        let mut offset = 0;
46        while offset < data.len() - 3 {
47            let signature = u32::from_le_bytes(data[offset..(offset + 4)].try_into().unwrap());
48            if signature != CDFH_SIGNATURE {
49                return Err(CentralDirectoryError::InvalidSignature(offset));
50            }
51
52            let Some(cdfh) = CentralDirectoryFileHeader::from_bytes(&data[(offset + 4)..]) else {
53                return Err(CentralDirectoryError::MalformedHeader(offset));
54            };
55
56            offset += cdfh.header_size + 4;
57            headers.push(cdfh);
58        }
59
60        if offset != data.len() {
61            // There is something left over
62            return Err(CentralDirectoryError::LeftoverBytes(data.len() - offset));
63        }
64
65        Ok(Self {
66            headers
67        })
68    }
69
70    /// Returns a reference to the CDFHs
71    pub fn headers_ref(&self) -> &[CentralDirectoryFileHeader] {
72        &self.headers
73    }
74
75    pub fn sort(mut self) -> SortedCentralDirectory {
76        self.headers.sort_by(|a, b| {
77            a.header_position().cmp(&b.header_position())    
78        });
79
80        SortedCentralDirectory {
81            headers: self.headers
82        }
83    }
84}
85
86/// Represents a sorted ZIP central directory
87#[derive(Debug)]
88pub struct SortedCentralDirectory {
89    headers: Vec<CentralDirectoryFileHeader>
90}
91
92impl SortedCentralDirectory {
93    /// Returns a reference to the CDFHs
94    pub fn headers_ref(&self) -> &[CentralDirectoryFileHeader] {
95        &self.headers
96    }
97}
98
99pub const CDFH_SIGNATURE: u32 = 0x02014B50;
100pub const CDFH_CONSTANT_SIZE: usize = 42;
101
102/// Represents the result of reading a central directory file header (CDFH)
103/// 
104/// The layout of this object does not follow the original ZIP CDFH structure
105#[derive(Debug, Clone)]
106pub struct CentralDirectoryFileHeader {
107    pub version_made_by: u16,
108    pub version_needed: u16,
109
110    pub flag: u16,
111
112    pub compression_method: Option<CompressionMethod>,
113
114    pub mod_time: u16,
115    pub mod_date: u16,
116
117    pub crc32: u32,
118
119    pub compressed_size: u64,
120    pub uncompressed_size: u64,
121
122    pub filename: String,
123    
124    pub extra_fields: Vec<FileHeaderExtraField>,
125
126    pub disk_number: u32,
127
128    pub internal_attributes: u16,
129    pub external_attributes: u32,
130
131    pub local_header_offset: u64,
132
133    #[cfg(feature = "zip-comments")]
134    pub comment: String,
135
136    pub header_size: usize
137}
138
139impl CentralDirectoryFileHeader {
140    /// Attempts to read a central directory file header from the provided
141    /// byte buffer. Returns None if there isn't enought data
142    pub fn from_bytes(data: impl AsRef<[u8]>) -> Option<Self> {
143        let data = data.as_ref();
144        if data.len() < CDFH_CONSTANT_SIZE {
145            return None;
146        }
147
148        let mut cursor = Cursor::new(data);
149
150        let version_made_by = cursor.read_u16::<LittleEndian>().unwrap();
151        let version_needed = cursor.read_u16::<LittleEndian>().unwrap();
152        let flag = cursor.read_u16::<LittleEndian>().unwrap();
153        let compression_method = cursor.read_u16::<LittleEndian>().unwrap();
154        let mod_time = cursor.read_u16::<LittleEndian>().unwrap();
155        let mod_date = cursor.read_u16::<LittleEndian>().unwrap();
156        let crc32 = cursor.read_u32::<LittleEndian>().unwrap();
157        let compressed_size = cursor.read_u32::<LittleEndian>().unwrap();
158        let uncompressed_size = cursor.read_u32::<LittleEndian>().unwrap();
159        let filename_length = cursor.read_u16::<LittleEndian>().unwrap();
160        let extra_fields_length = cursor.read_u16::<LittleEndian>().unwrap();
161        let comment_length = cursor.read_u16::<LittleEndian>().unwrap();
162        let disk_number = cursor.read_u16::<LittleEndian>().unwrap();
163        let internal_attributes = cursor.read_u16::<LittleEndian>().unwrap();
164        let external_attributes = cursor.read_u32::<LittleEndian>().unwrap();
165        let local_header_offset = cursor.read_u32::<LittleEndian>().unwrap();
166
167        let filename_length = filename_length as usize;
168        let extra_fields_length = extra_fields_length as usize;
169        let comment_length = comment_length as usize;
170        if data.len() < CDFH_CONSTANT_SIZE + filename_length + extra_fields_length + comment_length {
171            return None;
172        }
173
174        let compression_method = CompressionMethod::from_id(compression_method);
175
176        let filename_start = CDFH_CONSTANT_SIZE;
177        let filename_end = filename_start + filename_length;
178        let filename = String::from_utf8_lossy(&data[filename_start..filename_end]).to_string();
179
180        let extra_fields_start = filename_end;
181        let extra_fields_end = extra_fields_start + extra_fields_length;
182        let Some(extra_fields) = FileHeaderExtraField::read_extra_fields(&data[extra_fields_start..extra_fields_end]) else {
183            return None;
184        };
185
186        let comment_start = extra_fields_end;
187        let comment_end = comment_start + comment_length;
188
189        let original_zip64_data = Zip64OriginalData {
190            uncompressed_size,
191            compressed_size,
192            local_header_offset,
193            disk_number
194        };
195
196        let Some(Zip64ProcessedData {
197            uncompressed_size,
198            compressed_size,
199            local_header_offset,
200            disk_number
201        }) = original_zip64_data.process(&extra_fields) else {
202            return None;
203        };
204
205        Some(Self {
206            version_made_by,
207            version_needed,
208            flag,
209            compression_method,
210            mod_time,
211            mod_date,
212            crc32,
213            compressed_size,
214            uncompressed_size,
215            filename,
216            extra_fields,
217            disk_number,
218            internal_attributes,
219            external_attributes,
220            local_header_offset,
221
222            #[cfg(feature = "zip-comments")]
223            comment: String::from_utf8_lossy(&data[comment_start..comment_end]).to_string(),
224
225            header_size: comment_end
226        })
227    }
228
229    pub fn is_directory(&self) -> bool {
230        self.filename.ends_with('/')
231    }
232
233    /// Returns the [ZipPosition] of the LFH corresponding to this CDFH
234    pub fn header_position(&self) -> ZipPosition {
235        ZipPosition::new(
236            self.disk_number as usize,
237            self.local_header_offset as usize
238        )
239    }
240}