1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
use std::io::Cursor;

use byteorder::{ReadBytesExt, LittleEndian};
use thiserror::Error;

use crate::zip::ZipPosition;

use super::{CompressionMethod, file_header::{FileHeaderExtraField, Zip64OriginalData, Zip64ProcessedData}};

#[derive(Debug, Error)]
pub enum CentralDirectoryError {
    #[error("input too short")]
    InputTooShort,

    #[error("invalid central directory file header signature at {0}")]
    InvalidSignature(usize),

    #[error("malformed central directory file header at {0}")]
    MalformedHeader(usize),

    #[error("{0} bytes left over after reading entire central directory")]
    LeftoverBytes(usize)
}

/// Represents a ZIP central directory.
/// 
/// The unpacker requires the central directory to be sorted
/// in the order of ascending position. [CentralDirectory::sort] 
/// can be used to obtain a [SortedCentralDirectory]
#[derive(Debug)]
pub struct CentralDirectory {
    headers: Vec<CentralDirectoryFileHeader>
}

impl CentralDirectory {
    /// Tries to read all CDFH from the central directory
    pub fn from_bytes(data: impl AsRef<[u8]>) -> Result<Self, CentralDirectoryError> {
        let data = data.as_ref();
        if data.len() < 4 + CDFH_CONSTANT_SIZE {
            return Err(CentralDirectoryError::InputTooShort);
        }

        let mut headers = Vec::new();

        let mut offset = 0;
        while offset < data.len() - 3 {
            let signature = u32::from_le_bytes(data[offset..(offset + 4)].try_into().unwrap());
            if signature != CDFH_SIGNATURE {
                return Err(CentralDirectoryError::InvalidSignature(offset));
            }

            let Some(cdfh) = CentralDirectoryFileHeader::from_bytes(&data[(offset + 4)..]) else {
                return Err(CentralDirectoryError::MalformedHeader(offset));
            };

            offset += cdfh.header_size + 4;
            headers.push(cdfh);
        }

        if offset != data.len() {
            // There is something left over
            return Err(CentralDirectoryError::LeftoverBytes(data.len() - offset));
        }

        Ok(Self {
            headers
        })
    }

    /// Returns a reference to the CDFHs
    pub fn headers_ref(&self) -> &[CentralDirectoryFileHeader] {
        &self.headers
    }

    pub fn sort(mut self) -> SortedCentralDirectory {
        self.headers.sort_by(|a, b| {
            a.header_position().cmp(&b.header_position())    
        });

        SortedCentralDirectory {
            headers: self.headers
        }
    }
}

/// Represents a sorted ZIP central directory
#[derive(Debug)]
pub struct SortedCentralDirectory {
    headers: Vec<CentralDirectoryFileHeader>
}

impl SortedCentralDirectory {
    /// Returns a reference to the CDFHs
    pub fn headers_ref(&self) -> &[CentralDirectoryFileHeader] {
        &self.headers
    }
}

pub const CDFH_SIGNATURE: u32 = 0x02014B50;
pub const CDFH_CONSTANT_SIZE: usize = 42;

/// Represents the result of reading a central directory file header (CDFH)
/// 
/// The layout of this object does not follow the original ZIP CDFH structure
#[derive(Debug, Clone)]
pub struct CentralDirectoryFileHeader {
    pub version_made_by: u16,
    pub version_needed: u16,

    pub flag: u16,

    pub compression_method: Option<CompressionMethod>,

    pub mod_time: u16,
    pub mod_date: u16,

    pub crc32: u32,

    pub compressed_size: u64,
    pub uncompressed_size: u64,

    pub filename: String,
    
    pub extra_fields: Vec<FileHeaderExtraField>,

    pub disk_number: u32,

    pub internal_attributes: u16,
    pub external_attributes: u32,

    pub local_header_offset: u64,

    #[cfg(feature = "zip-comments")]
    pub comment: String,

    pub header_size: usize
}

impl CentralDirectoryFileHeader {
    /// Attempts to read a central directory file header from the provided
    /// byte buffer. Returns None if there isn't enought data
    pub fn from_bytes(data: impl AsRef<[u8]>) -> Option<Self> {
        let data = data.as_ref();
        if data.len() < CDFH_CONSTANT_SIZE {
            return None;
        }

        let mut cursor = Cursor::new(data);

        let version_made_by = cursor.read_u16::<LittleEndian>().unwrap();
        let version_needed = cursor.read_u16::<LittleEndian>().unwrap();
        let flag = cursor.read_u16::<LittleEndian>().unwrap();
        let compression_method = cursor.read_u16::<LittleEndian>().unwrap();
        let mod_time = cursor.read_u16::<LittleEndian>().unwrap();
        let mod_date = cursor.read_u16::<LittleEndian>().unwrap();
        let crc32 = cursor.read_u32::<LittleEndian>().unwrap();
        let compressed_size = cursor.read_u32::<LittleEndian>().unwrap();
        let uncompressed_size = cursor.read_u32::<LittleEndian>().unwrap();
        let filename_length = cursor.read_u16::<LittleEndian>().unwrap();
        let extra_fields_length = cursor.read_u16::<LittleEndian>().unwrap();
        let comment_length = cursor.read_u16::<LittleEndian>().unwrap();
        let disk_number = cursor.read_u16::<LittleEndian>().unwrap();
        let internal_attributes = cursor.read_u16::<LittleEndian>().unwrap();
        let external_attributes = cursor.read_u32::<LittleEndian>().unwrap();
        let local_header_offset = cursor.read_u32::<LittleEndian>().unwrap();

        let filename_length = filename_length as usize;
        let extra_fields_length = extra_fields_length as usize;
        let comment_length = comment_length as usize;
        if data.len() < CDFH_CONSTANT_SIZE + filename_length + extra_fields_length + comment_length {
            return None;
        }

        let compression_method = CompressionMethod::from_id(compression_method);

        let filename_start = CDFH_CONSTANT_SIZE;
        let filename_end = filename_start + filename_length;
        let filename = String::from_utf8_lossy(&data[filename_start..filename_end]).to_string();

        let extra_fields_start = filename_end;
        let extra_fields_end = extra_fields_start + extra_fields_length;
        let Some(extra_fields) = FileHeaderExtraField::read_extra_fields(&data[extra_fields_start..extra_fields_end]) else {
            return None;
        };

        let comment_start = extra_fields_end;
        let comment_end = comment_start + comment_length;

        let original_zip64_data = Zip64OriginalData {
            uncompressed_size,
            compressed_size,
            local_header_offset,
            disk_number
        };

        let Some(Zip64ProcessedData {
            uncompressed_size,
            compressed_size,
            local_header_offset,
            disk_number
        }) = original_zip64_data.process(&extra_fields) else {
            return None;
        };

        Some(Self {
            version_made_by,
            version_needed,
            flag,
            compression_method,
            mod_time,
            mod_date,
            crc32,
            compressed_size,
            uncompressed_size,
            filename,
            extra_fields,
            disk_number,
            internal_attributes,
            external_attributes,
            local_header_offset,

            #[cfg(feature = "zip-comments")]
            comment: String::from_utf8_lossy(&data[comment_start..comment_end]).to_string(),

            header_size: comment_end
        })
    }

    pub fn is_directory(&self) -> bool {
        self.filename.ends_with('/')
    }

    /// Returns the [ZipPosition] of the LFH corresponding to this CDFH
    pub fn header_position(&self) -> ZipPosition {
        ZipPosition::new(
            self.disk_number as usize,
            self.local_header_offset as usize
        )
    }
}