async_zip/base/read/
cd.rs

1use futures_lite::io::{AsyncRead, AsyncReadExt};
2
3use crate::base::read::counting::Counting;
4use crate::base::read::io::CombinedCentralDirectoryRecord;
5use crate::base::read::{detect_filename, get_zip64_extra_field, io};
6use crate::error::{Result, ZipError};
7use crate::spec::consts::{CDH_SIGNATURE, EOCDR_SIGNATURE, NON_ZIP64_MAX_SIZE, ZIP64_EOCDR_SIGNATURE};
8use crate::spec::header::{
9    CentralDirectoryRecord, EndOfCentralDirectoryHeader, Zip64EndOfCentralDirectoryLocator,
10    Zip64EndOfCentralDirectoryRecord,
11};
12use crate::spec::parse::parse_extra_fields;
13use crate::ZipString;
14
15/// An entry returned by the [`CentralDirectoryReader`].
16pub enum Entry {
17    CentralDirectoryEntry(CentralDirectoryEntry),
18    EndOfCentralDirectoryRecord {
19        /// The combined end-of-central-directory record, which may include ZIP64 information.
20        record: CombinedCentralDirectoryRecord,
21        /// The comment associated with the end-of-central-directory record.
22        comment: ZipString,
23        /// Whether the end-of-central-directory record contains extensible data.
24        extensible: bool,
25    },
26}
27
28/// An entry in the ZIP file's central directory.
29pub struct CentralDirectoryEntry {
30    /// The compressed size of the entry, taking into account ZIP64 if necessary.
31    pub(crate) compressed_size: u64,
32    /// The uncompressed size of the entry, taking into account ZIP64 if necessary.
33    pub(crate) uncompressed_size: u64,
34    /// The file offset of the entry in the ZIP file, taking into account ZIP64 if necessary.
35    pub(crate) lh_offset: u64,
36    /// The end-of-central-directory record header.
37    pub(crate) header: CentralDirectoryRecord,
38    /// The filename of the entry.
39    pub(crate) filename: ZipString,
40}
41
42impl CentralDirectoryEntry {
43    /// Returns the entry's filename.
44    ///
45    /// ## Note
46    /// This will return the raw filename stored during ZIP creation. If calling this method on entries retrieved from
47    /// untrusted ZIP files, the filename should be sanitised before being used as a path to prevent [directory
48    /// traversal attacks](https://en.wikipedia.org/wiki/Directory_traversal_attack).
49    pub fn filename(&self) -> &ZipString {
50        &self.filename
51    }
52
53    /// Returns whether or not the entry represents a directory.
54    pub fn dir(&self) -> Result<bool> {
55        Ok(self.filename.as_str()?.ends_with('/'))
56    }
57
58    /// Returns the entry's integer-based UNIX permissions.
59    pub fn unix_permissions(&self) -> Option<u32> {
60        Some((self.header.exter_attr) >> 16)
61    }
62
63    /// Returns the CRC32 checksum of the entry.
64    pub fn crc32(&self) -> u32 {
65        self.header.crc
66    }
67
68    /// Returns the file offset of the entry in the ZIP file.
69    pub fn file_offset(&self) -> u64 {
70        self.lh_offset
71    }
72
73    /// Returns the entry's compressed size.
74    pub fn compressed_size(&self) -> u64 {
75        self.compressed_size
76    }
77
78    /// Returns the entry's uncompressed size.
79    pub fn uncompressed_size(&self) -> u64 {
80        self.uncompressed_size
81    }
82}
83
84#[derive(Clone)]
85pub struct CentralDirectoryReader<R> {
86    reader: R,
87    initial: bool,
88    offset: u64,
89}
90
91impl<R> CentralDirectoryReader<Counting<R>>
92where
93    R: AsyncRead + Unpin,
94{
95    /// Constructs a new ZIP reader from a non-seekable source.
96    pub fn new(reader: R, offset: u64) -> Self {
97        Self { reader: Counting::new(reader), offset, initial: true }
98    }
99
100    /// Reads the next [`CentralDirectoryEntry`] from the underlying source, advancing the
101    /// reader to the next record.
102    ///
103    /// Returns `Ok(EndOfCentralDirectoryRecord)` if the end of the central directory record has
104    /// been reached.
105    pub async fn next(&mut self) -> Result<Entry> {
106        // Skip the first `CDH_SIGNATURE`. The `CentralDirectoryReader` is assumed to pick up from
107        // where the streaming `ZipFileReader` left off, which means that the first record's
108        // signature has already been read.
109        if self.initial {
110            self.initial = false;
111        } else {
112            let signature = {
113                let mut buffer = [0; 4];
114                self.reader.read_exact(&mut buffer).await?;
115                u32::from_le_bytes(buffer)
116            };
117            let offset = self.offset + self.reader.bytes_read();
118            match signature {
119                CDH_SIGNATURE => (),
120                EOCDR_SIGNATURE => {
121                    // Read the end-of-central-directory header.
122                    let eocdr = EndOfCentralDirectoryHeader::from_reader(&mut self.reader).await?;
123
124                    // Read the EOCDR comment.
125                    let comment =
126                        io::read_string(&mut self.reader, eocdr.file_comm_length.into(), crate::StringEncoding::Utf8)
127                            .await?;
128
129                    // Verify that the EOCDR offset matches the current reader offset.
130                    if eocdr.central_directory_offset() != self.offset {
131                        return Err(ZipError::InvalidEndOfCentralDirectoryOffset(
132                            eocdr.central_directory_offset(),
133                            offset,
134                        ));
135                    }
136
137                    return Ok(Entry::EndOfCentralDirectoryRecord {
138                        record: CombinedCentralDirectoryRecord::from(&eocdr),
139                        comment,
140                        extensible: false,
141                    });
142                }
143                ZIP64_EOCDR_SIGNATURE => {
144                    // Read the ZIP64 EOCDR.
145                    let zip64_eocdr = Zip64EndOfCentralDirectoryRecord::from_reader(&mut self.reader).await?;
146
147                    // Skip the extensible data field.
148                    let extensible = if zip64_eocdr.size_of_zip64_end_of_cd_record > 44 {
149                        let extensible_data_size = zip64_eocdr.size_of_zip64_end_of_cd_record - 44;
150                        io::skip_bytes(&mut self.reader, extensible_data_size).await?;
151                        true
152                    } else {
153                        false
154                    };
155
156                    // Read the ZIP64 EOCDR locator.
157                    let Some(zip64_eocdl) =
158                        Zip64EndOfCentralDirectoryLocator::try_from_reader(&mut self.reader).await?
159                    else {
160                        return Err(ZipError::MissingZip64EndOfCentralDirectoryLocator);
161                    };
162
163                    // Verify that the ZIP64 EOCDR locator points to the correct offset.
164                    if zip64_eocdl.relative_offset != offset {
165                        return Err(ZipError::InvalidZip64EndOfCentralDirectoryLocatorOffset(
166                            zip64_eocdl.relative_offset,
167                            offset,
168                        ));
169                    }
170
171                    // Read the EOCDR signature.
172                    let signature = {
173                        let mut buffer = [0; 4];
174                        self.reader.read_exact(&mut buffer).await?;
175                        u32::from_le_bytes(buffer)
176                    };
177                    if signature != EOCDR_SIGNATURE {
178                        return Err(ZipError::UnexpectedHeaderError(signature, EOCDR_SIGNATURE));
179                    }
180
181                    // Read the end-of-central-directory header.
182                    let eocdr = EndOfCentralDirectoryHeader::from_reader(&mut self.reader).await?;
183
184                    // Read the EOCDR comment.
185                    let comment =
186                        io::read_string(&mut self.reader, eocdr.file_comm_length.into(), crate::StringEncoding::Utf8)
187                            .await?;
188
189                    // Combine the EOCDR and ZIP64 EOCDR.
190                    let combined = CombinedCentralDirectoryRecord::combine(eocdr, zip64_eocdr);
191
192                    // Verify that the EOCDR offset matches the current reader offset.
193                    if combined.central_directory_offset() != self.offset {
194                        return Err(ZipError::InvalidEndOfCentralDirectoryOffset(
195                            combined.central_directory_offset(),
196                            offset,
197                        ));
198                    }
199
200                    return Ok(Entry::EndOfCentralDirectoryRecord { record: combined, comment, extensible });
201                }
202                actual => return Err(ZipError::UnexpectedHeaderError(actual, CDH_SIGNATURE)),
203            }
204        }
205
206        // Read the record.
207        let header = CentralDirectoryRecord::from_reader(&mut self.reader).await?;
208
209        // Read the file name, extra field, and comment, which also ensures that we advance the
210        // reader to the next record.
211        let filename_basic = io::read_bytes(&mut self.reader, header.file_name_length.into()).await?;
212        let extra_field = io::read_bytes(&mut self.reader, header.extra_field_length.into()).await?;
213        let extra_fields = parse_extra_fields(
214            extra_field,
215            header.uncompressed_size,
216            header.compressed_size,
217            Some(header.lh_offset),
218            Some(header.disk_start),
219        )?;
220        let zip64_extra_field = get_zip64_extra_field(&extra_fields);
221
222        // We read the comment but drop it, since we don't need it for anything.
223        io::skip_bytes(&mut self.reader, header.file_comment_length.into()).await?;
224
225        // Reconcile the compressed size, uncompressed size, and file offset, using ZIP64 if necessary.
226        let compressed_size = if let Some(compressed_size) = zip64_extra_field
227            .and_then(|zip64| zip64.compressed_size)
228            .filter(|_| header.compressed_size == NON_ZIP64_MAX_SIZE)
229        {
230            compressed_size
231        } else {
232            header.compressed_size as u64
233        };
234        let uncompressed_size = if let Some(uncompressed_size) = zip64_extra_field
235            .and_then(|zip64| zip64.uncompressed_size)
236            .filter(|_| header.uncompressed_size == NON_ZIP64_MAX_SIZE)
237        {
238            uncompressed_size
239        } else {
240            header.uncompressed_size as u64
241        };
242        let lh_offset = if let Some(lh_offset) = zip64_extra_field
243            .and_then(|zip64| zip64.relative_header_offset)
244            .filter(|_| header.lh_offset == NON_ZIP64_MAX_SIZE)
245        {
246            lh_offset
247        } else {
248            header.lh_offset as u64
249        };
250
251        // Parse out the filename.
252        let filename = detect_filename(filename_basic, header.flags.filename_unicode, extra_fields.as_ref());
253
254        Ok(Entry::CentralDirectoryEntry(CentralDirectoryEntry {
255            header,
256            compressed_size,
257            uncompressed_size,
258            lh_offset,
259            filename,
260        }))
261    }
262}