netzip/
lib.rs

1use std::io::Read;
2
3use bytes::Bytes;
4use flate2::bufread::DeflateDecoder;
5use netzip_parser::{CentralDirectoryEnd, CentralDirectoryRecord, LocalFile, ZipError};
6use thiserror::Error;
7
8#[derive(Debug, Error)]
9pub enum Error {
10    #[error("Error encountered while sending network request to '{0}': {1}")]
11    NetworkError(String, reqwest::Error),
12    #[error("Error encountered whiler parsing Zip from request to '{0}': {1}")]
13    ParserError(String, ZipError),
14    #[error("Error encountered while decompressing file from request to '{0}': {1}")]
15    DecompressionError(String, String),
16    #[error("Unable to decompress file with compression type {0}")]
17    UnsupportCompression(u16),
18}
19
20pub struct RemoteZip {
21    url: String,
22    http_client: reqwest::Client,
23    central_directory: Vec<CentralDirectoryRecord>,
24}
25
26impl RemoteZip {
27    /// Creates a new RemoteZip instance by fetching and parsing the ZIP directory structure from a remote URL
28    /// using the provided HTTP client.
29    ///
30    /// # Arguments
31    ///
32    /// * `url` - The URL of the remote ZIP file to access
33    /// * `http_client` - The reqwest HTTP client to use for making requests
34    ///
35    /// # Returns
36    ///
37    /// A Result containing either the initialized RemoteZip instance or an Error
38    pub async fn get_using(url: &str, http_client: reqwest::Client) -> Result<Self, Error> {
39        let min_cde_bytes = ranged_request(
40            url,
41            &format!("bytes=-{}", netzip_parser::EOCD_MIN_SIZE),
42            http_client.clone(),
43        )
44        .await?;
45
46        let cde = if let Ok(min_out) = CentralDirectoryEnd::parse(&min_cde_bytes) {
47            min_out
48        } else {
49            // There might be a comment, retry with an offset and search for the EOCD
50            let cde_haystack = ranged_request(
51                url,
52                &format!("bytes=-{}", netzip_parser::EOCD_MIN_SIZE + 1024),
53                http_client.clone(),
54            )
55            .await?;
56
57            CentralDirectoryEnd::find_and_parse(&cde_haystack)
58                .map_err(|e| Error::ParserError(url.into(), e))?
59        };
60
61        let cd_bytes = ranged_request(
62            url,
63            &format!(
64                "bytes={}-{}",
65                cde.central_directory_offset,
66                cde.central_directory_offset + cde.directory_size
67            ),
68            http_client.clone(),
69        )
70        .await?;
71
72        let cd_records = CentralDirectoryRecord::parse_many(&cd_bytes)
73            .map_err(|e| Error::ParserError(url.into(), e))?;
74
75        Ok(Self {
76            url: url.into(),
77            central_directory: cd_records,
78            http_client,
79        })
80    }
81
82    /// Creates a new RemoteZip instance by fetching and parsing the ZIP directory structure from a remote URL
83    /// using a default HTTP client.
84    ///
85    /// # Arguments
86    ///
87    /// * `url` - The URL of the remote ZIP file to access
88    ///
89    /// # Returns
90    ///
91    /// A Result containing either the initialized RemoteZip instance or an Error
92    pub async fn get(url: &str) -> Result<Self, Error> {
93        Self::get_using(url, reqwest::Client::new()).await
94    }
95
96    /// Returns a reference to the central directory records of the ZIP file.
97    ///
98    /// # Returns
99    ///
100    /// A reference to the vector of CentralDirectoryRecord entries
101    pub fn records(&self) -> &Vec<CentralDirectoryRecord> {
102        &self.central_directory
103    }
104
105    /// Returns a mutable reference to the central directory records of the ZIP file.
106    ///
107    /// # Returns
108    ///
109    /// A mutable reference to the vector of CentralDirectoryRecord entries
110    pub fn records_mut(&mut self) -> &mut Vec<CentralDirectoryRecord> {
111        &mut self.central_directory
112    }
113
114    /// Downloads and decompresses the specified files from the remote ZIP.
115    ///
116    /// # Arguments
117    ///
118    /// * `paths` - A vector of file paths/names to download from the ZIP
119    ///
120    /// # Returns
121    ///
122    /// A Result containing either a vector of tuples with (LocalFile metadata, file contents as bytes)
123    /// or an Error if any file could not be downloaded or decompressed
124    pub async fn download_files(
125        &self,
126        paths: Vec<String>,
127    ) -> Result<Vec<(LocalFile, Vec<u8>)>, Error> {
128        let needed_cd_records: Vec<&CentralDirectoryRecord> = self
129            .central_directory
130            .iter()
131            .filter(|x| paths.contains(&x.file_name))
132            .collect();
133
134        let mut out = Vec::new();
135
136        for cd_record in needed_cd_records {
137            let lfh_end_offset = cd_record.file_header_offset
138                + netzip_parser::LFH_MIN_SIZE as u32
139                + cd_record.extra_field_length as u32
140                + cd_record.file_name_length as u32
141                + cd_record.file_comment_length as u32;
142            let lfh_bytes = &self
143                .ranged_request(&format!(
144                    "bytes={}-{}",
145                    cd_record.file_header_offset, lfh_end_offset
146                ))
147                .await?;
148
149            let lfh = LocalFile::parse(&lfh_bytes)
150                .map_err(|e| Error::ParserError(self.url.clone(), e))?;
151
152            match lfh.compression_method {
153                netzip_parser::CompressionMethod::Deflate
154                | netzip_parser::CompressionMethod::Deflate64 => {
155                    let compressed_data: &[u8] = &self
156                        .ranged_request(&format!(
157                            "bytes={}-{}",
158                            lfh_end_offset,
159                            lfh_end_offset + lfh.compressed_size
160                        ))
161                        .await?;
162
163                    let mut decoder = DeflateDecoder::new(compressed_data);
164                    let mut decoded = Vec::with_capacity(lfh.uncompressed_size as usize);
165                    decoder
166                        .read_to_end(&mut decoded)
167                        .map_err(|e| Error::DecompressionError(self.url.clone(), e.to_string()))?;
168
169                    out.push((lfh, decoded));
170                }
171                netzip_parser::CompressionMethod::Stored => {
172                    let data = &self
173                        .ranged_request(&format!(
174                            "bytes={}-{}",
175                            lfh_end_offset,
176                            lfh_end_offset + lfh.uncompressed_size
177                        ))
178                        .await?;
179
180                    out.push((lfh, data.to_vec()));
181                }
182                netzip_parser::CompressionMethod::Unsupported(unsupported_id) => {
183                    return Err(Error::UnsupportCompression(unsupported_id));
184                }
185            }
186        }
187
188        return Ok(out);
189    }
190
191    async fn ranged_request(&self, range_string: &str) -> Result<Bytes, Error> {
192        self.http_client
193            .get(&self.url)
194            .header("Range", range_string)
195            .send()
196            .await
197            .map_err(|e| Error::NetworkError(self.url.clone(), e))?
198            .bytes()
199            .await
200            .map_err(|e| Error::NetworkError(self.url.clone(), e))
201    }
202}
203
204async fn ranged_request(
205    url: &str,
206    range_string: &str,
207    client: reqwest::Client,
208) -> Result<Bytes, Error> {
209    client
210        .get(url)
211        .header("Range", range_string)
212        .send()
213        .await
214        .map_err(|e| Error::NetworkError(url.into(), e))?
215        .bytes()
216        .await
217        .map_err(|e| Error::NetworkError(url.into(), e))
218}