zipit/
lib.rs

1//! ## Features
2//!
3//! - Stream on the fly an archive from multiple AsyncRead objects.
4//! - Single read / seek free implementation (the CRC and file size are calculated while streaming and are sent afterwards).
5//! - Archive size pre-calculation (useful if you want to set the `Content-Length` before streaming).
6//! - [futures](https://docs.rs/futures/latest/futures/) and [tokio](https://docs.rs/tokio/latest/tokio/io/index.html) `AsyncRead` / `AsyncWrite` compatible. Enable either the `futures-async-io` or the `tokio-async-io` feature accordingly.
7//!
8//! ## Limitations
9//!
10//! - No compression (stored method only).
11//! - Only files (no directories).
12//! - No customizable external file attributes.
13//!
14//! ## Examples
15//!
16//! ### [File system](examples/fs.rs)
17//!
18//! Write a zip archive to the file system using [`tokio::fs::File`](https://docs.rs/tokio/1.13.0/tokio/fs/struct.File.html):
19//!
20//! ```
21//! use std::io::Cursor;
22//! use tokio::fs::File;
23//! use zipit::{Archive, FileDateTime};
24//!
25//! #[tokio::main]
26//! async fn main() {
27//!     let file = File::from_std(tempfile::tempfile().unwrap());
28//!     let mut archive = Archive::new(file);
29//!     archive.append(
30//!         "file1.txt".to_owned(),
31//!         FileDateTime::now(),
32//!         &mut Cursor::new(b"hello\n".to_vec()),
33//!     ).await.unwrap();
34//!     archive.append(
35//!         "file2.txt".to_owned(),
36//!         FileDateTime::now(),
37//!         &mut Cursor::new(b"world\n".to_vec()),
38//!     ).await.unwrap();
39//!     archive.finalize().await.unwrap();
40//! }
41//! ```
42//!
43//! ### [Hyper](examples/hyper.rs)
44//!
45//! Stream a zip archive as a [`hyper`](https://docs.rs/hyper/0.14.14/hyper/) response:
46//!
47//! ```
48//! use std::io::Cursor;
49//! use hyper::{header, Body, Request, Response, Server, StatusCode};
50//! use tokio::io::duplex;
51//! use tokio_util::io::ReaderStream;
52//! use zipit::{archive_size, Archive, FileDateTime};
53//!
54//! async fn zip_archive(_req: Request<Body>) -> Result<Response<Body>, hyper::http::Error> {
55//!     let (filename_1, mut fd_1) = (String::from("file1.txt"), Cursor::new(b"hello\n".to_vec()));
56//!     let (filename_2, mut fd_2) = (String::from("file2.txt"), Cursor::new(b"world\n".to_vec()));
57//!     let archive_size = archive_size([
58//!         (filename_1.as_ref(), fd_1.get_ref().len()),
59//!         (filename_2.as_ref(), fd_2.get_ref().len()),
60//!     ]);
61//!
62//!     let (w, r) = duplex(4096);
63//!     tokio::spawn(async move {
64//!         let mut archive = Archive::new(w);
65//!         archive
66//!             .append(
67//!                 filename_1,
68//!                 FileDateTime::now(),
69//!                 &mut fd_1,
70//!             )
71//!             .await
72//!             .unwrap();
73//!         archive
74//!             .append(
75//!                 filename_2,
76//!                 FileDateTime::now(),
77//!                 &mut fd_2,
78//!             )
79//!             .await
80//!             .unwrap();
81//!         archive.finalize().await.unwrap();
82//!     });
83//!
84//!     Response::builder()
85//!         .status(StatusCode::OK)
86//!         .header(header::CONTENT_LENGTH, archive_size)
87//!         .header(header::CONTENT_TYPE, "application/zip")
88//!         .body(Body::wrap_stream(ReaderStream::new(r)))
89//! }
90//! ```
91
92#![deny(dead_code, unsafe_code, missing_docs)]
93#![cfg_attr(docsrs, feature(doc_auto_cfg))]
94
95#[cfg(any(feature = "futures-async-io", feature = "tokio-async-io"))]
96use std::io::Error as IoError;
97use std::mem::size_of;
98
99#[cfg(feature = "chrono-datetime")]
100use chrono::{DateTime, Datelike, Local, TimeZone, Timelike};
101#[cfg(any(feature = "futures-async-io", feature = "tokio-async-io"))]
102use crc32fast::Hasher;
103
104#[cfg(any(feature = "futures-async-io", feature = "tokio-async-io"))]
105#[derive(Debug)]
106struct FileInfo {
107    name: String,
108    size: usize,
109    crc: u32,
110    offset: usize,
111    datetime: (u16, u16),
112}
113
114/// The (timezone-less) date and time that will be written in the archive alongside the file.
115///
116/// Use `FileDateTime::Zero` if the date and time are insignificant. This will set the value to 0 which is 1980, January 1th, 12AM.  
117/// Use `FileDateTime::Custom` if you need to set a custom date and time.  
118/// Use `FileDateTime::now()` if you want to use the current date and time (`chrono-datetime` feature required).
119#[derive(Copy, Clone, Eq, PartialEq, Debug)]
120pub enum FileDateTime {
121    /// 1980, January 1th, 12AM.
122    Zero,
123    /// (year, month, day, hour, minute, second)
124    Custom {
125        /// Year.
126        year: u16,
127        /// Month.
128        month: u16,
129        /// Day.
130        day: u16,
131        /// Hour (24 format).
132        hour: u16,
133        /// Minute.
134        minute: u16,
135        /// Second.
136        second: u16,
137    },
138}
139
140#[cfg(any(feature = "futures-async-io", feature = "tokio-async-io"))]
141impl FileDateTime {
142    fn tuple(&self) -> (u16, u16, u16, u16, u16, u16) {
143        match self {
144            FileDateTime::Zero => Default::default(),
145            &FileDateTime::Custom {
146                year,
147                month,
148                day,
149                hour,
150                minute,
151                second,
152            } => (year, month, day, hour, minute, second),
153        }
154    }
155
156    fn ms_dos(&self) -> (u16, u16) {
157        let (year, month, day, hour, min, sec) = self.tuple();
158        (
159            day | month << 5 | year.saturating_sub(1980) << 9,
160            (sec / 2) | min << 5 | hour << 11,
161        )
162    }
163}
164
165#[cfg(feature = "chrono-datetime")]
166impl FileDateTime {
167    /// Use the local date and time of the system.
168    pub fn now() -> Self {
169        Self::from_chrono_datetime(Local::now())
170    }
171
172    /// Use a custom date and time.
173    pub fn from_chrono_datetime<Tz: TimeZone>(datetime: DateTime<Tz>) -> Self {
174        Self::Custom {
175            year: datetime.year() as u16,
176            month: datetime.month() as u16,
177            day: datetime.day() as u16,
178            hour: datetime.hour() as u16,
179            minute: datetime.minute() as u16,
180            second: datetime.second() as u16,
181        }
182    }
183}
184
185#[cfg(any(feature = "futures-async-io", feature = "tokio-async-io"))]
186macro_rules! header {
187    [$capacity:expr; $($elem:expr),*$(,)?] => {
188        {
189            let mut header = Vec::with_capacity($capacity);
190            $(
191                header.extend_from_slice(&$elem.to_le_bytes());
192            )*
193            header
194        }
195    };
196}
197
198const FILE_HEADER_BASE_SIZE: usize = 7 * size_of::<u16>() + 4 * size_of::<u32>();
199const DESCRIPTOR_SIZE: usize = 4 * size_of::<u32>();
200const CENTRAL_DIRECTORY_ENTRY_BASE_SIZE: usize = 11 * size_of::<u16>() + 6 * size_of::<u32>();
201const END_OF_CENTRAL_DIRECTORY_SIZE: usize = 5 * size_of::<u16>() + 3 * size_of::<u32>();
202
203/// A streamed zip archive.
204///
205/// Create an archive using the `new` function and a `AsyncWrite`. Then, append files one by one using the `append` function. When finished, use the `finalize` function.
206///
207/// ## Example
208///
209/// ```no_run
210/// use std::io::Cursor;
211/// use zipit::{Archive, FileDateTime};
212///
213/// #[tokio::main]
214/// async fn main() {
215///     let mut archive = Archive::new(Vec::new());
216///     archive.append(
217///         "file1.txt".to_owned(),
218///         FileDateTime::now(),
219///         &mut Cursor::new(b"hello\n".to_vec()),
220///     ).await.unwrap();
221///     archive.append(
222///         "file2.txt".to_owned(),
223///         FileDateTime::now(),
224///         &mut Cursor::new(b"world\n".to_vec()),
225///     ).await.unwrap();
226///     let data = archive.finalize().await.unwrap();
227///     println!("{:?}", data);
228/// }
229/// ```
230#[cfg(any(feature = "futures-async-io", feature = "tokio-async-io"))]
231#[derive(Debug)]
232pub struct Archive<W> {
233    sink: W,
234    files_info: Vec<FileInfo>,
235    written: usize,
236}
237
238#[cfg(any(feature = "futures-async-io", feature = "tokio-async-io"))]
239macro_rules! impl_methods {
240    (
241        $(#[$($attrss:tt)*])*,
242        $w:path, $r:path,
243        $we:path, $re: path,
244        $fa:tt, $ff:tt,
245    ) => {
246        impl<W> Archive<W> {
247            /// Append a new file to the archive using the provided name, date/time and `AsyncRead` object.
248            /// Filename must be valid UTF-8. Some (very) old zip utilities might mess up filenames during extraction if they contain non-ascii characters.
249            /// File's payload is not compressed and is given `rw-r--r--` permissions.
250            ///
251            /// # Error
252            ///
253            /// This function will forward any error found while trying to read from the file stream or while writing to the underlying sink.
254            $(#[$($attrss)*])*
255            pub async fn $fa<R>(
256                &mut self,
257                name: String,
258                datetime: FileDateTime,
259                reader: &mut R,
260            ) -> Result<(), IoError> where W: $w + Unpin, R: $r + Unpin {
261                use $we;
262                use $re;
263
264                let (date, time) = datetime.ms_dos();
265                let offset = self.written;
266                let mut header = header![
267                    FILE_HEADER_BASE_SIZE + name.len();
268                    0x04034b50u32,          // Local file header signature.
269                    10u16,                  // Version needed to extract.
270                    1u16 << 3 | 1 << 11,    // General purpose flag (temporary crc and sizes + UTF-8 filename).
271                    0u16,                   // Compression method (store).
272                    time,                   // Modification time.
273                    date,                   // Modification date.
274                    0u32,                   // Temporary CRC32.
275                    0u32,                   // Temporary compressed size.
276                    0u32,                   // Temporary uncompressed size.
277                    name.len() as u16,      // Filename length.
278                    0u16,                   // Extra field length.
279                ];
280                header.extend_from_slice(name.as_bytes()); // Filename.
281                self.sink.write_all(&header).await?;
282                self.written += header.len();
283
284                let mut total_read = 0;
285                let mut hasher = Hasher::new();
286                let mut buf = vec![0; 4096];
287                loop {
288                    let read = reader.read(&mut buf).await?;
289                    if read == 0 {
290                        break;
291                    }
292
293                    total_read += read;
294                    hasher.update(&buf[..read]);
295                    self.sink.write_all(&buf[..read]).await?; // Payload chunk.
296                }
297                let crc = hasher.finalize();
298                self.written += total_read;
299
300                let descriptor = header![
301                    DESCRIPTOR_SIZE;
302                    0x08074b50u32,      // Data descriptor signature.
303                    crc,                // CRC32.
304                    total_read as u32,  // Compressed size.
305                    total_read as u32,  // Uncompressed size.
306                ];
307                self.sink.write_all(&descriptor).await?;
308                self.written += descriptor.len();
309
310                self.files_info.push(FileInfo {
311                    name,
312                    size: total_read,
313                    crc,
314                    offset,
315                    datetime: (date, time),
316                });
317
318                Ok(())
319            }
320
321            /// Finalize the archive by writing the necessary metadata to the end of the archive.
322            ///
323            /// # Error
324            ///
325            /// This function will forward any error found while writing to the underlying sink.
326            $(#[$($attrss)*])*
327            pub async fn $ff(mut self) -> Result<W, IoError> where W: $w + Unpin {
328                use $we;
329
330                let mut central_directory_size = 0;
331                for file_info in &self.files_info {
332                    let mut entry = header![
333                        CENTRAL_DIRECTORY_ENTRY_BASE_SIZE + file_info.name.len();
334                        0x02014b50u32,                  // Central directory entry signature.
335                        0x031eu16,                      // Version made by.
336                        10u16,                          // Version needed to extract.
337                        1u16 << 3 | 1 << 11,            // General purpose flag (temporary crc and sizes + UTF-8 filename).
338                        0u16,                           // Compression method (store).
339                        file_info.datetime.1,           // Modification time.
340                        file_info.datetime.0,           // Modification date.
341                        file_info.crc,                  // CRC32.
342                        file_info.size as u32,          // Compressed size.
343                        file_info.size as u32,          // Uncompressed size.
344                        file_info.name.len() as u16,    // Filename length.
345                        0u16,                           // Extra field length.
346                        0u16,                           // File comment length.
347                        0u16,                           // File's Disk number.
348                        0u16,                           // Internal file attributes.
349                        (0o100000u32 | 0o0000400 | 0o0000200 | 0o0000040 | 0o0000004) << 16, // External file attributes (regular file / rw-r--r--).
350                        file_info.offset as u32,        // Offset from start of file to local file header.
351                    ];
352                    entry.extend_from_slice(file_info.name.as_bytes()); // Filename.
353                    self.sink.write_all(&entry).await?;
354                    central_directory_size += entry.len();
355                }
356
357                let end_of_central_directory = header![
358                    END_OF_CENTRAL_DIRECTORY_SIZE;
359                    0x06054b50u32,                  // End of central directory signature.
360                    0u16,                           // Number of this disk.
361                    0u16,                           // Number of the disk where central directory starts.
362                    self.files_info.len() as u16,   // Number of central directory records on this disk.
363                    self.files_info.len() as u16,   // Total number of central directory records.
364                    central_directory_size as u32,  // Size of central directory.
365                    self.written as u32,            // Offset from start of file to central directory.
366                    0u16,                           // Comment length.
367                ];
368                self.sink.write_all(&end_of_central_directory).await?;
369
370                Ok(self.sink)
371            }
372        }
373    };
374}
375
376#[cfg(all(feature = "futures-async-io", feature = "tokio-async-io"))]
377impl_methods!(
378    #[cfg(all(feature = "futures-async-io", feature = "tokio-async-io"))],
379    futures_util::AsyncWrite, futures_util::AsyncRead,
380    futures_util::AsyncWriteExt, futures_util::AsyncReadExt,
381    futures_append, futures_finalize,
382);
383#[cfg(all(feature = "futures-async-io", feature = "tokio-async-io"))]
384impl_methods!(
385    #[cfg(all(feature = "futures-async-io", feature = "tokio-async-io"))],
386    tokio::io::AsyncWrite, tokio::io::AsyncRead,
387    tokio::io::AsyncWriteExt, tokio::io::AsyncReadExt,
388    tokio_append, tokio_finalize,
389);
390
391#[cfg(all(feature = "futures-async-io", not(feature = "tokio-async-io")))]
392impl_methods!(
393    #[cfg(all(feature = "futures-async-io", not(feature = "tokio-async-io")))],
394    futures_util::AsyncWrite, futures_util::AsyncRead,
395    futures_util::AsyncWriteExt, futures_util::AsyncReadExt,
396    append, finalize,
397);
398
399#[cfg(all(not(feature = "futures-async-io"), feature = "tokio-async-io"))]
400impl_methods!(
401    #[cfg(all(not(feature = "futures-async-io"), feature = "tokio-async-io"))],
402    tokio::io::AsyncWrite, tokio::io::AsyncRead,
403    tokio::io::AsyncWriteExt, tokio::io::AsyncReadExt,
404    append, finalize,
405);
406
407#[cfg(any(feature = "futures-async-io", feature = "tokio-async-io"))]
408impl<W> Archive<W> {
409    /// Create a new zip archive, using the underlying `AsyncWrite` to write files' header and payload.
410    pub fn new(sink: W) -> Self {
411        Self {
412            sink,
413            files_info: Vec::new(),
414            written: 0,
415        }
416    }
417}
418
419/// Calculate the size that an archive could be based on the names and sizes of files.
420///
421/// ## Example
422///
423/// ```
424/// assert_eq!(
425///     zipit::archive_size([
426///         ("file1.txt", b"hello\n".len()),
427///         ("file2.txt", b"world\n".len()),
428///     ]),
429///     254,
430/// );
431/// ```
432pub fn archive_size<'a, I: IntoIterator<Item = (&'a str, usize)>>(files: I) -> usize {
433    files
434        .into_iter()
435        .map(|(name, size)| {
436            FILE_HEADER_BASE_SIZE
437                + name.len()
438                + size
439                + DESCRIPTOR_SIZE
440                + CENTRAL_DIRECTORY_ENTRY_BASE_SIZE
441                + name.len()
442        })
443        .sum::<usize>()
444        + END_OF_CENTRAL_DIRECTORY_SIZE
445}
446
447#[cfg(test)]
448mod tests {
449    use crate::{Archive, FileDateTime};
450    use std::io::Cursor;
451
452    #[test]
453    fn archive_size() {
454        assert_eq!(
455            crate::archive_size([
456                ("file1.txt", b"hello\n".len()),
457                ("file2.txt", b"world\n".len()),
458            ]),
459            254,
460        );
461        assert_eq!(
462            crate::archive_size([
463                ("file1.txt", b"hello\n".len()),
464                ("file2.txt", b"world\n".len()),
465                ("file3.txt", b"how are you?\n".len()),
466            ]),
467            377,
468        );
469    }
470
471    #[tokio::test]
472    async fn archive_structure() {
473        let mut archive = Archive::new(Vec::new());
474        archive
475            .tokio_append(
476                "file1.txt".to_owned(),
477                FileDateTime::now(),
478                &mut Cursor::new(b"hello\n".to_vec()),
479            )
480            .await
481            .unwrap();
482        archive
483            .tokio_append(
484                "file2.txt".to_owned(),
485                FileDateTime::now(),
486                &mut Cursor::new(b"world\n".to_vec()),
487            )
488            .await
489            .unwrap();
490        let data = archive.tokio_finalize().await.unwrap();
491
492        fn match_except_datetime(a1: &[u8], a2: &[u8]) -> bool {
493            let datetime_ranges = [
494                10..12,
495                12..14,
496                71..73,
497                73..75,
498                134..136,
499                136..138,
500                189..191,
501                191..193,
502            ];
503            let size_ranges = [18..22, 22..26, 79..83, 83..87];
504            a1.len() == a2.len()
505                && a1
506                    .into_iter()
507                    .zip(a2)
508                    .enumerate()
509                    .filter(|(i, _)| {
510                        datetime_ranges
511                            .iter()
512                            .chain(&size_ranges)
513                            .all(|range| !range.contains(i))
514                    })
515                    .all(|(_, (b1, b2))| b1 == b2)
516        }
517        assert!(match_except_datetime(
518            &data,
519            include_bytes!("timeless_test_archive.zip")
520        ));
521        assert!(match_except_datetime(
522            &data,
523            include_bytes!("zip_command_test_archive.zip")
524        ));
525    }
526}