rc_zip_corpus/
lib.rs

1//! A corpus of zip files for testing.
2
3use std::{fs::File, path::PathBuf};
4
5use chrono::{DateTime, FixedOffset, TimeZone, Timelike, Utc};
6use rc_zip::{
7    encoding::Encoding,
8    error::{Error, FormatError},
9    parse::{Archive, Entry, EntryKind},
10};
11use temp_dir::TempDir;
12use tracing::span;
13
14pub struct Case {
15    pub name: &'static str,
16    pub expected_encoding: Option<Encoding>,
17    pub comment: Option<&'static str>,
18    pub files: Files,
19    pub error: Option<Error>,
20}
21
22pub enum Files {
23    ExhaustiveList(Vec<CaseFile>),
24    NumFiles(usize),
25}
26
27impl Files {
28    pub fn len(&self) -> usize {
29        match self {
30            Self::ExhaustiveList(list) => list.len(),
31            Self::NumFiles(n) => *n,
32        }
33    }
34
35    pub fn is_empty(&self) -> bool {
36        self.len() == 0
37    }
38}
39
40impl Default for Files {
41    fn default() -> Self {
42        Self::NumFiles(0)
43    }
44}
45
46impl From<CaseFile> for Files {
47    fn from(file: CaseFile) -> Self {
48        vec![file].into()
49    }
50}
51
52impl From<Vec<CaseFile>> for Files {
53    fn from(files: Vec<CaseFile>) -> Self {
54        Self::ExhaustiveList(files)
55    }
56}
57
58impl From<usize> for Files {
59    fn from(num: usize) -> Self {
60        Self::NumFiles(num)
61    }
62}
63
64impl Default for Case {
65    fn default() -> Self {
66        Self {
67            name: "test.zip",
68            expected_encoding: None,
69            comment: None,
70            files: Files::default(),
71            error: None,
72        }
73    }
74}
75
76/// This path may disappear on drop (if the zip is bz2-compressed), so be
77/// careful
78pub struct GuardedPath {
79    pub path: PathBuf,
80    _guard: Option<TempDir>,
81}
82
83impl Case {
84    pub fn absolute_path(&self) -> GuardedPath {
85        let path = zips_dir().join(self.name);
86        if let Some(dec_name) = self.name.strip_suffix(".bz2") {
87            let dir = TempDir::new().unwrap();
88            let dec_path = dir.path().join(dec_name);
89            std::io::copy(
90                &mut File::open(&path).unwrap(),
91                &mut bzip2::write::BzDecoder::new(File::create(&dec_path).unwrap()),
92            )
93            .unwrap();
94            tracing::trace!("decompressed {} to {}", path.display(), dec_path.display());
95            GuardedPath {
96                path: dec_path,
97                _guard: Some(dir),
98            }
99        } else {
100            GuardedPath { path, _guard: None }
101        }
102    }
103
104    pub fn bytes(&self) -> Vec<u8> {
105        let gp = self.absolute_path();
106        std::fs::read(gp.path).unwrap()
107    }
108
109    pub fn new(name: &'static str) -> Self {
110        Self {
111            name,
112            ..Default::default()
113        }
114    }
115
116    pub fn encoding(mut self, enc: Encoding) -> Self {
117        self.expected_encoding = Some(enc);
118        self
119    }
120
121    pub fn comment(mut self, comment: &'static str) -> Self {
122        self.comment = Some(comment);
123        self
124    }
125
126    pub fn files<F: Into<Files>>(mut self, files: F) -> Self {
127        self.files = files.into();
128        self
129    }
130
131    pub fn error<E: Into<Error>>(mut self, error: E) -> Self {
132        self.error = Some(error.into());
133        self
134    }
135}
136
137pub struct CaseFile {
138    pub name: &'static str,
139    pub mode: Option<u32>,
140    pub modified: Option<DateTime<Utc>>,
141    pub content: FileContent,
142}
143
144impl CaseFile {
145    pub fn new(name: &'static str) -> Self {
146        Self {
147            name,
148            ..Default::default()
149        }
150    }
151
152    pub fn mode(mut self, mode: u32) -> Self {
153        self.mode = Some(mode);
154        self
155    }
156
157    pub fn modified(mut self, date: DateTime<Utc>) -> Self {
158        self.modified = Some(date);
159        self
160    }
161
162    pub fn content<C: Into<FileContent>>(mut self, content: C) -> Self {
163        self.content = content.into();
164        self
165    }
166}
167
168#[derive(Default)]
169pub enum FileContent {
170    #[default]
171    Unchecked,
172    Bytes(Vec<u8>),
173    File(&'static str),
174}
175
176impl From<&str> for FileContent {
177    fn from(s: &str) -> Self {
178        s.as_bytes().into()
179    }
180}
181
182impl From<&[u8]> for FileContent {
183    fn from(bytes: &[u8]) -> Self {
184        bytes.to_vec().into()
185    }
186}
187
188impl From<String> for FileContent {
189    fn from(s: String) -> Self {
190        s.into_bytes().into()
191    }
192}
193
194impl From<Vec<u8>> for FileContent {
195    fn from(bytes: Vec<u8>) -> Self {
196        Self::Bytes(bytes)
197    }
198}
199
200impl Default for CaseFile {
201    fn default() -> Self {
202        Self {
203            name: "default",
204            mode: None,
205            modified: None,
206            content: FileContent::default(),
207        }
208    }
209}
210
211pub fn zips_dir() -> PathBuf {
212    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
213        .parent()
214        .unwrap()
215        .join("testdata")
216}
217
218fn time_zone(hours: i32) -> FixedOffset {
219    FixedOffset::east_opt(hours * 3600).unwrap()
220}
221
222#[track_caller]
223fn date(
224    (year, month, day): (i32, u32, u32),
225    (hour, min, sec): (u32, u32, u32),
226    nsec: u32,
227    offset: FixedOffset,
228) -> DateTime<Utc> {
229    offset
230        .with_ymd_and_hms(year, month, day, hour, min, sec)
231        .single()
232        .unwrap()
233        .with_nanosecond(nsec)
234        .unwrap()
235        .into()
236}
237
238pub fn test_cases() -> Vec<Case> {
239    vec![
240        Case::new("zip64.zip").files(
241            CaseFile::new("README")
242                .content("This small file is in ZIP64 format.\n")
243                .modified(date((2012, 8, 10), (14, 33, 32), 0, time_zone(0)))
244                .mode(0o644),
245        ),
246        Case::new("test.zip")
247            .comment("This is a zipfile comment.")
248            .encoding(Encoding::Utf8)
249            .files(vec![
250                CaseFile::new("test.txt")
251                    .content("This is a test text file.\n")
252                    .modified(date((2010, 9, 5), (12, 12, 1), 0, time_zone(10)))
253                    .mode(0o644),
254                CaseFile::new("gophercolor16x16.png")
255                    .content(FileContent::File("gophercolor16x16.png"))
256                    .modified(date((2010, 9, 5), (15, 52, 58), 0, time_zone(10)))
257                    .mode(0o644),
258            ]),
259        Case::new("cp-437.zip")
260            .encoding(Encoding::Cp437)
261            .files(CaseFile::new("français")),
262        Case::new("shift-jis.zip")
263            .encoding(Encoding::ShiftJis)
264            .files(vec![
265                CaseFile::new("should-be-jis/"),
266                CaseFile::new("should-be-jis/ot_運命のワルツネぞなぞ小さな楽しみ遊びま.longboi"),
267            ]),
268        Case::new("utf8-winrar.zip").encoding(Encoding::Utf8).files(
269            CaseFile::new("世界").content("").modified(date(
270                (2017, 11, 6),
271                (21, 9, 27),
272                867862500,
273                time_zone(0),
274            )),
275        ),
276        Case::new("wine-zeroed.zip.bz2")
277            .encoding(Encoding::Utf8)
278            .files(11372),
279        Case::new("info-zip-unix-extra.zip").files(CaseFile::new("bun-darwin-x64/")),
280        Case::new("readme.trailingzip").files(CaseFile::new("README")),
281        #[cfg(feature = "lzma")]
282        Case::new("found-me-lzma.zip")
283            .encoding(Encoding::Utf8)
284            .files(
285                CaseFile::new("found-me.txt")
286                    .content("Oh no, you found me\n".repeat(5000))
287                    .modified(date((2024, 1, 26), (16, 14, 35), 46003100, time_zone(0))),
288            ),
289        #[cfg(feature = "deflate64")]
290        Case::new("found-me-deflate64.zip")
291            .encoding(Encoding::Utf8)
292            .files(
293                CaseFile::new("found-me.txt")
294                    .content("Oh no, you found me\n".repeat(5000))
295                    .modified(date((2024, 1, 26), (16, 14, 35), 46003100, time_zone(0))),
296            ),
297        // same with bzip2
298        #[cfg(feature = "bzip2")]
299        Case::new("found-me-bzip2.zip")
300            .encoding(Encoding::Utf8)
301            .files(
302                CaseFile::new("found-me.txt")
303                    .content("Oh no, you found me\n".repeat(5000))
304                    .modified(date((2024, 1, 26), (16, 14, 35), 46003100, time_zone(0))),
305            ),
306        // same with zstd
307        #[cfg(feature = "zstd")]
308        Case::new("found-me-zstd.zip")
309            .encoding(Encoding::Utf8)
310            .files(
311                CaseFile::new("found-me.txt")
312                    .content("Oh no, you found me\n".repeat(5000))
313                    .modified(date((2024, 1, 31), (6, 10, 25), 800491400, time_zone(0))),
314            ),
315    ]
316}
317
318pub fn streaming_test_cases() -> Vec<Case> {
319    vec![
320        Case::new("meta.zip").files(33),
321        Case::new("info-zip-unix-extra.zip").files(CaseFile::new("bun-darwin-x64/")),
322        Case::new("readme.trailingzip").error(FormatError::InvalidLocalHeader),
323    ]
324}
325
326pub fn check_case(case: &Case, archive: Result<&Archive, &Error>) {
327    let case_bytes = case.bytes();
328
329    if let Some(expected) = &case.error {
330        let actual = match archive {
331            Err(e) => e,
332            Ok(_) => panic!("should have failed"),
333        };
334        let expected = format!("{:#?}", expected);
335        let actual = format!("{:#?}", actual);
336        assert_eq!(expected, actual);
337        return;
338    }
339    let archive = archive.unwrap_or_else(|e| {
340        panic!(
341            "{} should have succeeded, but instead: {e:?} ({e})",
342            case.name
343        )
344    });
345
346    assert_eq!(case_bytes.len() as u64, archive.size());
347
348    if let Some(expected) = case.comment {
349        assert_eq!(expected, archive.comment())
350    }
351
352    if let Some(exp_encoding) = case.expected_encoding {
353        assert_eq!(archive.encoding(), exp_encoding);
354    }
355
356    assert_eq!(
357        case.files.len(),
358        archive.entries().count(),
359        "{} should have {} entries files",
360        case.name,
361        case.files.len()
362    );
363
364    // then each implementation should check individual files
365}
366
367pub fn check_file_against(file: &CaseFile, entry: &Entry, actual_bytes: &[u8]) {
368    if let Some(expected) = file.modified {
369        assert_eq!(
370            expected, entry.modified,
371            "entry {} should have modified = {:?}",
372            entry.name, expected
373        )
374    }
375
376    if let Some(mode) = file.mode {
377        assert_eq!(entry.mode.0 & 0o777, mode);
378    }
379
380    // I have honestly yet to see a zip file _entry_ with a comment.
381    assert!(entry.comment.is_empty());
382
383    match entry.kind() {
384        EntryKind::File => {
385            match &file.content {
386                FileContent::Unchecked => {
387                    // ah well
388                }
389                FileContent::Bytes(expected_bytes) => {
390                    // first check length
391                    assert_eq!(actual_bytes.len(), expected_bytes.len());
392                    assert_eq!(actual_bytes, &expected_bytes[..])
393                }
394                FileContent::File(file_path) => {
395                    let expected_bytes = std::fs::read(zips_dir().join(file_path)).unwrap();
396                    // first check length
397                    assert_eq!(actual_bytes.len(), expected_bytes.len());
398                    assert_eq!(actual_bytes, &expected_bytes[..])
399                }
400            }
401        }
402        EntryKind::Symlink | EntryKind::Directory => {
403            assert!(matches!(file.content, FileContent::Unchecked));
404        }
405    }
406}
407
408// This test subscriber is used to suppress trace-level logs (yet executes
409// the code, for coverage reasons)
410pub fn install_test_subscriber() {
411    let env_filter = tracing_subscriber::EnvFilter::builder().from_env_lossy();
412    let sub = tracing_subscriber::FmtSubscriber::builder()
413        .with_env_filter(env_filter)
414        .with_test_writer()
415        .finish();
416    let sub = DebugOnlySubscriber { inner: sub };
417    // fails when called multiple times from the same process (like in `cargo test`), so ignore
418    // errors
419    let _ = tracing::subscriber::set_global_default(sub);
420}
421
422struct DebugOnlySubscriber<S> {
423    inner: S,
424}
425
426impl<S> tracing::Subscriber for DebugOnlySubscriber<S>
427where
428    S: tracing::Subscriber,
429{
430    fn enabled(&self, _metadata: &tracing::Metadata<'_>) -> bool {
431        true
432    }
433
434    fn new_span(&self, span: &span::Attributes<'_>) -> span::Id {
435        self.inner.new_span(span)
436    }
437
438    fn record(&self, span: &span::Id, values: &span::Record<'_>) {
439        self.inner.record(span, values)
440    }
441
442    fn record_follows_from(&self, span: &span::Id, follows: &span::Id) {
443        self.inner.record_follows_from(span, follows)
444    }
445
446    fn event(&self, event: &tracing::Event<'_>) {
447        if *event.metadata().level() == tracing::Level::TRACE {
448            return;
449        }
450
451        self.inner.event(event)
452    }
453
454    fn enter(&self, span: &span::Id) {
455        self.inner.enter(span)
456    }
457
458    fn exit(&self, span: &span::Id) {
459        self.inner.exit(span)
460    }
461}