Skip to main content

virtual_filesystem/
zip_fs.rs

1use crate::file::{DirEntry, File, FileType, Metadata, OpenOptions};
2use crate::util::{make_relative, not_found, not_supported, parent_iter};
3use crate::{util, FileSystem};
4use parking_lot::Mutex;
5use std::collections::{HashMap, HashSet};
6use std::fmt::Debug;
7use std::io;
8use std::io::{Cursor, ErrorKind, Read, Seek, SeekFrom, Write};
9use std::path::{Path, PathBuf};
10use zip::read::ZipFile;
11use zip::result::{ZipError, ZipResult};
12use zip::ZipArchive;
13
14/// A virtual FileSystem backed by a ZIP file. Only supports read operations for now.
15#[derive(Debug)]
16pub struct ZipFS<R: Read + Seek> {
17    zip_file: Mutex<ZipArchive<R>>,
18    directories: HashSet<PathBuf>,
19    normalized_lower_to_path: HashMap<PathBuf, PathBuf>,
20    /// File sizes keyed by normalized lowercase path.
21    sizes: HashMap<PathBuf, u64>,
22}
23
24impl<R: Read + Seek> ZipFS<R> {
25    /// Mounts a ZIP file onto the local filesystem.
26    pub fn new(zip_file: R) -> ZipResult<Self> {
27        let mut zip_file = ZipArchive::new(zip_file)?;
28        // collect folders
29        let mut directories = HashSet::from_iter([Path::new("").to_owned()]);
30        let mut normalized_lower_to_path = HashMap::new();
31        let mut sizes = HashMap::new();
32        for i in 0..zip_file.len() {
33            let entry = zip_file.by_index(i)?;
34            let file_name = entry.name().to_owned();
35            let size = entry.size();
36            drop(entry);
37
38            for parent in parent_iter(Path::new(&file_name.to_lowercase())) {
39                directories.insert(parent.to_owned());
40            }
41
42            let normalized = Self::normalize_path(&file_name);
43            let lower = PathBuf::from(
44                normalized
45                    .to_str()
46                    .ok_or_else(not_supported)?
47                    .to_lowercase(),
48            );
49
50            sizes.insert(lower.clone(), size);
51            normalized_lower_to_path.insert(lower, normalized);
52        }
53
54        Ok(Self {
55            zip_file: Mutex::new(zip_file),
56            directories,
57            normalized_lower_to_path,
58            sizes,
59        })
60    }
61
62    fn convert_error<T>(maybe_error: ZipResult<T>) -> crate::Result<T> {
63        maybe_error.map_err(|err| match err {
64            ZipError::FileNotFound => {
65                io::Error::new(ErrorKind::NotFound, "File not found in zip archive")
66            }
67            ZipError::Io(io_error) => io_error,
68            ZipError::InvalidArchive(error_str) => {
69                io::Error::new(ErrorKind::InvalidData, error_str)
70            }
71            ZipError::UnsupportedArchive(error_str) => {
72                io::Error::new(ErrorKind::Unsupported, error_str)
73            }
74        })
75    }
76
77    /// Returns the cased path for the given normalized path.
78    fn get_cased_path(&self, normalized_path: &Path) -> Option<&PathBuf> {
79        // find the cased path
80        let lowercase_path = PathBuf::from(normalized_path.to_str()?.to_lowercase());
81        self.normalized_lower_to_path.get(&lowercase_path)
82    }
83
84    fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
85        // as far as I can tell, zip files are relative from the root
86        util::normalize_path(make_relative(path))
87    }
88
89    fn with_file<RV, F: FnOnce(ZipFile) -> RV>(
90        &self,
91        normalized_path: &Path,
92        f: F,
93    ) -> crate::Result<RV> {
94        // find the cased path
95        let cased_path = self.get_cased_path(normalized_path).ok_or_else(not_found)?;
96
97        let mut zip_file = self.zip_file.lock();
98
99        let entry =
100            Self::convert_error(zip_file.by_name(cased_path.to_str().ok_or_else(not_supported)?))?;
101        Ok(f(entry))
102    }
103}
104
105impl<R: Read + Seek> FileSystem for ZipFS<R> {
106    fn create_dir(&self, _path: &str) -> crate::Result<()> {
107        Err(not_supported())
108    }
109
110    fn metadata(&self, path: &str) -> crate::Result<Metadata> {
111        let normalized_path = Self::normalize_path(path);
112
113        // try directories first, which are lowercase
114        let lowercase_path = PathBuf::from(
115            normalized_path
116                .as_path()
117                .to_str()
118                .ok_or_else(not_supported)?
119                .to_lowercase(),
120        );
121        if self.directories.contains(&lowercase_path) {
122            return Ok(Metadata {
123                file_type: FileType::Directory,
124                len: 0,
125            });
126        }
127
128        // now files
129        self.with_file(normalized_path.as_path(), |file| Metadata {
130            file_type: FileType::File,
131            len: file.size(),
132        })
133    }
134
135    fn open_file_options(&self, path: &str, options: &OpenOptions) -> crate::Result<Box<dyn File>> {
136        // ensure we only want to read
137        if !options.read || options.write {
138            return Err(not_supported());
139        }
140
141        // open the file and read into a readable buffer
142        self.with_file::<crate::Result<Box<dyn File>>, _>(
143            &Self::normalize_path(path),
144            |mut entry| {
145                let mut contents = Vec::with_capacity(entry.size() as usize);
146                entry.read_to_end(&mut contents)?;
147                Ok(Box::new(ZipFileContents {
148                    inner: Cursor::new(contents),
149                }))
150            },
151        )?
152    }
153
154    fn read_dir(
155        &self,
156        path: &str,
157    ) -> crate::Result<Box<dyn Iterator<Item = crate::Result<DirEntry>>>> {
158        let directory = Self::normalize_path(path);
159
160        // if there are no folders with this path, error out
161        if !self.directories.contains(&directory) {
162            return Err(not_found());
163        }
164
165        let mut files: HashMap<PathBuf, Metadata> = HashMap::new();
166        for (lower, normalized_file) in &self.normalized_lower_to_path {
167            let mut add_parent = |normalized_path: &Path, metadata: Metadata| {
168                if normalized_path.parent()? == directory {
169                    let name = PathBuf::from(normalized_path.file_name()?);
170                    // directories take precedence over file entries (e.g. "folder/" zip entries)
171                    let entry = files.entry(name).or_insert(metadata.clone());
172                    if metadata.is_directory() {
173                        *entry = metadata;
174                    }
175                }
176                Some(())
177            };
178
179            // if the file's parent is the directory, it's in the directory
180            let size = self.sizes.get(lower).copied().unwrap_or(0);
181            add_parent(normalized_file, Metadata::file(size));
182
183            // if the file's parent directory is in the directory, add it
184            if let Some(file_parent) = normalized_file.parent() {
185                add_parent(file_parent, Metadata::directory());
186            }
187        }
188
189        Ok(Box::new(
190            files
191                .into_iter()
192                .map(|(path, metadata)| Ok(DirEntry { path, metadata })),
193        ))
194    }
195
196    fn remove_dir(&self, _path: &str) -> crate::Result<()> {
197        Err(not_supported())
198    }
199
200    fn remove_file(&self, _path: &str) -> crate::Result<()> {
201        Err(not_supported())
202    }
203}
204
205struct ZipFileContents {
206    inner: Cursor<Vec<u8>>,
207}
208
209impl Read for ZipFileContents {
210    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
211        self.inner.read(buf)
212    }
213}
214
215impl Seek for ZipFileContents {
216    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
217        self.inner.seek(pos)
218    }
219}
220
221impl Write for ZipFileContents {
222    fn write(&mut self, _buf: &[u8]) -> io::Result<usize> {
223        Err(not_supported())
224    }
225
226    fn flush(&mut self) -> io::Result<()> {
227        Err(not_supported())
228    }
229}
230
231impl File for ZipFileContents {
232    fn metadata(&self) -> crate::Result<Metadata> {
233        Ok(Metadata::file(self.inner.get_ref().len() as u64))
234    }
235}
236
237#[cfg(test)]
238mod test {
239    use crate::file::{FileType, Metadata};
240    use crate::zip_fs::ZipFS;
241    use crate::FileSystem;
242    use std::collections::BTreeMap;
243    use std::fs::File;
244
245    fn read_directory(fs: &ZipFS<File>, path: &str) -> crate::Result<BTreeMap<String, Metadata>> {
246        Ok(fs
247            .read_dir(path)?
248            .map(|entry| {
249                let entry = entry.unwrap();
250                (entry.path.to_str().unwrap().to_owned(), entry.metadata)
251            })
252            .collect::<BTreeMap<_, _>>())
253    }
254
255    fn zip_fs() -> ZipFS<File> {
256        ZipFS::new(File::open("test/deep_fs.zip").unwrap()).unwrap()
257    }
258
259    #[test]
260    fn read_dir() {
261        let fs = zip_fs();
262
263        let root = read_directory(&fs, "").unwrap();
264        itertools::assert_equal(root.keys(), vec!["file", "folder"]);
265        itertools::assert_equal(
266            root.values().map(|md| md.file_type),
267            vec![FileType::File, FileType::Directory],
268        );
269        itertools::assert_equal(root.values().map(|md| md.len), vec![2571, 0]);
270
271        let another_root = read_directory(&fs, ".").unwrap();
272        assert_eq!(root, another_root);
273
274        let another_root = read_directory(&fs, "///").unwrap();
275        assert_eq!(root, another_root);
276
277        let another_root = read_directory(&fs, "\\").unwrap();
278        assert_eq!(root, another_root);
279
280        let another_root = read_directory(&fs, "///test/../").unwrap();
281        assert_eq!(root, another_root);
282
283        let deeper_root = read_directory(&fs, "folder/and/it").unwrap();
284        itertools::assert_equal(deeper_root.keys(), vec!["desc", "goes"]);
285
286        assert!(read_directory(&fs, "file").is_err());
287        assert!(read_directory(&fs, "not_a_real_path").is_err());
288    }
289
290    #[test]
291    fn open_file() {
292        let fs = zip_fs();
293
294        let mut file = fs.open_file("file").unwrap();
295        let md = file.metadata().unwrap();
296        assert_eq!(md.file_type, FileType::File);
297        assert_eq!(md.len, 2571);
298
299        let file = file.read_into_string().unwrap();
300        assert!(file.starts_with("Lorem ipsum dolor"));
301
302        let indirect_file = fs
303            .open_file("///something/..\\file")
304            .unwrap()
305            .read_into_string()
306            .unwrap();
307        assert_eq!(indirect_file, file);
308
309        let nested_file = fs
310            .open_file("folder/and/it/goes/deeper/desc")
311            .unwrap()
312            .read_into_string()
313            .unwrap();
314        assert_eq!(nested_file, "deeper\n")
315    }
316
317    #[test]
318    fn metadata() {
319        let fs = zip_fs();
320
321        let md = fs.metadata("file").unwrap();
322        assert_eq!(md.file_type, FileType::File);
323        assert_eq!(md.len, 2571);
324
325        let md = fs.metadata("folder").unwrap();
326        assert_eq!(md.file_type, FileType::Directory);
327        assert_eq!(md.len, 0);
328
329        let md = fs.metadata("folder/and/it/goes/desc").unwrap();
330        assert_eq!(md.file_type, FileType::File);
331        assert_eq!(md.len, 5);
332    }
333
334    #[test]
335    fn exists() {
336        let fs = zip_fs();
337
338        assert!(fs.exists("/").unwrap());
339        assert!(fs.exists("").unwrap());
340        assert!(fs.exists("file").unwrap());
341        assert!(fs.exists("FiLe").unwrap());
342        assert!(!fs.exists("no_file").unwrap());
343        assert!(fs.exists("folder").unwrap());
344        assert!(fs.exists("folDeR").unwrap());
345        assert!(fs.exists("folder/and/it").unwrap());
346        assert!(fs.exists("folder/anD/iT").unwrap());
347        assert!(fs.exists("folder/and/it/desc").unwrap());
348        assert!(!fs.exists("folder/and/it/does/not").unwrap());
349        assert!(fs.exists("///test/something_else/../../file").unwrap());
350        assert!(fs.exists("///test/something_elsE/../../file").unwrap());
351    }
352}