pnp/
fs.rs

1use serde::Deserialize;
2use std::{
3    path::{Path, PathBuf},
4    str::Utf8Error,
5};
6
7use crate::zip::Zip;
8
9#[derive(Clone, Debug, PartialEq, Eq)]
10pub enum FileType {
11    File,
12    Directory,
13}
14
15#[derive(Clone, Debug, Deserialize, PartialEq)]
16#[serde(rename_all = "camelCase")]
17pub struct ZipInfo {
18    pub base_path: String,
19    pub virtual_segments: Option<(String, String)>,
20    pub zip_path: String,
21}
22
23#[derive(Clone, Debug, Deserialize, PartialEq)]
24#[serde(rename_all = "camelCase")]
25pub struct VirtualInfo {
26    pub base_path: String,
27    pub virtual_segments: (String, String),
28}
29
30pub trait VPathInfo {
31    fn physical_base_path(&self) -> PathBuf;
32}
33
34impl VPathInfo for ZipInfo {
35    fn physical_base_path(&self) -> PathBuf {
36        match &self.virtual_segments {
37            None => PathBuf::from(&self.base_path),
38            Some(segments) => PathBuf::from(&self.base_path).join(&segments.1),
39        }
40    }
41}
42
43impl VPathInfo for VirtualInfo {
44    fn physical_base_path(&self) -> PathBuf {
45        PathBuf::from(&self.base_path).join(&self.virtual_segments.1)
46    }
47}
48
49#[derive(Clone, Debug, Deserialize, PartialEq)]
50#[serde(untagged)]
51pub enum VPath {
52    Zip(ZipInfo),
53    Virtual(VirtualInfo),
54    Native(PathBuf),
55}
56
57impl VPath {
58    pub fn from(p: &Path) -> std::io::Result<VPath> {
59        vpath(p)
60    }
61}
62
63#[derive(thiserror::Error, Debug)]
64pub enum Error {
65    #[error("Entry not found")]
66    EntryNotFound,
67
68    #[error("Unsupported compression")]
69    UnsupportedCompression,
70
71    #[error("Decompression error")]
72    DecompressionError,
73
74    #[error(transparent)]
75    Utf8Error(#[from] Utf8Error),
76
77    #[error(transparent)]
78    IOError(#[from] std::io::Error),
79}
80
81#[cfg(feature = "mmap")]
82pub fn open_zip_via_mmap<P: AsRef<Path>>(p: P) -> Result<Zip<mmap_rs::Mmap>, std::io::Error> {
83    let file = std::fs::File::open(p)?;
84
85    let mmap_builder =
86        mmap_rs::MmapOptions::new(file.metadata().unwrap().len().try_into().unwrap()).unwrap();
87
88    let mmap = unsafe { mmap_builder.with_file(&file, 0).map().unwrap() };
89
90    let zip = Zip::new(mmap).map_err(|_| std::io::Error::other("Failed to read the zip file"))?;
91
92    Ok(zip)
93}
94
95#[cfg(feature = "mmap")]
96pub fn open_zip_via_mmap_p(p: &Path) -> Result<Zip<mmap_rs::Mmap>, std::io::Error> {
97    open_zip_via_mmap(p)
98}
99
100pub fn open_zip_via_read<P: AsRef<Path>>(p: P) -> Result<Zip<Vec<u8>>, std::io::Error> {
101    let data = std::fs::read(p)?;
102
103    let zip = Zip::new(data).map_err(|_| std::io::Error::other("Failed to read the zip file"))?;
104
105    Ok(zip)
106}
107
108pub fn open_zip_via_read_p(p: &Path) -> Result<Zip<Vec<u8>>, std::io::Error> {
109    open_zip_via_read(p)
110}
111
112pub trait ZipCache<Storage>
113where
114    Storage: AsRef<[u8]> + Send + Sync,
115{
116    fn act<T, P: AsRef<Path>, F: FnOnce(&Zip<Storage>) -> T>(
117        &self,
118        p: P,
119        cb: F,
120    ) -> Result<T, std::io::Error>;
121
122    fn file_type<P: AsRef<Path>, S: AsRef<str>>(
123        &self,
124        zip_path: P,
125        sub: S,
126    ) -> Result<FileType, std::io::Error>;
127    fn read<P: AsRef<Path>, S: AsRef<str>>(
128        &self,
129        zip_path: P,
130        sub: S,
131    ) -> Result<Vec<u8>, std::io::Error>;
132    fn read_to_string<P: AsRef<Path>, S: AsRef<str>>(
133        &self,
134        zip_path: P,
135        sub: S,
136    ) -> Result<String, std::io::Error>;
137}
138
139#[derive(Debug)]
140pub struct LruZipCache<Storage>
141where
142    Storage: AsRef<[u8]> + Send + Sync,
143{
144    lru: concurrent_lru::sharded::LruCache<PathBuf, Zip<Storage>>,
145    open: fn(&Path) -> std::io::Result<Zip<Storage>>,
146}
147
148impl<Storage> LruZipCache<Storage>
149where
150    Storage: AsRef<[u8]> + Send + Sync,
151{
152    pub fn new(n: u64, open: fn(&Path) -> std::io::Result<Zip<Storage>>) -> LruZipCache<Storage> {
153        LruZipCache { lru: concurrent_lru::sharded::LruCache::new(n), open }
154    }
155}
156
157impl<Storage> ZipCache<Storage> for LruZipCache<Storage>
158where
159    Storage: AsRef<[u8]> + Send + Sync,
160{
161    fn act<T, P: AsRef<Path>, F: FnOnce(&Zip<Storage>) -> T>(
162        &self,
163        p: P,
164        cb: F,
165    ) -> Result<T, std::io::Error> {
166        let zip = self.lru.get_or_try_init(p.as_ref().to_path_buf(), 1, |p| (self.open)(p))?;
167
168        Ok(cb(zip.value()))
169    }
170
171    fn file_type<P: AsRef<Path>, S: AsRef<str>>(
172        &self,
173        zip_path: P,
174        p: S,
175    ) -> Result<FileType, std::io::Error> {
176        self.act(zip_path, |zip| zip.file_type(p.as_ref()))?
177    }
178
179    fn read<P: AsRef<Path>, S: AsRef<str>>(
180        &self,
181        zip_path: P,
182        p: S,
183    ) -> Result<Vec<u8>, std::io::Error> {
184        self.act(zip_path, |zip| zip.read(p.as_ref()))?
185    }
186
187    fn read_to_string<P: AsRef<Path>, S: AsRef<str>>(
188        &self,
189        zip_path: P,
190        p: S,
191    ) -> Result<String, std::io::Error> {
192        self.act(zip_path, |zip| zip.read_to_string(p.as_ref()))?
193    }
194}
195
196fn vpath(p: &Path) -> std::io::Result<VPath> {
197    let Some(p_str) = p.as_os_str().to_str() else {
198        return Ok(VPath::Native(p.to_path_buf()));
199    };
200
201    let normalized_path = crate::util::normalize_path(p_str);
202
203    // We remove potential leading slashes to avoid __virtual__ accidentally removing them
204    let normalized_relative_path = normalized_path.strip_prefix('/').unwrap_or(&normalized_path);
205
206    let mut segment_it = normalized_relative_path.split('/');
207
208    // `split` returns [""] if the path is empty; we need to remove it
209    if normalized_relative_path.is_empty() {
210        segment_it.next();
211    }
212
213    let mut base_items: Vec<&str> = Vec::with_capacity(10);
214
215    let mut virtual_items: Option<Vec<&str>> = None;
216    let mut internal_items: Option<Vec<&str>> = None;
217    let mut zip_items: Option<Vec<&str>> = None;
218
219    while let Some(segment) = segment_it.next() {
220        if let Some(zip_segments) = &mut zip_items {
221            zip_segments.push(segment);
222            continue;
223        }
224
225        if segment == "__virtual__" && virtual_items.is_none() {
226            let mut acc_segments = Vec::with_capacity(3);
227
228            acc_segments.push(segment);
229
230            // We just skip the arbitrary hash, it doesn't matter what it is
231            if let Some(hash_segment) = segment_it.next() {
232                acc_segments.push(hash_segment);
233            }
234
235            // We retrieve the depth
236            if let Some(depth_segment) = segment_it.next() {
237                let depth = depth_segment.parse::<usize>();
238
239                acc_segments.push(depth_segment);
240
241                // We extract the backward segments from the base ones
242                if let Ok(depth) = depth {
243                    let parent_segments =
244                        base_items.split_off(base_items.len().saturating_sub(depth));
245
246                    acc_segments.splice(0..0, parent_segments);
247                }
248            }
249
250            virtual_items = Some(acc_segments);
251            internal_items = Some(Vec::with_capacity(10));
252
253            continue;
254        }
255
256        if segment.len() > 4 && segment.ends_with(".zip") {
257            zip_items = Some(Vec::with_capacity(10));
258        }
259
260        if let Some(virtual_segments) = &mut virtual_items {
261            virtual_segments.push(segment);
262        }
263
264        if let Some(internal_segments) = &mut internal_items {
265            internal_segments.push(segment);
266        } else {
267            base_items.push(segment);
268        }
269    }
270
271    let virtual_segments = match (virtual_items, internal_items) {
272        (Some(virtual_segments), Some(internal_segments)) => {
273            Some((virtual_segments.join("/"), internal_segments.join("/")))
274        }
275
276        _ => None,
277    };
278
279    if let Some(zip_segments) = zip_items {
280        let mut base_path = base_items.join("/");
281
282        // Don't forget to add back the leading slash we removed earlier
283        if normalized_relative_path != normalized_path {
284            base_path.insert(0, '/');
285        }
286
287        if !zip_segments.is_empty() {
288            return Ok(VPath::Zip(ZipInfo {
289                base_path,
290                virtual_segments,
291                zip_path: zip_segments.join("/"),
292            }));
293        }
294    }
295
296    if let Some(virtual_segments) = virtual_segments {
297        let mut base_path = base_items.join("/");
298
299        // Don't forget to add back the leading slash we removed earlier
300        if normalized_relative_path != normalized_path {
301            base_path.insert(0, '/');
302        }
303
304        return Ok(VPath::Virtual(VirtualInfo { base_path, virtual_segments }));
305    }
306
307    Ok(VPath::Native(PathBuf::from(normalized_path)))
308}
309
310#[cfg(test)]
311mod tests {
312    use rstest::rstest;
313    use std::path::PathBuf;
314
315    use crate::util;
316
317    use super::*;
318
319    #[test]
320    fn test_zip_type_api() {
321        let zip = open_zip_via_read(PathBuf::from(
322            "data/@babel-plugin-syntax-dynamic-import-npm-7.8.3-fb9ff5634a-8.zip",
323        ))
324        .unwrap();
325
326        assert_eq!(zip.file_type("node_modules").unwrap(), FileType::Directory);
327        assert_eq!(zip.file_type("node_modules/").unwrap(), FileType::Directory);
328    }
329
330    #[test]
331    #[should_panic(expected = "Kind(NotFound)")]
332    fn test_zip_type_api_not_exist_dir_with_slash() {
333        let zip = open_zip_via_read(PathBuf::from(
334            "data/@babel-plugin-syntax-dynamic-import-npm-7.8.3-fb9ff5634a-8.zip",
335        ))
336        .unwrap();
337
338        zip.file_type("not_exists/").unwrap();
339    }
340
341    #[test]
342    #[should_panic(expected = "Kind(NotFound)")]
343    fn test_zip_type_api_not_exist_dir_without_slash() {
344        let zip = open_zip_via_read(PathBuf::from(
345            "data/@babel-plugin-syntax-dynamic-import-npm-7.8.3-fb9ff5634a-8.zip",
346        ))
347        .unwrap();
348
349        zip.file_type("not_exists").unwrap();
350    }
351
352    #[test]
353    fn test_zip_list() {
354        let zip = open_zip_via_read(PathBuf::from(
355            "data/@babel-plugin-syntax-dynamic-import-npm-7.8.3-fb9ff5634a-8.zip",
356        ))
357        .unwrap();
358
359        let mut dirs: Vec<&String> = zip.dirs.iter().collect();
360        let mut files: Vec<&String> = zip.files.keys().collect();
361
362        dirs.sort();
363        files.sort();
364
365        assert_eq!(
366            dirs,
367            vec![
368                "node_modules/",
369                "node_modules/@babel/",
370                "node_modules/@babel/plugin-syntax-dynamic-import/",
371                "node_modules/@babel/plugin-syntax-dynamic-import/lib/",
372            ]
373        );
374
375        assert_eq!(
376            files,
377            vec![
378                "node_modules/@babel/plugin-syntax-dynamic-import/LICENSE",
379                "node_modules/@babel/plugin-syntax-dynamic-import/README.md",
380                "node_modules/@babel/plugin-syntax-dynamic-import/lib/index.js",
381                "node_modules/@babel/plugin-syntax-dynamic-import/package.json",
382            ]
383        );
384    }
385
386    #[test]
387    fn test_zip_read() {
388        let zip = open_zip_via_read(PathBuf::from(
389            "data/@babel-plugin-syntax-dynamic-import-npm-7.8.3-fb9ff5634a-8.zip",
390        ))
391        .unwrap();
392
393        let res = zip
394            .read_to_string("node_modules/@babel/plugin-syntax-dynamic-import/package.json")
395            .unwrap();
396
397        assert_eq!(
398            res,
399            "{\n  \"name\": \"@babel/plugin-syntax-dynamic-import\",\n  \"version\": \"7.8.3\",\n  \"description\": \"Allow parsing of import()\",\n  \"repository\": \"https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-dynamic-import\",\n  \"license\": \"MIT\",\n  \"publishConfig\": {\n    \"access\": \"public\"\n  },\n  \"main\": \"lib/index.js\",\n  \"keywords\": [\n    \"babel-plugin\"\n  ],\n  \"dependencies\": {\n    \"@babel/helper-plugin-utils\": \"^7.8.0\"\n  },\n  \"peerDependencies\": {\n    \"@babel/core\": \"^7.0.0-0\"\n  },\n  \"devDependencies\": {\n    \"@babel/core\": \"^7.8.0\"\n  }\n}\n"
400        );
401    }
402
403    #[rstest]
404    #[case(".zip", None)]
405    #[case("foo", None)]
406    #[case("foo.zip", None)]
407    #[case("foo.zip/bar", Some(VPath::Zip(ZipInfo {
408        base_path: "foo.zip".into(),
409        virtual_segments: None,
410        zip_path: "bar".into(),
411    })))]
412    #[case("foo.zip/bar/baz", Some(VPath::Zip(ZipInfo {
413        base_path: "foo.zip".into(),
414        virtual_segments: None,
415        zip_path: "bar/baz".into(),
416    })))]
417    #[case("/a/b/c/foo.zip", None)]
418    #[case("./a/b/c/foo.zip", None)]
419    #[case("./a/b/__virtual__/foo-abcdef/0/c/d", Some(VPath::Virtual(VirtualInfo {
420        base_path: "a/b".into(),
421        virtual_segments: ("__virtual__/foo-abcdef/0/c/d".into(), "c/d".into()),
422    })))]
423    #[case("./a/b/__virtual__/foo-abcdef/1/c/d", Some(VPath::Virtual(VirtualInfo {
424        base_path: "a".into(),
425        virtual_segments: ("b/__virtual__/foo-abcdef/1/c/d".into(), "c/d".into()),
426    })))]
427    #[case("./a/b/__virtual__/foo-abcdef/0/c/foo.zip/bar", Some(VPath::Zip(ZipInfo {
428        base_path: "a/b".into(),
429        virtual_segments: Some(("__virtual__/foo-abcdef/0/c/foo.zip".into(), "c/foo.zip".into())),
430        zip_path: "bar".into(),
431    })))]
432    #[case("./a/b/__virtual__/foo-abcdef/1/c/foo.zip/bar", Some(VPath::Zip(ZipInfo {
433        base_path: "a".into(),
434        virtual_segments: Some(("b/__virtual__/foo-abcdef/1/c/foo.zip".into(), "c/foo.zip".into())),
435        zip_path: "bar".into(),
436    })))]
437    #[case("/a/b/__virtual__/foo-abcdef/1/c/foo.zip/bar", Some(VPath::Zip(ZipInfo {
438        base_path: "/a".into(),
439        virtual_segments: Some(("b/__virtual__/foo-abcdef/1/c/foo.zip".into(), "c/foo.zip".into())),
440        zip_path: "bar".into(),
441    })))]
442    #[case("/a/b/__virtual__/foo-abcdef/2/c/foo.zip/bar", Some(VPath::Zip(ZipInfo {
443        base_path: "/".into(),
444        virtual_segments: Some(("a/b/__virtual__/foo-abcdef/2/c/foo.zip".into(), "c/foo.zip".into())),
445        zip_path: "bar".into(),
446    })))]
447    #[case("/__virtual__/foo-abcdef/2/c/foo.zip/bar", Some(VPath::Zip(ZipInfo {
448        base_path: "/".into(),
449        virtual_segments: Some(("__virtual__/foo-abcdef/2/c/foo.zip".into(), "c/foo.zip".into())),
450        zip_path: "bar".into(),
451    })))]
452    #[case("./a/b/c/.zip", None)]
453    #[case("./a/b/c/foo.zipp", None)]
454    #[case("./a/b/c/foo.zip/bar/baz/qux.zip", Some(VPath::Zip(ZipInfo {
455        base_path: "a/b/c/foo.zip".into(),
456        virtual_segments: None,
457        zip_path: "bar/baz/qux.zip".into(),
458    })))]
459    #[case("./a/b/c/foo.zip-bar.zip", None)]
460    #[case("./a/b/c/foo.zip-bar.zip/bar/baz/qux.zip", Some(VPath::Zip(ZipInfo {
461        base_path: "a/b/c/foo.zip-bar.zip".into(),
462        virtual_segments: None,
463        zip_path: "bar/baz/qux.zip".into(),
464    })))]
465    #[case("./a/b/c/foo.zip-bar/foo.zip-bar/foo.zip-bar.zip/d", Some(VPath::Zip(ZipInfo {
466        base_path: "a/b/c/foo.zip-bar/foo.zip-bar/foo.zip-bar.zip".into(),
467        virtual_segments: None,
468        zip_path: "d".into(),
469    })))]
470    fn test_path_to_pnp(#[case] input: &str, #[case] expected: Option<VPath>) {
471        let expectation: VPath = match &expected {
472            Some(p) => p.clone(),
473            None => VPath::Native(PathBuf::from(util::normalize_path(input))),
474        };
475
476        match vpath(&PathBuf::from(input)) {
477            Ok(res) => {
478                assert_eq!(res, expectation, "input='{input:?}'");
479            }
480            Err(err) => {
481                panic!("{input:?}: {err}");
482            }
483        }
484    }
485}