unrspack_resolver/
fs_cache.rs

1use std::{
2    borrow::Cow,
3    cell::RefCell,
4    convert::AsRef,
5    hash::{BuildHasherDefault, Hash, Hasher},
6    io,
7    ops::Deref,
8    path::{Component, Path, PathBuf},
9    sync::{
10        Arc,
11        atomic::{AtomicU64, Ordering},
12    },
13};
14
15use cfg_if::cfg_if;
16use once_cell::sync::OnceCell as OnceLock;
17use papaya::{Equivalent, HashMap, HashSet};
18use rustc_hash::FxHasher;
19
20use crate::{
21    FileMetadata, FileSystem, PackageJsonSerde, ResolveError, ResolveOptions, TsConfig,
22    TsConfigSerde,
23    cache::{Cache, CachedPath},
24    context::ResolveContext as Ctx,
25    path::PathUtil,
26};
27
28static THREAD_COUNT: AtomicU64 = AtomicU64::new(1);
29
30thread_local! {
31    /// Per-thread pre-allocated path that is used to perform operations on paths more quickly.
32    /// Learned from parcel <https://github.com/parcel-bundler/parcel/blob/a53f8f3ba1025c7ea8653e9719e0a61ef9717079/crates/parcel-resolver/src/cache.rs#L394>
33  pub static SCRATCH_PATH: RefCell<PathBuf> = RefCell::new(PathBuf::with_capacity(256));
34  pub static THREAD_ID: u64 = THREAD_COUNT.fetch_add(1, Ordering::SeqCst);
35}
36
37/// Cache implementation used for caching filesystem access.
38#[derive(Default)]
39pub struct FsCache<Fs> {
40    pub(crate) fs: Fs,
41    paths: HashSet<FsCachedPath, BuildHasherDefault<IdentityHasher>>,
42    tsconfigs: HashMap<PathBuf, Arc<TsConfigSerde>, BuildHasherDefault<FxHasher>>,
43}
44
45impl<Fs: FileSystem> Cache for FsCache<Fs> {
46    type Cp = FsCachedPath;
47    type Pj = PackageJsonSerde;
48    type Tc = TsConfigSerde;
49
50    fn clear(&self) {
51        self.paths.pin().clear();
52        self.tsconfigs.pin().clear();
53    }
54
55    #[allow(clippy::cast_possible_truncation)]
56    fn value(&self, path: &Path) -> FsCachedPath {
57        // `Path::hash` is slow: https://doc.rust-lang.org/std/path/struct.Path.html#impl-Hash-for-Path
58        // `path.as_os_str()` hash is not stable because we may joined a path like `foo/bar` and `foo\\bar` on windows.
59        let hash = {
60            let mut hasher = FxHasher::default();
61            path.as_os_str().hash(&mut hasher);
62            hasher.finish()
63        };
64        let paths = self.paths.pin();
65        if let Some(entry) = paths.get(&BorrowedCachedPath { hash, path }) {
66            return entry.clone();
67        }
68        let parent = path.parent().map(|p| self.value(p));
69        let cached_path = FsCachedPath(Arc::new(CachedPathImpl::new(
70            hash,
71            path.to_path_buf().into_boxed_path(),
72            parent,
73        )));
74        paths.insert(cached_path.clone());
75        cached_path
76    }
77
78    fn canonicalize(&self, path: &Self::Cp) -> Result<PathBuf, ResolveError> {
79        let cached_path = self.canonicalize_impl(path)?;
80        let path = cached_path.to_path_buf();
81        cfg_if! {
82            if #[cfg(windows)] {
83                let path = crate::FileSystemOs::strip_windows_prefix(path);
84            }
85        }
86        Ok(path)
87    }
88
89    fn is_file(&self, path: &Self::Cp, ctx: &mut Ctx) -> bool {
90        if let Some(meta) = path.meta(&self.fs) {
91            ctx.add_file_dependency(path.path());
92            meta.is_file
93        } else {
94            ctx.add_missing_dependency(path.path());
95            false
96        }
97    }
98
99    fn is_dir(&self, path: &Self::Cp, ctx: &mut Ctx) -> bool {
100        path.meta(&self.fs).map_or_else(
101            || {
102                ctx.add_missing_dependency(path.path());
103                false
104            },
105            |meta| meta.is_dir,
106        )
107    }
108
109    fn get_package_json(
110        &self,
111        path: &Self::Cp,
112        options: &ResolveOptions,
113        ctx: &mut Ctx,
114    ) -> Result<Option<(Self::Cp, Arc<PackageJsonSerde>)>, ResolveError> {
115        // Change to `std::sync::OnceLock::get_or_try_init` when it is stable.
116        let result = path
117            .package_json
118            .get_or_try_init(|| {
119                let package_json_path = path.path.join("package.json");
120                let Ok(package_json_string) = self.fs.read_to_string(&package_json_path) else {
121                    return Ok(None);
122                };
123                let real_path = if options.symlinks {
124                    self.canonicalize(path)?.join("package.json")
125                } else {
126                    package_json_path.clone()
127                };
128                PackageJsonSerde::parse(package_json_path.clone(), real_path, &package_json_string)
129                    .map(|package_json| Some((path.clone(), (Arc::new(package_json)))))
130                    .map_err(|error| {
131                        ResolveError::from_serde_json_error(
132                            package_json_path,
133                            &error,
134                            Some(package_json_string),
135                        )
136                    })
137            })
138            .cloned();
139        // https://github.com/webpack/enhanced-resolve/blob/58464fc7cb56673c9aa849e68e6300239601e615/lib/DescriptionFileUtils.js#L68-L82
140        match &result {
141            Ok(Some((_, package_json))) => {
142                ctx.add_file_dependency(&package_json.path);
143            }
144            Ok(None) => {
145                // Avoid an allocation by making this lazy
146                if let Some(deps) = &mut ctx.missing_dependencies {
147                    deps.push(path.path.join("package.json"));
148                }
149            }
150            Err(_) => {
151                if let Some(deps) = &mut ctx.file_dependencies {
152                    deps.push(path.path.join("package.json"));
153                }
154            }
155        }
156        result
157    }
158
159    fn get_tsconfig<F: FnOnce(&mut TsConfigSerde) -> Result<(), ResolveError>>(
160        &self,
161        root: bool,
162        path: &Path,
163        callback: F, // callback for modifying tsconfig with `extends`
164    ) -> Result<Arc<TsConfigSerde>, ResolveError> {
165        let tsconfigs = self.tsconfigs.pin();
166        if let Some(tsconfig) = tsconfigs.get(path) {
167            return Ok(Arc::clone(tsconfig));
168        }
169        let meta = self.fs.metadata(path).ok();
170        let tsconfig_path = if meta.is_some_and(|m| m.is_file) {
171            Cow::Borrowed(path)
172        } else if meta.is_some_and(|m| m.is_dir) {
173            Cow::Owned(path.join("tsconfig.json"))
174        } else {
175            let mut os_string = path.to_path_buf().into_os_string();
176            os_string.push(".json");
177            Cow::Owned(PathBuf::from(os_string))
178        };
179        let mut tsconfig_string = self
180            .fs
181            .read_to_string(&tsconfig_path)
182            .map_err(|_| ResolveError::TsconfigNotFound(path.to_path_buf()))?;
183        let mut tsconfig = TsConfigSerde::parse(root, &tsconfig_path, &mut tsconfig_string)
184            .map_err(|error| {
185                ResolveError::from_serde_json_error(
186                    tsconfig_path.to_path_buf(),
187                    &error,
188                    Some(tsconfig_string),
189                )
190            })?;
191        callback(&mut tsconfig)?;
192        tsconfig.expand_template_variables();
193        let tsconfig = Arc::new(tsconfig);
194        tsconfigs.insert(path.to_path_buf(), Arc::clone(&tsconfig));
195        Ok(tsconfig)
196    }
197}
198
199impl<Fs: FileSystem> FsCache<Fs> {
200    pub fn new(fs: Fs) -> Self {
201        Self {
202            fs,
203            paths: HashSet::builder()
204                .hasher(BuildHasherDefault::default())
205                .resize_mode(papaya::ResizeMode::Blocking)
206                .build(),
207            tsconfigs: HashMap::builder()
208                .hasher(BuildHasherDefault::default())
209                .resize_mode(papaya::ResizeMode::Blocking)
210                .build(),
211        }
212    }
213
214    /// Returns the canonical path, resolving all symbolic links.
215    ///
216    /// <https://github.com/parcel-bundler/parcel/blob/4d27ec8b8bd1792f536811fef86e74a31fa0e704/crates/parcel-resolver/src/cache.rs#L232>
217    fn canonicalize_impl(&self, path: &FsCachedPath) -> Result<FsCachedPath, ResolveError> {
218        // Check if this thread is already canonicalizing. If so, we have found a circular symlink.
219        // If a different thread is canonicalizing, OnceLock will queue this thread to wait for the result.
220        let tid = THREAD_ID.with(|t| *t);
221        if path.canonicalizing.load(Ordering::Acquire) == tid {
222            return Err(io::Error::new(io::ErrorKind::NotFound, "Circular symlink").into());
223        }
224
225        path.canonicalized
226            .get_or_init(|| {
227                path.canonicalizing.store(tid, Ordering::Release);
228
229                let res = path.parent().map_or_else(
230                    || Ok(path.normalize_root(self)),
231                    |parent| {
232                        self.canonicalize_impl(parent).and_then(|parent_canonical| {
233                            let normalized = parent_canonical.normalize_with(
234                                path.path().strip_prefix(parent.path()).unwrap(),
235                                self,
236                            );
237
238                            if self.fs.symlink_metadata(path.path()).is_ok_and(|m| m.is_symlink) {
239                                let link = self.fs.read_link(normalized.path())?;
240                                if link.is_absolute() {
241                                    return self.canonicalize_impl(&self.value(&link.normalize()));
242                                } else if let Some(dir) = normalized.parent() {
243                                    // Symlink is relative `../../foo.js`, use the path directory
244                                    // to resolve this symlink.
245                                    return self
246                                        .canonicalize_impl(&dir.normalize_with(&link, self));
247                                }
248                                debug_assert!(
249                                    false,
250                                    "Failed to get path parent for {:?}.",
251                                    normalized.path()
252                                );
253                            }
254
255                            Ok(normalized)
256                        })
257                    },
258                );
259
260                path.canonicalizing.store(0, Ordering::Release);
261                res
262            })
263            .clone()
264    }
265}
266
267#[derive(Clone)]
268pub struct FsCachedPath(Arc<CachedPathImpl>);
269
270pub struct CachedPathImpl {
271    hash: u64,
272    path: Box<Path>,
273    parent: Option<FsCachedPath>,
274    meta: OnceLock<Option<FileMetadata>>,
275    canonicalized: OnceLock<Result<FsCachedPath, ResolveError>>,
276    canonicalizing: AtomicU64,
277    node_modules: OnceLock<Option<FsCachedPath>>,
278    package_json: OnceLock<Option<(FsCachedPath, Arc<PackageJsonSerde>)>>,
279}
280
281impl CachedPathImpl {
282    const fn new(hash: u64, path: Box<Path>, parent: Option<FsCachedPath>) -> Self {
283        Self {
284            hash,
285            path,
286            parent,
287            meta: OnceLock::new(),
288            canonicalized: OnceLock::new(),
289            canonicalizing: AtomicU64::new(0),
290            node_modules: OnceLock::new(),
291            package_json: OnceLock::new(),
292        }
293    }
294}
295
296impl Deref for FsCachedPath {
297    type Target = CachedPathImpl;
298
299    fn deref(&self) -> &Self::Target {
300        self.0.as_ref()
301    }
302}
303
304impl CachedPath for FsCachedPath {
305    fn path(&self) -> &Path {
306        &self.0.path
307    }
308
309    fn to_path_buf(&self) -> PathBuf {
310        self.path.to_path_buf()
311    }
312
313    fn parent(&self) -> Option<&Self> {
314        self.0.parent.as_ref()
315    }
316
317    fn module_directory<C: Cache<Cp = Self>>(
318        &self,
319        module_name: &str,
320        cache: &C,
321        ctx: &mut Ctx,
322    ) -> Option<Self> {
323        let cached_path = cache.value(&self.path.join(module_name));
324        cache.is_dir(&cached_path, ctx).then_some(cached_path)
325    }
326
327    fn cached_node_modules<C: Cache<Cp = Self>>(&self, cache: &C, ctx: &mut Ctx) -> Option<Self> {
328        self.node_modules.get_or_init(|| self.module_directory("node_modules", cache, ctx)).clone()
329    }
330
331    /// Find package.json of a path by traversing parent directories.
332    ///
333    /// # Errors
334    ///
335    /// * [ResolveError::JSON]
336    fn find_package_json<C: Cache<Cp = Self>>(
337        &self,
338        options: &ResolveOptions,
339        cache: &C,
340        ctx: &mut Ctx,
341    ) -> Result<Option<(Self, Arc<C::Pj>)>, ResolveError> {
342        let mut cache_value = self;
343        // Go up directories when the querying path is not a directory
344        while !cache.is_dir(cache_value, ctx) {
345            if let Some(cv) = &cache_value.parent {
346                cache_value = cv;
347            } else {
348                break;
349            }
350        }
351        let mut cache_value = Some(cache_value);
352        while let Some(cv) = cache_value {
353            if let Some(package_json) = cache.get_package_json(cv, options, ctx)? {
354                return Ok(Some(package_json));
355            }
356            cache_value = cv.parent.as_ref();
357        }
358        Ok(None)
359    }
360
361    fn add_extension<C: Cache<Cp = Self>>(&self, ext: &str, cache: &C) -> Self {
362        SCRATCH_PATH.with_borrow_mut(|path| {
363            path.clear();
364            let s = path.as_mut_os_string();
365            s.push(self.path.as_os_str());
366            s.push(ext);
367            cache.value(path)
368        })
369    }
370
371    fn replace_extension<C: Cache<Cp = Self>>(&self, ext: &str, cache: &C) -> Self {
372        SCRATCH_PATH.with_borrow_mut(|path| {
373            path.clear();
374            let s = path.as_mut_os_string();
375            let self_len = self.path.as_os_str().len();
376            let self_bytes = self.path.as_os_str().as_encoded_bytes();
377            let slice_to_copy = self.path.extension().map_or(self_bytes, |previous_extension| {
378                &self_bytes[..self_len - previous_extension.len() - 1]
379            });
380            // SAFETY: ???
381            s.push(unsafe { std::ffi::OsStr::from_encoded_bytes_unchecked(slice_to_copy) });
382            s.push(ext);
383            cache.value(path)
384        })
385    }
386
387    /// Returns a new path by resolving the given subpath (including "." and ".." components) with this path.
388    fn normalize_with<C: Cache<Cp = Self>>(&self, subpath: impl AsRef<Path>, cache: &C) -> Self {
389        let subpath = subpath.as_ref();
390        let mut components = subpath.components();
391        let Some(head) = components.next() else { return cache.value(subpath) };
392        if matches!(head, Component::Prefix(..) | Component::RootDir) {
393            return cache.value(subpath);
394        }
395        SCRATCH_PATH.with_borrow_mut(|path| {
396            path.clear();
397            path.push(&self.path);
398            for component in std::iter::once(head).chain(components) {
399                match component {
400                    Component::CurDir => {}
401                    Component::ParentDir => {
402                        path.pop();
403                    }
404                    Component::Normal(c) => {
405                        cfg_if! {
406                            if #[cfg(target_family = "wasm")] {
407                                // Need to trim the extra \0 introduces by https://github.com/nodejs/uvwasi/issues/262
408                                path.push(c.to_string_lossy().trim_end_matches('\0'));
409                            } else {
410                                path.push(c);
411                            }
412                        }
413                    }
414                    Component::Prefix(..) | Component::RootDir => {
415                        unreachable!("Path {:?} Subpath {:?}", self.path, subpath)
416                    }
417                }
418            }
419
420            cache.value(path)
421        })
422    }
423
424    #[inline]
425    #[cfg(windows)]
426    fn normalize_root<C: Cache<Cp = Self>>(&self, cache: &C) -> Self {
427        if self.path().as_os_str().as_encoded_bytes().last() == Some(&b'/') {
428            let mut path_string = self.path.to_string_lossy().into_owned();
429            path_string.pop();
430            path_string.push('\\');
431            cache.value(&PathBuf::from(path_string))
432        } else {
433            self.clone()
434        }
435    }
436
437    #[inline]
438    #[cfg(not(windows))]
439    fn normalize_root<C: Cache<Cp = Self>>(&self, _cache: &C) -> Self {
440        self.clone()
441    }
442}
443
444impl FsCachedPath {
445    fn meta<Fs: FileSystem>(&self, fs: &Fs) -> Option<FileMetadata> {
446        *self.meta.get_or_init(|| fs.metadata(&self.path).ok())
447    }
448}
449
450impl Hash for FsCachedPath {
451    fn hash<H: Hasher>(&self, state: &mut H) {
452        self.hash.hash(state);
453    }
454}
455
456impl PartialEq for FsCachedPath {
457    fn eq(&self, other: &Self) -> bool {
458        self.path.as_os_str() == other.path.as_os_str()
459    }
460}
461
462impl Eq for FsCachedPath {}
463
464struct BorrowedCachedPath<'a> {
465    hash: u64,
466    path: &'a Path,
467}
468
469impl Equivalent<FsCachedPath> for BorrowedCachedPath<'_> {
470    fn equivalent(&self, other: &FsCachedPath) -> bool {
471        self.path.as_os_str() == other.path.as_os_str()
472    }
473}
474
475impl Hash for BorrowedCachedPath<'_> {
476    fn hash<H: Hasher>(&self, state: &mut H) {
477        self.hash.hash(state);
478    }
479}
480
481impl PartialEq for BorrowedCachedPath<'_> {
482    fn eq(&self, other: &Self) -> bool {
483        self.path.as_os_str() == other.path.as_os_str()
484    }
485}
486
487/// Since the cache key is memoized, use an identity hasher
488/// to avoid double cache.
489#[derive(Default)]
490struct IdentityHasher(u64);
491
492impl Hasher for IdentityHasher {
493    fn write(&mut self, _: &[u8]) {
494        unreachable!("Invalid use of IdentityHasher")
495    }
496
497    fn write_u64(&mut self, n: u64) {
498        self.0 = n;
499    }
500
501    fn finish(&self) -> u64 {
502        self.0
503    }
504}