Skip to main content

components_rs/discovery/
node_modules.rs

1//! Node.js `node_modules` directory traversal.
2//!
3//! Discovers every npm package directory reachable from the project root by following Node's
4//! module resolution algorithm: walk up the directory tree collecting `node_modules/` entries,
5//! descend into each package directory (including scoped `@scope/pkg` packages), and avoid
6//! revisiting the same canonical URL twice.
7
8use std::collections::HashSet;
9
10use url::Url;
11
12use crate::error::Result;
13use crate::fs::Fs;
14
15/// Build all ancestor directory URLs from the main module URL, to be used as
16/// starting points for node_modules scanning.
17/// Mirrors `ModuleStateBuilder.buildNodeModuleImportPaths`.
18///
19/// `main_module_path` must be a directory URL (ending with `/`).
20pub fn build_node_module_import_paths(main_module_path: &Url) -> Vec<Url> {
21    let mut paths = Vec::new();
22    let mut current = main_module_path.clone();
23    paths.push(current.clone());
24    loop {
25        let parent = match current.join("..") {
26            Ok(p) => p,
27            Err(_) => break,
28        };
29        if parent == current {
30            break;
31        }
32        current = parent;
33        // Stop before shallow root-level directories (e.g. /home/, /).
34        // Globbing for node_modules from those paths causes VS Code's
35        // findFiles to scan the entire filesystem, which hangs the build.
36        // Two non-empty path segments (/home/user/) is the practical floor
37        // for any project-relevant node_modules location.
38        let depth = current
39            .path()
40            .split('/')
41            .filter(|s| !s.is_empty())
42            .count();
43        if depth < 2 {
44            break;
45        }
46        paths.push(current.clone());
47    }
48    paths
49}
50
51/// Discover all node module paths starting from the given import URLs.
52/// Mirrors `ModuleStateBuilder.buildNodeModulePaths`.
53///
54/// When `recursive` is `true`, each discovered package is also scanned for its own
55/// nested `node_modules/`.  Set it to `false` (the common case with npm v3+ hoisting)
56/// to avoid hundreds of extra `read_dir` calls.
57pub async fn build_node_module_paths(
58    fs: &dyn Fs,
59    import_paths: &[Url],
60    recursive: bool,
61) -> Result<Vec<Url>> {
62    let mut result = Vec::new();
63    let mut visited = HashSet::new();
64    for path in import_paths {
65        let before = result.len();
66        // The import path itself may be a package (e.g. the project root).
67        if let Ok(pkg_url) = path.join("package.json") {
68            if fs.read_to_string(&pkg_url).await.is_ok() && visited.insert(path.clone()) {
69                tracing::info!("[node_modules] import path is a package: {}", path.as_str());
70                result.push(path.clone());
71            }
72        }
73        scan_import_path(fs, path, &mut result, &mut visited, recursive).await?;
74        tracing::info!(
75            "[node_modules] import path {} added {} packages (total {})",
76            path.as_str(),
77            result.len() - before,
78            result.len(),
79        );
80    }
81    tracing::info!("[node_modules] build_node_module_paths returning {} paths", result.len());
82    Ok(result)
83}
84
85/// Scan `<import_path>/node_modules/` for packages using glob patterns.
86/// Flat packages match `node_modules/*/package.json`;
87/// scoped packages match `node_modules/@*/*/package.json`.
88async fn scan_import_path(
89    fs: &dyn Fs,
90    import_path: &Url,
91    result: &mut Vec<Url>,
92    visited: &mut HashSet<Url>,
93    recursive: bool,
94) -> Result<()> {
95    let flat = fs.glob(import_path, "node_modules/*/package.json")
96        .await
97        .unwrap_or_default();
98    let scoped = fs.glob(import_path, "node_modules/@*/*/package.json")
99        .await
100        .unwrap_or_default();
101
102    tracing::info!(
103        "[node_modules] scan_import_path {}: flat={} scoped={}",
104        import_path.as_str(), flat.len(), scoped.len(),
105    );
106
107    let mut added = 0usize;
108    for entry in flat.into_iter().chain(scoped) {
109        // Derive the package directory by stripping the trailing "package.json"
110        // from the glob result.  Using rfind('/') is unambiguous; url.join(".")
111        // can misbehave depending on whether the URL has a trailing slash.
112        let Some(pkg_dir) = package_json_url_to_dir(&entry.path) else {
113            tracing::warn!("[node_modules] failed to derive dir from {}", entry.path.as_str());
114            continue;
115        };
116        if visited.insert(pkg_dir.clone()) {
117            result.push(pkg_dir.clone());
118            added += 1;
119            if recursive {
120                Box::pin(scan_import_path(fs, &pkg_dir, result, visited, recursive)).await?;
121            }
122        }
123    }
124    tracing::info!("[node_modules] scan_import_path {} added {} new packages", import_path.as_str(), added);
125
126    Ok(())
127}
128
129/// Given a `package.json` file URL returned by glob, return the containing
130/// directory URL (with a guaranteed trailing `/`).
131///
132/// Slicing at the last `/` is simpler and more reliable than `url.join(".")`
133/// whose behaviour depends on whether the URL already has a trailing slash.
134fn package_json_url_to_dir(url: &Url) -> Option<Url> {
135    let s = url.as_str();
136    let slash = s.rfind('/')?;
137    Url::parse(&s[..=slash]).ok()
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143
144    #[test]
145    fn test_package_json_url_to_dir() {
146        let flat = Url::parse(
147            "file:///home/user/project/node_modules/lodash/package.json",
148        )
149        .unwrap();
150        assert_eq!(
151            package_json_url_to_dir(&flat).unwrap().as_str(),
152            "file:///home/user/project/node_modules/lodash/",
153        );
154
155        let scoped = Url::parse(
156            "file:///home/user/project/node_modules/%40types/node/package.json",
157        )
158        .unwrap();
159        assert_eq!(
160            package_json_url_to_dir(&scoped).unwrap().as_str(),
161            "file:///home/user/project/node_modules/%40types/node/",
162        );
163    }
164
165    #[test]
166    fn test_build_import_paths() {
167        let root = Url::from_directory_path("/a/b/c").unwrap();
168        let paths = build_node_module_import_paths(&root);
169        // Stops before shallow directories (depth < 2): /a/ (depth 1) and / (depth 0)
170        // are excluded to avoid filesystem-wide glob scans.
171        assert_eq!(
172            paths,
173            vec![
174                Url::from_directory_path("/a/b/c").unwrap(),
175                Url::from_directory_path("/a/b").unwrap(),
176            ]
177        );
178    }
179}