components-rs 0.1.2

Static analysis tooling for Components.js dependency injection projects
Documentation
//! Node.js `node_modules` directory traversal.
//!
//! Discovers every npm package directory reachable from the project root by following Node's
//! module resolution algorithm: walk up the directory tree collecting `node_modules/` entries,
//! descend into each package directory (including scoped `@scope/pkg` packages), and avoid
//! revisiting the same canonical URL twice.

use std::collections::HashSet;

use url::Url;

use crate::error::Result;
use crate::fs::Fs;

/// Build all ancestor directory URLs from the main module URL, to be used as
/// starting points for node_modules scanning.
/// Mirrors `ModuleStateBuilder.buildNodeModuleImportPaths`.
///
/// `main_module_path` must be a directory URL (ending with `/`).
pub fn build_node_module_import_paths(main_module_path: &Url) -> Vec<Url> {
    let mut paths = Vec::new();
    let mut current = main_module_path.clone();
    paths.push(current.clone());
    loop {
        let parent = match current.join("..") {
            Ok(p) => p,
            Err(_) => break,
        };
        if parent == current {
            break;
        }
        current = parent;
        // Stop before shallow root-level directories (e.g. /home/, /).
        // Globbing for node_modules from those paths causes VS Code's
        // findFiles to scan the entire filesystem, which hangs the build.
        // Two non-empty path segments (/home/user/) is the practical floor
        // for any project-relevant node_modules location.
        let depth = current
            .path()
            .split('/')
            .filter(|s| !s.is_empty())
            .count();
        if depth < 2 {
            break;
        }
        paths.push(current.clone());
    }
    paths
}

/// Discover all node module paths starting from the given import URLs.
/// Mirrors `ModuleStateBuilder.buildNodeModulePaths`.
///
/// When `recursive` is `true`, each discovered package is also scanned for its own
/// nested `node_modules/`.  Set it to `false` (the common case with npm v3+ hoisting)
/// to avoid hundreds of extra `read_dir` calls.
pub async fn build_node_module_paths(
    fs: &dyn Fs,
    import_paths: &[Url],
    recursive: bool,
) -> Result<Vec<Url>> {
    let mut result = Vec::new();
    let mut visited = HashSet::new();
    for path in import_paths {
        let before = result.len();
        // The import path itself may be a package (e.g. the project root).
        if let Ok(pkg_url) = path.join("package.json") {
            if fs.read_to_string(&pkg_url).await.is_ok() && visited.insert(path.clone()) {
                tracing::info!("[node_modules] import path is a package: {}", path.as_str());
                result.push(path.clone());
            }
        }
        scan_import_path(fs, path, &mut result, &mut visited, recursive).await?;
        tracing::info!(
            "[node_modules] import path {} added {} packages (total {})",
            path.as_str(),
            result.len() - before,
            result.len(),
        );
    }
    tracing::info!("[node_modules] build_node_module_paths returning {} paths", result.len());
    Ok(result)
}

/// Scan `<import_path>/node_modules/` for packages using glob patterns.
/// Flat packages match `node_modules/*/package.json`;
/// scoped packages match `node_modules/@*/*/package.json`.
async fn scan_import_path(
    fs: &dyn Fs,
    import_path: &Url,
    result: &mut Vec<Url>,
    visited: &mut HashSet<Url>,
    recursive: bool,
) -> Result<()> {
    let flat = fs.glob(import_path, "node_modules/*/package.json")
        .await
        .unwrap_or_default();
    let scoped = fs.glob(import_path, "node_modules/@*/*/package.json")
        .await
        .unwrap_or_default();

    tracing::info!(
        "[node_modules] scan_import_path {}: flat={} scoped={}",
        import_path.as_str(), flat.len(), scoped.len(),
    );

    let mut added = 0usize;
    for entry in flat.into_iter().chain(scoped) {
        // Derive the package directory by stripping the trailing "package.json"
        // from the glob result.  Using rfind('/') is unambiguous; url.join(".")
        // can misbehave depending on whether the URL has a trailing slash.
        let Some(pkg_dir) = package_json_url_to_dir(&entry.path) else {
            tracing::warn!("[node_modules] failed to derive dir from {}", entry.path.as_str());
            continue;
        };
        if visited.insert(pkg_dir.clone()) {
            result.push(pkg_dir.clone());
            added += 1;
            if recursive {
                Box::pin(scan_import_path(fs, &pkg_dir, result, visited, recursive)).await?;
            }
        }
    }
    tracing::info!("[node_modules] scan_import_path {} added {} new packages", import_path.as_str(), added);

    Ok(())
}

/// Given a `package.json` file URL returned by glob, return the containing
/// directory URL (with a guaranteed trailing `/`).
///
/// Slicing at the last `/` is simpler and more reliable than `url.join(".")`
/// whose behaviour depends on whether the URL already has a trailing slash.
fn package_json_url_to_dir(url: &Url) -> Option<Url> {
    let s = url.as_str();
    let slash = s.rfind('/')?;
    Url::parse(&s[..=slash]).ok()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_package_json_url_to_dir() {
        let flat = Url::parse(
            "file:///home/user/project/node_modules/lodash/package.json",
        )
        .unwrap();
        assert_eq!(
            package_json_url_to_dir(&flat).unwrap().as_str(),
            "file:///home/user/project/node_modules/lodash/",
        );

        let scoped = Url::parse(
            "file:///home/user/project/node_modules/%40types/node/package.json",
        )
        .unwrap();
        assert_eq!(
            package_json_url_to_dir(&scoped).unwrap().as_str(),
            "file:///home/user/project/node_modules/%40types/node/",
        );
    }

    #[test]
    fn test_build_import_paths() {
        let root = Url::from_directory_path("/a/b/c").unwrap();
        let paths = build_node_module_import_paths(&root);
        // Stops before shallow directories (depth < 2): /a/ (depth 1) and / (depth 0)
        // are excluded to avoid filesystem-wide glob scans.
        assert_eq!(
            paths,
            vec![
                Url::from_directory_path("/a/b/c").unwrap(),
                Url::from_directory_path("/a/b").unwrap(),
            ]
        );
    }
}