npm-utils 0.4.0

Pure-Rust utilities for the npm registry: resolve a package version, download npm tarballs and GitHub archives, and extract files.
Documentation
//! `node_modules()` — resolve a `package.json`'s transitive `dependencies` against the registry
//! and install the flat tree.

use std::path::Path;

use crate::package_json::spec::{version_req, Spec};
use semver::VersionReq;
use serde_json::Value;

use crate::path_safety::safe_join;
use crate::registry::{Registry, Resolved};

/// Resolve `package_json`'s dependencies transitively, verify each tarball's registry
/// `dist.integrity` (sha512), and extract the flat tree into `<dest>/node_modules/`. Returns the
/// resolved set (sorted by name). A package whose registry metadata advertises no sha512 is
/// refused rather than installed unverified. Skips all work when the resolved set is unchanged.
pub fn node_modules(
    package_json: &Path,
    dest: &Path,
) -> Result<Vec<Resolved>, Box<dyn std::error::Error>> {
    let roots = root_requirements(package_json)?;
    let resolved = Registry::npm().resolve_tree(&roots)?;
    let want = resolved
        .iter()
        .map(|r| format!("{}@{}", r.name, r.version))
        .collect::<Vec<_>>()
        .join("\n");

    super::run_install(dest, &want, |node_modules| {
        for pkg in &resolved {
            let dir = safe_join(node_modules, &pkg.name)?;
            super::fetch_verify_extract(
                &pkg.name,
                &pkg.tarball_url,
                pkg.integrity.as_deref(),
                &dir,
            )?;
        }
        Ok(())
    })?;

    Ok(resolved)
}

/// The root requirements: each `dependencies` entry as `(name, VersionReq)`. Specs are classified
/// via [`Spec`]; a non-registry spec (git, remote tarball, local path, alias-to-non-registry)
/// can't be fetched as a registry tarball and is a clear error here.
fn root_requirements(
    package_json: &Path,
) -> Result<Vec<(String, VersionReq)>, Box<dyn std::error::Error>> {
    let json: Value = serde_json::from_str(&std::fs::read_to_string(package_json)?)?;
    let deps = json
        .get("dependencies")
        .and_then(Value::as_object)
        .ok_or("no dependencies section in package.json")?;
    let mut out = Vec::new();
    for (name, value) in deps {
        let Some(spec) = value.as_str() else { continue };
        if !Spec::parse(spec).is_registry() {
            return Err(format!(
                "dependency `{name}`: {spec:?} is not a registry spec — git/tarball/local specs \
                 aren't installable from the registry"
            )
            .into());
        }
        let req = version_req(spec)
            .map_err(|e| format!("dependency `{name}`: unsupported version {spec:?}: {e}"))?;
        out.push((name.clone(), req));
    }
    Ok(out)
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::tempdir;

    #[test]
    fn root_requirements_classifies_via_spec() {
        let tmp = tempdir().unwrap();
        let pkg = tmp.path().join("package.json");

        // A git spec is not a registry install → clear error.
        std::fs::write(&pkg, r#"{"dependencies":{"x":"github:owner/repo#abc"}}"#).unwrap();
        assert!(root_requirements(&pkg).is_err());

        // A registry range resolves to a (name, VersionReq).
        std::fs::write(&pkg, r#"{"dependencies":{"lit":"^3"}}"#).unwrap();
        let reqs = root_requirements(&pkg).unwrap();
        assert_eq!(reqs.len(), 1);
        assert_eq!(reqs[0].0, "lit");
    }

    #[test]
    #[ignore = "network: hits the npm registry"]
    fn installs_react_with_transitive_scheduler() {
        // Real install of the React-showcase deps. react-dom depends on scheduler, so a
        // correct transitive resolve produces all three under node_modules/. Each tarball's
        // registry sha512 integrity is also verified end-to-end here — a mismatch would fail
        // the install. (Tamper-rejection itself is covered offline by
        // `crate::integrity::tests::verify_checks_sha512_and_rejects_tampering`.)
        let tmp = tempdir().unwrap();
        let pkg = tmp.path().join("package.json");
        std::fs::write(
            &pkg,
            r#"{ "dependencies": { "react": "^19", "react-dom": "^19" } }"#,
        )
        .unwrap();

        let resolved = node_modules(&pkg, tmp.path()).unwrap();
        let names: Vec<&str> = resolved.iter().map(|r| r.name.as_str()).collect();
        assert!(names.contains(&"react"), "got {names:?}");
        assert!(names.contains(&"react-dom"), "got {names:?}");
        assert!(
            names.contains(&"scheduler"),
            "transitive dep missing: {names:?}"
        );

        let nm = tmp.path().join("node_modules");
        for p in ["react", "react-dom", "scheduler"] {
            assert!(
                nm.join(p).join("package.json").is_file(),
                "node_modules/{p}/package.json missing"
            );
        }
    }

    #[test]
    #[ignore = "network: hits the npm registry"]
    fn downloads_and_extracts_a_commonjs_package() {
        use crate::package_json::{PackageJson, PackageType};
        // `ms` is a tiny, dependency-free, long-frozen CommonJS package — a focused check that
        // we download + extract a real CJS package *intact*. CommonJS is exactly the case a
        // buildless ESM tree can't serve directly, which is why node_modules/ exists.
        let tmp = tempdir().unwrap();
        let pkg = tmp.path().join("package.json");
        std::fs::write(&pkg, r#"{ "dependencies": { "ms": "^2" } }"#).unwrap();

        let resolved = node_modules(&pkg, tmp.path()).unwrap();
        let names: Vec<&str> = resolved.iter().map(|r| r.name.as_str()).collect();
        assert_eq!(names, ["ms"], "ms has no runtime dependencies");

        let ms = tmp.path().join("node_modules/ms");
        let manifest = PackageJson::from_path(&ms.join("package.json")).unwrap();
        assert_eq!(manifest.name(), Some("ms"));
        assert_eq!(
            manifest.package_type(),
            PackageType::CommonJs,
            "ms ships CommonJS"
        );
        let entry = ms.join("index.js");
        let source = std::fs::read_to_string(&entry).unwrap();
        assert!(
            source.contains("module.exports"),
            "extracted entry {entry:?} is CommonJS source"
        );
    }
}