Skip to main content

npm_utils/
registry.rs

1//! npm registry interaction: tarball URLs, package metadata, and version
2//! resolution against a semver range.
3
4use crate::download;
5use semver::{Version, VersionReq};
6use serde_json::Value;
7
8/// An npm-compatible registry. Defaults to the public registry.
9pub struct Registry {
10    pub base_url: String,
11}
12
13impl Default for Registry {
14    fn default() -> Self {
15        Self {
16            base_url: "https://registry.npmjs.org".to_string(),
17        }
18    }
19}
20
21/// A resolved package version: the exact version, the tarball to fetch, and the
22/// registry's `dist.integrity` SRI for that tarball (when the packument publishes one).
23///
24/// `#[non_exhaustive]` so further fields can be added without a breaking change — this
25/// type is only ever *constructed* inside the crate; callers receive and read it.
26#[derive(Debug, Clone)]
27#[non_exhaustive]
28pub struct Resolved {
29    pub name: String,
30    pub version: Version,
31    pub tarball_url: String,
32    /// The registry's Subresource-Integrity hash (`sha512-<base64>`), when the packument
33    /// carries one — verified against the downloaded bytes before extraction. `None` for a
34    /// synthesized tarball URL or a packument entry without `dist.integrity`.
35    pub integrity: Option<String>,
36}
37
38impl Registry {
39    /// The public npm registry (`https://registry.npmjs.org`).
40    pub fn npm() -> Self {
41        Self::default()
42    }
43
44    /// A registry at a custom base URL (e.g. a private mirror).
45    pub fn with_base_url(base_url: impl Into<String>) -> Self {
46        Self {
47            base_url: base_url.into(),
48        }
49    }
50
51    /// Conventional tarball URL for an exact `version`. Handles scoped names:
52    /// `@scope/pkg` → `<base>/@scope/pkg/-/pkg-<version>.tgz`.
53    pub fn tarball_url(&self, name: &str, version: &str) -> String {
54        let unscoped = name.rsplit('/').next().unwrap_or(name);
55        format!("{}/{}/-/{}-{}.tgz", self.base_url, name, unscoped, version)
56    }
57
58    /// Fetch the package metadata document ("packument").
59    pub fn packument(&self, name: &str) -> Result<Value, Box<dyn std::error::Error>> {
60        // Scoped names are URL-encoded in the path: `@scope/pkg` → `@scope%2fpkg`.
61        let encoded = match name.strip_prefix('@') {
62            Some(rest) => format!("@{}", rest.replacen('/', "%2f", 1)),
63            None => name.to_string(),
64        };
65        let url = format!("{}/{}", self.base_url, encoded);
66        let bytes = download::fetch(&url)?;
67        Ok(serde_json::from_slice(&bytes)?)
68    }
69
70    /// Resolve the newest published version of `name` matching `req`.
71    pub fn resolve(
72        &self,
73        name: &str,
74        req: &VersionReq,
75    ) -> Result<Resolved, Box<dyn std::error::Error>> {
76        let doc = self.packument(name)?;
77        let (version, tarball, integrity) = select_version(&doc, req)
78            .ok_or_else(|| format!("no published version of {name} matches {req}"))?;
79        let tarball_url = tarball.unwrap_or_else(|| self.tarball_url(name, &version.to_string()));
80        Ok(Resolved {
81            name: name.to_string(),
82            version,
83            tarball_url,
84            integrity,
85        })
86    }
87
88    /// Resolve the transitive dependency graph of `roots` into a **flat** set — one
89    /// version per package name (the npm v3+ `node_modules` layout). Each package's
90    /// `dependencies` are read straight from the registry metadata (no tarball
91    /// extraction), every child resolved to its newest matching version, and the set
92    /// de-duplicated by name. Cyclic graphs terminate (a name is resolved once).
93    /// Returns the packages sorted by name.
94    ///
95    /// MVP limitation: a single version per package name. Two *incompatible*
96    /// requirements on the same package — a genuine conflict npm would resolve by
97    /// nesting — is reported as an error rather than silently mis-resolved.
98    pub fn resolve_tree(
99        &self,
100        roots: &[(String, VersionReq)],
101    ) -> Result<Vec<Resolved>, Box<dyn std::error::Error>> {
102        self.resolve_tree_from(roots, |name| self.packument(name))
103    }
104
105    /// [`resolve_tree`](Self::resolve_tree) with an injectable packument source, so the
106    /// graph walk can be unit-tested without the network.
107    fn resolve_tree_from<F>(
108        &self,
109        roots: &[(String, VersionReq)],
110        mut get_packument: F,
111    ) -> Result<Vec<Resolved>, Box<dyn std::error::Error>>
112    where
113        F: FnMut(&str) -> Result<Value, Box<dyn std::error::Error>>,
114    {
115        use std::collections::{HashMap, VecDeque};
116        let mut packuments: HashMap<String, Value> = HashMap::new();
117        let mut resolved: HashMap<String, Resolved> = HashMap::new();
118        let mut queue: VecDeque<(String, VersionReq)> = roots.iter().cloned().collect();
119
120        while let Some((name, req)) = queue.pop_front() {
121            if let Some(existing) = resolved.get(&name) {
122                if req.matches(&existing.version) {
123                    continue; // already resolved to a satisfying version — dedup
124                }
125                return Err(format!(
126                    "version conflict for `{name}`: resolved {} but also required `{req}` \
127                     (flat node_modules install resolves one version per package)",
128                    existing.version
129                )
130                .into());
131            }
132            if !packuments.contains_key(&name) {
133                let doc = get_packument(&name)?;
134                packuments.insert(name.clone(), doc);
135            }
136            let doc = &packuments[&name];
137            let (version, tarball, integrity) = select_version(doc, &req)
138                .ok_or_else(|| format!("no published version of {name} matches {req}"))?;
139            let deps = dependencies_of(doc, &version);
140            let tarball_url =
141                tarball.unwrap_or_else(|| self.tarball_url(&name, &version.to_string()));
142            for (dep_name, dep_spec) in deps {
143                let dep_req = version_req(&dep_spec).map_err(|e| {
144                    format!(
145                        "{name}@{version} dependency `{dep_name}`: unsupported version \
146                         {dep_spec:?}: {e}"
147                    )
148                })?;
149                queue.push_back((dep_name, dep_req));
150            }
151            resolved.insert(
152                name.clone(),
153                Resolved {
154                    name,
155                    version,
156                    tarball_url,
157                    integrity,
158                },
159            );
160        }
161        let mut out: Vec<Resolved> = resolved.into_values().collect();
162        out.sort_by(|a, b| a.name.cmp(&b.name));
163        Ok(out)
164    }
165}
166
167/// Pick the newest version in a packument's `versions` map that satisfies `req`,
168/// returning it with the `dist.tarball` URL and the `dist.integrity` SRI the registry
169/// advertises (each `None` if absent). Factored out for unit testing without network access.
170fn select_version(
171    doc: &Value,
172    req: &VersionReq,
173) -> Option<(Version, Option<String>, Option<String>)> {
174    let versions = doc.get("versions")?.as_object()?;
175    let mut best: Option<(Version, Option<String>, Option<String>)> = None;
176    for (ver_str, meta) in versions {
177        let Ok(ver) = Version::parse(ver_str) else {
178            continue;
179        };
180        if !req.matches(&ver) {
181            continue;
182        }
183        if best.as_ref().map(|(b, _, _)| ver > *b).unwrap_or(true) {
184            let dist = meta.get("dist");
185            let string_at = |key: &str| {
186                dist.and_then(|d| d.get(key))
187                    .and_then(|v| v.as_str())
188                    .map(str::to_string)
189            };
190            best = Some((ver, string_at("tarball"), string_at("integrity")));
191        }
192    }
193    best
194}
195
196/// The npm dependency-spec → [`VersionReq`] parser lives in the [`crate::package_json`] module
197/// (the package-spec grammar); re-exported here for back-compat as `registry::version_req`.
198pub use crate::package_json::spec::version_req;
199
200/// The `dependencies` of a specific version, read from a packument, as `(name, spec)`
201/// pairs. The full packument carries each version's `dependencies` inline, so the
202/// transitive walk discovers children without extracting any tarball.
203fn dependencies_of(doc: &Value, version: &Version) -> Vec<(String, String)> {
204    doc.get("versions")
205        .and_then(|v| v.get(version.to_string()))
206        .and_then(|meta| meta.get("dependencies"))
207        .and_then(|d| d.as_object())
208        .map(|map| {
209            map.iter()
210                .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
211                .collect()
212        })
213        .unwrap_or_default()
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219    use serde_json::json;
220
221    #[test]
222    fn tarball_url_handles_scoped_and_unscoped() {
223        let reg = Registry::npm();
224        assert_eq!(
225            reg.tarball_url("lit", "3.3.3"),
226            "https://registry.npmjs.org/lit/-/lit-3.3.3.tgz"
227        );
228        assert_eq!(
229            reg.tarball_url("@lit/context", "1.1.6"),
230            "https://registry.npmjs.org/@lit/context/-/context-1.1.6.tgz"
231        );
232    }
233
234    #[test]
235    fn select_version_picks_newest_matching() {
236        let doc = json!({
237            "versions": {
238                "3.1.0": { "dist": { "tarball": "https://r/lit-3.1.0.tgz" } },
239                "3.3.3": { "dist": {
240                    "tarball": "https://r/lit-3.3.3.tgz",
241                    "integrity": "sha512-deadbeef"
242                } },
243                "4.0.0": { "dist": { "tarball": "https://r/lit-4.0.0.tgz" } },
244                "2.9.9": {}
245            }
246        });
247        let (ver, tarball, integrity) = select_version(&doc, &"^3".parse().unwrap()).unwrap();
248        assert_eq!(ver, Version::parse("3.3.3").unwrap());
249        assert_eq!(tarball.as_deref(), Some("https://r/lit-3.3.3.tgz"));
250        // The registry's dist.integrity rides along so node_modules can verify the tarball.
251        assert_eq!(integrity.as_deref(), Some("sha512-deadbeef"));
252    }
253
254    #[test]
255    fn select_version_integrity_is_none_when_absent() {
256        // A dist with a tarball but no integrity → integrity None. node_modules then refuses
257        // to install it unverified (from_lockfile is likewise strict on a missing sha512).
258        let doc = json!({ "versions": {
259            "1.0.0": { "dist": { "tarball": "https://r/x-1.0.0.tgz" } }
260        }});
261        let (_, tarball, integrity) = select_version(&doc, &"^1".parse().unwrap()).unwrap();
262        assert_eq!(tarball.as_deref(), Some("https://r/x-1.0.0.tgz"));
263        assert!(integrity.is_none());
264    }
265
266    #[test]
267    fn select_version_none_when_no_match() {
268        let doc = json!({ "versions": { "1.0.0": {}, "2.0.0": {} } });
269        assert!(select_version(&doc, &"^5".parse().unwrap()).is_none());
270    }
271
272    /// A one-version packument carrying a `dependencies` map, mirroring the registry's
273    /// shape, so the graph walk can be exercised without the network.
274    fn packument_with(version: &str, deps: &[(&str, &str)]) -> Value {
275        let dep_map: serde_json::Map<String, Value> = deps
276            .iter()
277            .map(|(n, s)| (n.to_string(), json!(*s)))
278            .collect();
279        let mut versions = serde_json::Map::new();
280        versions.insert(
281            version.to_string(),
282            json!({
283                "dist": {
284                    "tarball": format!("https://r/{version}.tgz"),
285                    "integrity": format!("sha512-{version}"),
286                },
287                "dependencies": Value::Object(dep_map),
288            }),
289        );
290        json!({ "versions": Value::Object(versions) })
291    }
292
293    #[test]
294    fn resolve_tree_walks_transitively_dedups_and_handles_cycles() {
295        // a@1 → {b ^1, c ^1}; b@1 → {c ^1} (shared); c@1 → {a ^1} (cycle back to root).
296        let mut pkgs: std::collections::HashMap<String, Value> = std::collections::HashMap::new();
297        pkgs.insert(
298            "a".into(),
299            packument_with("1.0.0", &[("b", "^1"), ("c", "^1")]),
300        );
301        pkgs.insert("b".into(), packument_with("1.2.0", &[("c", "^1")]));
302        pkgs.insert("c".into(), packument_with("1.5.0", &[("a", "^1")]));
303
304        let roots = vec![("a".to_string(), "^1".parse().unwrap())];
305        let resolved = Registry::npm()
306            .resolve_tree_from(&roots, |name| {
307                pkgs.get(name)
308                    .cloned()
309                    .ok_or_else(|| format!("no packument for {name}").into())
310            })
311            .unwrap();
312
313        // Each of a, b, c resolved exactly once (cycle + shared dep deduped), sorted by name.
314        let names: Vec<&str> = resolved.iter().map(|r| r.name.as_str()).collect();
315        assert_eq!(names, ["a", "b", "c"]);
316        let ver = |n: &str| {
317            resolved
318                .iter()
319                .find(|r| r.name == n)
320                .unwrap()
321                .version
322                .to_string()
323        };
324        assert_eq!(ver("b"), "1.2.0");
325        assert_eq!(ver("c"), "1.5.0");
326
327        // dist.integrity threads through the transitive walk, ready for verification.
328        let integrity = |n: &str| {
329            resolved
330                .iter()
331                .find(|r| r.name == n)
332                .unwrap()
333                .integrity
334                .clone()
335        };
336        assert_eq!(integrity("b").as_deref(), Some("sha512-1.2.0"));
337    }
338
339    #[test]
340    fn resolve_tree_errors_on_version_conflict() {
341        // root requires x ^1; root also requires y, and y requires x ^2 → incompatible.
342        let mut pkgs: std::collections::HashMap<String, Value> = std::collections::HashMap::new();
343        pkgs.insert(
344            "x".into(),
345            json!({ "versions": {
346                "1.0.0": { "dist": { "tarball": "https://r/x1.tgz" } },
347                "2.0.0": { "dist": { "tarball": "https://r/x2.tgz" } }
348            }}),
349        );
350        pkgs.insert("y".into(), packument_with("1.0.0", &[("x", "^2")]));
351
352        let roots = vec![
353            ("x".to_string(), "^1".parse().unwrap()),
354            ("y".to_string(), "^1".parse().unwrap()),
355        ];
356        let err = Registry::npm()
357            .resolve_tree_from(&roots, |name| {
358                pkgs.get(name)
359                    .cloned()
360                    .ok_or_else(|| format!("no packument for {name}").into())
361            })
362            .unwrap_err();
363        assert!(err.to_string().contains("version conflict"), "got: {err}");
364    }
365}