Skip to main content

npm_utils/
registry.rs

1//! npm registry interaction: tarball URLs, package metadata, and version
2//! resolution against a semver range.
3
4use crate::download;
5use crate::package_json::spec::Range;
6use semver::Version;
7use serde_json::Value;
8
9/// An npm-compatible registry. Defaults to the public registry.
10pub struct Registry {
11    pub base_url: String,
12}
13
14impl Default for Registry {
15    fn default() -> Self {
16        Self {
17            base_url: "https://registry.npmjs.org".to_string(),
18        }
19    }
20}
21
22/// A resolved package version: the exact version, the tarball to fetch, and the
23/// registry's `dist.integrity` SRI for that tarball (when the packument publishes one).
24///
25/// `#[non_exhaustive]` so further fields can be added without a breaking change — this
26/// type is only ever *constructed* inside the crate; callers receive and read it.
27#[derive(Debug, Clone)]
28#[non_exhaustive]
29pub struct Resolved {
30    pub name: String,
31    pub version: Version,
32    pub tarball_url: String,
33    /// The registry's Subresource-Integrity hash (`sha512-<base64>`), when the packument
34    /// carries one — verified against the downloaded bytes before extraction. `None` for a
35    /// synthesized tarball URL or a packument entry without `dist.integrity`.
36    pub integrity: Option<String>,
37}
38
39impl Registry {
40    /// The public npm registry (`https://registry.npmjs.org`).
41    pub fn npm() -> Self {
42        Self::default()
43    }
44
45    /// A registry at a custom base URL (e.g. a private mirror).
46    pub fn with_base_url(base_url: impl Into<String>) -> Self {
47        Self {
48            base_url: base_url.into(),
49        }
50    }
51
52    /// Conventional tarball URL for an exact `version`. Handles scoped names:
53    /// `@scope/pkg` → `<base>/@scope/pkg/-/pkg-<version>.tgz`.
54    pub fn tarball_url(&self, name: &str, version: &str) -> String {
55        let unscoped = name.rsplit('/').next().unwrap_or(name);
56        format!("{}/{}/-/{}-{}.tgz", self.base_url, name, unscoped, version)
57    }
58
59    /// Fetch the package metadata document ("packument").
60    pub fn packument(&self, name: &str) -> Result<Value, Box<dyn std::error::Error>> {
61        // Scoped names are URL-encoded in the path: `@scope/pkg` → `@scope%2fpkg`.
62        let encoded = match name.strip_prefix('@') {
63            Some(rest) => format!("@{}", rest.replacen('/', "%2f", 1)),
64            None => name.to_string(),
65        };
66        let url = format!("{}/{}", self.base_url, encoded);
67        let bytes = download::fetch(&url)?;
68        Ok(serde_json::from_slice(&bytes)?)
69    }
70
71    /// Resolve the newest published version of `name` matching the `range`.
72    pub fn resolve(
73        &self,
74        name: &str,
75        range: &Range,
76    ) -> Result<Resolved, Box<dyn std::error::Error>> {
77        let doc = self.packument(name)?;
78        let (version, tarball, integrity) = select_version(&doc, range)
79            .ok_or_else(|| format!("no published version of {name} matches {range}"))?;
80        let tarball_url = tarball.unwrap_or_else(|| self.tarball_url(name, &version.to_string()));
81        Ok(Resolved {
82            name: name.to_string(),
83            version,
84            tarball_url,
85            integrity,
86        })
87    }
88
89    /// Resolve the transitive dependency graph of `roots` into a **flat** set — one
90    /// version per package name (the npm v3+ `node_modules` layout). Each package's
91    /// `dependencies` are read straight from the registry metadata (no tarball
92    /// extraction), every child resolved to its newest matching version, and the set
93    /// de-duplicated by name. Cyclic graphs terminate (a name is resolved once).
94    /// Returns the packages sorted by name.
95    ///
96    /// MVP limitation: a single version per package name. Two *incompatible*
97    /// requirements on the same package — a genuine conflict npm would resolve by
98    /// nesting — is reported as an error rather than silently mis-resolved.
99    pub fn resolve_tree(
100        &self,
101        roots: &[(String, Range)],
102    ) -> Result<Vec<Resolved>, Box<dyn std::error::Error>> {
103        self.resolve_tree_from(roots, |name| self.packument(name))
104    }
105
106    /// [`resolve_tree`](Self::resolve_tree) with an injectable packument source, so the
107    /// graph walk can be unit-tested without the network.
108    fn resolve_tree_from<F>(
109        &self,
110        roots: &[(String, Range)],
111        mut get_packument: F,
112    ) -> Result<Vec<Resolved>, Box<dyn std::error::Error>>
113    where
114        F: FnMut(&str) -> Result<Value, Box<dyn std::error::Error>>,
115    {
116        use std::collections::{HashMap, VecDeque};
117        let mut packuments: HashMap<String, Value> = HashMap::new();
118        let mut resolved: HashMap<String, Resolved> = HashMap::new();
119        let mut queue: VecDeque<(String, Range)> = roots.iter().cloned().collect();
120
121        while let Some((name, range)) = queue.pop_front() {
122            if let Some(existing) = resolved.get(&name) {
123                if range.matches(&existing.version) {
124                    continue; // already resolved to a satisfying version — dedup
125                }
126                return Err(format!(
127                    "version conflict for `{name}`: resolved {} but also required `{range}` \
128                     (flat node_modules install resolves one version per package)",
129                    existing.version
130                )
131                .into());
132            }
133            if !packuments.contains_key(&name) {
134                let doc = get_packument(&name)?;
135                packuments.insert(name.clone(), doc);
136            }
137            let doc = &packuments[&name];
138            let (version, tarball, integrity) = select_version(doc, &range)
139                .ok_or_else(|| format!("no published version of {name} matches {range}"))?;
140            let deps = dependencies_of(doc, &version);
141            let tarball_url =
142                tarball.unwrap_or_else(|| self.tarball_url(&name, &version.to_string()));
143            for (dep_name, dep_spec) in deps {
144                // Transitive deps routinely use npm `||`/space ranges; parse the full grammar.
145                let dep_range = Range::parse(&dep_spec).map_err(|e| {
146                    format!(
147                        "{name}@{version} dependency `{dep_name}`: unsupported version \
148                         {dep_spec:?}: {e}"
149                    )
150                })?;
151                queue.push_back((dep_name, dep_range));
152            }
153            resolved.insert(
154                name.clone(),
155                Resolved {
156                    name,
157                    version,
158                    tarball_url,
159                    integrity,
160                },
161            );
162        }
163        let mut out: Vec<Resolved> = resolved.into_values().collect();
164        out.sort_by(|a, b| a.name.cmp(&b.name));
165        Ok(out)
166    }
167}
168
169/// Pick the newest version in a packument's `versions` map that satisfies the `range`,
170/// returning it with the `dist.tarball` URL and the `dist.integrity` SRI the registry
171/// advertises (each `None` if absent). Factored out for unit testing without network access.
172fn select_version(doc: &Value, range: &Range) -> Option<(Version, Option<String>, Option<String>)> {
173    let versions = doc.get("versions")?.as_object()?;
174    let mut best: Option<(Version, Option<String>, Option<String>)> = None;
175    for (ver_str, meta) in versions {
176        let Ok(ver) = Version::parse(ver_str) else {
177            continue;
178        };
179        if !range.matches(&ver) {
180            continue;
181        }
182        if best.as_ref().map(|(b, _, _)| ver > *b).unwrap_or(true) {
183            let dist = meta.get("dist");
184            let string_at = |key: &str| {
185                dist.and_then(|d| d.get(key))
186                    .and_then(|v| v.as_str())
187                    .map(str::to_string)
188            };
189            best = Some((ver, string_at("tarball"), string_at("integrity")));
190        }
191    }
192    best
193}
194
195/// The npm dependency-spec → [`VersionReq`] parser lives in the [`crate::package_json`] module
196/// (the package-spec grammar); re-exported here for back-compat as `registry::version_req`.
197pub use crate::package_json::spec::version_req;
198
199/// The `dependencies` of a specific version, read from a packument, as `(name, spec)`
200/// pairs. The full packument carries each version's `dependencies` inline, so the
201/// transitive walk discovers children without extracting any tarball.
202fn dependencies_of(doc: &Value, version: &Version) -> Vec<(String, String)> {
203    doc.get("versions")
204        .and_then(|v| v.get(version.to_string()))
205        .and_then(|meta| meta.get("dependencies"))
206        .and_then(|d| d.as_object())
207        .map(|map| {
208            map.iter()
209                .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
210                .collect()
211        })
212        .unwrap_or_default()
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218    use serde_json::json;
219
220    #[test]
221    fn tarball_url_handles_scoped_and_unscoped() {
222        let reg = Registry::npm();
223        assert_eq!(
224            reg.tarball_url("lit", "3.3.3"),
225            "https://registry.npmjs.org/lit/-/lit-3.3.3.tgz"
226        );
227        assert_eq!(
228            reg.tarball_url("@lit/context", "1.1.6"),
229            "https://registry.npmjs.org/@lit/context/-/context-1.1.6.tgz"
230        );
231    }
232
233    #[test]
234    fn select_version_picks_newest_matching() {
235        let doc = json!({
236            "versions": {
237                "3.1.0": { "dist": { "tarball": "https://r/lit-3.1.0.tgz" } },
238                "3.3.3": { "dist": {
239                    "tarball": "https://r/lit-3.3.3.tgz",
240                    "integrity": "sha512-deadbeef"
241                } },
242                "4.0.0": { "dist": { "tarball": "https://r/lit-4.0.0.tgz" } },
243                "2.9.9": {}
244            }
245        });
246        let (ver, tarball, integrity) = select_version(&doc, &"^3".parse().unwrap()).unwrap();
247        assert_eq!(ver, Version::parse("3.3.3").unwrap());
248        assert_eq!(tarball.as_deref(), Some("https://r/lit-3.3.3.tgz"));
249        // The registry's dist.integrity rides along so node_modules can verify the tarball.
250        assert_eq!(integrity.as_deref(), Some("sha512-deadbeef"));
251    }
252
253    #[test]
254    fn select_version_integrity_is_none_when_absent() {
255        // A dist with a tarball but no integrity → integrity None. node_modules then refuses
256        // to install it unverified (from_lockfile is likewise strict on a missing sha512).
257        let doc = json!({ "versions": {
258            "1.0.0": { "dist": { "tarball": "https://r/x-1.0.0.tgz" } }
259        }});
260        let (_, tarball, integrity) = select_version(&doc, &"^1".parse().unwrap()).unwrap();
261        assert_eq!(tarball.as_deref(), Some("https://r/x-1.0.0.tgz"));
262        assert!(integrity.is_none());
263    }
264
265    #[test]
266    fn select_version_none_when_no_match() {
267        let doc = json!({ "versions": { "1.0.0": {}, "2.0.0": {} } });
268        assert!(select_version(&doc, &"^5".parse().unwrap()).is_none());
269    }
270
271    /// A one-version packument carrying a `dependencies` map, mirroring the registry's
272    /// shape, so the graph walk can be exercised without the network.
273    fn packument_with(version: &str, deps: &[(&str, &str)]) -> Value {
274        let dep_map: serde_json::Map<String, Value> = deps
275            .iter()
276            .map(|(n, s)| (n.to_string(), json!(*s)))
277            .collect();
278        let mut versions = serde_json::Map::new();
279        versions.insert(
280            version.to_string(),
281            json!({
282                "dist": {
283                    "tarball": format!("https://r/{version}.tgz"),
284                    "integrity": format!("sha512-{version}"),
285                },
286                "dependencies": Value::Object(dep_map),
287            }),
288        );
289        json!({ "versions": Value::Object(versions) })
290    }
291
292    #[test]
293    fn resolve_tree_walks_transitively_dedups_and_handles_cycles() {
294        // a@1 → {b ^1, c ^1}; b@1 → {c ^1} (shared); c@1 → {a ^1} (cycle back to root).
295        let mut pkgs: std::collections::HashMap<String, Value> = std::collections::HashMap::new();
296        pkgs.insert(
297            "a".into(),
298            packument_with("1.0.0", &[("b", "^1"), ("c", "^1")]),
299        );
300        pkgs.insert("b".into(), packument_with("1.2.0", &[("c", "^1")]));
301        pkgs.insert("c".into(), packument_with("1.5.0", &[("a", "^1")]));
302
303        let roots = vec![("a".to_string(), "^1".parse().unwrap())];
304        let resolved = Registry::npm()
305            .resolve_tree_from(&roots, |name| {
306                pkgs.get(name)
307                    .cloned()
308                    .ok_or_else(|| format!("no packument for {name}").into())
309            })
310            .unwrap();
311
312        // Each of a, b, c resolved exactly once (cycle + shared dep deduped), sorted by name.
313        let names: Vec<&str> = resolved.iter().map(|r| r.name.as_str()).collect();
314        assert_eq!(names, ["a", "b", "c"]);
315        let ver = |n: &str| {
316            resolved
317                .iter()
318                .find(|r| r.name == n)
319                .unwrap()
320                .version
321                .to_string()
322        };
323        assert_eq!(ver("b"), "1.2.0");
324        assert_eq!(ver("c"), "1.5.0");
325
326        // dist.integrity threads through the transitive walk, ready for verification.
327        let integrity = |n: &str| {
328            resolved
329                .iter()
330                .find(|r| r.name == n)
331                .unwrap()
332                .integrity
333                .clone()
334        };
335        assert_eq!(integrity("b").as_deref(), Some("sha512-1.2.0"));
336    }
337
338    #[test]
339    fn resolve_tree_resolves_a_transitive_or_range() {
340        // Regression: a transitive dep with an npm `||` range (e.g. @lit/context →
341        // @lit/reactive-element `^1.6.2 || ^2.1.0`) must resolve, not fail to parse the `||`.
342        let mut pkgs: std::collections::HashMap<String, Value> = std::collections::HashMap::new();
343        pkgs.insert(
344            "ctx".into(),
345            packument_with("1.1.6", &[("re", "^1.6.2 || ^2.1.0")]),
346        );
347        pkgs.insert("re".into(), packument_with("2.1.0", &[]));
348
349        let roots = vec![("ctx".to_string(), "^1".parse().unwrap())];
350        let resolved = Registry::npm()
351            .resolve_tree_from(&roots, |name| {
352                pkgs.get(name)
353                    .cloned()
354                    .ok_or_else(|| format!("no packument for {name}").into())
355            })
356            .unwrap();
357
358        let names: Vec<&str> = resolved.iter().map(|r| r.name.as_str()).collect();
359        assert_eq!(
360            names,
361            ["ctx", "re"],
362            "the `||`-ranged transitive dep resolved"
363        );
364        assert_eq!(
365            resolved
366                .iter()
367                .find(|r| r.name == "re")
368                .unwrap()
369                .version
370                .to_string(),
371            "2.1.0"
372        );
373    }
374
375    #[test]
376    fn resolve_tree_errors_on_version_conflict() {
377        // root requires x ^1; root also requires y, and y requires x ^2 → incompatible.
378        let mut pkgs: std::collections::HashMap<String, Value> = std::collections::HashMap::new();
379        pkgs.insert(
380            "x".into(),
381            json!({ "versions": {
382                "1.0.0": { "dist": { "tarball": "https://r/x1.tgz" } },
383                "2.0.0": { "dist": { "tarball": "https://r/x2.tgz" } }
384            }}),
385        );
386        pkgs.insert("y".into(), packument_with("1.0.0", &[("x", "^2")]));
387
388        let roots = vec![
389            ("x".to_string(), "^1".parse().unwrap()),
390            ("y".to_string(), "^1".parse().unwrap()),
391        ];
392        let err = Registry::npm()
393            .resolve_tree_from(&roots, |name| {
394                pkgs.get(name)
395                    .cloned()
396                    .ok_or_else(|| format!("no packument for {name}").into())
397            })
398            .unwrap_err();
399        assert!(err.to_string().contains("version conflict"), "got: {err}");
400    }
401}