Skip to main content

aube_lockfile/
source.rs

1use std::path::{Path, PathBuf};
2
3/// Non-registry source for a locked package.
4///
5/// When a package comes from a local path (via `file:` or `link:` in
6/// `package.json`) it doesn't have a tarball URL or integrity hash, so we
7/// record the source separately and let the linker materialize it
8/// on-the-fly.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub enum LocalSource {
11    /// `file:<dir>` — a directory on disk whose contents should be
12    /// hardlink-copied into the virtual store like a normal package.
13    /// Path is stored relative to the project root.
14    Directory(PathBuf),
15    /// `file:<tarball>` — a `.tgz` on disk, extracted into the virtual
16    /// store the same way we extract registry tarballs.
17    Tarball(PathBuf),
18    /// `link:<dir>` — a plain symlink into `node_modules/<name>`, never
19    /// materialized into the virtual store. Transitive deps are the
20    /// target's responsibility.
21    Link(PathBuf),
22    /// `portal:<dir>` — a Yarn Berry package portal. The target is a
23    /// package on disk, but unlike `link:` its dependencies are still
24    /// modeled in the lockfile graph.
25    Portal(PathBuf),
26    /// `exec:<script>` — a Yarn Berry generator script. The script is
27    /// executed at fetch time and writes the package files into a
28    /// generated build directory.
29    Exec(PathBuf),
30    /// `git+https://`, `git+ssh://`, `github:user/repo`, etc. — a
31    /// remote git repo. Cloned at fetch time and imported like a
32    /// `file:` directory. `url` is the normalized clone URL (what
33    /// gets passed to `git clone`). `committish` is the user-written
34    /// ref after `#` (branch, tag, or commit; `None` means HEAD).
35    /// `resolved` is the 40-char commit SHA that `git ls-remote`
36    /// pinned the ref to — the lockfile records this so repeat
37    /// installs reproduce bit-for-bit.
38    Git(GitSource),
39    /// `https://example.com/pkg.tgz` — a remote tarball URL. Fetched
40    /// once at resolve time so the resolver can read the enclosed
41    /// `package.json` for version + transitive deps and pin the
42    /// sha512 integrity. `integrity` stays empty on freshly-parsed
43    /// specifiers and is filled in by the resolver after download.
44    RemoteTarball(RemoteTarballSource),
45}
46
47/// A remote tarball dependency spec. See [`LocalSource::RemoteTarball`].
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct RemoteTarballSource {
50    pub url: String,
51    pub integrity: String,
52    pub git_hosted: bool,
53}
54
55/// A git dependency spec. See [`LocalSource::Git`].
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct GitSource {
58    pub url: String,
59    pub committish: Option<String>,
60    pub resolved: String,
61    /// SHA-512 SRI of the hosted tarball bytes when the git source was
62    /// fetched through a codeload-style archive. Plain git-clone sources
63    /// leave this unset because git object IDs verify the checkout.
64    pub integrity: Option<String>,
65    /// pnpm `&path:/sub/dir` selector — when set, only this
66    /// subdirectory of the cloned repo is treated as the package
67    /// root. Stored without leading slash so dep_path hashes are
68    /// stable regardless of whether the user wrote `path:/x` or
69    /// `path:x`.
70    pub subpath: Option<String>,
71}
72
73pub fn git_commits_match(left: &str, right: &str) -> bool {
74    if left.eq_ignore_ascii_case(right) {
75        return true;
76    }
77    let left = left.trim();
78    let right = right.trim();
79    if left.len().min(right.len()) < 7
80        || !left.bytes().all(|b| b.is_ascii_hexdigit())
81        || !right.bytes().all(|b| b.is_ascii_hexdigit())
82    {
83        return false;
84    }
85    let left = left.to_ascii_lowercase();
86    let right = right.to_ascii_lowercase();
87    (left.len() == 40 && right.len() < 40 && left.starts_with(&right))
88        || (right.len() == 40 && left.len() < 40 && right.starts_with(&left))
89}
90
91impl LocalSource {
92    /// The original path (relative to the project root) the user wrote
93    /// in `package.json`. `None` for non-path sources like git.
94    pub fn path(&self) -> Option<&Path> {
95        match self {
96            LocalSource::Directory(p)
97            | LocalSource::Tarball(p)
98            | LocalSource::Link(p)
99            | LocalSource::Portal(p)
100            | LocalSource::Exec(p) => Some(p),
101            LocalSource::Git(_) | LocalSource::RemoteTarball(_) => None,
102        }
103    }
104
105    /// The protocol kind (`"file"` / `"link"` / `"git"` / `"url"`).
106    pub fn kind_str(&self) -> &'static str {
107        match self {
108            LocalSource::Directory(_) | LocalSource::Tarball(_) => "file",
109            LocalSource::Link(_) => "link",
110            LocalSource::Portal(_) => "portal",
111            LocalSource::Exec(_) => "exec",
112            LocalSource::Git(_) => "git",
113            LocalSource::RemoteTarball(_) => "url",
114        }
115    }
116
117    /// Whether this source is pinned to immutable, globally
118    /// reproducible content and can therefore be shared across
119    /// projects inside aube's global virtual store, exactly like a
120    /// registry package.
121    ///
122    /// `Git` is pinned to a 40-char commit SHA and `RemoteTarball` to
123    /// a fetched URL (and, once resolved, an integrity hash), so two
124    /// projects that depend on the same one resolve to the same files.
125    /// `file:` / `link:` / `portal:` / `exec:` all resolve against a
126    /// path inside the depending project, so they stay per-project and
127    /// are never promoted into the shared store.
128    ///
129    /// Load-bearing for global-virtual-store correctness: a registry
130    /// package materialized into the shared store points its
131    /// dependency siblings at the hashed global path
132    /// (`virtual_store_subdir(dep_path)`). If one of those deps were a
133    /// git/tarball source that only ever landed in the per-project
134    /// `.aube/`, the sibling symlink would dangle and Node's module
135    /// walk would silently fall back to some unrelated `<name>` found
136    /// higher up the tree.
137    pub fn is_globally_shareable(&self) -> bool {
138        matches!(self, LocalSource::Git(_) | LocalSource::RemoteTarball(_))
139    }
140
141    /// The path as a POSIX-style string with forward-slash separators.
142    /// `Path::display()` and `to_string_lossy()` honor the host's
143    /// separator (backslash on Windows), which would make `dep_path`
144    /// hashes and lockfile `specifier:` strings non-portable: the
145    /// same `file:./some/dir` would render as `some\dir` on Windows
146    /// and `some/dir` on Unix, producing two different hashes for
147    /// the same logical target. Always rendering with `/` keeps
148    /// lockfiles cross-platform identical.
149    pub fn path_posix(&self) -> String {
150        self.path()
151            .map(|p| p.to_string_lossy().replace('\\', "/"))
152            .unwrap_or_default()
153    }
154
155    /// Canonical specifier string as pnpm writes it in the `packages:`
156    /// and `snapshots:` keys (post-`<name>@` part). For `file:` /
157    /// `link:` this is `file:./vendor/foo` / `link:../sibling`. For
158    /// `git`, pnpm uses the resolved form `<url>#<commit>` (no
159    /// `git+` prefix) because the lockfile pins to the exact commit
160    /// regardless of what the user wrote. Always emits POSIX
161    /// separators so the resulting lockfile is portable.
162    pub fn specifier(&self) -> String {
163        match self {
164            LocalSource::Git(g) => match &g.subpath {
165                Some(sub) => format!("{}#{}&path:/{}", g.url, g.resolved, sub),
166                None => format!("{}#{}", g.url, g.resolved),
167            },
168            LocalSource::RemoteTarball(t) => t.url.clone(),
169            _ => format!("{}:{}", self.kind_str(), self.path_posix()),
170        }
171    }
172
173    /// Internal FS-safe dep_path used as the key in
174    /// `LockfileGraph.packages` and as the `.aube/` subdir name.
175    ///
176    /// Distinct paths must map to distinct keys (otherwise the
177    /// linker would silently mix files between two local packages),
178    /// and the result must be a single filesystem component — no
179    /// `/`, `\`, `:`, or `..`. Ad-hoc character substitution trips
180    /// over cases like `../vendor` vs `__/vendor` or `a.b` vs `a_b`
181    /// collapsing to the same string, so we hash the raw path bytes
182    /// and suffix the first 16 hex chars (64 bits — more than enough
183    /// to avoid collisions inside a single project).
184    ///
185    /// The hash input is the POSIX-form path string so a checked-in
186    /// lockfile resolves to the same key regardless of which
187    /// platform ran `aube install`.
188    pub fn dep_path(&self, name: &str) -> String {
189        use sha2::{Digest, Sha256};
190        let mut hasher = Sha256::new();
191        match self {
192            LocalSource::Git(g) => {
193                hasher.update(g.url.as_bytes());
194                hasher.update(b"#");
195                hasher.update(g.resolved.as_bytes());
196                if let Some(sub) = &g.subpath {
197                    hasher.update(b"&path:/");
198                    hasher.update(sub.as_bytes());
199                }
200            }
201            LocalSource::RemoteTarball(t) => {
202                hasher.update(t.url.as_bytes());
203            }
204            _ => hasher.update(self.path_posix().as_bytes()),
205        }
206        let digest = hasher.finalize();
207        let short: String = digest.iter().take(8).map(|b| format!("{b:02x}")).collect();
208        format!("{name}@{}+{short}", self.kind_str())
209    }
210
211    /// Classify a user-written `file:` / `link:` specifier against the
212    /// project root. Returns `None` if `spec` isn't a local specifier.
213    /// Resolves the target path relative to `project_root`; a `file:`
214    /// target that resolves to a `.tgz` / `.tar.gz` on disk is treated
215    /// as a tarball, anything else as a directory.
216    pub fn parse(spec: &str, project_root: &Path) -> Option<Self> {
217        // Check git first so URLs like `https://host/user/repo.git`
218        // aren't swallowed by the broader bare-http tarball check
219        // below.
220        if let Some((url, committish, subpath)) = parse_git_spec(spec) {
221            // `resolved` is filled in by the resolver after running
222            // `git ls-remote`. A lockfile round-trip that never
223            // re-resolves will leave this empty, which is the sentinel
224            // the resolver checks for before calling ls-remote.
225            return Some(LocalSource::Git(GitSource {
226                url,
227                committish,
228                resolved: String::new(),
229                integrity: None,
230                subpath,
231            }));
232        }
233        // Any remaining bare `http(s)://` URL is a remote tarball.
234        // npm semantics treat *all* non-git HTTP URLs in a dependency
235        // value as tarball URLs, so services that serve tarballs from
236        // URLs without a `.tgz` extension (pkg.pr.new, GitHub
237        // codeload, etc.) classify correctly here.
238        if Self::looks_like_remote_tarball_url(spec) {
239            return Some(LocalSource::RemoteTarball(RemoteTarballSource {
240                url: spec.to_string(),
241                integrity: String::new(),
242                git_hosted: false,
243            }));
244        }
245        let (kind, rest) = if let Some(r) = spec.strip_prefix("file:") {
246            ("file", r)
247        } else if let Some(r) = spec.strip_prefix("link:") {
248            ("link", r)
249        } else if let Some(r) = spec.strip_prefix("portal:") {
250            ("portal", r)
251        } else if let Some(r) = spec.strip_prefix("exec:") {
252            return Some(LocalSource::Exec(PathBuf::from(r)));
253        } else {
254            return None;
255        };
256        let rel = PathBuf::from(rest);
257        let abs = project_root.join(&rel);
258        if kind == "link" {
259            return Some(LocalSource::Link(rel));
260        }
261        if kind == "portal" {
262            return Some(LocalSource::Portal(rel));
263        }
264        if abs.is_file() && Self::path_looks_like_tarball(&rel) {
265            return Some(LocalSource::Tarball(rel));
266        }
267        Some(LocalSource::Directory(rel))
268    }
269
270    /// Whether a specifier looks like a direct HTTP(S) URL that should
271    /// be fetched as a tarball. Per npm semantics, *any* `http://` or
272    /// `https://` URL in a dependency value is a tarball URL — services
273    /// like pkg.pr.new, GitHub codeload, and private registries with
274    /// auth-token query strings serve tarballs from URLs that don't
275    /// carry a `.tgz` extension. Git URLs must already have been
276    /// ruled out by the caller (see [`parse_git_spec`]) so a
277    /// `.git`-suffixed URL doesn't get misclassified here.
278    pub fn looks_like_remote_tarball_url(spec: &str) -> bool {
279        spec.starts_with("https://") || spec.starts_with("http://")
280    }
281
282    pub fn path_looks_like_tarball(path: &Path) -> bool {
283        let name = match path.file_name().and_then(|n| n.to_str()) {
284            Some(n) => n,
285            None => return false,
286        };
287        let lower = name.to_ascii_lowercase();
288        lower.ends_with(".tgz") || lower.ends_with(".tar.gz")
289    }
290}
291
292/// Resolve a transitive dependency's recorded spec *value* to the same
293/// `dep_path` key the lockfile parser assigns the target package, for
294/// the two content-pinned source kinds that get shared globally (git
295/// and remote tarball).
296///
297/// pnpm records a git / remote-tarball dependency inside a snapshot's
298/// `dependencies:` map by its *resolved spec* — `<url>#<sha>` for git,
299/// the tarball URL for remote tarballs (e.g. request-promise-core lists
300/// `request: https://github.com/request/request.git#<sha>`). The parser,
301/// however, keys the package itself under [`LocalSource::dep_path`] — the
302/// short `name@git+<hash>` / `name@url+<hash>` form. A naive
303/// `format!("{name}@{value}")` lookup therefore points at a key that was
304/// never inserted into the graph, so:
305///
306/// * the linker's sibling symlink dangles (Node resolves the wrong
307///   `<name>` or none — the request-promise-core crash), and
308/// * the graph hasher skips the child entirely, so neither its content
309///   fingerprint nor its build/engine taint cascades into the parent's
310///   global-virtual-store hash.
311///
312/// Mirror `pnpm::read::push_direct`'s keying so the resolved value lands
313/// on the exact `dep_path` the package was materialized under. Returns
314/// `None` for every other value (plain semver, `file:`, `link:`, npm
315/// aliases, …) so callers keep the verbatim `name@value` key those
316/// already resolve correctly with.
317pub fn shared_local_dep_path(dep_name: &str, dep_value: &str) -> Option<String> {
318    // pnpm appends a `(peer@ver)` suffix to some spec values; the parser
319    // strips it before classifying the source, so strip it here too.
320    //
321    // This MUST stay byte-for-byte identical to `pnpm::read::push_direct`'s
322    // `classify_version` (`info.version.split('(').next()`), which is what
323    // produced the `dep_path` keys in `graph.packages` we're matching
324    // against. A "smarter" strip (e.g. only a trailing `(peer@…)` via
325    // rfind) would *desync* the two: any value with a non-peer `(` would
326    // hash differently here than the key the parser inserted, silently
327    // re-skipping that child in the linker and graph hasher. If the
328    // first-`(` truncation is ever wrong for a real spec, fix it in
329    // `push_direct` and here together — never in isolation.
330    let classify = dep_value.split('(').next().unwrap_or(dep_value);
331    match LocalSource::parse(classify, Path::new("")) {
332        Some(LocalSource::Git(mut git)) => {
333            // Snapshot specs carry the pinned commit after `#`, which
334            // `parse` records as `committish` rather than `resolved`. The
335            // package was keyed with that commit promoted to `resolved`
336            // (see `push_direct`), so promote it here too — otherwise the
337            // `url#resolved` hash diverges from the package's dep_path.
338            if git.resolved.is_empty() {
339                git.resolved = git.committish.take()?;
340            }
341            Some(LocalSource::Git(git).dep_path(dep_name))
342        }
343        Some(tarball @ LocalSource::RemoteTarball(_)) => Some(tarball.dep_path(dep_name)),
344        _ => None,
345    }
346}
347
348/// Resolve a dependency edge `(name, tail)` to the graph key of the child
349/// package node, honoring every reader's storage convention. Returns the
350/// first candidate that satisfies `contains` (the caller's "is this a real
351/// package key?" predicate), or `None` when the edge points outside the
352/// graph (a pruned optional, an unresolved peer, a `link:` target, …).
353///
354/// Three conventions coexist because the readers disagree on what a
355/// dependency *value* holds, and a graph walker that only knows one of
356/// them silently drops the others:
357///   1. `tail` verbatim — npm/yarn/bun store the full dep_path as the
358///      value (`"foo@1.2.3"`).
359///   2. `name@tail` — the pnpm reader stores only the tail (`"1.2.3"`),
360///      so the key is the name re-joined to it.
361///   3. [`shared_local_dep_path`] — git / remote-tarball deps store the
362///      resolved URL as the tail, but the node is keyed under the short
363///      `name@git+<hash>` / `name@url+<hash>` form. The linker's
364///      `materialize` already bridges the edge this way; reachability /
365///      marking walkers that skip it prune the entire git/tarball subtree
366///      (a content-pinned git/tarball child and everything under it
367///      vanishes from the walk once the node is keyed canonically).
368pub fn resolve_dep_edge(name: &str, tail: &str, contains: impl Fn(&str) -> bool) -> Option<String> {
369    if contains(tail) {
370        return Some(tail.to_string());
371    }
372    let rejoined = format!("{name}@{tail}");
373    if contains(&rejoined) {
374        return Some(rejoined);
375    }
376    shared_local_dep_path(name, tail).filter(|key| contains(key))
377}
378
379/// Parse a git dependency specifier into `(clone_url, committish)`.
380///
381/// Recognized forms:
382/// - `git+https://host/user/repo.git[#ref]`
383/// - `git+ssh://git@host/user/repo.git[#ref]`
384/// - `git://host/user/repo.git[#ref]`
385/// - `https://host/user/repo.git[#ref]` (only when ending in `.git`)
386/// - `user@host:path[.git][#ref]` (scp-form, only for github.com / gitlab.com /
387///   bitbucket.org — matches pnpm 11 behavior, where unknown SCP hosts are
388///   treated as local paths) → `ssh://user@host/path[.git]`
389/// - `github:user/repo[#ref]` → `https://github.com/user/repo.git`
390/// - `gitlab:user/repo[#ref]` → `https://gitlab.com/user/repo.git`
391/// - `bitbucket:user/repo[#ref]` → `https://bitbucket.org/user/repo.git`
392/// - `user/repo[#ref]` (bare GitHub shorthand, npm/pnpm compat)
393///   → `https://github.com/user/repo.git`
394///
395/// Returns `None` for any specifier that doesn't look like a git URL,
396/// so the caller can fall through to other protocol parsers.
397pub fn parse_git_spec(spec: &str) -> Option<(String, Option<String>, Option<String>)> {
398    let (body, committish, subpath) = match spec.find('#') {
399        Some(idx) => {
400            let (c, s) = parse_git_fragment(&spec[idx + 1..]);
401            (&spec[..idx], c, s)
402        }
403        None => (spec, None, None),
404    };
405    let is_bare_transport = body.starts_with("https://")
406        || body.starts_with("http://")
407        || body.starts_with("ssh://")
408        || body.starts_with("file://");
409    let url = if let Some(rest) = body.strip_prefix("git+") {
410        // `git+` explicitly tags the URL as git, so the `.git`
411        // suffix is optional (GitHub/GitLab accept both forms).
412        rest.to_string()
413    } else if body.starts_with("git://") {
414        body.to_string()
415    } else if let Some(scp) = parse_scp_url(body) {
416        scp
417    } else if let Some(path) = body.strip_prefix("github:") {
418        format!("https://github.com/{path}.git")
419    } else if let Some(path) = body.strip_prefix("gitlab:") {
420        format!("https://gitlab.com/{path}.git")
421    } else if let Some(path) = body.strip_prefix("bitbucket:") {
422        format!("https://bitbucket.org/{path}.git")
423    } else if is_bare_transport && body.ends_with(".git") {
424        body.to_string()
425    } else if is_bare_transport
426        && committish
427            .as_deref()
428            .is_some_and(|c| c.len() == 40 && c.chars().all(|ch| ch.is_ascii_hexdigit()))
429    {
430        // Lockfile round-trip form: `specifier()` writes the stored
431        // URL verbatim plus `#<sha>`. URLs that dropped the `git+`
432        // prefix (and happen to lack `.git`) are disambiguated from
433        // plain tarball URLs by the 40-hex committish suffix.
434        body.to_string()
435    } else if is_bare_github_shorthand(body) {
436        // npm/pnpm bare GitHub shorthand: `user/repo` expands to
437        // `github:user/repo`. Placed last so all explicit URL/scheme
438        // forms above shadow it.
439        format!("https://github.com/{body}.git")
440    } else {
441        return None;
442    };
443    Some((url, committish, subpath))
444}
445
446/// `user/repo` — a single `/`, both segments non-empty, ASCII
447/// alphanumeric + `_.-` only, owner doesn't start with `.` so
448/// single-component relative paths (`./repo`, `../repo`) are rejected.
449/// Excludes scoped npm names (`@scope/pkg`) and file paths. Other
450/// URL/SCP forms are ruled out by placement order in `parse_git_spec`.
451fn is_bare_github_shorthand(body: &str) -> bool {
452    let Some((owner, repo)) = body.split_once('/') else {
453        return false;
454    };
455    !owner.is_empty()
456        && !owner.starts_with('.')
457        && !repo.is_empty()
458        && !repo.contains('/')
459        && owner
460            .bytes()
461            .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'.' | b'-'))
462        && repo
463            .bytes()
464            .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'.' | b'-'))
465}
466
467/// A git URL that maps to one of the three "hosted" providers npm /
468/// pnpm both special-case (github / gitlab / bitbucket). For these
469/// hosts a public read can be served as a flat HTTPS tarball over
470/// `codeload.github.com` (or each host's equivalent), bypassing `git`
471/// entirely. The lockfile's stored URL is canonical-identity only —
472/// pnpm and npm both re-derive the fetch URL from `(host, owner,
473/// repo)` on every install rather than dialing whatever scheme
474/// happens to be in `resolved:`.
475#[derive(Debug, Clone, PartialEq, Eq)]
476pub struct HostedGit {
477    pub host: HostedGitHost,
478    pub owner: String,
479    pub repo: String,
480}
481
482#[derive(Debug, Clone, Copy, PartialEq, Eq)]
483pub enum HostedGitHost {
484    GitHub,
485    GitLab,
486    Bitbucket,
487}
488
489impl HostedGit {
490    /// `https://github.com/<owner>/<repo>.git` — the form `git fetch`
491    /// can dial without an SSH key. Used as the runtime fetch URL when
492    /// the lockfile's stored URL is `git+ssh://git@…` (npm canonical
493    /// identity) but the actual install host has no SSH configured.
494    pub fn https_url(&self) -> String {
495        let host = self.host.host_domain();
496        format!("https://{host}/{}/{}.git", self.owner, self.repo)
497    }
498
499    /// `https://codeload.github.com/<owner>/<repo>/tar.gz/<sha>` (or
500    /// each host's equivalent) — a flat HTTPS tarball at the given
501    /// commit. Returns `None` unless `committish` is a 40-char hex
502    /// SHA, since the codeload path can't be verified after extraction
503    /// without `.git/` metadata. Branch / tag names round-trip through
504    /// `git ls-remote` to get pinned to a SHA first.
505    pub fn tarball_url(&self, committish: &str) -> Option<String> {
506        if committish.len() != 40 || !committish.chars().all(|c| c.is_ascii_hexdigit()) {
507            return None;
508        }
509        let sha = committish.to_ascii_lowercase();
510        Some(match self.host {
511            HostedGitHost::GitHub => format!(
512                "https://codeload.github.com/{}/{}/tar.gz/{sha}",
513                self.owner, self.repo
514            ),
515            HostedGitHost::GitLab => format!(
516                "https://gitlab.com/{}/{}/-/archive/{sha}/{}-{sha}.tar.gz",
517                self.owner, self.repo, self.repo
518            ),
519            HostedGitHost::Bitbucket => format!(
520                "https://bitbucket.org/{}/{}/get/{sha}.tar.gz",
521                self.owner, self.repo
522            ),
523        })
524    }
525}
526
527impl HostedGitHost {
528    fn from_domain(domain: &str) -> Option<Self> {
529        match domain {
530            "github.com" => Some(HostedGitHost::GitHub),
531            "gitlab.com" => Some(HostedGitHost::GitLab),
532            "bitbucket.org" => Some(HostedGitHost::Bitbucket),
533            _ => None,
534        }
535    }
536
537    pub fn host_domain(self) -> &'static str {
538        match self {
539            HostedGitHost::GitHub => "github.com",
540            HostedGitHost::GitLab => "gitlab.com",
541            HostedGitHost::Bitbucket => "bitbucket.org",
542        }
543    }
544}
545
546/// Parse a clone URL — in any form `parse_git_spec` accepts as input
547/// or produces as output — into its `(host, owner, repo)` components,
548/// when the host is one of the three providers npm / pnpm route
549/// through HTTPS tarballs. Returns `None` for any other host (including
550/// self-hosted GitLab / Gitea / Bitbucket Data Center): those still
551/// need a real `git clone` because no codeload-style HTTP archive is
552/// available.
553///
554/// Accepts:
555/// - `https://github.com/owner/repo[.git]`
556/// - `git+https://github.com/owner/repo[.git]`
557/// - `git://github.com/owner/repo[.git]`
558/// - `ssh://git@github.com/owner/repo[.git]`
559/// - `git+ssh://git@github.com/owner/repo[.git]` (npm canonical lockfile form)
560/// - `git@github.com:owner/repo[.git]` (scp shorthand, in case a caller
561///   parses raw lockfile fields without going through `parse_git_spec`)
562pub fn parse_hosted_git(url: &str) -> Option<HostedGit> {
563    let body = url.strip_prefix("git+").unwrap_or(url);
564    let after_scheme = if let Some(rest) = body.strip_prefix("https://") {
565        rest
566    } else if let Some(rest) = body.strip_prefix("http://") {
567        rest
568    } else if let Some(rest) = body.strip_prefix("ssh://") {
569        rest
570    } else if let Some(rest) = body.strip_prefix("git://") {
571        rest
572    } else {
573        // scp shorthand `user@host:path` — not produced by parse_git_spec
574        // but accepted defensively in case a raw lockfile string ever
575        // bypasses it.
576        let scp_path = parse_scp_url(body)?;
577        return parse_hosted_git(&scp_path);
578    };
579    // Strip optional `user@` (always `git@` for hosted forms).
580    let host_and_path = match after_scheme.split_once('@') {
581        Some((_, rest)) => rest,
582        None => after_scheme,
583    };
584    let (host, path) = host_and_path.split_once('/')?;
585    let host = HostedGitHost::from_domain(host)?;
586    // Take exactly two path segments: owner and repo. Anything beyond
587    // (subgroup-style GitLab paths) doesn't have a stable HTTPS tarball
588    // form on the three providers we care about, so refuse and let the
589    // caller fall back to clone.
590    let mut segs = path.splitn(3, '/');
591    let owner = segs.next()?;
592    let repo = segs.next()?;
593    if owner.is_empty() || repo.is_empty() || segs.next().is_some() {
594        return None;
595    }
596    let repo = repo
597        .strip_suffix(".git")
598        .unwrap_or(repo)
599        .trim_end_matches('/');
600    if repo.is_empty() {
601        return None;
602    }
603    Some(HostedGit {
604        host,
605        owner: owner.to_string(),
606        repo: repo.to_string(),
607    })
608}
609
610fn parse_scp_url(body: &str) -> Option<String> {
611    if body.contains("://") {
612        return None;
613    }
614    let colon = body.find(':')?;
615    let before = &body[..colon];
616    let path = &body[colon + 1..];
617    if before.is_empty() || path.is_empty() {
618        return None;
619    }
620    if path.starts_with('/') {
621        return None;
622    }
623    let at = before.find('@')?;
624    let user = &before[..at];
625    let host = &before[at + 1..];
626    if user.is_empty() || host.is_empty() || host.contains('/') || host.contains('@') {
627        return None;
628    }
629    // pnpm 11 only resolves SCP-form as hosted Git for the three known
630    // providers; other hosts (e.g. `git@example.com:foo/bar.git`) are
631    // treated as local paths, and `host:path` without a user errors.
632    if !matches!(host, "github.com" | "gitlab.com" | "bitbucket.org") {
633        return None;
634    }
635    Some(format!("ssh://{user}@{host}/{path}"))
636}
637
638/// Normalize git URL fragments used by npm-compatible lockfiles.
639///
640/// Plain git accepts `#<ref>`, while npm and Yarn Berry also write
641/// key/value fragments such as `#commit=<sha>` for pinned git deps.
642/// Downstream code passes this value directly to `git ls-remote` and
643/// `git checkout`, so strip the selector key here and keep only the
644/// actual ref name or SHA.
645pub(crate) fn normalize_git_fragment(fragment: &str) -> Option<String> {
646    parse_git_fragment(fragment).0
647}
648
649/// Parse a git URL fragment into `(committish, subpath)`. Handles the
650/// pnpm/hosted-git-info form `<ref>&path:/sub/dir` (the `path:` key
651/// uses a colon, not `=`, by historical convention) as well as the
652/// `key=value` form npm/Yarn Berry write. Unknown selectors are
653/// ignored. Subpath is returned without leading slash so the caller
654/// can join it with a clone dir without tripping the absolute-path
655/// branch of `Path::join`.
656pub(crate) fn parse_git_fragment(fragment: &str) -> (Option<String>, Option<String>) {
657    if fragment.is_empty() {
658        return (None, None);
659    }
660
661    let mut fallback: Option<&str> = None;
662    let mut preferred: Option<&str> = None;
663    let mut subpath: Option<String> = None;
664    for part in fragment.split('&') {
665        if part.is_empty() {
666            continue;
667        }
668        // Try `key=value` first; fall back to `key:value` only for
669        // the small set of selectors we actually handle below. A tag
670        // name with a colon (e.g. `release:2026-01`) is left alone —
671        // and `semver:^1.0.0` stays as a literal ref so `ls-remote`
672        // surfaces an explicit error rather than silently HEAD-ing.
673        let split = part.split_once('=').or_else(|| {
674            part.split_once(':')
675                .filter(|(k, _)| matches!(*k, "commit" | "tag" | "head" | "branch" | "path"))
676        });
677        let (key, value) = split.unwrap_or(("", part));
678        if value.is_empty() {
679            continue;
680        }
681        match key {
682            "commit" => {
683                preferred.get_or_insert(value);
684            }
685            "tag" | "head" | "branch" => {
686                fallback.get_or_insert(value);
687            }
688            "path" => {
689                // Strip leading slashes (pnpm writes `path:/sub`) and
690                // reject any `..` / `.` component. Without this, a
691                // crafted spec like `&path:/../../etc` would let the
692                // resolver and installer escape the clone dir and
693                // import an arbitrary host directory into the store.
694                if subpath.is_some() {
695                    // First-wins, matching the other selectors above.
696                    continue;
697                }
698                let trimmed = value.trim_start_matches('/');
699                if trimmed.is_empty() {
700                    continue;
701                }
702                if trimmed
703                    .split('/')
704                    .any(|c| c.is_empty() || c == "." || c == "..")
705                {
706                    continue;
707                }
708                subpath = Some(trimmed.to_string());
709            }
710            "" => {
711                fallback.get_or_insert(value);
712            }
713            _ => {}
714        }
715    }
716
717    (preferred.or(fallback).map(ToString::to_string), subpath)
718}
719
720#[cfg(test)]
721mod tests {
722    use super::*;
723
724    #[test]
725    fn matches_https_tgz() {
726        assert!(LocalSource::looks_like_remote_tarball_url(
727            "https://example.com/pkg-1.0.0.tgz"
728        ));
729    }
730
731    #[test]
732    fn matches_http_tar_gz() {
733        assert!(LocalSource::looks_like_remote_tarball_url(
734            "http://example.com/pkg-1.0.0.tar.gz"
735        ));
736    }
737
738    #[test]
739    fn strips_fragment_before_suffix_check() {
740        assert!(LocalSource::looks_like_remote_tarball_url(
741            "https://example.com/pkg-1.0.0.tgz#sha512-abc"
742        ));
743    }
744
745    #[test]
746    fn strips_query_string_before_suffix_check() {
747        // Auth-token URLs from private registries (JFrog, Nexus,
748        // CodeArtifact, …) routinely trail `?token=…` after the
749        // filename. Must still classify as a tarball URL.
750        assert!(LocalSource::looks_like_remote_tarball_url(
751            "https://registry.example.com/pkg/-/pkg-1.0.0.tgz?token=abc"
752        ));
753        assert!(LocalSource::looks_like_remote_tarball_url(
754            "https://example.com/pkg-1.0.0.tar.gz?v=2&signed=1"
755        ));
756    }
757
758    #[test]
759    fn matches_bare_http_url_without_tarball_suffix() {
760        // pkg.pr.new serves tarballs from URLs without a `.tgz`
761        // extension; npm treats all non-git http(s) URLs as tarball
762        // URLs, so these must classify as remote tarballs.
763        assert!(LocalSource::looks_like_remote_tarball_url(
764            "https://pkg.pr.new/lunariajs/lunaria/@lunariajs/core@904b935"
765        ));
766        assert!(LocalSource::looks_like_remote_tarball_url(
767            "https://codeload.github.com/user/repo/tar.gz/main"
768        ));
769    }
770
771    #[test]
772    fn git_commits_match_only_allows_full_sha_prefix_pairs() {
773        let full = "abcdef0123456789abcdef0123456789abcdef01";
774        assert!(git_commits_match(full, "abcdef0"));
775        assert!(git_commits_match("abcdef0", full));
776        assert!(git_commits_match(full, full));
777        assert!(!git_commits_match("abcdef0", "abcdef012"));
778        assert!(!git_commits_match(full, "abcdef1"));
779        assert!(!git_commits_match("main", full));
780    }
781
782    #[test]
783    fn rejects_non_http_schemes() {
784        assert!(!LocalSource::looks_like_remote_tarball_url(
785            "ftp://example.com/pkg.tgz"
786        ));
787        assert!(!LocalSource::looks_like_remote_tarball_url(
788            "git://example.com/repo.git"
789        ));
790    }
791
792    #[test]
793    fn parse_classifies_bare_http_url_as_remote_tarball() {
794        use std::path::Path;
795        let parsed = LocalSource::parse(
796            "https://pkg.pr.new/lunariajs/lunaria/@lunariajs/core@904b935",
797            Path::new(""),
798        );
799        assert!(matches!(parsed, Some(LocalSource::RemoteTarball(_))));
800    }
801
802    #[test]
803    fn parse_prefers_git_over_tarball_for_dot_git_url() {
804        use std::path::Path;
805        let parsed = LocalSource::parse("https://github.com/user/repo.git", Path::new(""));
806        assert!(matches!(parsed, Some(LocalSource::Git(_))));
807    }
808
809    #[test]
810    fn parse_classifies_exec_as_local_source() {
811        let parsed = LocalSource::parse("exec:./scripts/generate.js", Path::new(""));
812        assert_eq!(
813            parsed,
814            Some(LocalSource::Exec(PathBuf::from("./scripts/generate.js")))
815        );
816    }
817
818    #[test]
819    fn git_plus_https_without_dot_git_roundtrips_via_lockfile_form() {
820        // Initial parse: `git+https://…/repo` (no `.git`).
821        let (url, committish, subpath) = parse_git_spec("git+https://host/user/repo").unwrap();
822        assert_eq!(url, "https://host/user/repo");
823        assert_eq!(committish, None);
824        assert_eq!(subpath, None);
825
826        // After resolving, the serializer writes `<url>#<sha>` into
827        // the lockfile's importer `version:` field.
828        let sha = "abcdef0123456789abcdef0123456789abcdef01";
829        let source = LocalSource::Git(GitSource {
830            url: url.clone(),
831            committish: None,
832            resolved: sha.to_string(),
833            integrity: None,
834            subpath: None,
835        });
836        let lockfile_version = source.specifier();
837        assert_eq!(lockfile_version, format!("https://host/user/repo#{sha}"));
838
839        // Re-parse must recognize the bare URL because the 40-hex
840        // committish suffix unambiguously tags it as git.
841        let (round_url, round_committish, round_subpath) =
842            parse_git_spec(&lockfile_version).unwrap();
843        assert_eq!(round_url, "https://host/user/repo");
844        assert_eq!(round_committish.as_deref(), Some(sha));
845        assert_eq!(round_subpath, None);
846    }
847
848    #[test]
849    fn bare_https_without_dot_git_and_no_committish_is_not_git() {
850        // A plain `https://…` URL with no `.git` and no SHA could be
851        // anything (including a tarball); don't claim it.
852        assert!(parse_git_spec("https://example.com/pkg").is_none());
853    }
854
855    #[test]
856    fn github_shorthand_expands_and_roundtrips() {
857        let (url, _, _) = parse_git_spec("github:user/repo").unwrap();
858        assert_eq!(url, "https://github.com/user/repo.git");
859    }
860
861    #[test]
862    fn bare_user_repo_expands_to_github() {
863        let (url, committish, subpath) = parse_git_spec("kevva/is-negative").unwrap();
864        assert_eq!(url, "https://github.com/kevva/is-negative.git");
865        assert!(committish.is_none());
866        assert!(subpath.is_none());
867    }
868
869    #[test]
870    fn bare_user_repo_with_committish_preserved() {
871        let (url, committish, _) = parse_git_spec("kevva/is-negative#v1.0.0").unwrap();
872        assert_eq!(url, "https://github.com/kevva/is-negative.git");
873        assert_eq!(committish.as_deref(), Some("v1.0.0"));
874    }
875
876    #[test]
877    fn bare_scope_pkg_is_not_git_shorthand() {
878        // npm-style `@scope/pkg` is a registry name, not a GitHub shorthand.
879        assert!(parse_git_spec("@types/node").is_none());
880    }
881
882    #[test]
883    fn bare_relative_path_is_not_git_shorthand() {
884        // Single-component relative paths split as owner=".", owner="..",
885        // so owner-starts-with-`.` is the load-bearing guard here.
886        assert!(parse_git_spec("./repo").is_none());
887        assert!(parse_git_spec("../repo").is_none());
888        // Multi-component relative paths additionally fail the
889        // single-`/`-only guard.
890        assert!(parse_git_spec("./local/path").is_none());
891        assert!(parse_git_spec("../local/path").is_none());
892    }
893
894    #[test]
895    fn bare_path_with_extra_slashes_is_not_git_shorthand() {
896        // Real GitHub shorthand is exactly `user/repo` — anything with a
897        // second `/` is a path, not a shorthand.
898        assert!(parse_git_spec("path/with/slashes/extra").is_none());
899    }
900
901    #[test]
902    fn bare_scp_form_unknown_host_is_not_github_shorthand() {
903        // `user@host:repo.git` is scp form (handled or rejected above);
904        // the bare-shorthand branch must not pick it up.
905        assert!(parse_git_spec("user@host:repo.git").is_none());
906    }
907
908    #[test]
909    fn scp_form_recognized() {
910        let (url, committish, _) =
911            parse_git_spec("git@github.com:EthanHenrickson/math-mcp.git").unwrap();
912        assert_eq!(url, "ssh://git@github.com/EthanHenrickson/math-mcp.git");
913        assert!(committish.is_none());
914    }
915
916    #[test]
917    fn scp_form_with_ref_recognized() {
918        let (url, committish, _) =
919            parse_git_spec("git@github.com:EthanHenrickson/math-mcp.git#0.1.5").unwrap();
920        assert_eq!(url, "ssh://git@github.com/EthanHenrickson/math-mcp.git");
921        assert_eq!(committish.as_deref(), Some("0.1.5"));
922    }
923
924    #[test]
925    fn scp_form_bitbucket_recognized() {
926        let (url, _, _) = parse_git_spec("git@bitbucket.org:pnpmjs/git-resolver.git").unwrap();
927        assert_eq!(url, "ssh://git@bitbucket.org/pnpmjs/git-resolver.git");
928    }
929
930    #[test]
931    fn scp_form_unknown_host_rejected() {
932        // pnpm 11 treats `user@unknown-host:path` as a local path, not Git.
933        assert!(parse_git_spec("git@example.com:org/repo.git").is_none());
934        assert!(parse_git_spec("alice@host.example.com:org/repo.git").is_none());
935    }
936
937    #[test]
938    fn scp_form_without_user_rejected() {
939        // pnpm 11 errors on bare `host:path` as unsupported.
940        assert!(parse_git_spec("github.com:user/repo.git").is_none());
941    }
942
943    #[test]
944    fn commit_selector_fragment_normalizes_to_sha() {
945        let sha = "abcdef0123456789abcdef0123456789abcdef01";
946        let (url, committish, _) =
947            parse_git_spec(&format!("https://host/user/repo.git#commit={sha}")).unwrap();
948        assert_eq!(url, "https://host/user/repo.git");
949        assert_eq!(committish.as_deref(), Some(sha));
950    }
951
952    #[test]
953    fn named_selector_fragment_normalizes_to_ref() {
954        let (url, committish, _) = parse_git_spec("git+https://host/user/repo#tag=v1.2.3").unwrap();
955        assert_eq!(url, "https://host/user/repo");
956        assert_eq!(committish.as_deref(), Some("v1.2.3"));
957    }
958
959    #[test]
960    fn pnpm_path_subpath_extracted_from_fragment() {
961        // pnpm syntax: `<url>#<ref>&path:/<subdir>` selects a
962        // subdirectory of the cloned repo as the package root.
963        let (url, committish, subpath) =
964            parse_git_spec("github:org/dep#v0.1.4&path:/packages/special").unwrap();
965        assert_eq!(url, "https://github.com/org/dep.git");
966        assert_eq!(committish.as_deref(), Some("v0.1.4"));
967        assert_eq!(subpath.as_deref(), Some("packages/special"));
968    }
969
970    #[test]
971    fn path_subpath_roundtrips_via_specifier() {
972        let sha = "abcdef0123456789abcdef0123456789abcdef01";
973        let source = LocalSource::Git(GitSource {
974            url: "https://github.com/org/dep.git".to_string(),
975            committish: None,
976            resolved: sha.to_string(),
977            integrity: None,
978            subpath: Some("packages/special".to_string()),
979        });
980        let spec = source.specifier();
981        assert_eq!(
982            spec,
983            format!("https://github.com/org/dep.git#{sha}&path:/packages/special")
984        );
985        let (url, committish, subpath) = parse_git_spec(&spec).unwrap();
986        assert_eq!(url, "https://github.com/org/dep.git");
987        assert_eq!(committish.as_deref(), Some(sha));
988        assert_eq!(subpath.as_deref(), Some("packages/special"));
989    }
990
991    #[test]
992    fn parse_hosted_git_recognizes_canonical_forms() {
993        // All these point at the same (github.com, owner, repo) tuple
994        // and must map to the same HostedGit so the runtime fetch URL
995        // doesn't depend on which scheme the lockfile happens to record.
996        let canonical = HostedGit {
997            host: HostedGitHost::GitHub,
998            owner: "owner".to_string(),
999            repo: "repo".to_string(),
1000        };
1001        for spec in [
1002            "https://github.com/owner/repo.git",
1003            "https://github.com/owner/repo",
1004            "http://github.com/owner/repo.git",
1005            "git+https://github.com/owner/repo.git",
1006            "git+https://github.com/owner/repo",
1007            "git://github.com/owner/repo.git",
1008            "ssh://git@github.com/owner/repo.git",
1009            "git+ssh://git@github.com/owner/repo.git",
1010            "git@github.com:owner/repo.git",
1011        ] {
1012            assert_eq!(
1013                parse_hosted_git(spec).as_ref(),
1014                Some(&canonical),
1015                "spec {spec} should map to canonical HostedGit",
1016            );
1017        }
1018    }
1019
1020    #[test]
1021    fn parse_hosted_git_returns_none_for_non_hosted() {
1022        // Self-hosted GitLab / Gitea / arbitrary hosts: no codeload
1023        // template, so the codeload fast path doesn't apply.
1024        for spec in [
1025            "https://example.com/owner/repo.git",
1026            "ssh://git@gitea.internal/owner/repo.git",
1027            "git+ssh://git@gitlab.example.com/group/sub/repo.git",
1028            "https://github.com/owner/repo/sub",
1029            "https://github.com/owner",
1030        ] {
1031            assert!(
1032                parse_hosted_git(spec).is_none(),
1033                "spec {spec} must not match a hosted provider",
1034            );
1035        }
1036    }
1037
1038    #[test]
1039    fn hosted_tarball_url_only_for_full_sha() {
1040        let g = HostedGit {
1041            host: HostedGitHost::GitHub,
1042            owner: "o".to_string(),
1043            repo: "r".to_string(),
1044        };
1045        let sha = "abcdef0123456789abcdef0123456789abcdef01";
1046        assert_eq!(
1047            g.tarball_url(sha).as_deref(),
1048            Some("https://codeload.github.com/o/r/tar.gz/abcdef0123456789abcdef0123456789abcdef01"),
1049        );
1050        // Branch / tag / abbreviated SHA don't take the fast path —
1051        // codeload accepts them but the wrapper-dir name varies and
1052        // we can't verify a non-SHA committish post-extraction.
1053        assert!(g.tarball_url("main").is_none());
1054        assert!(g.tarball_url("v1.2.3").is_none());
1055        assert!(g.tarball_url("abcdef0").is_none());
1056    }
1057
1058    #[test]
1059    fn hosted_tarball_url_per_provider() {
1060        let sha = "abcdef0123456789abcdef0123456789abcdef01";
1061        let gitlab = HostedGit {
1062            host: HostedGitHost::GitLab,
1063            owner: "g".to_string(),
1064            repo: "r".to_string(),
1065        }
1066        .tarball_url(sha)
1067        .unwrap();
1068        assert!(gitlab.starts_with("https://gitlab.com/g/r/-/archive/"));
1069        assert!(gitlab.ends_with("/r-abcdef0123456789abcdef0123456789abcdef01.tar.gz"));
1070        let bitbucket = HostedGit {
1071            host: HostedGitHost::Bitbucket,
1072            owner: "g".to_string(),
1073            repo: "r".to_string(),
1074        }
1075        .tarball_url(sha)
1076        .unwrap();
1077        assert_eq!(
1078            bitbucket,
1079            "https://bitbucket.org/g/r/get/abcdef0123456789abcdef0123456789abcdef01.tar.gz",
1080        );
1081    }
1082
1083    #[test]
1084    fn hosted_https_url_normalizes() {
1085        let g = parse_hosted_git("git+ssh://git@github.com/owner/repo.git").unwrap();
1086        assert_eq!(g.https_url(), "https://github.com/owner/repo.git");
1087    }
1088
1089    #[test]
1090    fn path_traversal_components_in_subpath_are_rejected() {
1091        // `..` and `.` components would let a crafted spec escape the
1092        // clone dir at install time. The parser drops them so the
1093        // resolver/installer never see a traversal-laden subpath.
1094        let cases = [
1095            "github:org/dep#main&path:/../../etc",
1096            "github:org/dep#main&path:/packages/../../../etc",
1097            "github:org/dep#main&path:/./packages/foo",
1098            "github:org/dep#main&path:/packages//foo",
1099        ];
1100        for spec in cases {
1101            let (_, _, subpath) = parse_git_spec(spec).unwrap();
1102            assert_eq!(subpath, None, "spec should drop subpath: {spec}");
1103        }
1104    }
1105
1106    #[test]
1107    fn dep_path_distinguishes_subpaths_under_same_commit() {
1108        // Two packages from the same repo+commit but different
1109        // subdirs must hash to distinct dep_paths so the linker
1110        // doesn't collapse them.
1111        let sha = "abcdef0123456789abcdef0123456789abcdef01";
1112        let a = LocalSource::Git(GitSource {
1113            url: "https://example.com/r.git".to_string(),
1114            committish: None,
1115            resolved: sha.to_string(),
1116            integrity: None,
1117            subpath: Some("packages/a".to_string()),
1118        });
1119        let b = LocalSource::Git(GitSource {
1120            url: "https://example.com/r.git".to_string(),
1121            committish: None,
1122            resolved: sha.to_string(),
1123            integrity: None,
1124            subpath: Some("packages/b".to_string()),
1125        });
1126        assert_ne!(a.dep_path("dep"), b.dep_path("dep"));
1127    }
1128
1129    const SHARED_SHA: &str = "0123456789abcdef0123456789abcdef01234567";
1130
1131    /// The dep_path the lockfile parser keys a git package under, given
1132    /// its normalized clone URL and pinned commit.
1133    fn git_key(url: &str, resolved: &str) -> String {
1134        LocalSource::Git(GitSource {
1135            url: url.to_string(),
1136            committish: None,
1137            resolved: resolved.to_string(),
1138            integrity: None,
1139            subpath: None,
1140        })
1141        .dep_path("request")
1142    }
1143
1144    /// The dep_path the lockfile parser keys a remote-tarball package
1145    /// under, given its fetch URL.
1146    fn tarball_key(url: &str) -> String {
1147        LocalSource::RemoteTarball(RemoteTarballSource {
1148            url: url.to_string(),
1149            integrity: String::new(),
1150            git_hosted: false,
1151        })
1152        .dep_path("request")
1153    }
1154
1155    #[test]
1156    fn shared_github_shorthand_maps_to_git_dep_path() {
1157        // A dependent records its git `request` via the `github:` spec,
1158        // but the package is keyed under the hashed `git+` dep_path. The
1159        // sibling symlink / hasher lookup must use that same key or it
1160        // dangles / silently skips the child.
1161        let got = shared_local_dep_path("request", &format!("github:request/request#{SHARED_SHA}"))
1162            .expect("github: spec is a shareable local source");
1163        assert_eq!(
1164            got,
1165            git_key("https://github.com/request/request.git", SHARED_SHA)
1166        );
1167        assert!(got.starts_with("request@git+"), "unexpected key: {got}");
1168    }
1169
1170    #[test]
1171    fn shared_git_url_and_shorthand_converge() {
1172        // Whether the dependent recorded the shorthand or the resolved
1173        // `<url>.git#<sha>` form, both must canonicalize to one key.
1174        let from_shorthand =
1175            shared_local_dep_path("request", &format!("github:request/request#{SHARED_SHA}"))
1176                .unwrap();
1177        let from_url = shared_local_dep_path(
1178            "request",
1179            &format!("https://github.com/request/request.git#{SHARED_SHA}"),
1180        )
1181        .unwrap();
1182        assert_eq!(from_shorthand, from_url);
1183    }
1184
1185    #[test]
1186    fn shared_missing_resolved_is_promoted_from_committish() {
1187        // A lockfile round-trip that never re-resolved leaves `resolved`
1188        // empty and only carries `#<committish>`; the helper must promote
1189        // it so the hash matches the package's `<url>#<sha>` key.
1190        let got = shared_local_dep_path(
1191            "request",
1192            &format!("https://github.com/request/request.git#{SHARED_SHA}"),
1193        )
1194        .unwrap();
1195        assert_eq!(
1196            got,
1197            git_key("https://github.com/request/request.git", SHARED_SHA)
1198        );
1199    }
1200
1201    #[test]
1202    fn shared_codeload_tarball_maps_to_url_dep_path() {
1203        // The exact form pnpm records for a `github:` dep that resolves to
1204        // a codeload archive. This is the case that crashed
1205        // request-promise-core under the global virtual store.
1206        let url = format!("https://codeload.github.com/request/request/tar.gz/{SHARED_SHA}");
1207        let got = shared_local_dep_path("request", &url).unwrap();
1208        assert_eq!(got, tarball_key(&url));
1209        assert!(got.starts_with("request@url+"), "unexpected key: {got}");
1210    }
1211
1212    #[test]
1213    fn shared_strips_peer_suffix_before_classifying() {
1214        let url = format!("https://codeload.github.com/request/request/tar.gz/{SHARED_SHA}");
1215        let with_peer = format!("{url}(typescript@5.8.3)");
1216        assert_eq!(
1217            shared_local_dep_path("request", &with_peer),
1218            shared_local_dep_path("request", &url),
1219        );
1220    }
1221
1222    #[test]
1223    fn shared_returns_none_for_non_shareable_specs() {
1224        for value in [
1225            "4.18.1",
1226            "^1.2.3",
1227            "link:../sibling",
1228            "file:./vendor/x",
1229            "npm:lodash@4.18.1",
1230        ] {
1231            assert!(
1232                shared_local_dep_path("dep", value).is_none(),
1233                "{value:?} must not be treated as a shareable local source",
1234            );
1235        }
1236    }
1237}