aube_lockfile/source.rs
1use std::path::{Path, PathBuf};
2
3/// Non-registry source for a locked package.
4///
5/// When a package comes from a local path (via `file:` or `link:` in
6/// `package.json`) it doesn't have a tarball URL or integrity hash, so we
7/// record the source separately and let the linker materialize it
8/// on-the-fly.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub enum LocalSource {
11 /// `file:<dir>` — a directory on disk whose contents should be
12 /// hardlink-copied into the virtual store like a normal package.
13 /// Path is stored relative to the project root.
14 Directory(PathBuf),
15 /// `file:<tarball>` — a `.tgz` on disk, extracted into the virtual
16 /// store the same way we extract registry tarballs.
17 Tarball(PathBuf),
18 /// `link:<dir>` — a plain symlink into `node_modules/<name>`, never
19 /// materialized into the virtual store. Transitive deps are the
20 /// target's responsibility.
21 Link(PathBuf),
22 /// `portal:<dir>` — a Yarn Berry package portal. The target is a
23 /// package on disk, but unlike `link:` its dependencies are still
24 /// modeled in the lockfile graph.
25 Portal(PathBuf),
26 /// `exec:<script>` — a Yarn Berry generator script. The script is
27 /// executed at fetch time and writes the package files into a
28 /// generated build directory.
29 Exec(PathBuf),
30 /// `git+https://`, `git+ssh://`, `github:user/repo`, etc. — a
31 /// remote git repo. Cloned at fetch time and imported like a
32 /// `file:` directory. `url` is the normalized clone URL (what
33 /// gets passed to `git clone`). `committish` is the user-written
34 /// ref after `#` (branch, tag, or commit; `None` means HEAD).
35 /// `resolved` is the 40-char commit SHA that `git ls-remote`
36 /// pinned the ref to — the lockfile records this so repeat
37 /// installs reproduce bit-for-bit.
38 Git(GitSource),
39 /// `https://example.com/pkg.tgz` — a remote tarball URL. Fetched
40 /// once at resolve time so the resolver can read the enclosed
41 /// `package.json` for version + transitive deps and pin the
42 /// sha512 integrity. `integrity` stays empty on freshly-parsed
43 /// specifiers and is filled in by the resolver after download.
44 RemoteTarball(RemoteTarballSource),
45}
46
47/// A remote tarball dependency spec. See [`LocalSource::RemoteTarball`].
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct RemoteTarballSource {
50 pub url: String,
51 pub integrity: String,
52 pub git_hosted: bool,
53}
54
55/// A git dependency spec. See [`LocalSource::Git`].
56#[derive(Debug, Clone, PartialEq, Eq)]
57pub struct GitSource {
58 pub url: String,
59 pub committish: Option<String>,
60 pub resolved: String,
61 /// SHA-512 SRI of the hosted tarball bytes when the git source was
62 /// fetched through a codeload-style archive. Plain git-clone sources
63 /// leave this unset because git object IDs verify the checkout.
64 pub integrity: Option<String>,
65 /// pnpm `&path:/sub/dir` selector — when set, only this
66 /// subdirectory of the cloned repo is treated as the package
67 /// root. Stored without leading slash so dep_path hashes are
68 /// stable regardless of whether the user wrote `path:/x` or
69 /// `path:x`.
70 pub subpath: Option<String>,
71}
72
73pub fn git_commits_match(left: &str, right: &str) -> bool {
74 if left.eq_ignore_ascii_case(right) {
75 return true;
76 }
77 let left = left.trim();
78 let right = right.trim();
79 if left.len().min(right.len()) < 7
80 || !left.bytes().all(|b| b.is_ascii_hexdigit())
81 || !right.bytes().all(|b| b.is_ascii_hexdigit())
82 {
83 return false;
84 }
85 let left = left.to_ascii_lowercase();
86 let right = right.to_ascii_lowercase();
87 (left.len() == 40 && right.len() < 40 && left.starts_with(&right))
88 || (right.len() == 40 && left.len() < 40 && right.starts_with(&left))
89}
90
91impl LocalSource {
92 /// The original path (relative to the project root) the user wrote
93 /// in `package.json`. `None` for non-path sources like git.
94 pub fn path(&self) -> Option<&Path> {
95 match self {
96 LocalSource::Directory(p)
97 | LocalSource::Tarball(p)
98 | LocalSource::Link(p)
99 | LocalSource::Portal(p)
100 | LocalSource::Exec(p) => Some(p),
101 LocalSource::Git(_) | LocalSource::RemoteTarball(_) => None,
102 }
103 }
104
105 /// The protocol kind (`"file"` / `"link"` / `"git"` / `"url"`).
106 pub fn kind_str(&self) -> &'static str {
107 match self {
108 LocalSource::Directory(_) | LocalSource::Tarball(_) => "file",
109 LocalSource::Link(_) => "link",
110 LocalSource::Portal(_) => "portal",
111 LocalSource::Exec(_) => "exec",
112 LocalSource::Git(_) => "git",
113 LocalSource::RemoteTarball(_) => "url",
114 }
115 }
116
117 /// Whether this source is pinned to immutable, globally
118 /// reproducible content and can therefore be shared across
119 /// projects inside aube's global virtual store, exactly like a
120 /// registry package.
121 ///
122 /// `Git` is pinned to a 40-char commit SHA and `RemoteTarball` to
123 /// a fetched URL (and, once resolved, an integrity hash), so two
124 /// projects that depend on the same one resolve to the same files.
125 /// `file:` / `link:` / `portal:` / `exec:` all resolve against a
126 /// path inside the depending project, so they stay per-project and
127 /// are never promoted into the shared store.
128 ///
129 /// Load-bearing for global-virtual-store correctness: a registry
130 /// package materialized into the shared store points its
131 /// dependency siblings at the hashed global path
132 /// (`virtual_store_subdir(dep_path)`). If one of those deps were a
133 /// git/tarball source that only ever landed in the per-project
134 /// `.aube/`, the sibling symlink would dangle and Node's module
135 /// walk would silently fall back to some unrelated `<name>` found
136 /// higher up the tree.
137 pub fn is_globally_shareable(&self) -> bool {
138 matches!(self, LocalSource::Git(_) | LocalSource::RemoteTarball(_))
139 }
140
141 /// The path as a POSIX-style string with forward-slash separators.
142 /// `Path::display()` and `to_string_lossy()` honor the host's
143 /// separator (backslash on Windows), which would make `dep_path`
144 /// hashes and lockfile `specifier:` strings non-portable: the
145 /// same `file:./some/dir` would render as `some\dir` on Windows
146 /// and `some/dir` on Unix, producing two different hashes for
147 /// the same logical target. Always rendering with `/` keeps
148 /// lockfiles cross-platform identical.
149 pub fn path_posix(&self) -> String {
150 self.path()
151 .map(|p| p.to_string_lossy().replace('\\', "/"))
152 .unwrap_or_default()
153 }
154
155 /// Canonical specifier string as pnpm writes it in the `packages:`
156 /// and `snapshots:` keys (post-`<name>@` part). For `file:` /
157 /// `link:` this is `file:./vendor/foo` / `link:../sibling`. For
158 /// `git`, pnpm uses the resolved form `<url>#<commit>` (no
159 /// `git+` prefix) because the lockfile pins to the exact commit
160 /// regardless of what the user wrote. Always emits POSIX
161 /// separators so the resulting lockfile is portable.
162 pub fn specifier(&self) -> String {
163 match self {
164 LocalSource::Git(g) => match &g.subpath {
165 Some(sub) => format!("{}#{}&path:/{}", g.url, g.resolved, sub),
166 None => format!("{}#{}", g.url, g.resolved),
167 },
168 LocalSource::RemoteTarball(t) => t.url.clone(),
169 _ => format!("{}:{}", self.kind_str(), self.path_posix()),
170 }
171 }
172
173 /// Internal FS-safe dep_path used as the key in
174 /// `LockfileGraph.packages` and as the `.aube/` subdir name.
175 ///
176 /// Distinct paths must map to distinct keys (otherwise the
177 /// linker would silently mix files between two local packages),
178 /// and the result must be a single filesystem component — no
179 /// `/`, `\`, `:`, or `..`. Ad-hoc character substitution trips
180 /// over cases like `../vendor` vs `__/vendor` or `a.b` vs `a_b`
181 /// collapsing to the same string, so we hash the raw path bytes
182 /// and suffix the first 16 hex chars (64 bits — more than enough
183 /// to avoid collisions inside a single project).
184 ///
185 /// The hash input is the POSIX-form path string so a checked-in
186 /// lockfile resolves to the same key regardless of which
187 /// platform ran `aube install`.
188 pub fn dep_path(&self, name: &str) -> String {
189 use sha2::{Digest, Sha256};
190 let mut hasher = Sha256::new();
191 match self {
192 LocalSource::Git(g) => {
193 hasher.update(g.url.as_bytes());
194 hasher.update(b"#");
195 hasher.update(g.resolved.as_bytes());
196 if let Some(sub) = &g.subpath {
197 hasher.update(b"&path:/");
198 hasher.update(sub.as_bytes());
199 }
200 }
201 LocalSource::RemoteTarball(t) => {
202 hasher.update(t.url.as_bytes());
203 }
204 _ => hasher.update(self.path_posix().as_bytes()),
205 }
206 let digest = hasher.finalize();
207 let short: String = digest.iter().take(8).map(|b| format!("{b:02x}")).collect();
208 format!("{name}@{}+{short}", self.kind_str())
209 }
210
211 /// Classify a user-written `file:` / `link:` specifier against the
212 /// project root. Returns `None` if `spec` isn't a local specifier.
213 /// Resolves the target path relative to `project_root`; a `file:`
214 /// target that resolves to a `.tgz` / `.tar.gz` on disk is treated
215 /// as a tarball, anything else as a directory.
216 pub fn parse(spec: &str, project_root: &Path) -> Option<Self> {
217 // Check git first so URLs like `https://host/user/repo.git`
218 // aren't swallowed by the broader bare-http tarball check
219 // below.
220 if let Some((url, committish, subpath)) = parse_git_spec(spec) {
221 // `resolved` is filled in by the resolver after running
222 // `git ls-remote`. A lockfile round-trip that never
223 // re-resolves will leave this empty, which is the sentinel
224 // the resolver checks for before calling ls-remote.
225 return Some(LocalSource::Git(GitSource {
226 url,
227 committish,
228 resolved: String::new(),
229 integrity: None,
230 subpath,
231 }));
232 }
233 // Any remaining bare `http(s)://` URL is a remote tarball.
234 // npm semantics treat *all* non-git HTTP URLs in a dependency
235 // value as tarball URLs, so services that serve tarballs from
236 // URLs without a `.tgz` extension (pkg.pr.new, GitHub
237 // codeload, etc.) classify correctly here.
238 if Self::looks_like_remote_tarball_url(spec) {
239 return Some(LocalSource::RemoteTarball(RemoteTarballSource {
240 url: spec.to_string(),
241 integrity: String::new(),
242 git_hosted: false,
243 }));
244 }
245 let (kind, rest) = if let Some(r) = spec.strip_prefix("file:") {
246 ("file", r)
247 } else if let Some(r) = spec.strip_prefix("link:") {
248 ("link", r)
249 } else if let Some(r) = spec.strip_prefix("portal:") {
250 ("portal", r)
251 } else if let Some(r) = spec.strip_prefix("exec:") {
252 return Some(LocalSource::Exec(PathBuf::from(r)));
253 } else {
254 return None;
255 };
256 let rel = PathBuf::from(rest);
257 let abs = project_root.join(&rel);
258 if kind == "link" {
259 return Some(LocalSource::Link(rel));
260 }
261 if kind == "portal" {
262 return Some(LocalSource::Portal(rel));
263 }
264 if abs.is_file() && Self::path_looks_like_tarball(&rel) {
265 return Some(LocalSource::Tarball(rel));
266 }
267 Some(LocalSource::Directory(rel))
268 }
269
270 /// Whether a specifier looks like a direct HTTP(S) URL that should
271 /// be fetched as a tarball. Per npm semantics, *any* `http://` or
272 /// `https://` URL in a dependency value is a tarball URL — services
273 /// like pkg.pr.new, GitHub codeload, and private registries with
274 /// auth-token query strings serve tarballs from URLs that don't
275 /// carry a `.tgz` extension. Git URLs must already have been
276 /// ruled out by the caller (see [`parse_git_spec`]) so a
277 /// `.git`-suffixed URL doesn't get misclassified here.
278 pub fn looks_like_remote_tarball_url(spec: &str) -> bool {
279 spec.starts_with("https://") || spec.starts_with("http://")
280 }
281
282 pub fn path_looks_like_tarball(path: &Path) -> bool {
283 let name = match path.file_name().and_then(|n| n.to_str()) {
284 Some(n) => n,
285 None => return false,
286 };
287 let lower = name.to_ascii_lowercase();
288 lower.ends_with(".tgz") || lower.ends_with(".tar.gz")
289 }
290}
291
292/// Resolve a transitive dependency's recorded spec *value* to the same
293/// `dep_path` key the lockfile parser assigns the target package, for
294/// the two content-pinned source kinds that get shared globally (git
295/// and remote tarball).
296///
297/// pnpm records a git / remote-tarball dependency inside a snapshot's
298/// `dependencies:` map by its *resolved spec* — `<url>#<sha>` for git,
299/// the tarball URL for remote tarballs (e.g. request-promise-core lists
300/// `request: https://github.com/request/request.git#<sha>`). The parser,
301/// however, keys the package itself under [`LocalSource::dep_path`] — the
302/// short `name@git+<hash>` / `name@url+<hash>` form. A naive
303/// `format!("{name}@{value}")` lookup therefore points at a key that was
304/// never inserted into the graph, so:
305///
306/// * the linker's sibling symlink dangles (Node resolves the wrong
307/// `<name>` or none — the request-promise-core crash), and
308/// * the graph hasher skips the child entirely, so neither its content
309/// fingerprint nor its build/engine taint cascades into the parent's
310/// global-virtual-store hash.
311///
312/// Mirror `pnpm::read::push_direct`'s keying so the resolved value lands
313/// on the exact `dep_path` the package was materialized under. Returns
314/// `None` for every other value (plain semver, `file:`, `link:`, npm
315/// aliases, …) so callers keep the verbatim `name@value` key those
316/// already resolve correctly with.
317pub fn shared_local_dep_path(dep_name: &str, dep_value: &str) -> Option<String> {
318 // pnpm appends a `(peer@ver)` suffix to some spec values; the parser
319 // strips it before classifying the source, so strip it here too.
320 //
321 // This MUST stay byte-for-byte identical to `pnpm::read::push_direct`'s
322 // `classify_version` (`info.version.split('(').next()`), which is what
323 // produced the `dep_path` keys in `graph.packages` we're matching
324 // against. A "smarter" strip (e.g. only a trailing `(peer@…)` via
325 // rfind) would *desync* the two: any value with a non-peer `(` would
326 // hash differently here than the key the parser inserted, silently
327 // re-skipping that child in the linker and graph hasher. If the
328 // first-`(` truncation is ever wrong for a real spec, fix it in
329 // `push_direct` and here together — never in isolation.
330 let classify = dep_value.split('(').next().unwrap_or(dep_value);
331 match LocalSource::parse(classify, Path::new("")) {
332 Some(LocalSource::Git(mut git)) => {
333 // Snapshot specs carry the pinned commit after `#`, which
334 // `parse` records as `committish` rather than `resolved`. The
335 // package was keyed with that commit promoted to `resolved`
336 // (see `push_direct`), so promote it here too — otherwise the
337 // `url#resolved` hash diverges from the package's dep_path.
338 if git.resolved.is_empty() {
339 git.resolved = git.committish.take()?;
340 }
341 Some(LocalSource::Git(git).dep_path(dep_name))
342 }
343 Some(tarball @ LocalSource::RemoteTarball(_)) => Some(tarball.dep_path(dep_name)),
344 _ => None,
345 }
346}
347
348/// Resolve a dependency edge `(name, tail)` to the graph key of the child
349/// package node, honoring every reader's storage convention. Returns the
350/// first candidate that satisfies `contains` (the caller's "is this a real
351/// package key?" predicate), or `None` when the edge points outside the
352/// graph (a pruned optional, an unresolved peer, a `link:` target, …).
353///
354/// Three conventions coexist because the readers disagree on what a
355/// dependency *value* holds, and a graph walker that only knows one of
356/// them silently drops the others:
357/// 1. `tail` verbatim — npm/yarn/bun store the full dep_path as the
358/// value (`"foo@1.2.3"`).
359/// 2. `name@tail` — the pnpm reader stores only the tail (`"1.2.3"`),
360/// so the key is the name re-joined to it.
361/// 3. [`shared_local_dep_path`] — git / remote-tarball deps store the
362/// resolved URL as the tail, but the node is keyed under the short
363/// `name@git+<hash>` / `name@url+<hash>` form. The linker's
364/// `materialize` already bridges the edge this way; reachability /
365/// marking walkers that skip it prune the entire git/tarball subtree
366/// (a content-pinned git/tarball child and everything under it
367/// vanishes from the walk once the node is keyed canonically).
368pub fn resolve_dep_edge(name: &str, tail: &str, contains: impl Fn(&str) -> bool) -> Option<String> {
369 if contains(tail) {
370 return Some(tail.to_string());
371 }
372 let rejoined = format!("{name}@{tail}");
373 if contains(&rejoined) {
374 return Some(rejoined);
375 }
376 shared_local_dep_path(name, tail).filter(|key| contains(key))
377}
378
379/// Parse a git dependency specifier into `(clone_url, committish)`.
380///
381/// Recognized forms:
382/// - `git+https://host/user/repo.git[#ref]`
383/// - `git+ssh://git@host/user/repo.git[#ref]`
384/// - `git://host/user/repo.git[#ref]`
385/// - `https://host/user/repo.git[#ref]` (only when ending in `.git`)
386/// - `user@host:path[.git][#ref]` (scp-form, only for github.com / gitlab.com /
387/// bitbucket.org — matches pnpm 11 behavior, where unknown SCP hosts are
388/// treated as local paths) → `ssh://user@host/path[.git]`
389/// - `github:user/repo[#ref]` → `https://github.com/user/repo.git`
390/// - `gitlab:user/repo[#ref]` → `https://gitlab.com/user/repo.git`
391/// - `bitbucket:user/repo[#ref]` → `https://bitbucket.org/user/repo.git`
392/// - `user/repo[#ref]` (bare GitHub shorthand, npm/pnpm compat)
393/// → `https://github.com/user/repo.git`
394///
395/// Returns `None` for any specifier that doesn't look like a git URL,
396/// so the caller can fall through to other protocol parsers.
397pub fn parse_git_spec(spec: &str) -> Option<(String, Option<String>, Option<String>)> {
398 let (body, committish, subpath) = match spec.find('#') {
399 Some(idx) => {
400 let (c, s) = parse_git_fragment(&spec[idx + 1..]);
401 (&spec[..idx], c, s)
402 }
403 None => (spec, None, None),
404 };
405 let is_bare_transport = body.starts_with("https://")
406 || body.starts_with("http://")
407 || body.starts_with("ssh://")
408 || body.starts_with("file://");
409 let url = if let Some(rest) = body.strip_prefix("git+") {
410 // `git+` explicitly tags the URL as git, so the `.git`
411 // suffix is optional (GitHub/GitLab accept both forms).
412 rest.to_string()
413 } else if body.starts_with("git://") {
414 body.to_string()
415 } else if let Some(scp) = parse_scp_url(body) {
416 scp
417 } else if let Some(path) = body.strip_prefix("github:") {
418 format!("https://github.com/{path}.git")
419 } else if let Some(path) = body.strip_prefix("gitlab:") {
420 format!("https://gitlab.com/{path}.git")
421 } else if let Some(path) = body.strip_prefix("bitbucket:") {
422 format!("https://bitbucket.org/{path}.git")
423 } else if is_bare_transport && body.ends_with(".git") {
424 body.to_string()
425 } else if is_bare_transport
426 && committish
427 .as_deref()
428 .is_some_and(|c| c.len() == 40 && c.chars().all(|ch| ch.is_ascii_hexdigit()))
429 {
430 // Lockfile round-trip form: `specifier()` writes the stored
431 // URL verbatim plus `#<sha>`. URLs that dropped the `git+`
432 // prefix (and happen to lack `.git`) are disambiguated from
433 // plain tarball URLs by the 40-hex committish suffix.
434 body.to_string()
435 } else if is_bare_github_shorthand(body) {
436 // npm/pnpm bare GitHub shorthand: `user/repo` expands to
437 // `github:user/repo`. Placed last so all explicit URL/scheme
438 // forms above shadow it.
439 format!("https://github.com/{body}.git")
440 } else {
441 return None;
442 };
443 Some((url, committish, subpath))
444}
445
446/// `user/repo` — a single `/`, both segments non-empty, ASCII
447/// alphanumeric + `_.-` only, owner doesn't start with `.` so
448/// single-component relative paths (`./repo`, `../repo`) are rejected.
449/// Excludes scoped npm names (`@scope/pkg`) and file paths. Other
450/// URL/SCP forms are ruled out by placement order in `parse_git_spec`.
451fn is_bare_github_shorthand(body: &str) -> bool {
452 let Some((owner, repo)) = body.split_once('/') else {
453 return false;
454 };
455 !owner.is_empty()
456 && !owner.starts_with('.')
457 && !repo.is_empty()
458 && !repo.contains('/')
459 && owner
460 .bytes()
461 .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'.' | b'-'))
462 && repo
463 .bytes()
464 .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'.' | b'-'))
465}
466
467/// A git URL that maps to one of the three "hosted" providers npm /
468/// pnpm both special-case (github / gitlab / bitbucket). For these
469/// hosts a public read can be served as a flat HTTPS tarball over
470/// `codeload.github.com` (or each host's equivalent), bypassing `git`
471/// entirely. The lockfile's stored URL is canonical-identity only —
472/// pnpm and npm both re-derive the fetch URL from `(host, owner,
473/// repo)` on every install rather than dialing whatever scheme
474/// happens to be in `resolved:`.
475#[derive(Debug, Clone, PartialEq, Eq)]
476pub struct HostedGit {
477 pub host: HostedGitHost,
478 pub owner: String,
479 pub repo: String,
480}
481
482#[derive(Debug, Clone, Copy, PartialEq, Eq)]
483pub enum HostedGitHost {
484 GitHub,
485 GitLab,
486 Bitbucket,
487}
488
489impl HostedGit {
490 /// `https://github.com/<owner>/<repo>.git` — the form `git fetch`
491 /// can dial without an SSH key. Used as the runtime fetch URL when
492 /// the lockfile's stored URL is `git+ssh://git@…` (npm canonical
493 /// identity) but the actual install host has no SSH configured.
494 pub fn https_url(&self) -> String {
495 let host = self.host.host_domain();
496 format!("https://{host}/{}/{}.git", self.owner, self.repo)
497 }
498
499 /// `https://codeload.github.com/<owner>/<repo>/tar.gz/<sha>` (or
500 /// each host's equivalent) — a flat HTTPS tarball at the given
501 /// commit. Returns `None` unless `committish` is a 40-char hex
502 /// SHA, since the codeload path can't be verified after extraction
503 /// without `.git/` metadata. Branch / tag names round-trip through
504 /// `git ls-remote` to get pinned to a SHA first.
505 pub fn tarball_url(&self, committish: &str) -> Option<String> {
506 if committish.len() != 40 || !committish.chars().all(|c| c.is_ascii_hexdigit()) {
507 return None;
508 }
509 let sha = committish.to_ascii_lowercase();
510 Some(match self.host {
511 HostedGitHost::GitHub => format!(
512 "https://codeload.github.com/{}/{}/tar.gz/{sha}",
513 self.owner, self.repo
514 ),
515 HostedGitHost::GitLab => format!(
516 "https://gitlab.com/{}/{}/-/archive/{sha}/{}-{sha}.tar.gz",
517 self.owner, self.repo, self.repo
518 ),
519 HostedGitHost::Bitbucket => format!(
520 "https://bitbucket.org/{}/{}/get/{sha}.tar.gz",
521 self.owner, self.repo
522 ),
523 })
524 }
525}
526
527impl HostedGitHost {
528 fn from_domain(domain: &str) -> Option<Self> {
529 match domain {
530 "github.com" => Some(HostedGitHost::GitHub),
531 "gitlab.com" => Some(HostedGitHost::GitLab),
532 "bitbucket.org" => Some(HostedGitHost::Bitbucket),
533 _ => None,
534 }
535 }
536
537 pub fn host_domain(self) -> &'static str {
538 match self {
539 HostedGitHost::GitHub => "github.com",
540 HostedGitHost::GitLab => "gitlab.com",
541 HostedGitHost::Bitbucket => "bitbucket.org",
542 }
543 }
544}
545
546/// Parse a clone URL — in any form `parse_git_spec` accepts as input
547/// or produces as output — into its `(host, owner, repo)` components,
548/// when the host is one of the three providers npm / pnpm route
549/// through HTTPS tarballs. Returns `None` for any other host (including
550/// self-hosted GitLab / Gitea / Bitbucket Data Center): those still
551/// need a real `git clone` because no codeload-style HTTP archive is
552/// available.
553///
554/// Accepts:
555/// - `https://github.com/owner/repo[.git]`
556/// - `git+https://github.com/owner/repo[.git]`
557/// - `git://github.com/owner/repo[.git]`
558/// - `ssh://git@github.com/owner/repo[.git]`
559/// - `git+ssh://git@github.com/owner/repo[.git]` (npm canonical lockfile form)
560/// - `git@github.com:owner/repo[.git]` (scp shorthand, in case a caller
561/// parses raw lockfile fields without going through `parse_git_spec`)
562pub fn parse_hosted_git(url: &str) -> Option<HostedGit> {
563 let body = url.strip_prefix("git+").unwrap_or(url);
564 let after_scheme = if let Some(rest) = body.strip_prefix("https://") {
565 rest
566 } else if let Some(rest) = body.strip_prefix("http://") {
567 rest
568 } else if let Some(rest) = body.strip_prefix("ssh://") {
569 rest
570 } else if let Some(rest) = body.strip_prefix("git://") {
571 rest
572 } else {
573 // scp shorthand `user@host:path` — not produced by parse_git_spec
574 // but accepted defensively in case a raw lockfile string ever
575 // bypasses it.
576 let scp_path = parse_scp_url(body)?;
577 return parse_hosted_git(&scp_path);
578 };
579 // Strip optional `user@` (always `git@` for hosted forms).
580 let host_and_path = match after_scheme.split_once('@') {
581 Some((_, rest)) => rest,
582 None => after_scheme,
583 };
584 let (host, path) = host_and_path.split_once('/')?;
585 let host = HostedGitHost::from_domain(host)?;
586 // Take exactly two path segments: owner and repo. Anything beyond
587 // (subgroup-style GitLab paths) doesn't have a stable HTTPS tarball
588 // form on the three providers we care about, so refuse and let the
589 // caller fall back to clone.
590 let mut segs = path.splitn(3, '/');
591 let owner = segs.next()?;
592 let repo = segs.next()?;
593 if owner.is_empty() || repo.is_empty() || segs.next().is_some() {
594 return None;
595 }
596 let repo = repo
597 .strip_suffix(".git")
598 .unwrap_or(repo)
599 .trim_end_matches('/');
600 if repo.is_empty() {
601 return None;
602 }
603 Some(HostedGit {
604 host,
605 owner: owner.to_string(),
606 repo: repo.to_string(),
607 })
608}
609
610fn parse_scp_url(body: &str) -> Option<String> {
611 if body.contains("://") {
612 return None;
613 }
614 let colon = body.find(':')?;
615 let before = &body[..colon];
616 let path = &body[colon + 1..];
617 if before.is_empty() || path.is_empty() {
618 return None;
619 }
620 if path.starts_with('/') {
621 return None;
622 }
623 let at = before.find('@')?;
624 let user = &before[..at];
625 let host = &before[at + 1..];
626 if user.is_empty() || host.is_empty() || host.contains('/') || host.contains('@') {
627 return None;
628 }
629 // pnpm 11 only resolves SCP-form as hosted Git for the three known
630 // providers; other hosts (e.g. `git@example.com:foo/bar.git`) are
631 // treated as local paths, and `host:path` without a user errors.
632 if !matches!(host, "github.com" | "gitlab.com" | "bitbucket.org") {
633 return None;
634 }
635 Some(format!("ssh://{user}@{host}/{path}"))
636}
637
638/// Normalize git URL fragments used by npm-compatible lockfiles.
639///
640/// Plain git accepts `#<ref>`, while npm and Yarn Berry also write
641/// key/value fragments such as `#commit=<sha>` for pinned git deps.
642/// Downstream code passes this value directly to `git ls-remote` and
643/// `git checkout`, so strip the selector key here and keep only the
644/// actual ref name or SHA.
645pub(crate) fn normalize_git_fragment(fragment: &str) -> Option<String> {
646 parse_git_fragment(fragment).0
647}
648
649/// Parse a git URL fragment into `(committish, subpath)`. Handles the
650/// pnpm/hosted-git-info form `<ref>&path:/sub/dir` (the `path:` key
651/// uses a colon, not `=`, by historical convention) as well as the
652/// `key=value` form npm/Yarn Berry write. Unknown selectors are
653/// ignored. Subpath is returned without leading slash so the caller
654/// can join it with a clone dir without tripping the absolute-path
655/// branch of `Path::join`.
656pub(crate) fn parse_git_fragment(fragment: &str) -> (Option<String>, Option<String>) {
657 if fragment.is_empty() {
658 return (None, None);
659 }
660
661 let mut fallback: Option<&str> = None;
662 let mut preferred: Option<&str> = None;
663 let mut subpath: Option<String> = None;
664 for part in fragment.split('&') {
665 if part.is_empty() {
666 continue;
667 }
668 // Try `key=value` first; fall back to `key:value` only for
669 // the small set of selectors we actually handle below. A tag
670 // name with a colon (e.g. `release:2026-01`) is left alone —
671 // and `semver:^1.0.0` stays as a literal ref so `ls-remote`
672 // surfaces an explicit error rather than silently HEAD-ing.
673 let split = part.split_once('=').or_else(|| {
674 part.split_once(':')
675 .filter(|(k, _)| matches!(*k, "commit" | "tag" | "head" | "branch" | "path"))
676 });
677 let (key, value) = split.unwrap_or(("", part));
678 if value.is_empty() {
679 continue;
680 }
681 match key {
682 "commit" => {
683 preferred.get_or_insert(value);
684 }
685 "tag" | "head" | "branch" => {
686 fallback.get_or_insert(value);
687 }
688 "path" => {
689 // Strip leading slashes (pnpm writes `path:/sub`) and
690 // reject any `..` / `.` component. Without this, a
691 // crafted spec like `&path:/../../etc` would let the
692 // resolver and installer escape the clone dir and
693 // import an arbitrary host directory into the store.
694 if subpath.is_some() {
695 // First-wins, matching the other selectors above.
696 continue;
697 }
698 let trimmed = value.trim_start_matches('/');
699 if trimmed.is_empty() {
700 continue;
701 }
702 if trimmed
703 .split('/')
704 .any(|c| c.is_empty() || c == "." || c == "..")
705 {
706 continue;
707 }
708 subpath = Some(trimmed.to_string());
709 }
710 "" => {
711 fallback.get_or_insert(value);
712 }
713 _ => {}
714 }
715 }
716
717 (preferred.or(fallback).map(ToString::to_string), subpath)
718}
719
720#[cfg(test)]
721mod tests {
722 use super::*;
723
724 #[test]
725 fn matches_https_tgz() {
726 assert!(LocalSource::looks_like_remote_tarball_url(
727 "https://example.com/pkg-1.0.0.tgz"
728 ));
729 }
730
731 #[test]
732 fn matches_http_tar_gz() {
733 assert!(LocalSource::looks_like_remote_tarball_url(
734 "http://example.com/pkg-1.0.0.tar.gz"
735 ));
736 }
737
738 #[test]
739 fn strips_fragment_before_suffix_check() {
740 assert!(LocalSource::looks_like_remote_tarball_url(
741 "https://example.com/pkg-1.0.0.tgz#sha512-abc"
742 ));
743 }
744
745 #[test]
746 fn strips_query_string_before_suffix_check() {
747 // Auth-token URLs from private registries (JFrog, Nexus,
748 // CodeArtifact, …) routinely trail `?token=…` after the
749 // filename. Must still classify as a tarball URL.
750 assert!(LocalSource::looks_like_remote_tarball_url(
751 "https://registry.example.com/pkg/-/pkg-1.0.0.tgz?token=abc"
752 ));
753 assert!(LocalSource::looks_like_remote_tarball_url(
754 "https://example.com/pkg-1.0.0.tar.gz?v=2&signed=1"
755 ));
756 }
757
758 #[test]
759 fn matches_bare_http_url_without_tarball_suffix() {
760 // pkg.pr.new serves tarballs from URLs without a `.tgz`
761 // extension; npm treats all non-git http(s) URLs as tarball
762 // URLs, so these must classify as remote tarballs.
763 assert!(LocalSource::looks_like_remote_tarball_url(
764 "https://pkg.pr.new/lunariajs/lunaria/@lunariajs/core@904b935"
765 ));
766 assert!(LocalSource::looks_like_remote_tarball_url(
767 "https://codeload.github.com/user/repo/tar.gz/main"
768 ));
769 }
770
771 #[test]
772 fn git_commits_match_only_allows_full_sha_prefix_pairs() {
773 let full = "abcdef0123456789abcdef0123456789abcdef01";
774 assert!(git_commits_match(full, "abcdef0"));
775 assert!(git_commits_match("abcdef0", full));
776 assert!(git_commits_match(full, full));
777 assert!(!git_commits_match("abcdef0", "abcdef012"));
778 assert!(!git_commits_match(full, "abcdef1"));
779 assert!(!git_commits_match("main", full));
780 }
781
782 #[test]
783 fn rejects_non_http_schemes() {
784 assert!(!LocalSource::looks_like_remote_tarball_url(
785 "ftp://example.com/pkg.tgz"
786 ));
787 assert!(!LocalSource::looks_like_remote_tarball_url(
788 "git://example.com/repo.git"
789 ));
790 }
791
792 #[test]
793 fn parse_classifies_bare_http_url_as_remote_tarball() {
794 use std::path::Path;
795 let parsed = LocalSource::parse(
796 "https://pkg.pr.new/lunariajs/lunaria/@lunariajs/core@904b935",
797 Path::new(""),
798 );
799 assert!(matches!(parsed, Some(LocalSource::RemoteTarball(_))));
800 }
801
802 #[test]
803 fn parse_prefers_git_over_tarball_for_dot_git_url() {
804 use std::path::Path;
805 let parsed = LocalSource::parse("https://github.com/user/repo.git", Path::new(""));
806 assert!(matches!(parsed, Some(LocalSource::Git(_))));
807 }
808
809 #[test]
810 fn parse_classifies_exec_as_local_source() {
811 let parsed = LocalSource::parse("exec:./scripts/generate.js", Path::new(""));
812 assert_eq!(
813 parsed,
814 Some(LocalSource::Exec(PathBuf::from("./scripts/generate.js")))
815 );
816 }
817
818 #[test]
819 fn git_plus_https_without_dot_git_roundtrips_via_lockfile_form() {
820 // Initial parse: `git+https://…/repo` (no `.git`).
821 let (url, committish, subpath) = parse_git_spec("git+https://host/user/repo").unwrap();
822 assert_eq!(url, "https://host/user/repo");
823 assert_eq!(committish, None);
824 assert_eq!(subpath, None);
825
826 // After resolving, the serializer writes `<url>#<sha>` into
827 // the lockfile's importer `version:` field.
828 let sha = "abcdef0123456789abcdef0123456789abcdef01";
829 let source = LocalSource::Git(GitSource {
830 url: url.clone(),
831 committish: None,
832 resolved: sha.to_string(),
833 integrity: None,
834 subpath: None,
835 });
836 let lockfile_version = source.specifier();
837 assert_eq!(lockfile_version, format!("https://host/user/repo#{sha}"));
838
839 // Re-parse must recognize the bare URL because the 40-hex
840 // committish suffix unambiguously tags it as git.
841 let (round_url, round_committish, round_subpath) =
842 parse_git_spec(&lockfile_version).unwrap();
843 assert_eq!(round_url, "https://host/user/repo");
844 assert_eq!(round_committish.as_deref(), Some(sha));
845 assert_eq!(round_subpath, None);
846 }
847
848 #[test]
849 fn bare_https_without_dot_git_and_no_committish_is_not_git() {
850 // A plain `https://…` URL with no `.git` and no SHA could be
851 // anything (including a tarball); don't claim it.
852 assert!(parse_git_spec("https://example.com/pkg").is_none());
853 }
854
855 #[test]
856 fn github_shorthand_expands_and_roundtrips() {
857 let (url, _, _) = parse_git_spec("github:user/repo").unwrap();
858 assert_eq!(url, "https://github.com/user/repo.git");
859 }
860
861 #[test]
862 fn bare_user_repo_expands_to_github() {
863 let (url, committish, subpath) = parse_git_spec("kevva/is-negative").unwrap();
864 assert_eq!(url, "https://github.com/kevva/is-negative.git");
865 assert!(committish.is_none());
866 assert!(subpath.is_none());
867 }
868
869 #[test]
870 fn bare_user_repo_with_committish_preserved() {
871 let (url, committish, _) = parse_git_spec("kevva/is-negative#v1.0.0").unwrap();
872 assert_eq!(url, "https://github.com/kevva/is-negative.git");
873 assert_eq!(committish.as_deref(), Some("v1.0.0"));
874 }
875
876 #[test]
877 fn bare_scope_pkg_is_not_git_shorthand() {
878 // npm-style `@scope/pkg` is a registry name, not a GitHub shorthand.
879 assert!(parse_git_spec("@types/node").is_none());
880 }
881
882 #[test]
883 fn bare_relative_path_is_not_git_shorthand() {
884 // Single-component relative paths split as owner=".", owner="..",
885 // so owner-starts-with-`.` is the load-bearing guard here.
886 assert!(parse_git_spec("./repo").is_none());
887 assert!(parse_git_spec("../repo").is_none());
888 // Multi-component relative paths additionally fail the
889 // single-`/`-only guard.
890 assert!(parse_git_spec("./local/path").is_none());
891 assert!(parse_git_spec("../local/path").is_none());
892 }
893
894 #[test]
895 fn bare_path_with_extra_slashes_is_not_git_shorthand() {
896 // Real GitHub shorthand is exactly `user/repo` — anything with a
897 // second `/` is a path, not a shorthand.
898 assert!(parse_git_spec("path/with/slashes/extra").is_none());
899 }
900
901 #[test]
902 fn bare_scp_form_unknown_host_is_not_github_shorthand() {
903 // `user@host:repo.git` is scp form (handled or rejected above);
904 // the bare-shorthand branch must not pick it up.
905 assert!(parse_git_spec("user@host:repo.git").is_none());
906 }
907
908 #[test]
909 fn scp_form_recognized() {
910 let (url, committish, _) =
911 parse_git_spec("git@github.com:EthanHenrickson/math-mcp.git").unwrap();
912 assert_eq!(url, "ssh://git@github.com/EthanHenrickson/math-mcp.git");
913 assert!(committish.is_none());
914 }
915
916 #[test]
917 fn scp_form_with_ref_recognized() {
918 let (url, committish, _) =
919 parse_git_spec("git@github.com:EthanHenrickson/math-mcp.git#0.1.5").unwrap();
920 assert_eq!(url, "ssh://git@github.com/EthanHenrickson/math-mcp.git");
921 assert_eq!(committish.as_deref(), Some("0.1.5"));
922 }
923
924 #[test]
925 fn scp_form_bitbucket_recognized() {
926 let (url, _, _) = parse_git_spec("git@bitbucket.org:pnpmjs/git-resolver.git").unwrap();
927 assert_eq!(url, "ssh://git@bitbucket.org/pnpmjs/git-resolver.git");
928 }
929
930 #[test]
931 fn scp_form_unknown_host_rejected() {
932 // pnpm 11 treats `user@unknown-host:path` as a local path, not Git.
933 assert!(parse_git_spec("git@example.com:org/repo.git").is_none());
934 assert!(parse_git_spec("alice@host.example.com:org/repo.git").is_none());
935 }
936
937 #[test]
938 fn scp_form_without_user_rejected() {
939 // pnpm 11 errors on bare `host:path` as unsupported.
940 assert!(parse_git_spec("github.com:user/repo.git").is_none());
941 }
942
943 #[test]
944 fn commit_selector_fragment_normalizes_to_sha() {
945 let sha = "abcdef0123456789abcdef0123456789abcdef01";
946 let (url, committish, _) =
947 parse_git_spec(&format!("https://host/user/repo.git#commit={sha}")).unwrap();
948 assert_eq!(url, "https://host/user/repo.git");
949 assert_eq!(committish.as_deref(), Some(sha));
950 }
951
952 #[test]
953 fn named_selector_fragment_normalizes_to_ref() {
954 let (url, committish, _) = parse_git_spec("git+https://host/user/repo#tag=v1.2.3").unwrap();
955 assert_eq!(url, "https://host/user/repo");
956 assert_eq!(committish.as_deref(), Some("v1.2.3"));
957 }
958
959 #[test]
960 fn pnpm_path_subpath_extracted_from_fragment() {
961 // pnpm syntax: `<url>#<ref>&path:/<subdir>` selects a
962 // subdirectory of the cloned repo as the package root.
963 let (url, committish, subpath) =
964 parse_git_spec("github:org/dep#v0.1.4&path:/packages/special").unwrap();
965 assert_eq!(url, "https://github.com/org/dep.git");
966 assert_eq!(committish.as_deref(), Some("v0.1.4"));
967 assert_eq!(subpath.as_deref(), Some("packages/special"));
968 }
969
970 #[test]
971 fn path_subpath_roundtrips_via_specifier() {
972 let sha = "abcdef0123456789abcdef0123456789abcdef01";
973 let source = LocalSource::Git(GitSource {
974 url: "https://github.com/org/dep.git".to_string(),
975 committish: None,
976 resolved: sha.to_string(),
977 integrity: None,
978 subpath: Some("packages/special".to_string()),
979 });
980 let spec = source.specifier();
981 assert_eq!(
982 spec,
983 format!("https://github.com/org/dep.git#{sha}&path:/packages/special")
984 );
985 let (url, committish, subpath) = parse_git_spec(&spec).unwrap();
986 assert_eq!(url, "https://github.com/org/dep.git");
987 assert_eq!(committish.as_deref(), Some(sha));
988 assert_eq!(subpath.as_deref(), Some("packages/special"));
989 }
990
991 #[test]
992 fn parse_hosted_git_recognizes_canonical_forms() {
993 // All these point at the same (github.com, owner, repo) tuple
994 // and must map to the same HostedGit so the runtime fetch URL
995 // doesn't depend on which scheme the lockfile happens to record.
996 let canonical = HostedGit {
997 host: HostedGitHost::GitHub,
998 owner: "owner".to_string(),
999 repo: "repo".to_string(),
1000 };
1001 for spec in [
1002 "https://github.com/owner/repo.git",
1003 "https://github.com/owner/repo",
1004 "http://github.com/owner/repo.git",
1005 "git+https://github.com/owner/repo.git",
1006 "git+https://github.com/owner/repo",
1007 "git://github.com/owner/repo.git",
1008 "ssh://git@github.com/owner/repo.git",
1009 "git+ssh://git@github.com/owner/repo.git",
1010 "git@github.com:owner/repo.git",
1011 ] {
1012 assert_eq!(
1013 parse_hosted_git(spec).as_ref(),
1014 Some(&canonical),
1015 "spec {spec} should map to canonical HostedGit",
1016 );
1017 }
1018 }
1019
1020 #[test]
1021 fn parse_hosted_git_returns_none_for_non_hosted() {
1022 // Self-hosted GitLab / Gitea / arbitrary hosts: no codeload
1023 // template, so the codeload fast path doesn't apply.
1024 for spec in [
1025 "https://example.com/owner/repo.git",
1026 "ssh://git@gitea.internal/owner/repo.git",
1027 "git+ssh://git@gitlab.example.com/group/sub/repo.git",
1028 "https://github.com/owner/repo/sub",
1029 "https://github.com/owner",
1030 ] {
1031 assert!(
1032 parse_hosted_git(spec).is_none(),
1033 "spec {spec} must not match a hosted provider",
1034 );
1035 }
1036 }
1037
1038 #[test]
1039 fn hosted_tarball_url_only_for_full_sha() {
1040 let g = HostedGit {
1041 host: HostedGitHost::GitHub,
1042 owner: "o".to_string(),
1043 repo: "r".to_string(),
1044 };
1045 let sha = "abcdef0123456789abcdef0123456789abcdef01";
1046 assert_eq!(
1047 g.tarball_url(sha).as_deref(),
1048 Some("https://codeload.github.com/o/r/tar.gz/abcdef0123456789abcdef0123456789abcdef01"),
1049 );
1050 // Branch / tag / abbreviated SHA don't take the fast path —
1051 // codeload accepts them but the wrapper-dir name varies and
1052 // we can't verify a non-SHA committish post-extraction.
1053 assert!(g.tarball_url("main").is_none());
1054 assert!(g.tarball_url("v1.2.3").is_none());
1055 assert!(g.tarball_url("abcdef0").is_none());
1056 }
1057
1058 #[test]
1059 fn hosted_tarball_url_per_provider() {
1060 let sha = "abcdef0123456789abcdef0123456789abcdef01";
1061 let gitlab = HostedGit {
1062 host: HostedGitHost::GitLab,
1063 owner: "g".to_string(),
1064 repo: "r".to_string(),
1065 }
1066 .tarball_url(sha)
1067 .unwrap();
1068 assert!(gitlab.starts_with("https://gitlab.com/g/r/-/archive/"));
1069 assert!(gitlab.ends_with("/r-abcdef0123456789abcdef0123456789abcdef01.tar.gz"));
1070 let bitbucket = HostedGit {
1071 host: HostedGitHost::Bitbucket,
1072 owner: "g".to_string(),
1073 repo: "r".to_string(),
1074 }
1075 .tarball_url(sha)
1076 .unwrap();
1077 assert_eq!(
1078 bitbucket,
1079 "https://bitbucket.org/g/r/get/abcdef0123456789abcdef0123456789abcdef01.tar.gz",
1080 );
1081 }
1082
1083 #[test]
1084 fn hosted_https_url_normalizes() {
1085 let g = parse_hosted_git("git+ssh://git@github.com/owner/repo.git").unwrap();
1086 assert_eq!(g.https_url(), "https://github.com/owner/repo.git");
1087 }
1088
1089 #[test]
1090 fn path_traversal_components_in_subpath_are_rejected() {
1091 // `..` and `.` components would let a crafted spec escape the
1092 // clone dir at install time. The parser drops them so the
1093 // resolver/installer never see a traversal-laden subpath.
1094 let cases = [
1095 "github:org/dep#main&path:/../../etc",
1096 "github:org/dep#main&path:/packages/../../../etc",
1097 "github:org/dep#main&path:/./packages/foo",
1098 "github:org/dep#main&path:/packages//foo",
1099 ];
1100 for spec in cases {
1101 let (_, _, subpath) = parse_git_spec(spec).unwrap();
1102 assert_eq!(subpath, None, "spec should drop subpath: {spec}");
1103 }
1104 }
1105
1106 #[test]
1107 fn dep_path_distinguishes_subpaths_under_same_commit() {
1108 // Two packages from the same repo+commit but different
1109 // subdirs must hash to distinct dep_paths so the linker
1110 // doesn't collapse them.
1111 let sha = "abcdef0123456789abcdef0123456789abcdef01";
1112 let a = LocalSource::Git(GitSource {
1113 url: "https://example.com/r.git".to_string(),
1114 committish: None,
1115 resolved: sha.to_string(),
1116 integrity: None,
1117 subpath: Some("packages/a".to_string()),
1118 });
1119 let b = LocalSource::Git(GitSource {
1120 url: "https://example.com/r.git".to_string(),
1121 committish: None,
1122 resolved: sha.to_string(),
1123 integrity: None,
1124 subpath: Some("packages/b".to_string()),
1125 });
1126 assert_ne!(a.dep_path("dep"), b.dep_path("dep"));
1127 }
1128
1129 const SHARED_SHA: &str = "0123456789abcdef0123456789abcdef01234567";
1130
1131 /// The dep_path the lockfile parser keys a git package under, given
1132 /// its normalized clone URL and pinned commit.
1133 fn git_key(url: &str, resolved: &str) -> String {
1134 LocalSource::Git(GitSource {
1135 url: url.to_string(),
1136 committish: None,
1137 resolved: resolved.to_string(),
1138 integrity: None,
1139 subpath: None,
1140 })
1141 .dep_path("request")
1142 }
1143
1144 /// The dep_path the lockfile parser keys a remote-tarball package
1145 /// under, given its fetch URL.
1146 fn tarball_key(url: &str) -> String {
1147 LocalSource::RemoteTarball(RemoteTarballSource {
1148 url: url.to_string(),
1149 integrity: String::new(),
1150 git_hosted: false,
1151 })
1152 .dep_path("request")
1153 }
1154
1155 #[test]
1156 fn shared_github_shorthand_maps_to_git_dep_path() {
1157 // A dependent records its git `request` via the `github:` spec,
1158 // but the package is keyed under the hashed `git+` dep_path. The
1159 // sibling symlink / hasher lookup must use that same key or it
1160 // dangles / silently skips the child.
1161 let got = shared_local_dep_path("request", &format!("github:request/request#{SHARED_SHA}"))
1162 .expect("github: spec is a shareable local source");
1163 assert_eq!(
1164 got,
1165 git_key("https://github.com/request/request.git", SHARED_SHA)
1166 );
1167 assert!(got.starts_with("request@git+"), "unexpected key: {got}");
1168 }
1169
1170 #[test]
1171 fn shared_git_url_and_shorthand_converge() {
1172 // Whether the dependent recorded the shorthand or the resolved
1173 // `<url>.git#<sha>` form, both must canonicalize to one key.
1174 let from_shorthand =
1175 shared_local_dep_path("request", &format!("github:request/request#{SHARED_SHA}"))
1176 .unwrap();
1177 let from_url = shared_local_dep_path(
1178 "request",
1179 &format!("https://github.com/request/request.git#{SHARED_SHA}"),
1180 )
1181 .unwrap();
1182 assert_eq!(from_shorthand, from_url);
1183 }
1184
1185 #[test]
1186 fn shared_missing_resolved_is_promoted_from_committish() {
1187 // A lockfile round-trip that never re-resolved leaves `resolved`
1188 // empty and only carries `#<committish>`; the helper must promote
1189 // it so the hash matches the package's `<url>#<sha>` key.
1190 let got = shared_local_dep_path(
1191 "request",
1192 &format!("https://github.com/request/request.git#{SHARED_SHA}"),
1193 )
1194 .unwrap();
1195 assert_eq!(
1196 got,
1197 git_key("https://github.com/request/request.git", SHARED_SHA)
1198 );
1199 }
1200
1201 #[test]
1202 fn shared_codeload_tarball_maps_to_url_dep_path() {
1203 // The exact form pnpm records for a `github:` dep that resolves to
1204 // a codeload archive. This is the case that crashed
1205 // request-promise-core under the global virtual store.
1206 let url = format!("https://codeload.github.com/request/request/tar.gz/{SHARED_SHA}");
1207 let got = shared_local_dep_path("request", &url).unwrap();
1208 assert_eq!(got, tarball_key(&url));
1209 assert!(got.starts_with("request@url+"), "unexpected key: {got}");
1210 }
1211
1212 #[test]
1213 fn shared_strips_peer_suffix_before_classifying() {
1214 let url = format!("https://codeload.github.com/request/request/tar.gz/{SHARED_SHA}");
1215 let with_peer = format!("{url}(typescript@5.8.3)");
1216 assert_eq!(
1217 shared_local_dep_path("request", &with_peer),
1218 shared_local_dep_path("request", &url),
1219 );
1220 }
1221
1222 #[test]
1223 fn shared_returns_none_for_non_shareable_specs() {
1224 for value in [
1225 "4.18.1",
1226 "^1.2.3",
1227 "link:../sibling",
1228 "file:./vendor/x",
1229 "npm:lodash@4.18.1",
1230 ] {
1231 assert!(
1232 shared_local_dep_path("dep", value).is_none(),
1233 "{value:?} must not be treated as a shareable local source",
1234 );
1235 }
1236 }
1237}