Skip to main content

aube_lockfile/
source.rs

1use std::path::{Path, PathBuf};
2
3/// Non-registry source for a locked package.
4///
5/// When a package comes from a local path (via `file:` or `link:` in
6/// `package.json`) it doesn't have a tarball URL or integrity hash, so we
7/// record the source separately and let the linker materialize it
8/// on-the-fly.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub enum LocalSource {
11    /// `file:<dir>` — a directory on disk whose contents should be
12    /// hardlink-copied into the virtual store like a normal package.
13    /// Path is stored relative to the project root.
14    Directory(PathBuf),
15    /// `file:<tarball>` — a `.tgz` on disk, extracted into the virtual
16    /// store the same way we extract registry tarballs.
17    Tarball(PathBuf),
18    /// `link:<dir>` — a plain symlink into `node_modules/<name>`, never
19    /// materialized into the virtual store. Transitive deps are the
20    /// target's responsibility.
21    Link(PathBuf),
22    /// `portal:<dir>` — a Yarn Berry package portal. The target is a
23    /// package on disk, but unlike `link:` its dependencies are still
24    /// modeled in the lockfile graph.
25    Portal(PathBuf),
26    /// `exec:<script>` — a Yarn Berry generator script. The script is
27    /// executed at fetch time and writes the package files into a
28    /// generated build directory.
29    Exec(PathBuf),
30    /// `git+https://`, `git+ssh://`, `github:user/repo`, etc. — a
31    /// remote git repo. Cloned at fetch time and imported like a
32    /// `file:` directory. `url` is the normalized clone URL (what
33    /// gets passed to `git clone`). `committish` is the user-written
34    /// ref after `#` (branch, tag, or commit; `None` means HEAD).
35    /// `resolved` is the 40-char commit SHA that `git ls-remote`
36    /// pinned the ref to — the lockfile records this so repeat
37    /// installs reproduce bit-for-bit.
38    Git(GitSource),
39    /// `https://example.com/pkg.tgz` — a remote tarball URL. Fetched
40    /// once at resolve time so the resolver can read the enclosed
41    /// `package.json` for version + transitive deps and pin the
42    /// sha512 integrity. `integrity` stays empty on freshly-parsed
43    /// specifiers and is filled in by the resolver after download.
44    RemoteTarball(RemoteTarballSource),
45}
46
47/// A remote tarball dependency spec. See [`LocalSource::RemoteTarball`].
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct RemoteTarballSource {
50    pub url: String,
51    pub integrity: String,
52}
53
54/// A git dependency spec. See [`LocalSource::Git`].
55#[derive(Debug, Clone, PartialEq, Eq)]
56pub struct GitSource {
57    pub url: String,
58    pub committish: Option<String>,
59    pub resolved: String,
60    /// pnpm `&path:/sub/dir` selector — when set, only this
61    /// subdirectory of the cloned repo is treated as the package
62    /// root. Stored without leading slash so dep_path hashes are
63    /// stable regardless of whether the user wrote `path:/x` or
64    /// `path:x`.
65    pub subpath: Option<String>,
66}
67
68impl LocalSource {
69    /// The original path (relative to the project root) the user wrote
70    /// in `package.json`. `None` for non-path sources like git.
71    pub fn path(&self) -> Option<&Path> {
72        match self {
73            LocalSource::Directory(p)
74            | LocalSource::Tarball(p)
75            | LocalSource::Link(p)
76            | LocalSource::Portal(p)
77            | LocalSource::Exec(p) => Some(p),
78            LocalSource::Git(_) | LocalSource::RemoteTarball(_) => None,
79        }
80    }
81
82    /// The protocol kind (`"file"` / `"link"` / `"git"` / `"url"`).
83    pub fn kind_str(&self) -> &'static str {
84        match self {
85            LocalSource::Directory(_) | LocalSource::Tarball(_) => "file",
86            LocalSource::Link(_) => "link",
87            LocalSource::Portal(_) => "portal",
88            LocalSource::Exec(_) => "exec",
89            LocalSource::Git(_) => "git",
90            LocalSource::RemoteTarball(_) => "url",
91        }
92    }
93
94    /// The path as a POSIX-style string with forward-slash separators.
95    /// `Path::display()` and `to_string_lossy()` honor the host's
96    /// separator (backslash on Windows), which would make `dep_path`
97    /// hashes and lockfile `specifier:` strings non-portable: the
98    /// same `file:./some/dir` would render as `some\dir` on Windows
99    /// and `some/dir` on Unix, producing two different hashes for
100    /// the same logical target. Always rendering with `/` keeps
101    /// lockfiles cross-platform identical.
102    pub fn path_posix(&self) -> String {
103        self.path()
104            .map(|p| p.to_string_lossy().replace('\\', "/"))
105            .unwrap_or_default()
106    }
107
108    /// Canonical specifier string as pnpm writes it in the `packages:`
109    /// and `snapshots:` keys (post-`<name>@` part). For `file:` /
110    /// `link:` this is `file:./vendor/foo` / `link:../sibling`. For
111    /// `git`, pnpm uses the resolved form `<url>#<commit>` (no
112    /// `git+` prefix) because the lockfile pins to the exact commit
113    /// regardless of what the user wrote. Always emits POSIX
114    /// separators so the resulting lockfile is portable.
115    pub fn specifier(&self) -> String {
116        match self {
117            LocalSource::Git(g) => match &g.subpath {
118                Some(sub) => format!("{}#{}&path:/{}", g.url, g.resolved, sub),
119                None => format!("{}#{}", g.url, g.resolved),
120            },
121            LocalSource::RemoteTarball(t) => t.url.clone(),
122            _ => format!("{}:{}", self.kind_str(), self.path_posix()),
123        }
124    }
125
126    /// Internal FS-safe dep_path used as the key in
127    /// `LockfileGraph.packages` and as the `.aube/` subdir name.
128    ///
129    /// Distinct paths must map to distinct keys (otherwise the
130    /// linker would silently mix files between two local packages),
131    /// and the result must be a single filesystem component — no
132    /// `/`, `\`, `:`, or `..`. Ad-hoc character substitution trips
133    /// over cases like `../vendor` vs `__/vendor` or `a.b` vs `a_b`
134    /// collapsing to the same string, so we hash the raw path bytes
135    /// and suffix the first 16 hex chars (64 bits — more than enough
136    /// to avoid collisions inside a single project).
137    ///
138    /// The hash input is the POSIX-form path string so a checked-in
139    /// lockfile resolves to the same key regardless of which
140    /// platform ran `aube install`.
141    pub fn dep_path(&self, name: &str) -> String {
142        use sha2::{Digest, Sha256};
143        let mut hasher = Sha256::new();
144        match self {
145            LocalSource::Git(g) => {
146                hasher.update(g.url.as_bytes());
147                hasher.update(b"#");
148                hasher.update(g.resolved.as_bytes());
149                if let Some(sub) = &g.subpath {
150                    hasher.update(b"&path:/");
151                    hasher.update(sub.as_bytes());
152                }
153            }
154            LocalSource::RemoteTarball(t) => {
155                hasher.update(t.url.as_bytes());
156            }
157            _ => hasher.update(self.path_posix().as_bytes()),
158        }
159        let digest = hasher.finalize();
160        let short: String = digest.iter().take(8).map(|b| format!("{b:02x}")).collect();
161        format!("{name}@{}+{short}", self.kind_str())
162    }
163
164    /// Classify a user-written `file:` / `link:` specifier against the
165    /// project root. Returns `None` if `spec` isn't a local specifier.
166    /// Resolves the target path relative to `project_root`; a `file:`
167    /// target that resolves to a `.tgz` / `.tar.gz` on disk is treated
168    /// as a tarball, anything else as a directory.
169    pub fn parse(spec: &str, project_root: &Path) -> Option<Self> {
170        // Check git first so URLs like `https://host/user/repo.git`
171        // aren't swallowed by the broader bare-http tarball check
172        // below.
173        if let Some((url, committish, subpath)) = parse_git_spec(spec) {
174            // `resolved` is filled in by the resolver after running
175            // `git ls-remote`. A lockfile round-trip that never
176            // re-resolves will leave this empty, which is the sentinel
177            // the resolver checks for before calling ls-remote.
178            return Some(LocalSource::Git(GitSource {
179                url,
180                committish,
181                resolved: String::new(),
182                subpath,
183            }));
184        }
185        // Any remaining bare `http(s)://` URL is a remote tarball.
186        // npm semantics treat *all* non-git HTTP URLs in a dependency
187        // value as tarball URLs, so services that serve tarballs from
188        // URLs without a `.tgz` extension (pkg.pr.new, GitHub
189        // codeload, etc.) classify correctly here.
190        if Self::looks_like_remote_tarball_url(spec) {
191            return Some(LocalSource::RemoteTarball(RemoteTarballSource {
192                url: spec.to_string(),
193                integrity: String::new(),
194            }));
195        }
196        let (kind, rest) = if let Some(r) = spec.strip_prefix("file:") {
197            ("file", r)
198        } else if let Some(r) = spec.strip_prefix("link:") {
199            ("link", r)
200        } else if let Some(r) = spec.strip_prefix("portal:") {
201            ("portal", r)
202        } else if let Some(r) = spec.strip_prefix("exec:") {
203            return Some(LocalSource::Exec(PathBuf::from(r)));
204        } else {
205            return None;
206        };
207        let rel = PathBuf::from(rest);
208        let abs = project_root.join(&rel);
209        if kind == "link" {
210            return Some(LocalSource::Link(rel));
211        }
212        if kind == "portal" {
213            return Some(LocalSource::Portal(rel));
214        }
215        if abs.is_file() && Self::path_looks_like_tarball(&rel) {
216            return Some(LocalSource::Tarball(rel));
217        }
218        Some(LocalSource::Directory(rel))
219    }
220
221    /// Whether a specifier looks like a direct HTTP(S) URL that should
222    /// be fetched as a tarball. Per npm semantics, *any* `http://` or
223    /// `https://` URL in a dependency value is a tarball URL — services
224    /// like pkg.pr.new, GitHub codeload, and private registries with
225    /// auth-token query strings serve tarballs from URLs that don't
226    /// carry a `.tgz` extension. Git URLs must already have been
227    /// ruled out by the caller (see [`parse_git_spec`]) so a
228    /// `.git`-suffixed URL doesn't get misclassified here.
229    pub fn looks_like_remote_tarball_url(spec: &str) -> bool {
230        spec.starts_with("https://") || spec.starts_with("http://")
231    }
232
233    pub fn path_looks_like_tarball(path: &Path) -> bool {
234        let name = match path.file_name().and_then(|n| n.to_str()) {
235            Some(n) => n,
236            None => return false,
237        };
238        let lower = name.to_ascii_lowercase();
239        lower.ends_with(".tgz") || lower.ends_with(".tar.gz")
240    }
241}
242
243/// Parse a git dependency specifier into `(clone_url, committish)`.
244///
245/// Recognized forms:
246/// - `git+https://host/user/repo.git[#ref]`
247/// - `git+ssh://git@host/user/repo.git[#ref]`
248/// - `git://host/user/repo.git[#ref]`
249/// - `https://host/user/repo.git[#ref]` (only when ending in `.git`)
250/// - `user@host:path[.git][#ref]` (scp-form, only for github.com / gitlab.com /
251///   bitbucket.org — matches pnpm 11 behavior, where unknown SCP hosts are
252///   treated as local paths) → `ssh://user@host/path[.git]`
253/// - `github:user/repo[#ref]` → `https://github.com/user/repo.git`
254/// - `gitlab:user/repo[#ref]` → `https://gitlab.com/user/repo.git`
255/// - `bitbucket:user/repo[#ref]` → `https://bitbucket.org/user/repo.git`
256/// - `user/repo[#ref]` (bare GitHub shorthand, npm/pnpm compat)
257///   → `https://github.com/user/repo.git`
258///
259/// Returns `None` for any specifier that doesn't look like a git URL,
260/// so the caller can fall through to other protocol parsers.
261pub fn parse_git_spec(spec: &str) -> Option<(String, Option<String>, Option<String>)> {
262    let (body, committish, subpath) = match spec.find('#') {
263        Some(idx) => {
264            let (c, s) = parse_git_fragment(&spec[idx + 1..]);
265            (&spec[..idx], c, s)
266        }
267        None => (spec, None, None),
268    };
269    let is_bare_transport = body.starts_with("https://")
270        || body.starts_with("http://")
271        || body.starts_with("ssh://")
272        || body.starts_with("file://");
273    let url = if let Some(rest) = body.strip_prefix("git+") {
274        // `git+` explicitly tags the URL as git, so the `.git`
275        // suffix is optional (GitHub/GitLab accept both forms).
276        rest.to_string()
277    } else if body.starts_with("git://") {
278        body.to_string()
279    } else if let Some(scp) = parse_scp_url(body) {
280        scp
281    } else if let Some(path) = body.strip_prefix("github:") {
282        format!("https://github.com/{path}.git")
283    } else if let Some(path) = body.strip_prefix("gitlab:") {
284        format!("https://gitlab.com/{path}.git")
285    } else if let Some(path) = body.strip_prefix("bitbucket:") {
286        format!("https://bitbucket.org/{path}.git")
287    } else if is_bare_transport && body.ends_with(".git") {
288        body.to_string()
289    } else if is_bare_transport
290        && committish
291            .as_deref()
292            .is_some_and(|c| c.len() == 40 && c.chars().all(|ch| ch.is_ascii_hexdigit()))
293    {
294        // Lockfile round-trip form: `specifier()` writes the stored
295        // URL verbatim plus `#<sha>`. URLs that dropped the `git+`
296        // prefix (and happen to lack `.git`) are disambiguated from
297        // plain tarball URLs by the 40-hex committish suffix.
298        body.to_string()
299    } else if is_bare_github_shorthand(body) {
300        // npm/pnpm bare GitHub shorthand: `user/repo` expands to
301        // `github:user/repo`. Placed last so all explicit URL/scheme
302        // forms above shadow it.
303        format!("https://github.com/{body}.git")
304    } else {
305        return None;
306    };
307    Some((url, committish, subpath))
308}
309
310/// `user/repo` — a single `/`, both segments non-empty, ASCII
311/// alphanumeric + `_.-` only, owner doesn't start with `.` so
312/// single-component relative paths (`./repo`, `../repo`) are rejected.
313/// Excludes scoped npm names (`@scope/pkg`) and file paths. Other
314/// URL/SCP forms are ruled out by placement order in `parse_git_spec`.
315fn is_bare_github_shorthand(body: &str) -> bool {
316    let Some((owner, repo)) = body.split_once('/') else {
317        return false;
318    };
319    !owner.is_empty()
320        && !owner.starts_with('.')
321        && !repo.is_empty()
322        && !repo.contains('/')
323        && owner
324            .bytes()
325            .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'.' | b'-'))
326        && repo
327            .bytes()
328            .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'.' | b'-'))
329}
330
331/// A git URL that maps to one of the three "hosted" providers npm /
332/// pnpm both special-case (github / gitlab / bitbucket). For these
333/// hosts a public read can be served as a flat HTTPS tarball over
334/// `codeload.github.com` (or each host's equivalent), bypassing `git`
335/// entirely. The lockfile's stored URL is canonical-identity only —
336/// pnpm and npm both re-derive the fetch URL from `(host, owner,
337/// repo)` on every install rather than dialing whatever scheme
338/// happens to be in `resolved:`.
339#[derive(Debug, Clone, PartialEq, Eq)]
340pub struct HostedGit {
341    pub host: HostedGitHost,
342    pub owner: String,
343    pub repo: String,
344}
345
346#[derive(Debug, Clone, Copy, PartialEq, Eq)]
347pub enum HostedGitHost {
348    GitHub,
349    GitLab,
350    Bitbucket,
351}
352
353impl HostedGit {
354    /// `https://github.com/<owner>/<repo>.git` — the form `git fetch`
355    /// can dial without an SSH key. Used as the runtime fetch URL when
356    /// the lockfile's stored URL is `git+ssh://git@…` (npm canonical
357    /// identity) but the actual install host has no SSH configured.
358    pub fn https_url(&self) -> String {
359        let host = self.host.host_domain();
360        format!("https://{host}/{}/{}.git", self.owner, self.repo)
361    }
362
363    /// `https://codeload.github.com/<owner>/<repo>/tar.gz/<sha>` (or
364    /// each host's equivalent) — a flat HTTPS tarball at the given
365    /// commit. Returns `None` unless `committish` is a 40-char hex
366    /// SHA, since the codeload path can't be verified after extraction
367    /// without `.git/` metadata. Branch / tag names round-trip through
368    /// `git ls-remote` to get pinned to a SHA first.
369    pub fn tarball_url(&self, committish: &str) -> Option<String> {
370        if committish.len() != 40 || !committish.chars().all(|c| c.is_ascii_hexdigit()) {
371            return None;
372        }
373        let sha = committish.to_ascii_lowercase();
374        Some(match self.host {
375            HostedGitHost::GitHub => format!(
376                "https://codeload.github.com/{}/{}/tar.gz/{sha}",
377                self.owner, self.repo
378            ),
379            HostedGitHost::GitLab => format!(
380                "https://gitlab.com/{}/{}/-/archive/{sha}/{}-{sha}.tar.gz",
381                self.owner, self.repo, self.repo
382            ),
383            HostedGitHost::Bitbucket => format!(
384                "https://bitbucket.org/{}/{}/get/{sha}.tar.gz",
385                self.owner, self.repo
386            ),
387        })
388    }
389}
390
391impl HostedGitHost {
392    fn from_domain(domain: &str) -> Option<Self> {
393        match domain {
394            "github.com" => Some(HostedGitHost::GitHub),
395            "gitlab.com" => Some(HostedGitHost::GitLab),
396            "bitbucket.org" => Some(HostedGitHost::Bitbucket),
397            _ => None,
398        }
399    }
400
401    pub fn host_domain(self) -> &'static str {
402        match self {
403            HostedGitHost::GitHub => "github.com",
404            HostedGitHost::GitLab => "gitlab.com",
405            HostedGitHost::Bitbucket => "bitbucket.org",
406        }
407    }
408}
409
410/// Parse a clone URL — in any form `parse_git_spec` accepts as input
411/// or produces as output — into its `(host, owner, repo)` components,
412/// when the host is one of the three providers npm / pnpm route
413/// through HTTPS tarballs. Returns `None` for any other host (including
414/// self-hosted GitLab / Gitea / Bitbucket Data Center): those still
415/// need a real `git clone` because no codeload-style HTTP archive is
416/// available.
417///
418/// Accepts:
419/// - `https://github.com/owner/repo[.git]`
420/// - `git+https://github.com/owner/repo[.git]`
421/// - `git://github.com/owner/repo[.git]`
422/// - `ssh://git@github.com/owner/repo[.git]`
423/// - `git+ssh://git@github.com/owner/repo[.git]` (npm canonical lockfile form)
424/// - `git@github.com:owner/repo[.git]` (scp shorthand, in case a caller
425///   parses raw lockfile fields without going through `parse_git_spec`)
426pub fn parse_hosted_git(url: &str) -> Option<HostedGit> {
427    let body = url.strip_prefix("git+").unwrap_or(url);
428    let after_scheme = if let Some(rest) = body.strip_prefix("https://") {
429        rest
430    } else if let Some(rest) = body.strip_prefix("http://") {
431        rest
432    } else if let Some(rest) = body.strip_prefix("ssh://") {
433        rest
434    } else if let Some(rest) = body.strip_prefix("git://") {
435        rest
436    } else {
437        // scp shorthand `user@host:path` — not produced by parse_git_spec
438        // but accepted defensively in case a raw lockfile string ever
439        // bypasses it.
440        let scp_path = parse_scp_url(body)?;
441        return parse_hosted_git(&scp_path);
442    };
443    // Strip optional `user@` (always `git@` for hosted forms).
444    let host_and_path = match after_scheme.split_once('@') {
445        Some((_, rest)) => rest,
446        None => after_scheme,
447    };
448    let (host, path) = host_and_path.split_once('/')?;
449    let host = HostedGitHost::from_domain(host)?;
450    // Take exactly two path segments: owner and repo. Anything beyond
451    // (subgroup-style GitLab paths) doesn't have a stable HTTPS tarball
452    // form on the three providers we care about, so refuse and let the
453    // caller fall back to clone.
454    let mut segs = path.splitn(3, '/');
455    let owner = segs.next()?;
456    let repo = segs.next()?;
457    if owner.is_empty() || repo.is_empty() || segs.next().is_some() {
458        return None;
459    }
460    let repo = repo
461        .strip_suffix(".git")
462        .unwrap_or(repo)
463        .trim_end_matches('/');
464    if repo.is_empty() {
465        return None;
466    }
467    Some(HostedGit {
468        host,
469        owner: owner.to_string(),
470        repo: repo.to_string(),
471    })
472}
473
474fn parse_scp_url(body: &str) -> Option<String> {
475    if body.contains("://") {
476        return None;
477    }
478    let colon = body.find(':')?;
479    let before = &body[..colon];
480    let path = &body[colon + 1..];
481    if before.is_empty() || path.is_empty() {
482        return None;
483    }
484    if path.starts_with('/') {
485        return None;
486    }
487    let at = before.find('@')?;
488    let user = &before[..at];
489    let host = &before[at + 1..];
490    if user.is_empty() || host.is_empty() || host.contains('/') || host.contains('@') {
491        return None;
492    }
493    // pnpm 11 only resolves SCP-form as hosted Git for the three known
494    // providers; other hosts (e.g. `git@example.com:foo/bar.git`) are
495    // treated as local paths, and `host:path` without a user errors.
496    if !matches!(host, "github.com" | "gitlab.com" | "bitbucket.org") {
497        return None;
498    }
499    Some(format!("ssh://{user}@{host}/{path}"))
500}
501
502/// Normalize git URL fragments used by npm-compatible lockfiles.
503///
504/// Plain git accepts `#<ref>`, while npm and Yarn Berry also write
505/// key/value fragments such as `#commit=<sha>` for pinned git deps.
506/// Downstream code passes this value directly to `git ls-remote` and
507/// `git checkout`, so strip the selector key here and keep only the
508/// actual ref name or SHA.
509pub(crate) fn normalize_git_fragment(fragment: &str) -> Option<String> {
510    parse_git_fragment(fragment).0
511}
512
513/// Parse a git URL fragment into `(committish, subpath)`. Handles the
514/// pnpm/hosted-git-info form `<ref>&path:/sub/dir` (the `path:` key
515/// uses a colon, not `=`, by historical convention) as well as the
516/// `key=value` form npm/Yarn Berry write. Unknown selectors are
517/// ignored. Subpath is returned without leading slash so the caller
518/// can join it with a clone dir without tripping the absolute-path
519/// branch of `Path::join`.
520pub(crate) fn parse_git_fragment(fragment: &str) -> (Option<String>, Option<String>) {
521    if fragment.is_empty() {
522        return (None, None);
523    }
524
525    let mut fallback: Option<&str> = None;
526    let mut preferred: Option<&str> = None;
527    let mut subpath: Option<String> = None;
528    for part in fragment.split('&') {
529        if part.is_empty() {
530            continue;
531        }
532        // Try `key=value` first; fall back to `key:value` only for
533        // the small set of selectors we actually handle below. A tag
534        // name with a colon (e.g. `release:2026-01`) is left alone —
535        // and `semver:^1.0.0` stays as a literal ref so `ls-remote`
536        // surfaces an explicit error rather than silently HEAD-ing.
537        let split = part.split_once('=').or_else(|| {
538            part.split_once(':')
539                .filter(|(k, _)| matches!(*k, "commit" | "tag" | "head" | "branch" | "path"))
540        });
541        let (key, value) = split.unwrap_or(("", part));
542        if value.is_empty() {
543            continue;
544        }
545        match key {
546            "commit" => {
547                preferred.get_or_insert(value);
548            }
549            "tag" | "head" | "branch" => {
550                fallback.get_or_insert(value);
551            }
552            "path" => {
553                // Strip leading slashes (pnpm writes `path:/sub`) and
554                // reject any `..` / `.` component. Without this, a
555                // crafted spec like `&path:/../../etc` would let the
556                // resolver and installer escape the clone dir and
557                // import an arbitrary host directory into the store.
558                if subpath.is_some() {
559                    // First-wins, matching the other selectors above.
560                    continue;
561                }
562                let trimmed = value.trim_start_matches('/');
563                if trimmed.is_empty() {
564                    continue;
565                }
566                if trimmed
567                    .split('/')
568                    .any(|c| c.is_empty() || c == "." || c == "..")
569                {
570                    continue;
571                }
572                subpath = Some(trimmed.to_string());
573            }
574            "" => {
575                fallback.get_or_insert(value);
576            }
577            _ => {}
578        }
579    }
580
581    (preferred.or(fallback).map(ToString::to_string), subpath)
582}
583
584#[cfg(test)]
585mod tests {
586    use super::*;
587
588    #[test]
589    fn matches_https_tgz() {
590        assert!(LocalSource::looks_like_remote_tarball_url(
591            "https://example.com/pkg-1.0.0.tgz"
592        ));
593    }
594
595    #[test]
596    fn matches_http_tar_gz() {
597        assert!(LocalSource::looks_like_remote_tarball_url(
598            "http://example.com/pkg-1.0.0.tar.gz"
599        ));
600    }
601
602    #[test]
603    fn strips_fragment_before_suffix_check() {
604        assert!(LocalSource::looks_like_remote_tarball_url(
605            "https://example.com/pkg-1.0.0.tgz#sha512-abc"
606        ));
607    }
608
609    #[test]
610    fn strips_query_string_before_suffix_check() {
611        // Auth-token URLs from private registries (JFrog, Nexus,
612        // CodeArtifact, …) routinely trail `?token=…` after the
613        // filename. Must still classify as a tarball URL.
614        assert!(LocalSource::looks_like_remote_tarball_url(
615            "https://registry.example.com/pkg/-/pkg-1.0.0.tgz?token=abc"
616        ));
617        assert!(LocalSource::looks_like_remote_tarball_url(
618            "https://example.com/pkg-1.0.0.tar.gz?v=2&signed=1"
619        ));
620    }
621
622    #[test]
623    fn matches_bare_http_url_without_tarball_suffix() {
624        // pkg.pr.new serves tarballs from URLs without a `.tgz`
625        // extension; npm treats all non-git http(s) URLs as tarball
626        // URLs, so these must classify as remote tarballs.
627        assert!(LocalSource::looks_like_remote_tarball_url(
628            "https://pkg.pr.new/lunariajs/lunaria/@lunariajs/core@904b935"
629        ));
630        assert!(LocalSource::looks_like_remote_tarball_url(
631            "https://codeload.github.com/user/repo/tar.gz/main"
632        ));
633    }
634
635    #[test]
636    fn rejects_non_http_schemes() {
637        assert!(!LocalSource::looks_like_remote_tarball_url(
638            "ftp://example.com/pkg.tgz"
639        ));
640        assert!(!LocalSource::looks_like_remote_tarball_url(
641            "git://example.com/repo.git"
642        ));
643    }
644
645    #[test]
646    fn parse_classifies_bare_http_url_as_remote_tarball() {
647        use std::path::Path;
648        let parsed = LocalSource::parse(
649            "https://pkg.pr.new/lunariajs/lunaria/@lunariajs/core@904b935",
650            Path::new(""),
651        );
652        assert!(matches!(parsed, Some(LocalSource::RemoteTarball(_))));
653    }
654
655    #[test]
656    fn parse_prefers_git_over_tarball_for_dot_git_url() {
657        use std::path::Path;
658        let parsed = LocalSource::parse("https://github.com/user/repo.git", Path::new(""));
659        assert!(matches!(parsed, Some(LocalSource::Git(_))));
660    }
661
662    #[test]
663    fn parse_classifies_exec_as_local_source() {
664        let parsed = LocalSource::parse("exec:./scripts/generate.js", Path::new(""));
665        assert_eq!(
666            parsed,
667            Some(LocalSource::Exec(PathBuf::from("./scripts/generate.js")))
668        );
669    }
670
671    #[test]
672    fn git_plus_https_without_dot_git_roundtrips_via_lockfile_form() {
673        // Initial parse: `git+https://…/repo` (no `.git`).
674        let (url, committish, subpath) = parse_git_spec("git+https://host/user/repo").unwrap();
675        assert_eq!(url, "https://host/user/repo");
676        assert_eq!(committish, None);
677        assert_eq!(subpath, None);
678
679        // After resolving, the serializer writes `<url>#<sha>` into
680        // the lockfile's importer `version:` field.
681        let sha = "abcdef0123456789abcdef0123456789abcdef01";
682        let source = LocalSource::Git(GitSource {
683            url: url.clone(),
684            committish: None,
685            resolved: sha.to_string(),
686            subpath: None,
687        });
688        let lockfile_version = source.specifier();
689        assert_eq!(lockfile_version, format!("https://host/user/repo#{sha}"));
690
691        // Re-parse must recognize the bare URL because the 40-hex
692        // committish suffix unambiguously tags it as git.
693        let (round_url, round_committish, round_subpath) =
694            parse_git_spec(&lockfile_version).unwrap();
695        assert_eq!(round_url, "https://host/user/repo");
696        assert_eq!(round_committish.as_deref(), Some(sha));
697        assert_eq!(round_subpath, None);
698    }
699
700    #[test]
701    fn bare_https_without_dot_git_and_no_committish_is_not_git() {
702        // A plain `https://…` URL with no `.git` and no SHA could be
703        // anything (including a tarball); don't claim it.
704        assert!(parse_git_spec("https://example.com/pkg").is_none());
705    }
706
707    #[test]
708    fn github_shorthand_expands_and_roundtrips() {
709        let (url, _, _) = parse_git_spec("github:user/repo").unwrap();
710        assert_eq!(url, "https://github.com/user/repo.git");
711    }
712
713    #[test]
714    fn bare_user_repo_expands_to_github() {
715        let (url, committish, subpath) = parse_git_spec("kevva/is-negative").unwrap();
716        assert_eq!(url, "https://github.com/kevva/is-negative.git");
717        assert!(committish.is_none());
718        assert!(subpath.is_none());
719    }
720
721    #[test]
722    fn bare_user_repo_with_committish_preserved() {
723        let (url, committish, _) = parse_git_spec("kevva/is-negative#v1.0.0").unwrap();
724        assert_eq!(url, "https://github.com/kevva/is-negative.git");
725        assert_eq!(committish.as_deref(), Some("v1.0.0"));
726    }
727
728    #[test]
729    fn bare_scope_pkg_is_not_git_shorthand() {
730        // npm-style `@scope/pkg` is a registry name, not a GitHub shorthand.
731        assert!(parse_git_spec("@types/node").is_none());
732    }
733
734    #[test]
735    fn bare_relative_path_is_not_git_shorthand() {
736        // Single-component relative paths split as owner=".", owner="..",
737        // so owner-starts-with-`.` is the load-bearing guard here.
738        assert!(parse_git_spec("./repo").is_none());
739        assert!(parse_git_spec("../repo").is_none());
740        // Multi-component relative paths additionally fail the
741        // single-`/`-only guard.
742        assert!(parse_git_spec("./local/path").is_none());
743        assert!(parse_git_spec("../local/path").is_none());
744    }
745
746    #[test]
747    fn bare_path_with_extra_slashes_is_not_git_shorthand() {
748        // Real GitHub shorthand is exactly `user/repo` — anything with a
749        // second `/` is a path, not a shorthand.
750        assert!(parse_git_spec("path/with/slashes/extra").is_none());
751    }
752
753    #[test]
754    fn bare_scp_form_unknown_host_is_not_github_shorthand() {
755        // `user@host:repo.git` is scp form (handled or rejected above);
756        // the bare-shorthand branch must not pick it up.
757        assert!(parse_git_spec("user@host:repo.git").is_none());
758    }
759
760    #[test]
761    fn scp_form_recognized() {
762        let (url, committish, _) =
763            parse_git_spec("git@github.com:EthanHenrickson/math-mcp.git").unwrap();
764        assert_eq!(url, "ssh://git@github.com/EthanHenrickson/math-mcp.git");
765        assert!(committish.is_none());
766    }
767
768    #[test]
769    fn scp_form_with_ref_recognized() {
770        let (url, committish, _) =
771            parse_git_spec("git@github.com:EthanHenrickson/math-mcp.git#0.1.5").unwrap();
772        assert_eq!(url, "ssh://git@github.com/EthanHenrickson/math-mcp.git");
773        assert_eq!(committish.as_deref(), Some("0.1.5"));
774    }
775
776    #[test]
777    fn scp_form_bitbucket_recognized() {
778        let (url, _, _) = parse_git_spec("git@bitbucket.org:pnpmjs/git-resolver.git").unwrap();
779        assert_eq!(url, "ssh://git@bitbucket.org/pnpmjs/git-resolver.git");
780    }
781
782    #[test]
783    fn scp_form_unknown_host_rejected() {
784        // pnpm 11 treats `user@unknown-host:path` as a local path, not Git.
785        assert!(parse_git_spec("git@example.com:org/repo.git").is_none());
786        assert!(parse_git_spec("alice@host.example.com:org/repo.git").is_none());
787    }
788
789    #[test]
790    fn scp_form_without_user_rejected() {
791        // pnpm 11 errors on bare `host:path` as unsupported.
792        assert!(parse_git_spec("github.com:user/repo.git").is_none());
793    }
794
795    #[test]
796    fn commit_selector_fragment_normalizes_to_sha() {
797        let sha = "abcdef0123456789abcdef0123456789abcdef01";
798        let (url, committish, _) =
799            parse_git_spec(&format!("https://host/user/repo.git#commit={sha}")).unwrap();
800        assert_eq!(url, "https://host/user/repo.git");
801        assert_eq!(committish.as_deref(), Some(sha));
802    }
803
804    #[test]
805    fn named_selector_fragment_normalizes_to_ref() {
806        let (url, committish, _) = parse_git_spec("git+https://host/user/repo#tag=v1.2.3").unwrap();
807        assert_eq!(url, "https://host/user/repo");
808        assert_eq!(committish.as_deref(), Some("v1.2.3"));
809    }
810
811    #[test]
812    fn pnpm_path_subpath_extracted_from_fragment() {
813        // pnpm syntax: `<url>#<ref>&path:/<subdir>` selects a
814        // subdirectory of the cloned repo as the package root.
815        let (url, committish, subpath) =
816            parse_git_spec("github:org/dep#v0.1.4&path:/packages/special").unwrap();
817        assert_eq!(url, "https://github.com/org/dep.git");
818        assert_eq!(committish.as_deref(), Some("v0.1.4"));
819        assert_eq!(subpath.as_deref(), Some("packages/special"));
820    }
821
822    #[test]
823    fn path_subpath_roundtrips_via_specifier() {
824        let sha = "abcdef0123456789abcdef0123456789abcdef01";
825        let source = LocalSource::Git(GitSource {
826            url: "https://github.com/org/dep.git".to_string(),
827            committish: None,
828            resolved: sha.to_string(),
829            subpath: Some("packages/special".to_string()),
830        });
831        let spec = source.specifier();
832        assert_eq!(
833            spec,
834            format!("https://github.com/org/dep.git#{sha}&path:/packages/special")
835        );
836        let (url, committish, subpath) = parse_git_spec(&spec).unwrap();
837        assert_eq!(url, "https://github.com/org/dep.git");
838        assert_eq!(committish.as_deref(), Some(sha));
839        assert_eq!(subpath.as_deref(), Some("packages/special"));
840    }
841
842    #[test]
843    fn parse_hosted_git_recognizes_canonical_forms() {
844        // All these point at the same (github.com, owner, repo) tuple
845        // and must map to the same HostedGit so the runtime fetch URL
846        // doesn't depend on which scheme the lockfile happens to record.
847        let canonical = HostedGit {
848            host: HostedGitHost::GitHub,
849            owner: "owner".to_string(),
850            repo: "repo".to_string(),
851        };
852        for spec in [
853            "https://github.com/owner/repo.git",
854            "https://github.com/owner/repo",
855            "http://github.com/owner/repo.git",
856            "git+https://github.com/owner/repo.git",
857            "git+https://github.com/owner/repo",
858            "git://github.com/owner/repo.git",
859            "ssh://git@github.com/owner/repo.git",
860            "git+ssh://git@github.com/owner/repo.git",
861            "git@github.com:owner/repo.git",
862        ] {
863            assert_eq!(
864                parse_hosted_git(spec).as_ref(),
865                Some(&canonical),
866                "spec {spec} should map to canonical HostedGit",
867            );
868        }
869    }
870
871    #[test]
872    fn parse_hosted_git_returns_none_for_non_hosted() {
873        // Self-hosted GitLab / Gitea / arbitrary hosts: no codeload
874        // template, so the codeload fast path doesn't apply.
875        for spec in [
876            "https://example.com/owner/repo.git",
877            "ssh://git@gitea.internal/owner/repo.git",
878            "git+ssh://git@gitlab.example.com/group/sub/repo.git",
879            "https://github.com/owner/repo/sub",
880            "https://github.com/owner",
881        ] {
882            assert!(
883                parse_hosted_git(spec).is_none(),
884                "spec {spec} must not match a hosted provider",
885            );
886        }
887    }
888
889    #[test]
890    fn hosted_tarball_url_only_for_full_sha() {
891        let g = HostedGit {
892            host: HostedGitHost::GitHub,
893            owner: "o".to_string(),
894            repo: "r".to_string(),
895        };
896        let sha = "abcdef0123456789abcdef0123456789abcdef01";
897        assert_eq!(
898            g.tarball_url(sha).as_deref(),
899            Some("https://codeload.github.com/o/r/tar.gz/abcdef0123456789abcdef0123456789abcdef01"),
900        );
901        // Branch / tag / abbreviated SHA don't take the fast path —
902        // codeload accepts them but the wrapper-dir name varies and
903        // we can't verify a non-SHA committish post-extraction.
904        assert!(g.tarball_url("main").is_none());
905        assert!(g.tarball_url("v1.2.3").is_none());
906        assert!(g.tarball_url("abcdef0").is_none());
907    }
908
909    #[test]
910    fn hosted_tarball_url_per_provider() {
911        let sha = "abcdef0123456789abcdef0123456789abcdef01";
912        let gitlab = HostedGit {
913            host: HostedGitHost::GitLab,
914            owner: "g".to_string(),
915            repo: "r".to_string(),
916        }
917        .tarball_url(sha)
918        .unwrap();
919        assert!(gitlab.starts_with("https://gitlab.com/g/r/-/archive/"));
920        assert!(gitlab.ends_with("/r-abcdef0123456789abcdef0123456789abcdef01.tar.gz"));
921        let bitbucket = HostedGit {
922            host: HostedGitHost::Bitbucket,
923            owner: "g".to_string(),
924            repo: "r".to_string(),
925        }
926        .tarball_url(sha)
927        .unwrap();
928        assert_eq!(
929            bitbucket,
930            "https://bitbucket.org/g/r/get/abcdef0123456789abcdef0123456789abcdef01.tar.gz",
931        );
932    }
933
934    #[test]
935    fn hosted_https_url_normalizes() {
936        let g = parse_hosted_git("git+ssh://git@github.com/owner/repo.git").unwrap();
937        assert_eq!(g.https_url(), "https://github.com/owner/repo.git");
938    }
939
940    #[test]
941    fn path_traversal_components_in_subpath_are_rejected() {
942        // `..` and `.` components would let a crafted spec escape the
943        // clone dir at install time. The parser drops them so the
944        // resolver/installer never see a traversal-laden subpath.
945        let cases = [
946            "github:org/dep#main&path:/../../etc",
947            "github:org/dep#main&path:/packages/../../../etc",
948            "github:org/dep#main&path:/./packages/foo",
949            "github:org/dep#main&path:/packages//foo",
950        ];
951        for spec in cases {
952            let (_, _, subpath) = parse_git_spec(spec).unwrap();
953            assert_eq!(subpath, None, "spec should drop subpath: {spec}");
954        }
955    }
956
957    #[test]
958    fn dep_path_distinguishes_subpaths_under_same_commit() {
959        // Two packages from the same repo+commit but different
960        // subdirs must hash to distinct dep_paths so the linker
961        // doesn't collapse them.
962        let sha = "abcdef0123456789abcdef0123456789abcdef01";
963        let a = LocalSource::Git(GitSource {
964            url: "https://example.com/r.git".to_string(),
965            committish: None,
966            resolved: sha.to_string(),
967            subpath: Some("packages/a".to_string()),
968        });
969        let b = LocalSource::Git(GitSource {
970            url: "https://example.com/r.git".to_string(),
971            committish: None,
972            resolved: sha.to_string(),
973            subpath: Some("packages/b".to_string()),
974        });
975        assert_ne!(a.dep_path("dep"), b.dep_path("dep"));
976    }
977}