use std::path::{Path, PathBuf};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LocalSource {
Directory(PathBuf),
Tarball(PathBuf),
Link(PathBuf),
Portal(PathBuf),
Exec(PathBuf),
Git(GitSource),
RemoteTarball(RemoteTarballSource),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RemoteTarballSource {
pub url: String,
pub integrity: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GitSource {
pub url: String,
pub committish: Option<String>,
pub resolved: String,
pub subpath: Option<String>,
}
impl LocalSource {
pub fn path(&self) -> Option<&Path> {
match self {
LocalSource::Directory(p)
| LocalSource::Tarball(p)
| LocalSource::Link(p)
| LocalSource::Portal(p)
| LocalSource::Exec(p) => Some(p),
LocalSource::Git(_) | LocalSource::RemoteTarball(_) => None,
}
}
pub fn kind_str(&self) -> &'static str {
match self {
LocalSource::Directory(_) | LocalSource::Tarball(_) => "file",
LocalSource::Link(_) => "link",
LocalSource::Portal(_) => "portal",
LocalSource::Exec(_) => "exec",
LocalSource::Git(_) => "git",
LocalSource::RemoteTarball(_) => "url",
}
}
pub fn path_posix(&self) -> String {
self.path()
.map(|p| p.to_string_lossy().replace('\\', "/"))
.unwrap_or_default()
}
pub fn specifier(&self) -> String {
match self {
LocalSource::Git(g) => match &g.subpath {
Some(sub) => format!("{}#{}&path:/{}", g.url, g.resolved, sub),
None => format!("{}#{}", g.url, g.resolved),
},
LocalSource::RemoteTarball(t) => t.url.clone(),
_ => format!("{}:{}", self.kind_str(), self.path_posix()),
}
}
pub fn dep_path(&self, name: &str) -> String {
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
match self {
LocalSource::Git(g) => {
hasher.update(g.url.as_bytes());
hasher.update(b"#");
hasher.update(g.resolved.as_bytes());
if let Some(sub) = &g.subpath {
hasher.update(b"&path:/");
hasher.update(sub.as_bytes());
}
}
LocalSource::RemoteTarball(t) => {
hasher.update(t.url.as_bytes());
}
_ => hasher.update(self.path_posix().as_bytes()),
}
let digest = hasher.finalize();
let short: String = digest.iter().take(8).map(|b| format!("{b:02x}")).collect();
format!("{name}@{}+{short}", self.kind_str())
}
pub fn parse(spec: &str, project_root: &Path) -> Option<Self> {
if let Some((url, committish, subpath)) = parse_git_spec(spec) {
return Some(LocalSource::Git(GitSource {
url,
committish,
resolved: String::new(),
subpath,
}));
}
if Self::looks_like_remote_tarball_url(spec) {
return Some(LocalSource::RemoteTarball(RemoteTarballSource {
url: spec.to_string(),
integrity: String::new(),
}));
}
let (kind, rest) = if let Some(r) = spec.strip_prefix("file:") {
("file", r)
} else if let Some(r) = spec.strip_prefix("link:") {
("link", r)
} else if let Some(r) = spec.strip_prefix("portal:") {
("portal", r)
} else if let Some(r) = spec.strip_prefix("exec:") {
return Some(LocalSource::Exec(PathBuf::from(r)));
} else {
return None;
};
let rel = PathBuf::from(rest);
let abs = project_root.join(&rel);
if kind == "link" {
return Some(LocalSource::Link(rel));
}
if kind == "portal" {
return Some(LocalSource::Portal(rel));
}
if abs.is_file() && Self::path_looks_like_tarball(&rel) {
return Some(LocalSource::Tarball(rel));
}
Some(LocalSource::Directory(rel))
}
pub fn looks_like_remote_tarball_url(spec: &str) -> bool {
spec.starts_with("https://") || spec.starts_with("http://")
}
pub fn path_looks_like_tarball(path: &Path) -> bool {
let name = match path.file_name().and_then(|n| n.to_str()) {
Some(n) => n,
None => return false,
};
let lower = name.to_ascii_lowercase();
lower.ends_with(".tgz") || lower.ends_with(".tar.gz")
}
}
pub fn parse_git_spec(spec: &str) -> Option<(String, Option<String>, Option<String>)> {
let (body, committish, subpath) = match spec.find('#') {
Some(idx) => {
let (c, s) = parse_git_fragment(&spec[idx + 1..]);
(&spec[..idx], c, s)
}
None => (spec, None, None),
};
let is_bare_transport = body.starts_with("https://")
|| body.starts_with("http://")
|| body.starts_with("ssh://")
|| body.starts_with("file://");
let url = if let Some(rest) = body.strip_prefix("git+") {
rest.to_string()
} else if body.starts_with("git://") {
body.to_string()
} else if let Some(scp) = parse_scp_url(body) {
scp
} else if let Some(path) = body.strip_prefix("github:") {
format!("https://github.com/{path}.git")
} else if let Some(path) = body.strip_prefix("gitlab:") {
format!("https://gitlab.com/{path}.git")
} else if let Some(path) = body.strip_prefix("bitbucket:") {
format!("https://bitbucket.org/{path}.git")
} else if is_bare_transport && body.ends_with(".git") {
body.to_string()
} else if is_bare_transport
&& committish
.as_deref()
.is_some_and(|c| c.len() == 40 && c.chars().all(|ch| ch.is_ascii_hexdigit()))
{
body.to_string()
} else if is_bare_github_shorthand(body) {
format!("https://github.com/{body}.git")
} else {
return None;
};
Some((url, committish, subpath))
}
fn is_bare_github_shorthand(body: &str) -> bool {
let Some((owner, repo)) = body.split_once('/') else {
return false;
};
!owner.is_empty()
&& !owner.starts_with('.')
&& !repo.is_empty()
&& !repo.contains('/')
&& owner
.bytes()
.all(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'.' | b'-'))
&& repo
.bytes()
.all(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'.' | b'-'))
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct HostedGit {
pub host: HostedGitHost,
pub owner: String,
pub repo: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HostedGitHost {
GitHub,
GitLab,
Bitbucket,
}
impl HostedGit {
pub fn https_url(&self) -> String {
let host = self.host.host_domain();
format!("https://{host}/{}/{}.git", self.owner, self.repo)
}
pub fn tarball_url(&self, committish: &str) -> Option<String> {
if committish.len() != 40 || !committish.chars().all(|c| c.is_ascii_hexdigit()) {
return None;
}
let sha = committish.to_ascii_lowercase();
Some(match self.host {
HostedGitHost::GitHub => format!(
"https://codeload.github.com/{}/{}/tar.gz/{sha}",
self.owner, self.repo
),
HostedGitHost::GitLab => format!(
"https://gitlab.com/{}/{}/-/archive/{sha}/{}-{sha}.tar.gz",
self.owner, self.repo, self.repo
),
HostedGitHost::Bitbucket => format!(
"https://bitbucket.org/{}/{}/get/{sha}.tar.gz",
self.owner, self.repo
),
})
}
}
impl HostedGitHost {
fn from_domain(domain: &str) -> Option<Self> {
match domain {
"github.com" => Some(HostedGitHost::GitHub),
"gitlab.com" => Some(HostedGitHost::GitLab),
"bitbucket.org" => Some(HostedGitHost::Bitbucket),
_ => None,
}
}
pub fn host_domain(self) -> &'static str {
match self {
HostedGitHost::GitHub => "github.com",
HostedGitHost::GitLab => "gitlab.com",
HostedGitHost::Bitbucket => "bitbucket.org",
}
}
}
pub fn parse_hosted_git(url: &str) -> Option<HostedGit> {
let body = url.strip_prefix("git+").unwrap_or(url);
let after_scheme = if let Some(rest) = body.strip_prefix("https://") {
rest
} else if let Some(rest) = body.strip_prefix("http://") {
rest
} else if let Some(rest) = body.strip_prefix("ssh://") {
rest
} else if let Some(rest) = body.strip_prefix("git://") {
rest
} else {
let scp_path = parse_scp_url(body)?;
return parse_hosted_git(&scp_path);
};
let host_and_path = match after_scheme.split_once('@') {
Some((_, rest)) => rest,
None => after_scheme,
};
let (host, path) = host_and_path.split_once('/')?;
let host = HostedGitHost::from_domain(host)?;
let mut segs = path.splitn(3, '/');
let owner = segs.next()?;
let repo = segs.next()?;
if owner.is_empty() || repo.is_empty() || segs.next().is_some() {
return None;
}
let repo = repo
.strip_suffix(".git")
.unwrap_or(repo)
.trim_end_matches('/');
if repo.is_empty() {
return None;
}
Some(HostedGit {
host,
owner: owner.to_string(),
repo: repo.to_string(),
})
}
fn parse_scp_url(body: &str) -> Option<String> {
if body.contains("://") {
return None;
}
let colon = body.find(':')?;
let before = &body[..colon];
let path = &body[colon + 1..];
if before.is_empty() || path.is_empty() {
return None;
}
if path.starts_with('/') {
return None;
}
let at = before.find('@')?;
let user = &before[..at];
let host = &before[at + 1..];
if user.is_empty() || host.is_empty() || host.contains('/') || host.contains('@') {
return None;
}
if !matches!(host, "github.com" | "gitlab.com" | "bitbucket.org") {
return None;
}
Some(format!("ssh://{user}@{host}/{path}"))
}
pub(crate) fn normalize_git_fragment(fragment: &str) -> Option<String> {
parse_git_fragment(fragment).0
}
pub(crate) fn parse_git_fragment(fragment: &str) -> (Option<String>, Option<String>) {
if fragment.is_empty() {
return (None, None);
}
let mut fallback: Option<&str> = None;
let mut preferred: Option<&str> = None;
let mut subpath: Option<String> = None;
for part in fragment.split('&') {
if part.is_empty() {
continue;
}
let split = part.split_once('=').or_else(|| {
part.split_once(':')
.filter(|(k, _)| matches!(*k, "commit" | "tag" | "head" | "branch" | "path"))
});
let (key, value) = split.unwrap_or(("", part));
if value.is_empty() {
continue;
}
match key {
"commit" => {
preferred.get_or_insert(value);
}
"tag" | "head" | "branch" => {
fallback.get_or_insert(value);
}
"path" => {
if subpath.is_some() {
continue;
}
let trimmed = value.trim_start_matches('/');
if trimmed.is_empty() {
continue;
}
if trimmed
.split('/')
.any(|c| c.is_empty() || c == "." || c == "..")
{
continue;
}
subpath = Some(trimmed.to_string());
}
"" => {
fallback.get_or_insert(value);
}
_ => {}
}
}
(preferred.or(fallback).map(ToString::to_string), subpath)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn matches_https_tgz() {
assert!(LocalSource::looks_like_remote_tarball_url(
"https://example.com/pkg-1.0.0.tgz"
));
}
#[test]
fn matches_http_tar_gz() {
assert!(LocalSource::looks_like_remote_tarball_url(
"http://example.com/pkg-1.0.0.tar.gz"
));
}
#[test]
fn strips_fragment_before_suffix_check() {
assert!(LocalSource::looks_like_remote_tarball_url(
"https://example.com/pkg-1.0.0.tgz#sha512-abc"
));
}
#[test]
fn strips_query_string_before_suffix_check() {
assert!(LocalSource::looks_like_remote_tarball_url(
"https://registry.example.com/pkg/-/pkg-1.0.0.tgz?token=abc"
));
assert!(LocalSource::looks_like_remote_tarball_url(
"https://example.com/pkg-1.0.0.tar.gz?v=2&signed=1"
));
}
#[test]
fn matches_bare_http_url_without_tarball_suffix() {
assert!(LocalSource::looks_like_remote_tarball_url(
"https://pkg.pr.new/lunariajs/lunaria/@lunariajs/core@904b935"
));
assert!(LocalSource::looks_like_remote_tarball_url(
"https://codeload.github.com/user/repo/tar.gz/main"
));
}
#[test]
fn rejects_non_http_schemes() {
assert!(!LocalSource::looks_like_remote_tarball_url(
"ftp://example.com/pkg.tgz"
));
assert!(!LocalSource::looks_like_remote_tarball_url(
"git://example.com/repo.git"
));
}
#[test]
fn parse_classifies_bare_http_url_as_remote_tarball() {
use std::path::Path;
let parsed = LocalSource::parse(
"https://pkg.pr.new/lunariajs/lunaria/@lunariajs/core@904b935",
Path::new(""),
);
assert!(matches!(parsed, Some(LocalSource::RemoteTarball(_))));
}
#[test]
fn parse_prefers_git_over_tarball_for_dot_git_url() {
use std::path::Path;
let parsed = LocalSource::parse("https://github.com/user/repo.git", Path::new(""));
assert!(matches!(parsed, Some(LocalSource::Git(_))));
}
#[test]
fn parse_classifies_exec_as_local_source() {
let parsed = LocalSource::parse("exec:./scripts/generate.js", Path::new(""));
assert_eq!(
parsed,
Some(LocalSource::Exec(PathBuf::from("./scripts/generate.js")))
);
}
#[test]
fn git_plus_https_without_dot_git_roundtrips_via_lockfile_form() {
let (url, committish, subpath) = parse_git_spec("git+https://host/user/repo").unwrap();
assert_eq!(url, "https://host/user/repo");
assert_eq!(committish, None);
assert_eq!(subpath, None);
let sha = "abcdef0123456789abcdef0123456789abcdef01";
let source = LocalSource::Git(GitSource {
url: url.clone(),
committish: None,
resolved: sha.to_string(),
subpath: None,
});
let lockfile_version = source.specifier();
assert_eq!(lockfile_version, format!("https://host/user/repo#{sha}"));
let (round_url, round_committish, round_subpath) =
parse_git_spec(&lockfile_version).unwrap();
assert_eq!(round_url, "https://host/user/repo");
assert_eq!(round_committish.as_deref(), Some(sha));
assert_eq!(round_subpath, None);
}
#[test]
fn bare_https_without_dot_git_and_no_committish_is_not_git() {
assert!(parse_git_spec("https://example.com/pkg").is_none());
}
#[test]
fn github_shorthand_expands_and_roundtrips() {
let (url, _, _) = parse_git_spec("github:user/repo").unwrap();
assert_eq!(url, "https://github.com/user/repo.git");
}
#[test]
fn bare_user_repo_expands_to_github() {
let (url, committish, subpath) = parse_git_spec("kevva/is-negative").unwrap();
assert_eq!(url, "https://github.com/kevva/is-negative.git");
assert!(committish.is_none());
assert!(subpath.is_none());
}
#[test]
fn bare_user_repo_with_committish_preserved() {
let (url, committish, _) = parse_git_spec("kevva/is-negative#v1.0.0").unwrap();
assert_eq!(url, "https://github.com/kevva/is-negative.git");
assert_eq!(committish.as_deref(), Some("v1.0.0"));
}
#[test]
fn bare_scope_pkg_is_not_git_shorthand() {
assert!(parse_git_spec("@types/node").is_none());
}
#[test]
fn bare_relative_path_is_not_git_shorthand() {
assert!(parse_git_spec("./repo").is_none());
assert!(parse_git_spec("../repo").is_none());
assert!(parse_git_spec("./local/path").is_none());
assert!(parse_git_spec("../local/path").is_none());
}
#[test]
fn bare_path_with_extra_slashes_is_not_git_shorthand() {
assert!(parse_git_spec("path/with/slashes/extra").is_none());
}
#[test]
fn bare_scp_form_unknown_host_is_not_github_shorthand() {
assert!(parse_git_spec("user@host:repo.git").is_none());
}
#[test]
fn scp_form_recognized() {
let (url, committish, _) =
parse_git_spec("git@github.com:EthanHenrickson/math-mcp.git").unwrap();
assert_eq!(url, "ssh://git@github.com/EthanHenrickson/math-mcp.git");
assert!(committish.is_none());
}
#[test]
fn scp_form_with_ref_recognized() {
let (url, committish, _) =
parse_git_spec("git@github.com:EthanHenrickson/math-mcp.git#0.1.5").unwrap();
assert_eq!(url, "ssh://git@github.com/EthanHenrickson/math-mcp.git");
assert_eq!(committish.as_deref(), Some("0.1.5"));
}
#[test]
fn scp_form_bitbucket_recognized() {
let (url, _, _) = parse_git_spec("git@bitbucket.org:pnpmjs/git-resolver.git").unwrap();
assert_eq!(url, "ssh://git@bitbucket.org/pnpmjs/git-resolver.git");
}
#[test]
fn scp_form_unknown_host_rejected() {
assert!(parse_git_spec("git@example.com:org/repo.git").is_none());
assert!(parse_git_spec("alice@host.example.com:org/repo.git").is_none());
}
#[test]
fn scp_form_without_user_rejected() {
assert!(parse_git_spec("github.com:user/repo.git").is_none());
}
#[test]
fn commit_selector_fragment_normalizes_to_sha() {
let sha = "abcdef0123456789abcdef0123456789abcdef01";
let (url, committish, _) =
parse_git_spec(&format!("https://host/user/repo.git#commit={sha}")).unwrap();
assert_eq!(url, "https://host/user/repo.git");
assert_eq!(committish.as_deref(), Some(sha));
}
#[test]
fn named_selector_fragment_normalizes_to_ref() {
let (url, committish, _) = parse_git_spec("git+https://host/user/repo#tag=v1.2.3").unwrap();
assert_eq!(url, "https://host/user/repo");
assert_eq!(committish.as_deref(), Some("v1.2.3"));
}
#[test]
fn pnpm_path_subpath_extracted_from_fragment() {
let (url, committish, subpath) =
parse_git_spec("github:org/dep#v0.1.4&path:/packages/special").unwrap();
assert_eq!(url, "https://github.com/org/dep.git");
assert_eq!(committish.as_deref(), Some("v0.1.4"));
assert_eq!(subpath.as_deref(), Some("packages/special"));
}
#[test]
fn path_subpath_roundtrips_via_specifier() {
let sha = "abcdef0123456789abcdef0123456789abcdef01";
let source = LocalSource::Git(GitSource {
url: "https://github.com/org/dep.git".to_string(),
committish: None,
resolved: sha.to_string(),
subpath: Some("packages/special".to_string()),
});
let spec = source.specifier();
assert_eq!(
spec,
format!("https://github.com/org/dep.git#{sha}&path:/packages/special")
);
let (url, committish, subpath) = parse_git_spec(&spec).unwrap();
assert_eq!(url, "https://github.com/org/dep.git");
assert_eq!(committish.as_deref(), Some(sha));
assert_eq!(subpath.as_deref(), Some("packages/special"));
}
#[test]
fn parse_hosted_git_recognizes_canonical_forms() {
let canonical = HostedGit {
host: HostedGitHost::GitHub,
owner: "owner".to_string(),
repo: "repo".to_string(),
};
for spec in [
"https://github.com/owner/repo.git",
"https://github.com/owner/repo",
"http://github.com/owner/repo.git",
"git+https://github.com/owner/repo.git",
"git+https://github.com/owner/repo",
"git://github.com/owner/repo.git",
"ssh://git@github.com/owner/repo.git",
"git+ssh://git@github.com/owner/repo.git",
"git@github.com:owner/repo.git",
] {
assert_eq!(
parse_hosted_git(spec).as_ref(),
Some(&canonical),
"spec {spec} should map to canonical HostedGit",
);
}
}
#[test]
fn parse_hosted_git_returns_none_for_non_hosted() {
for spec in [
"https://example.com/owner/repo.git",
"ssh://git@gitea.internal/owner/repo.git",
"git+ssh://git@gitlab.example.com/group/sub/repo.git",
"https://github.com/owner/repo/sub",
"https://github.com/owner",
] {
assert!(
parse_hosted_git(spec).is_none(),
"spec {spec} must not match a hosted provider",
);
}
}
#[test]
fn hosted_tarball_url_only_for_full_sha() {
let g = HostedGit {
host: HostedGitHost::GitHub,
owner: "o".to_string(),
repo: "r".to_string(),
};
let sha = "abcdef0123456789abcdef0123456789abcdef01";
assert_eq!(
g.tarball_url(sha).as_deref(),
Some("https://codeload.github.com/o/r/tar.gz/abcdef0123456789abcdef0123456789abcdef01"),
);
assert!(g.tarball_url("main").is_none());
assert!(g.tarball_url("v1.2.3").is_none());
assert!(g.tarball_url("abcdef0").is_none());
}
#[test]
fn hosted_tarball_url_per_provider() {
let sha = "abcdef0123456789abcdef0123456789abcdef01";
let gitlab = HostedGit {
host: HostedGitHost::GitLab,
owner: "g".to_string(),
repo: "r".to_string(),
}
.tarball_url(sha)
.unwrap();
assert!(gitlab.starts_with("https://gitlab.com/g/r/-/archive/"));
assert!(gitlab.ends_with("/r-abcdef0123456789abcdef0123456789abcdef01.tar.gz"));
let bitbucket = HostedGit {
host: HostedGitHost::Bitbucket,
owner: "g".to_string(),
repo: "r".to_string(),
}
.tarball_url(sha)
.unwrap();
assert_eq!(
bitbucket,
"https://bitbucket.org/g/r/get/abcdef0123456789abcdef0123456789abcdef01.tar.gz",
);
}
#[test]
fn hosted_https_url_normalizes() {
let g = parse_hosted_git("git+ssh://git@github.com/owner/repo.git").unwrap();
assert_eq!(g.https_url(), "https://github.com/owner/repo.git");
}
#[test]
fn path_traversal_components_in_subpath_are_rejected() {
let cases = [
"github:org/dep#main&path:/../../etc",
"github:org/dep#main&path:/packages/../../../etc",
"github:org/dep#main&path:/./packages/foo",
"github:org/dep#main&path:/packages//foo",
];
for spec in cases {
let (_, _, subpath) = parse_git_spec(spec).unwrap();
assert_eq!(subpath, None, "spec should drop subpath: {spec}");
}
}
#[test]
fn dep_path_distinguishes_subpaths_under_same_commit() {
let sha = "abcdef0123456789abcdef0123456789abcdef01";
let a = LocalSource::Git(GitSource {
url: "https://example.com/r.git".to_string(),
committish: None,
resolved: sha.to_string(),
subpath: Some("packages/a".to_string()),
});
let b = LocalSource::Git(GitSource {
url: "https://example.com/r.git".to_string(),
committish: None,
resolved: sha.to_string(),
subpath: Some("packages/b".to_string()),
});
assert_ne!(a.dep_path("dep"), b.dep_path("dep"));
}
}