use std::path::Path;
use std::process::Command;
#[cfg(feature = "https-fetcher")]
use std::io::Read;
use super::uri::ParsedUri;
pub trait Fetcher: Send + Sync {
fn fetch(&self, uri: &ParsedUri, dest: &Path) -> Result<(), FetchError>;
fn schemes(&self) -> &'static [&'static str];
fn is_immutable_rev(&self, _rev: Option<&str>) -> bool {
false
}
}
#[derive(Debug)]
#[non_exhaustive]
pub enum FetchError {
Unimplemented { scheme: String, message: String },
Network { message: String },
UpstreamStatus { status: String, message: String },
Extract { message: String },
Io(std::io::Error),
Other { message: String },
}
impl std::fmt::Display for FetchError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
FetchError::Unimplemented { scheme, message } => {
write!(f, "`{scheme}:` resolver not implemented: {message}")
}
FetchError::Network { message } => write!(f, "network error: {message}"),
FetchError::UpstreamStatus { status, message } => {
write!(f, "upstream returned {status}: {message}")
}
FetchError::Extract { message } => write!(f, "archive extraction failed: {message}"),
FetchError::Io(e) => write!(f, "fetcher io error: {e}"),
FetchError::Other { message } => write!(f, "{message}"),
}
}
}
impl std::error::Error for FetchError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
FetchError::Io(e) => Some(e),
_ => None,
}
}
}
impl From<std::io::Error> for FetchError {
fn from(e: std::io::Error) -> Self {
FetchError::Io(e)
}
}
#[derive(Debug, Default, Clone, Copy)]
pub struct HttpsFetcher;
#[cfg(feature = "https-fetcher")]
const HTTPS_RESPONSE_CAP_BYTES: u64 = 256 * 1024 * 1024;
#[cfg(feature = "https-fetcher")]
const HTTPS_ERROR_BODY_CAP_BYTES: u64 = 64 * 1024;
#[cfg(feature = "https-fetcher")]
const HTTPS_CONNECT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30);
#[cfg(feature = "https-fetcher")]
const HTTPS_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(120);
#[cfg(feature = "https-fetcher")]
impl Fetcher for HttpsFetcher {
fn fetch(&self, uri: &ParsedUri, dest: &Path) -> Result<(), FetchError> {
let url = format!("https:{}", uri.body);
let agent = ureq::AgentBuilder::new()
.timeout_connect(HTTPS_CONNECT_TIMEOUT)
.timeout_read(HTTPS_READ_TIMEOUT)
.build();
let response = agent
.get(&url)
.set(
"User-Agent",
"lex-extension-host (https://github.com/lex-fmt/lex)",
)
.call()
.map_err(map_ureq_error)?;
let content_type = response.header("Content-Type").map(|s| s.to_string());
let format = super::extract::detect_format(content_type.as_deref(), &uri.body);
let mut response_reader = response.into_reader().take(HTTPS_RESPONSE_CAP_BYTES + 1);
let mut temp = tempfile::tempfile().map_err(FetchError::Io)?;
let written = std::io::copy(&mut response_reader, &mut temp).map_err(FetchError::Io)?;
if written > HTTPS_RESPONSE_CAP_BYTES {
return Err(FetchError::Extract {
message: format!("response exceeded {HTTPS_RESPONSE_CAP_BYTES}-byte cap"),
});
}
use std::io::Seek;
temp.rewind().map_err(FetchError::Io)?;
super::extract::extract_archive_into(temp, format, dest, uri.subdir.as_deref())
.map_err(map_extract_error)?;
Ok(())
}
fn schemes(&self) -> &'static [&'static str] {
&["https"]
}
}
#[cfg(not(feature = "https-fetcher"))]
impl Fetcher for HttpsFetcher {
fn fetch(&self, _uri: &ParsedUri, _dest: &Path) -> Result<(), FetchError> {
Err(FetchError::Unimplemented {
scheme: "https".into(),
message: "https: fetcher disabled at build time (the `https-fetcher` feature on lex-extension-host wasn't enabled — common for wasm targets where the underlying TLS chain doesn't compile)".into(),
})
}
fn schemes(&self) -> &'static [&'static str] {
&["https"]
}
}
#[cfg(feature = "https-fetcher")]
fn map_ureq_error(e: ureq::Error) -> FetchError {
match e {
ureq::Error::Status(code, response) => {
let mut reader = response.into_reader().take(HTTPS_ERROR_BODY_CAP_BYTES);
let mut buf = String::new();
use std::io::Read as _;
let _ = reader.read_to_string(&mut buf);
FetchError::UpstreamStatus {
status: format!("{code}"),
message: if buf.is_empty() {
"<empty body>".into()
} else {
buf
},
}
}
ureq::Error::Transport(t) => FetchError::Network {
message: t.to_string(),
},
}
}
#[cfg(feature = "https-fetcher")]
fn map_extract_error(e: super::extract::ExtractError) -> FetchError {
use super::extract::ExtractError;
match e {
ExtractError::Io(io_err) => FetchError::Io(io_err),
other => FetchError::Extract {
message: other.to_string(),
},
}
}
#[derive(Debug, Default, Clone, Copy)]
pub struct GitFetcher;
impl Fetcher for GitFetcher {
fn fetch(&self, uri: &ParsedUri, dest: &Path) -> Result<(), FetchError> {
let url = reconstruct_git_url(&uri.scheme, &uri.body);
let subdir = uri
.subdir
.as_deref()
.map(|s| s.trim_matches('/').to_string())
.filter(|s| !s.is_empty());
let clone_dir = dest.join(".lex-git-clone");
if let Some(sub) = subdir.as_deref() {
validate_subdir(sub)?;
}
let mut cmd = Command::new("git");
cmd.arg("clone").arg("--depth=1");
if let Some(rev) = uri.rev.as_deref().filter(|s| !s.is_empty()) {
cmd.arg("--branch").arg(rev);
}
cmd.arg("--").arg(&url).arg(&clone_dir);
cmd.env("GIT_TERMINAL_PROMPT", "0");
let output = cmd.output().map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
FetchError::Other {
message: "git binary not in PATH; install git, or use a `path:` URI / `--ext-schema` flag for a local schema".into(),
}
} else {
FetchError::Io(e)
}
})?;
if !output.status.success() {
let _ = std::fs::remove_dir_all(&clone_dir);
let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
return Err(classify_git_clone_error(&stderr));
}
let source = match subdir.as_deref() {
Some(sub) => match safe_subdir_join(&clone_dir, sub) {
Ok(p) => {
if !p.is_dir() {
let _ = std::fs::remove_dir_all(&clone_dir);
return Err(FetchError::Other {
message: format!(
"subdir `{sub}` not found in cloned repo (clone succeeded but the path doesn't exist)"
),
});
}
p
}
Err(e) => {
let _ = std::fs::remove_dir_all(&clone_dir);
return Err(e);
}
},
None => clone_dir.clone(),
};
copy_dir_contents(&source, dest, &clone_dir).map_err(FetchError::Io)?;
std::fs::remove_dir_all(&clone_dir).map_err(FetchError::Io)?;
Ok(())
}
fn schemes(&self) -> &'static [&'static str] {
&["git", "git+ssh"]
}
fn is_immutable_rev(&self, rev: Option<&str>) -> bool {
is_immutable_git_rev(rev)
}
}
fn reconstruct_git_url(scheme: &str, body: &str) -> String {
match scheme {
"git+ssh" => format!("git+ssh:{body}"),
_ => body.to_string(),
}
}
fn validate_subdir(subdir: &str) -> Result<(), FetchError> {
use std::path::Component;
let path = Path::new(subdir);
if path.is_absolute() {
return Err(FetchError::Other {
message: format!(
"subdir `{subdir}` is absolute; subdir must be a relative path within the cloned repo"
),
});
}
for component in path.components() {
match component {
Component::ParentDir => {
return Err(FetchError::Other {
message: format!(
"subdir `{subdir}` contains `..`; refusing to escape the clone root"
),
});
}
Component::Prefix(_) | Component::RootDir => {
return Err(FetchError::Other {
message: format!(
"subdir `{subdir}` is rooted (absolute path or platform prefix); refusing"
),
});
}
Component::Normal(_) | Component::CurDir => {}
}
}
Ok(())
}
fn safe_subdir_join(clone_dir: &Path, subdir: &str) -> Result<std::path::PathBuf, FetchError> {
use std::path::Component;
validate_subdir(subdir)?;
let mut accumulated = clone_dir.to_path_buf();
for component in Path::new(subdir).components() {
if let Component::Normal(name) = component {
accumulated.push(name);
match std::fs::symlink_metadata(&accumulated) {
Ok(meta) if meta.file_type().is_symlink() => {
return Err(FetchError::Other {
message: format!(
"subdir component `{}` is a symlink in the cloned repo; refusing to follow",
accumulated
.strip_prefix(clone_dir)
.unwrap_or(&accumulated)
.display()
),
});
}
Ok(_) | Err(_) => {
}
}
}
}
Ok(accumulated)
}
fn classify_git_clone_error(stderr: &str) -> FetchError {
let lower = stderr.to_ascii_lowercase();
if lower.contains("could not resolve host")
|| lower.contains("could not connect")
|| lower.contains("connection refused")
|| lower.contains("connection timed out")
|| lower.contains("network is unreachable")
|| lower.contains("no route to host")
{
FetchError::Network {
message: stderr.trim().to_string(),
}
} else if lower.contains("permission denied")
|| lower.contains("authentication failed")
|| lower.contains("could not read username")
|| lower.contains("access denied")
|| lower.contains("repository not found")
{
FetchError::UpstreamStatus {
status: "auth".into(),
message: stderr.trim().to_string(),
}
} else {
FetchError::Other {
message: stderr.trim().to_string(),
}
}
}
fn is_immutable_git_rev(rev: Option<&str>) -> bool {
let Some(rev) = rev else { return false };
let bytes = rev.as_bytes();
if (7..=40).contains(&bytes.len())
&& bytes
.iter()
.all(|&b| matches!(b, b'0'..=b'9' | b'a'..=b'f'))
{
return true;
}
let after_v = bytes.strip_prefix(b"v").unwrap_or(bytes);
let mut parts = after_v.split(|&b| b == b'.');
let (Some(first), Some(second)) = (parts.next(), parts.next()) else {
return false;
};
!first.is_empty()
&& first.iter().all(|b| b.is_ascii_digit())
&& !second.is_empty()
&& second.iter().take_while(|b| b.is_ascii_digit()).count() > 0
}
fn copy_dir_contents(src: &Path, dest: &Path, skip_path: &Path) -> std::io::Result<()> {
for entry in std::fs::read_dir(src)? {
let entry = entry?;
let src_path = entry.path();
if src_path == skip_path {
continue;
}
let name = entry.file_name();
if name == ".git" {
continue;
}
let dest_path = dest.join(&name);
let file_type = entry.file_type()?;
if file_type.is_symlink() {
continue;
}
if file_type.is_dir() {
std::fs::create_dir_all(&dest_path)?;
copy_dir_contents_no_skip(&src_path, &dest_path)?;
} else if file_type.is_file() {
std::fs::copy(&src_path, &dest_path)?;
}
}
Ok(())
}
fn copy_dir_contents_no_skip(src: &Path, dest: &Path) -> std::io::Result<()> {
for entry in std::fs::read_dir(src)? {
let entry = entry?;
let src_path = entry.path();
let dest_path = dest.join(entry.file_name());
let file_type = entry.file_type()?;
if file_type.is_symlink() {
continue;
}
if file_type.is_dir() {
std::fs::create_dir_all(&dest_path)?;
copy_dir_contents_no_skip(&src_path, &dest_path)?;
} else if file_type.is_file() {
std::fs::copy(&src_path, &dest_path)?;
}
}
Ok(())
}
#[cfg(test)]
mod git_helper_tests {
use super::*;
#[test]
fn immutable_rev_full_sha() {
assert!(is_immutable_git_rev(Some(
"a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0"
)));
}
#[test]
fn immutable_rev_short_sha_seven_chars() {
assert!(is_immutable_git_rev(Some("a1b2c3d")));
}
#[test]
fn immutable_rev_rejects_sha_below_seven_chars() {
assert!(!is_immutable_git_rev(Some("abc123")));
}
#[test]
fn immutable_rev_rejects_uppercase_hex() {
assert!(!is_immutable_git_rev(Some("ABC1234")));
}
#[test]
fn immutable_rev_semver_tag_with_v_prefix() {
assert!(is_immutable_git_rev(Some("v1.2.0")));
assert!(is_immutable_git_rev(Some("v0.14.0")));
}
#[test]
fn immutable_rev_semver_tag_without_v_prefix() {
assert!(is_immutable_git_rev(Some("1.2")));
assert!(is_immutable_git_rev(Some("1.2.3")));
}
#[test]
fn immutable_rev_semver_tag_with_decoration() {
assert!(is_immutable_git_rev(Some("v1.2.3-rc4")));
assert!(is_immutable_git_rev(Some("1.2-pre")));
}
#[test]
fn immutable_rev_rejects_single_digit_branch_lookalike() {
assert!(!is_immutable_git_rev(Some("1")));
assert!(!is_immutable_git_rev(Some("v1")));
}
#[test]
fn immutable_rev_rejects_branch_names() {
assert!(!is_immutable_git_rev(Some("main")));
assert!(!is_immutable_git_rev(Some("master")));
assert!(!is_immutable_git_rev(Some("feature/foo")));
assert!(!is_immutable_git_rev(Some("release-2026-05")));
}
#[test]
fn immutable_rev_rejects_none() {
assert!(!is_immutable_git_rev(None));
}
#[test]
fn immutable_rev_rejects_empty_string() {
assert!(!is_immutable_git_rev(Some("")));
}
#[test]
fn reconstruct_url_git_scheme_passes_body_verbatim() {
assert_eq!(
reconstruct_git_url("git", "https://host/path/repo.git"),
"https://host/path/repo.git"
);
assert_eq!(
reconstruct_git_url("git", "git@host:owner/repo.git"),
"git@host:owner/repo.git"
);
assert_eq!(
reconstruct_git_url("git", "file:///tmp/bare"),
"file:///tmp/bare"
);
}
#[test]
fn reconstruct_url_git_ssh_scheme_rebuilds_full_url() {
assert_eq!(
reconstruct_git_url("git+ssh", "//git@host/path.git"),
"git+ssh://git@host/path.git"
);
}
#[test]
fn classify_dns_failure_is_network() {
let err = classify_git_clone_error(
"fatal: unable to access 'https://nonexistent.example/r.git/': Could not resolve host: nonexistent.example",
);
assert!(matches!(err, FetchError::Network { .. }), "got: {err:?}");
}
#[test]
fn classify_connection_refused_is_network() {
let err = classify_git_clone_error(
"fatal: unable to access 'https://localhost:1/r.git/': Failed to connect to localhost port 1: Connection refused",
);
assert!(matches!(err, FetchError::Network { .. }), "got: {err:?}");
}
#[test]
fn classify_auth_failure_is_upstream_status() {
let err = classify_git_clone_error(
"git@github.com: Permission denied (publickey).\nfatal: Could not read from remote repository.",
);
assert!(
matches!(err, FetchError::UpstreamStatus { .. }),
"got: {err:?}"
);
}
#[test]
fn classify_repository_not_found_is_upstream_status() {
let err = classify_git_clone_error(
"remote: Repository not found.\nfatal: repository 'https://github.com/private/secret.git/' not found",
);
assert!(
matches!(err, FetchError::UpstreamStatus { .. }),
"got: {err:?}"
);
}
#[test]
fn validate_subdir_rejects_parent_dir() {
let err = validate_subdir("../escape").unwrap_err();
assert!(matches!(err, FetchError::Other { .. }), "got: {err:?}");
}
#[test]
fn validate_subdir_rejects_parent_dir_in_middle() {
let err = validate_subdir("safe/../escape").unwrap_err();
assert!(matches!(err, FetchError::Other { .. }), "got: {err:?}");
}
#[test]
fn validate_subdir_rejects_absolute_path() {
let err = validate_subdir("/etc/passwd").unwrap_err();
assert!(matches!(err, FetchError::Other { .. }), "got: {err:?}");
}
#[test]
fn validate_subdir_accepts_normal_relative_path() {
validate_subdir("labels").unwrap();
validate_subdir("src/labels").unwrap();
validate_subdir("./labels").unwrap();
validate_subdir("a/b/c").unwrap();
}
#[test]
fn safe_subdir_join_rejects_traversal_lexically_before_disk_lookup() {
let err = safe_subdir_join(Path::new("/nonexistent"), "../escape").unwrap_err();
assert!(matches!(err, FetchError::Other { .. }), "got: {err:?}");
}
#[test]
fn safe_subdir_join_refuses_symlink_at_subdir_root() {
let base = tempfile::tempdir().unwrap();
let outside = tempfile::tempdir().unwrap();
std::fs::create_dir(outside.path().join("real")).unwrap();
#[cfg(unix)]
std::os::unix::fs::symlink(outside.path(), base.path().join("labels")).unwrap();
#[cfg(windows)]
std::os::windows::fs::symlink_dir(outside.path(), base.path().join("labels")).unwrap();
let err = safe_subdir_join(base.path(), "labels").unwrap_err();
match err {
FetchError::Other { message } => assert!(
message.contains("symlink"),
"error should mention symlink, got: {message}"
),
other => panic!("expected Other(symlink), got: {other:?}"),
}
}
#[test]
fn safe_subdir_join_refuses_symlink_at_intermediate_component() {
let base = tempfile::tempdir().unwrap();
let outside = tempfile::tempdir().unwrap();
std::fs::create_dir(outside.path().join("labels")).unwrap();
#[cfg(unix)]
std::os::unix::fs::symlink(outside.path(), base.path().join("src")).unwrap();
#[cfg(windows)]
std::os::windows::fs::symlink_dir(outside.path(), base.path().join("src")).unwrap();
let err = safe_subdir_join(base.path(), "src/labels").unwrap_err();
assert!(matches!(err, FetchError::Other { .. }), "got: {err:?}");
}
#[test]
fn safe_subdir_join_accepts_normal_path_with_real_directories() {
let base = tempfile::tempdir().unwrap();
std::fs::create_dir_all(base.path().join("src/labels")).unwrap();
let joined = safe_subdir_join(base.path(), "src/labels").unwrap();
assert_eq!(joined, base.path().join("src/labels"));
}
#[test]
fn safe_subdir_join_accepts_path_with_missing_tail() {
let base = tempfile::tempdir().unwrap();
let joined = safe_subdir_join(base.path(), "does/not/exist").unwrap();
assert_eq!(joined, base.path().join("does/not/exist"));
}
#[test]
fn classify_unknown_ref_falls_through_to_other() {
let err = classify_git_clone_error(
"warning: Could not find remote branch nonexistent to clone.\nfatal: Remote branch nonexistent not found in upstream origin",
);
assert!(matches!(err, FetchError::Other { .. }), "got: {err:?}");
}
}