use anyhow::{anyhow, Result};
use once_cell::sync::Lazy;
use regex::Regex;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParsedGitUrl {
pub clone_url: String,
pub branch: String,
pub subdirectory: String,
}
pub fn is_git_url(path_str: &str) -> bool {
path_str.starts_with("https://")
|| path_str.starts_with("http://")
|| path_str.starts_with("git@")
|| path_str.starts_with("file://")
}
static GITHUB_TREE_URL_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"https://github\.com/([^/]+)/([^/]+)/(?:tree|blob)/([^/]+)(?:/(.*))?$").unwrap()
});
pub fn parse_github_folder_url(url: &str) -> Option<ParsedGitUrl> {
if let Some(caps) = GITHUB_TREE_URL_RE.captures(url) {
let user = caps.get(1).unwrap().as_str();
let repo = caps.get(2).unwrap().as_str();
let branch = caps.get(3).unwrap().as_str();
let subdirectory = caps.get(4).map_or("", |m| m.as_str()).trim_end_matches('/');
return Some(ParsedGitUrl {
clone_url: format!("https://github.com/{}/{}.git", user, repo),
branch: branch.to_string(),
subdirectory: subdirectory.to_string(),
});
}
let path_part = url.strip_prefix("https://github.com/")?;
let parts: Vec<&str> = path_part.split('/').filter(|s| !s.is_empty()).collect();
if parts.len() < 3 {
return None;
}
let user = parts[0];
let repo = parts[1].trim_end_matches(".git"); let first_segment = parts[2];
let reserved_names = [
"releases", "tags", "pull", "issues", "actions", "projects", "wiki", "security", "pulse",
"graphs", "settings", "blob", "tree", "commit", "blame", "find",
];
if reserved_names.contains(&first_segment) {
return None;
}
let branch = "HEAD";
let subdirectory = parts[2..].join("/");
Some(ParsedGitUrl {
clone_url: format!("https://github.com/{}/{}.git", user, repo),
branch: branch.to_string(),
subdirectory,
})
}
pub fn parse_clone_url(clone_url: &str) -> Result<(String, String)> {
static RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"github\.com[/:]([^/]+)/([^/]+?)(?:\.git)?$").unwrap());
RE.captures(clone_url)
.and_then(|caps| Some((caps.get(1)?.as_str(), caps.get(2)?.as_str())))
.map(|(owner, repo)| (owner.to_string(), repo.to_string()))
.ok_or_else(|| anyhow!("Could not parse owner/repo from clone URL: {}", clone_url))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_github_folder_url_valid() {
let url = "https://github.com/BurntSushi/ripgrep/tree/master/crates/ignore";
let expected = Some(ParsedGitUrl {
clone_url: "https://github.com/BurntSushi/ripgrep.git".to_string(),
branch: "master".to_string(),
subdirectory: "crates/ignore".to_string(),
});
assert_eq!(parse_github_folder_url(url), expected);
}
#[test]
fn test_parse_github_sloppy_url_no_tree_assumes_default_branch() {
let url = "https://github.com/BurntSushi/ripgrep/master/crates/ignore";
let expected = Some(ParsedGitUrl {
clone_url: "https://github.com/BurntSushi/ripgrep.git".to_string(),
branch: "HEAD".to_string(), subdirectory: "master/crates/ignore".to_string(), });
assert_eq!(parse_github_folder_url(url), expected);
}
#[test]
fn test_parse_github_sloppy_url_no_branch() {
let url = "https://github.com/BurntSushi/ripgrep/crates/ignore";
let expected = Some(ParsedGitUrl {
clone_url: "https://github.com/BurntSushi/ripgrep.git".to_string(),
branch: "HEAD".to_string(),
subdirectory: "crates/ignore".to_string(),
});
assert_eq!(parse_github_folder_url(url), expected);
}
#[test]
fn test_parse_github_sloppy_url_with_git_suffix() {
let url = "https://github.com/BurntSushi/ripgrep.git/master/crates/ignore";
let expected = Some(ParsedGitUrl {
clone_url: "https://github.com/BurntSushi/ripgrep.git".to_string(),
branch: "HEAD".to_string(),
subdirectory: "master/crates/ignore".to_string(),
});
assert_eq!(parse_github_folder_url(url), expected);
}
#[test]
fn test_parse_github_url_rejects_root() {
assert_eq!(
parse_github_folder_url("https://github.com/rust-lang/rust"),
None
);
assert_eq!(
parse_github_folder_url("https://github.com/rust-lang/rust.git"),
None
);
}
#[test]
fn test_parse_github_url_rejects_reserved_paths() {
assert_eq!(
parse_github_folder_url("https://github.com/user/repo/blob/master/file.txt"),
Some(ParsedGitUrl {
clone_url: "https://github.com/user/repo.git".to_string(),
branch: "master".to_string(),
subdirectory: "file.txt".to_string(),
})
);
assert_eq!(
parse_github_folder_url("https://github.com/user/repo/issues/1"),
None
);
assert_eq!(
parse_github_folder_url("https://github.com/user/repo/pull/2"),
None
);
assert_eq!(
parse_github_folder_url("https://gitlab.com/user/repo/tree/master"),
None
);
}
}