use anyhow::{anyhow, Result};
use once_cell::sync::Lazy;
use regex::Regex;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParsedGitUrl {
pub clone_url: String,
pub branch: String,
pub subdirectory: String,
}
pub fn is_git_url(path_str: &str) -> bool {
path_str.starts_with("https://")
|| path_str.starts_with("http://")
|| path_str.starts_with("git@")
|| path_str.starts_with("file://")
}
static GITHUB_TREE_URL_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"https://github\.com/([^/]+)/([^/]+)/(?:tree|blob)/(.+)").unwrap()
});
pub fn parse_github_folder_url(url: &str) -> Option<ParsedGitUrl> {
parse_github_folder_url_with_hint(url, None)
}
pub fn parse_github_folder_url_with_hint(url: &str, branch_hint: Option<&str>) -> Option<ParsedGitUrl> {
if let Some(caps) = GITHUB_TREE_URL_RE.captures(url) {
let user = caps.get(1).unwrap().as_str();
let repo = caps.get(2).unwrap().as_str();
let rest = caps.get(3).unwrap().as_str().trim_end_matches('/');
let (branch, subdirectory) = if let Some(hint) = branch_hint {
if rest == hint {
(hint.to_string(), String::new())
} else if rest.starts_with(hint) && rest.as_bytes().get(hint.len()) == Some(&b'/') {
(hint.to_string(), rest[hint.len() + 1..].to_string())
} else {
split_at_first_slash(rest)
}
} else {
split_at_first_slash(rest)
};
return Some(ParsedGitUrl {
clone_url: format!("https://github.com/{}/{}.git", user, repo),
branch,
subdirectory,
});
}
let path_part = url.strip_prefix("https://github.com/")?;
let parts: Vec<&str> = path_part.split('/').filter(|s| !s.is_empty()).collect();
if parts.len() < 3 {
return None;
}
let user = parts[0];
let repo = parts[1].trim_end_matches(".git"); let first_segment = parts[2];
let reserved_names = [
"releases", "tags", "pull", "issues", "actions", "projects", "wiki", "security", "pulse",
"graphs", "settings", "blob", "tree", "commit", "blame", "find",
];
if reserved_names.contains(&first_segment) {
return None;
}
let branch = "HEAD";
let subdirectory = parts[2..].join("/");
Some(ParsedGitUrl {
clone_url: format!("https://github.com/{}/{}.git", user, repo),
branch: branch.to_string(),
subdirectory,
})
}
fn split_at_first_slash(s: &str) -> (String, String) {
match s.split_once('/') {
Some((b, p)) => (b.to_string(), p.to_string()),
None => (s.to_string(), String::new()),
}
}
pub fn parse_clone_url(clone_url: &str) -> Result<(String, String)> {
static RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"github\.com[/:]([^/]+)/([^/]+?)(?:\.git)?$").unwrap());
RE.captures(clone_url)
.and_then(|caps| Some((caps.get(1)?.as_str(), caps.get(2)?.as_str())))
.map(|(owner, repo)| (owner.to_string(), repo.to_string()))
.ok_or_else(|| anyhow!("Could not parse owner/repo from clone URL: {}", clone_url))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_github_folder_url_valid() {
let url = "https://github.com/BurntSushi/ripgrep/tree/master/crates/ignore";
let expected = Some(ParsedGitUrl {
clone_url: "https://github.com/BurntSushi/ripgrep.git".to_string(),
branch: "master".to_string(),
subdirectory: "crates/ignore".to_string(),
});
assert_eq!(parse_github_folder_url(url), expected);
}
#[test]
fn test_parse_github_sloppy_url_no_tree_assumes_default_branch() {
let url = "https://github.com/BurntSushi/ripgrep/master/crates/ignore";
let expected = Some(ParsedGitUrl {
clone_url: "https://github.com/BurntSushi/ripgrep.git".to_string(),
branch: "HEAD".to_string(), subdirectory: "master/crates/ignore".to_string(), });
assert_eq!(parse_github_folder_url(url), expected);
}
#[test]
fn test_parse_github_sloppy_url_no_branch() {
let url = "https://github.com/BurntSushi/ripgrep/crates/ignore";
let expected = Some(ParsedGitUrl {
clone_url: "https://github.com/BurntSushi/ripgrep.git".to_string(),
branch: "HEAD".to_string(),
subdirectory: "crates/ignore".to_string(),
});
assert_eq!(parse_github_folder_url(url), expected);
}
#[test]
fn test_parse_github_sloppy_url_with_git_suffix() {
let url = "https://github.com/BurntSushi/ripgrep.git/master/crates/ignore";
let expected = Some(ParsedGitUrl {
clone_url: "https://github.com/BurntSushi/ripgrep.git".to_string(),
branch: "HEAD".to_string(),
subdirectory: "master/crates/ignore".to_string(),
});
assert_eq!(parse_github_folder_url(url), expected);
}
#[test]
fn test_parse_github_url_rejects_root() {
assert_eq!(
parse_github_folder_url("https://github.com/rust-lang/rust"),
None
);
assert_eq!(
parse_github_folder_url("https://github.com/rust-lang/rust.git"),
None
);
}
#[test]
fn test_parse_github_url_rejects_reserved_paths() {
assert_eq!(
parse_github_folder_url("https://github.com/user/repo/blob/master/file.txt"),
Some(ParsedGitUrl {
clone_url: "https://github.com/user/repo.git".to_string(),
branch: "master".to_string(),
subdirectory: "file.txt".to_string(),
})
);
assert_eq!(
parse_github_folder_url("https://github.com/user/repo/issues/1"),
None
);
assert_eq!(
parse_github_folder_url("https://github.com/user/repo/pull/2"),
None
);
assert_eq!(
parse_github_folder_url("https://gitlab.com/user/repo/tree/master"),
None
);
}
#[test]
fn test_parse_with_hint_simple_branch() {
let url = "https://github.com/user/repo/tree/main/src";
let expected = Some(ParsedGitUrl {
clone_url: "https://github.com/user/repo.git".to_string(),
branch: "main".to_string(),
subdirectory: "src".to_string(),
});
assert_eq!(parse_github_folder_url_with_hint(url, Some("main")), expected);
}
#[test]
fn test_parse_with_hint_branch_with_slashes() {
let url = "https://github.com/user/repo/tree/feature/new-ui/src/components";
let expected = Some(ParsedGitUrl {
clone_url: "https://github.com/user/repo.git".to_string(),
branch: "feature/new-ui".to_string(),
subdirectory: "src/components".to_string(),
});
assert_eq!(
parse_github_folder_url_with_hint(url, Some("feature/new-ui")),
expected
);
}
#[test]
fn test_parse_without_hint_defaults_to_first_slash() {
let url = "https://github.com/user/repo/tree/feature/new-ui/src";
let expected = Some(ParsedGitUrl {
clone_url: "https://github.com/user/repo.git".to_string(),
branch: "feature".to_string(),
subdirectory: "new-ui/src".to_string(),
});
assert_eq!(parse_github_folder_url_with_hint(url, None), expected);
}
#[test]
fn test_parse_with_hint_exact_match_no_path() {
let url = "https://github.com/user/repo/tree/release/v1.0";
let expected = Some(ParsedGitUrl {
clone_url: "https://github.com/user/repo.git".to_string(),
branch: "release/v1.0".to_string(),
subdirectory: "".to_string(),
});
assert_eq!(
parse_github_folder_url_with_hint(url, Some("release/v1.0")),
expected
);
}
#[test]
fn test_parse_with_hint_mismatch_prefix_fallback() {
let url = "https://github.com/user/repo/tree/feature-new/src";
let expected = Some(ParsedGitUrl {
clone_url: "https://github.com/user/repo.git".to_string(),
branch: "feature-new".to_string(),
subdirectory: "src".to_string(),
});
assert_eq!(parse_github_folder_url_with_hint(url, Some("feature")), expected);
}
#[test]
fn test_parse_with_hint_blob_url() {
let url = "https://github.com/user/repo/blob/group/feature/file.rs";
let expected = Some(ParsedGitUrl {
clone_url: "https://github.com/user/repo.git".to_string(),
branch: "group/feature".to_string(),
subdirectory: "file.rs".to_string(),
});
assert_eq!(
parse_github_folder_url_with_hint(url, Some("group/feature")),
expected
);
}
}