use regex::Regex;
use std::collections::BTreeSet;
fn bracket_pattern() -> Regex {
Regex::new(r"\[([^\]]+?\.[a-z]+(?::\d+)?)\]")
.expect("BRACKET_PATTERN: invalid regex - this is a developer error")
}
fn paren_pattern() -> Regex {
Regex::new(r"\(([^\)]+?\.[a-z]+)\)")
.expect("PAREN_PATTERN: invalid regex - this is a developer error")
}
fn backtick_pattern() -> Regex {
Regex::new(r"`([^`]+?\.[a-z]+(?::\d+)?)`")
.expect("BACKTICK_PATTERN: invalid regex - this is a developer error")
}
fn bare_pattern() -> Regex {
Regex::new(r"\b([\w/-]+?\.[a-z]+:\d+)\b")
.expect("BARE_PATTERN: invalid regex - this is a developer error")
}
pub fn extract_file_paths_from_issues(content: &str) -> Vec<String> {
let bracket_pattern = bracket_pattern();
let paren_pattern = paren_pattern();
let backtick_pattern = backtick_pattern();
let bare_pattern = bare_pattern();
let bracket_files = bracket_pattern.captures_iter(content).filter_map(|caps| {
let path = caps.get(1)?.as_str().trim();
let file_path = path.split(':').next().unwrap_or(path);
looks_like_file_path(file_path).then(|| file_path.to_string())
});
let paren_files = paren_pattern.captures_iter(content).filter_map(|caps| {
let path = caps.get(1)?.as_str().trim();
looks_like_file_path(path).then(|| path.to_string())
});
let backtick_files = backtick_pattern.captures_iter(content).filter_map(|caps| {
let path = caps.get(1)?.as_str().trim();
let file_path = path.split(':').next().unwrap_or(path);
looks_like_file_path(file_path).then(|| file_path.to_string())
});
let bare_files = bare_pattern.captures_iter(content).filter_map(|caps| {
let path = caps.get(1)?.as_str().trim();
let file_path = path.split(':').next().unwrap_or(path);
looks_like_file_path(file_path).then(|| file_path.to_string())
});
let files: BTreeSet<String> = bracket_files
.chain(paren_files)
.chain(backtick_files)
.chain(bare_files)
.collect();
files.into_iter().collect()
}
fn looks_like_file_path(s: &str) -> bool {
if !s.contains('.') {
return false;
}
if s.contains("://") || s.contains("www.") || s.starts_with("http") {
return false;
}
if s.len() < 4 {
return false;
}
let extensions = [
"rs", "toml", "md", "txt", "json", "yaml", "yml", "xml", "html", "css", "js", "ts", "py",
"go", "java", "c", "cpp", "h", "hpp", "cs", "rb", "php", "sh", "bash", "zsh",
];
let has_known_extension = s.split('.').any(|ext| {
let ext = ext.split(':').next().unwrap_or(ext);
extensions.contains(&ext)
});
has_known_extension
}