pub(crate) const SURFACED_REFERENCE_EXTENSIONS: &[&str] = &[
".rs", ".md", ".toml", ".json", ".sh", ".yml", ".yaml", ".py", ".ts", ".tsx", ".go", ".txt",
".sql", ".rb", ".java", ".c", ".cpp", ".h", ".hpp", ".html", ".css", ".lock",
];
pub(crate) const SURFACED_EXTENSIONLESS_FILENAMES: &[&str] = &[
"Dockerfile",
"Makefile",
"LICENSE",
"COPYING",
"NOTICE",
"README",
"CHANGELOG",
"CONTRIBUTING",
"AUTHORS",
"MAINTAINERS",
"CODEOWNERS",
"Gemfile",
"Rakefile",
"Procfile",
];
fn looks_like_surfaced_reference(token: &str) -> bool {
if token.len() < 2 {
return false;
}
if token.starts_with("http://") || token.starts_with("https://") {
return false;
}
if token.contains('/') {
return true;
}
if SURFACED_REFERENCE_EXTENSIONS
.iter()
.any(|ext| token.ends_with(ext))
{
return true;
}
SURFACED_EXTENSIONLESS_FILENAMES.contains(&token)
}
fn strip_sentence_period(text: &str) -> &str {
let mut chars = text.char_indices().rev();
let Some((last_idx, last_ch)) = chars.next() else {
return text;
};
if last_ch != '.' {
return text;
}
match chars.next() {
Some((_, prev))
if prev.is_alphanumeric() || matches!(prev, '`' | '"' | '\'' | ')' | ']' | '}') =>
{
&text[..last_idx]
}
_ => text,
}
}
fn normalize_prose_token(token: &str) -> &str {
let mut current = token;
loop {
let stripped = current
.trim_matches(|c: char| matches!(c, '`' | '"' | '\'' | ',' | ';' | ':' | '!' | '?'));
let stripped = strip_sentence_period(stripped);
if stripped == current {
return current;
}
current = stripped;
}
}
pub(crate) fn collect_prose_path_candidates(text: &str, out: &mut Vec<String>) {
for raw in text.split(|c: char| {
c.is_whitespace() || matches!(c, ',' | ';' | '(' | ')' | '[' | ']' | '{' | '}')
}) {
let trimmed = normalize_prose_token(raw);
if looks_like_surfaced_reference(trimmed) {
out.push(trimmed.to_owned());
}
}
}
pub(crate) fn extract_key_files_candidates(text: &str) -> Vec<String> {
let trimmed = text.trim();
let mut candidates: Vec<String> = Vec::new();
if let Some(rest) = trimmed.strip_prefix('`') {
if let Some(end) = rest.find('`') {
let path = &rest[..end];
if !path.is_empty() {
candidates.push(path.to_owned());
return candidates;
}
}
}
let stripped = trimmed.trim_matches(|c: char| matches!(c, '`' | '"' | '\''));
if !stripped.is_empty() {
candidates.push(stripped.to_owned());
}
if let Some((prefix, _)) = stripped.split_once(" - ") {
let prefix = prefix.trim();
if !prefix.is_empty() && prefix != stripped {
candidates.push(prefix.to_owned());
}
}
candidates
}