pub(crate) const SURFACED_REFERENCE_EXTENSIONS: &[&str] = &[
".rs", ".md", ".toml", ".json", ".sh", ".yml", ".yaml", ".py", ".ts", ".tsx", ".go", ".txt",
".sql", ".rb", ".java", ".c", ".cpp", ".h", ".hpp", ".html", ".css", ".lock",
];
pub(crate) const SURFACED_EXTENSIONLESS_FILENAMES: &[&str] = &[
"Dockerfile",
"Makefile",
"LICENSE",
"COPYING",
"NOTICE",
"README",
"CHANGELOG",
"CONTRIBUTING",
"AUTHORS",
"MAINTAINERS",
"CODEOWNERS",
"Gemfile",
"Rakefile",
"Procfile",
];
const SURFACED_REFERENCE_ROOT_PREFIXES: &[&str] = &[
"src/",
"docs/",
"tests/",
"test/",
"scripts/",
"examples/",
"crates/",
"benches/",
"assets/",
"templates/",
"migrations/",
"lib/",
"bin/",
"cmd/",
"pkg/",
"internal/",
"app/",
"config/",
".claude/",
".ccd/",
".github/",
"./",
"../",
"/",
];
fn starts_with_known_root(token: &str) -> bool {
SURFACED_REFERENCE_ROOT_PREFIXES
.iter()
.any(|prefix| token.starts_with(prefix))
}
fn contains_unresolved_placeholder(token: &str) -> bool {
if token.contains("YYYY") || token.contains("MM-DD") {
return true;
}
if token.contains("XXXX") {
return true;
}
let bytes = token.as_bytes();
let digit_context = bytes.iter().any(|b| b.is_ascii_digit());
if !digit_context {
return false;
}
let mut i = 0;
while i + 2 <= bytes.len() {
if &bytes[i..i + 2] == b"XX" {
let before = i.checked_sub(1).map(|j| bytes[j]);
let after = bytes.get(i + 2).copied();
let sep_before = matches!(before, Some(b'-') | Some(b'_'));
let sep_after = matches!(after, Some(b'-') | Some(b'_') | Some(b'.') | Some(b'/'));
if sep_before && sep_after {
return true;
}
}
i += 1;
}
false
}
fn looks_like_surfaced_reference(token: &str) -> bool {
if token.len() < 2 {
return false;
}
if token.starts_with("http://") || token.starts_with("https://") {
return false;
}
if contains_unresolved_placeholder(token) {
return false;
}
let has_known_extension = SURFACED_REFERENCE_EXTENSIONS
.iter()
.any(|ext| token.ends_with(ext));
if token.contains('/') {
return has_known_extension || starts_with_known_root(token);
}
if has_known_extension {
return true;
}
SURFACED_EXTENSIONLESS_FILENAMES.contains(&token)
}
fn strip_sentence_period(text: &str) -> &str {
let mut chars = text.char_indices().rev();
let Some((last_idx, last_ch)) = chars.next() else {
return text;
};
if last_ch != '.' {
return text;
}
match chars.next() {
Some((_, prev))
if prev.is_alphanumeric() || matches!(prev, '`' | '"' | '\'' | ')' | ']' | '}') =>
{
&text[..last_idx]
}
_ => text,
}
}
fn normalize_prose_token(token: &str) -> &str {
let mut current = token;
loop {
let stripped = current
.trim_matches(|c: char| matches!(c, '`' | '"' | '\'' | ',' | ';' | ':' | '!' | '?'));
let stripped = strip_sentence_period(stripped);
if stripped == current {
return current;
}
current = stripped;
}
}
pub(crate) fn collect_prose_path_candidates(text: &str, out: &mut Vec<String>) {
for raw in text.split(|c: char| {
c.is_whitespace() || matches!(c, ',' | ';' | '(' | ')' | '[' | ']' | '{' | '}')
}) {
let trimmed = normalize_prose_token(raw);
if looks_like_surfaced_reference(trimmed) {
out.push(trimmed.to_owned());
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn collect(text: &str) -> Vec<String> {
let mut out: Vec<String> = Vec::new();
collect_prose_path_candidates(text, &mut out);
out
}
#[test]
fn collects_known_root_with_extension() {
assert_eq!(
collect("see src/state/consistency.rs"),
vec!["src/state/consistency.rs"]
);
}
#[test]
fn collects_docs_path_with_extension() {
assert_eq!(
collect("mentioned in docs/dev/2026-04-15-review.md"),
vec!["docs/dev/2026-04-15-review.md"]
);
}
#[test]
fn collects_slash_path_with_known_extension() {
assert_eq!(collect("edit foo/bar.rs please"), vec!["foo/bar.rs"]);
}
#[test]
fn collects_bare_known_extensionless_filename() {
assert_eq!(collect("update the Dockerfile now"), vec!["Dockerfile"]);
}
#[test]
fn collects_bare_extension_filename() {
assert_eq!(collect("check README.md first"), vec!["README.md"]);
}
#[test]
fn collects_leading_dot_slash_anchor() {
assert_eq!(
collect("run ./scripts/build.sh"),
vec!["./scripts/build.sh"]
);
}
#[test]
fn skips_slash_separated_identifier_without_extension_or_root() {
assert!(collect("state, paths, session, start/dispatch, protected_write").is_empty());
}
#[test]
fn skips_natural_language_slash_noun_phrase() {
assert!(collect("Host/vendor names vary").is_empty());
}
#[test]
fn skips_slash_separated_verb_listing() {
assert!(collect("retire/rename/defend the surface").is_empty());
}
#[test]
fn skips_slash_separated_identifier_listing() {
assert!(collect("execution_gates/lease/escalation flow").is_empty());
}
#[test]
fn skips_unresolved_xx_day_placeholder_in_dated_filename() {
assert!(collect("write docs/dev/2026-04-XX-kernel-review-summary.md later").is_empty());
}
#[test]
fn skips_xxxx_xx_xx_placeholder() {
assert!(collect("docs/dev/XXXX-XX-XX-template.md placeholder").is_empty());
}
#[test]
fn skips_yyyy_mm_dd_placeholder() {
assert!(collect("docs/dev/YYYY-MM-DD-template.md").is_empty());
}
}
pub(crate) fn extract_key_files_candidates(text: &str) -> Vec<String> {
let trimmed = text.trim();
let mut candidates: Vec<String> = Vec::new();
if let Some(rest) = trimmed.strip_prefix('`') {
if let Some(end) = rest.find('`') {
let path = &rest[..end];
if !path.is_empty() {
candidates.push(path.to_owned());
return candidates;
}
}
}
let stripped = trimmed.trim_matches(|c: char| matches!(c, '`' | '"' | '\''));
if !stripped.is_empty() {
candidates.push(stripped.to_owned());
}
if let Some((prefix, _)) = stripped.split_once(" - ") {
let prefix = prefix.trim();
if !prefix.is_empty() && prefix != stripped {
candidates.push(prefix.to_owned());
}
}
candidates
}