pub(crate) const SURFACED_REFERENCE_EXTENSIONS: &[&str] = &[
".rs", ".md", ".toml", ".json", ".sh", ".yml", ".yaml", ".py", ".ts", ".tsx", ".go", ".txt",
".sql", ".rb", ".java", ".c", ".cpp", ".h", ".hpp", ".html", ".css", ".lock",
];
pub(crate) const SURFACED_EXTENSIONLESS_FILENAMES: &[&str] = &[
"Dockerfile",
"Makefile",
"LICENSE",
"COPYING",
"NOTICE",
"README",
"CONTRIBUTING",
"AUTHORS",
"MAINTAINERS",
"CODEOWNERS",
"Gemfile",
"Rakefile",
"Procfile",
];
const REPO_PATH_PREFIXES: &[&str] = &[
"src/",
"docs/",
"tests/",
"test/",
"scripts/",
"skills/",
"specs/",
"templates/",
"examples/",
"crates/",
"benches/",
"assets/",
"migrations/",
"lib/",
"bin/",
"cmd/",
"pkg/",
"internal/",
"app/",
"config/",
".github/",
".claude/",
".gemini/",
".agents/",
".ccd/",
".ccd-hosts/",
];
fn has_anchor_or_repo_prefix(token: &str) -> bool {
if token.starts_with('/') || token.starts_with("./") || token.starts_with("../") {
return true;
}
REPO_PATH_PREFIXES
.iter()
.any(|prefix| token.starts_with(prefix))
}
fn ends_with_known_extension(token: &str) -> bool {
SURFACED_REFERENCE_EXTENSIONS
.iter()
.any(|ext| token.ends_with(ext))
}
fn has_nonempty_stem_before_extension(token: &str) -> bool {
for ext in SURFACED_REFERENCE_EXTENSIONS {
if let Some(stem) = token.strip_suffix(ext) {
return !stem.is_empty() && stem.chars().any(|c| c != '/');
}
}
true
}
fn contains_unresolved_placeholder(token: &str) -> bool {
if token.contains("YYYY") || token.contains("MM-DD") {
return true;
}
if token.contains("XXXX") {
return true;
}
if contains_repeat_run_in_filename_context(token, b'N', 3) {
return true;
}
let bytes = token.as_bytes();
let digit_context = bytes.iter().any(|b| b.is_ascii_digit());
if !digit_context {
return false;
}
let mut i = 0;
while i + 2 <= bytes.len() {
if &bytes[i..i + 2] == b"XX" {
let before = i.checked_sub(1).map(|j| bytes[j]);
let after = bytes.get(i + 2).copied();
let sep_before = matches!(before, Some(b'-') | Some(b'_'));
let sep_after = matches!(after, Some(b'-') | Some(b'_') | Some(b'.') | Some(b'/'));
if sep_before && sep_after {
return true;
}
}
i += 1;
}
false
}
fn contains_repeat_run_in_filename_context(token: &str, ch: u8, target: usize) -> bool {
let bytes = token.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] != ch {
i += 1;
continue;
}
let mut j = i;
while j < bytes.len() && bytes[j] == ch {
j += 1;
}
let run_len = j - i;
if run_len == target {
let before = i.checked_sub(1).map(|k| bytes[k]);
let after = bytes.get(j).copied();
let sep_before =
before.is_none() || matches!(before, Some(b'-') | Some(b'_') | Some(b'/'));
let sep_after = matches!(
after,
Some(b'-') | Some(b'_') | Some(b'.') | Some(b'/') | None
);
if sep_before && sep_after {
return true;
}
}
i = j;
}
false
}
fn looks_like_slash_command(token: &str) -> bool {
let Some(rest) = token.strip_prefix('/') else {
return false;
};
let first_segment = match rest.split_once('/') {
Some((seg, _)) => seg,
None => rest,
};
let Some((ns, cmd)) = first_segment.split_once(':') else {
return false;
};
let valid = |seg: &str| {
!seg.is_empty()
&& seg
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
};
valid(ns) && valid(cmd)
}
fn looks_like_surfaced_reference(token: &str) -> bool {
if token.len() < 2 {
return false;
}
if token.starts_with("http://") || token.starts_with("https://") {
return false;
}
if contains_unresolved_placeholder(token) {
return false;
}
let has_known_extension = ends_with_known_extension(token);
if has_known_extension && !has_nonempty_stem_before_extension(token) {
return false;
}
if token.contains('/') {
return has_known_extension || has_anchor_or_repo_prefix(token);
}
if has_known_extension {
return true;
}
SURFACED_EXTENSIONLESS_FILENAMES.contains(&token)
}
fn looks_like_unquoted_prose_path(token: &str) -> bool {
if token.len() < 2 {
return false;
}
if token.starts_with("http://") || token.starts_with("https://") {
return false;
}
if contains_unresolved_placeholder(token) {
return false;
}
if looks_like_slash_command(token) {
return false;
}
if ends_with_known_extension(token) && !has_nonempty_stem_before_extension(token) {
return false;
}
if has_anchor_or_repo_prefix(token) {
return true;
}
if token.contains('/') {
return false;
}
SURFACED_EXTENSIONLESS_FILENAMES.contains(&token)
}
fn strip_sentence_period(text: &str) -> &str {
let mut chars = text.char_indices().rev();
let Some((last_idx, last_ch)) = chars.next() else {
return text;
};
if last_ch != '.' {
return text;
}
match chars.next() {
Some((_, prev))
if prev.is_alphanumeric() || matches!(prev, '`' | '"' | '\'' | ')' | ']' | '}') =>
{
&text[..last_idx]
}
_ => text,
}
}
fn normalize_prose_token(token: &str) -> &str {
let mut current = token;
loop {
let stripped = current
.trim_matches(|c: char| matches!(c, '`' | '"' | '\'' | ',' | ';' | ':' | '!' | '?'));
let stripped = strip_sentence_period(stripped);
if stripped == current {
return current;
}
current = stripped;
}
}
pub(crate) fn collect_prose_path_candidates(text: &str, out: &mut Vec<String>) {
let mut cursor = 0usize;
while let Some(open_rel) = text[cursor..].find('`') {
let start = cursor + open_rel + 1;
let Some(close_rel) = text[start..].find('`') else {
break;
};
let end = start + close_rel;
let quoted = &text[start..end];
let normalized = normalize_prose_token(quoted);
if looks_like_surfaced_reference(normalized) {
out.push(normalized.to_owned());
}
cursor = end + 1;
}
for raw in text.split(|c: char| {
c.is_whitespace() || matches!(c, ',' | ';' | '(' | ')' | '[' | ']' | '{' | '}')
}) {
if raw.contains('`') {
continue;
}
if raw.contains("::") {
continue;
}
let trimmed = normalize_prose_token(raw);
if looks_like_unquoted_prose_path(trimmed) {
out.push(trimmed.to_owned());
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn collect(text: &str) -> Vec<String> {
let mut out: Vec<String> = Vec::new();
collect_prose_path_candidates(text, &mut out);
out
}
#[test]
fn collects_known_root_with_extension() {
assert_eq!(
collect("see src/state/consistency.rs"),
vec!["src/state/consistency.rs"]
);
}
#[test]
fn collects_docs_path_with_extension() {
assert_eq!(
collect("mentioned in docs/dev/2026-04-15-review.md"),
vec!["docs/dev/2026-04-15-review.md"]
);
}
#[test]
fn collects_leading_dot_slash_anchor() {
assert_eq!(
collect("run ./scripts/build.sh"),
vec!["./scripts/build.sh"]
);
}
#[test]
fn collects_bare_known_extensionless_filename() {
assert_eq!(collect("update the Dockerfile now"), vec!["Dockerfile"]);
}
#[test]
fn collects_backtick_quoted_bare_extensioned_filename() {
assert_eq!(collect("check `backlog.md` first"), vec!["backlog.md"]);
}
#[test]
fn collects_backtick_quoted_unprefixed_slashed_path() {
assert_eq!(
collect("see `radar/context_check.rs` in review"),
vec!["radar/context_check.rs"]
);
}
#[test]
fn skips_slash_separated_identifier_without_extension_or_root() {
assert!(collect("state, paths, session, start/dispatch, protected_write").is_empty());
}
#[test]
fn skips_natural_language_slash_noun_phrase() {
assert!(collect("Host/vendor names vary").is_empty());
}
#[test]
fn skips_slash_separated_verb_listing() {
assert!(collect("retire/rename/defend the surface").is_empty());
}
#[test]
fn skips_slash_separated_identifier_listing() {
assert!(collect("execution_gates/lease/escalation flow").is_empty());
}
#[test]
fn skips_bare_extensioned_filename_in_prose() {
assert!(collect("write 2026-04-16-my-note.md later").is_empty());
}
#[test]
fn skips_unresolved_xx_day_placeholder_in_dated_filename() {
assert!(collect("write docs/dev/2026-04-XX-kernel-review-summary.md later").is_empty());
}
#[test]
fn skips_xxxx_xx_xx_placeholder() {
assert!(collect("docs/dev/XXXX-XX-XX-template.md placeholder").is_empty());
}
#[test]
fn skips_yyyy_mm_dd_placeholder() {
assert!(collect("docs/dev/YYYY-MM-DD-template.md").is_empty());
}
#[test]
fn skips_bare_extension_token() {
assert!(collect("touch .rs files in the tree").is_empty());
assert!(collect(".md").is_empty());
}
#[test]
fn skips_bare_extension_token_even_when_backticked() {
assert!(collect("we added `.rs` files").is_empty());
}
#[test]
fn skips_bare_changelog_reference() {
assert!(collect("update the CHANGELOG now").is_empty());
}
#[test]
fn still_collects_backticked_changelog_md() {
assert_eq!(
collect("update `CHANGELOG.md` for the release"),
vec!["CHANGELOG.md"]
);
}
#[test]
fn skips_nnn_placeholder_in_dated_filename() {
assert!(collect("write docs/dev/NNN-short-slug.md later").is_empty());
}
#[test]
fn skips_nnn_placeholder_bare() {
assert!(collect("template NNN-slug.md is a placeholder").is_empty());
}
#[test]
fn does_not_treat_nn_or_nnnn_as_nnn_placeholder() {
assert_eq!(
collect("docs/dev/CNN-report.md exists"),
vec!["docs/dev/CNN-report.md"]
);
}
#[test]
fn skips_rust_module_notation() {
assert!(
collect("trace radar/context_check.rs::build_context_check_decision for detail")
.is_empty()
);
}
#[test]
fn skips_unprefixed_slashed_rust_path_in_unquoted_prose() {
assert!(collect("see radar/context_check.rs for detail").is_empty());
}
#[test]
fn skips_slash_command_identifier() {
assert!(collect("run /codex:review before merging").is_empty());
assert!(collect("run /codex:rescue if blocked").is_empty());
}
#[test]
fn does_not_confuse_slash_command_with_absolute_path() {
assert_eq!(
collect("ship /usr/local/bin/ccd with the package"),
vec!["/usr/local/bin/ccd"]
);
}
}
pub(crate) fn extract_key_files_candidates(text: &str) -> Vec<String> {
let trimmed = text.trim();
let mut candidates: Vec<String> = Vec::new();
if let Some(rest) = trimmed.strip_prefix('`') {
if let Some(end) = rest.find('`') {
let path = &rest[..end];
if !path.is_empty() {
candidates.push(path.to_owned());
return candidates;
}
}
}
let stripped = trimmed.trim_matches(|c: char| matches!(c, '`' | '"' | '\''));
if !stripped.is_empty() {
candidates.push(stripped.to_owned());
}
if let Some((prefix, _)) = stripped.split_once(" - ") {
let prefix = prefix.trim();
if !prefix.is_empty() && prefix != stripped {
candidates.push(prefix.to_owned());
}
}
candidates
}