use crate::finding::{Finding, FindingKind, Location, Tier};
use anyhow::Result;
use std::collections::BTreeSet;
use std::path::{Path, PathBuf};
use walkdir::{DirEntry, WalkDir};
const KEYWORD_MIN_LEN: usize = 6;
pub fn find_doc_drift(root: &Path, changed_symbols: &BTreeSet<String>) -> Result<Vec<Finding>> {
if changed_symbols.is_empty() {
return Ok(Vec::new());
}
let mut findings = Vec::new();
for entry in walk_docs(root) {
let path = entry.path();
let Ok(src) = std::fs::read_to_string(path) else {
continue;
};
let rel = path.strip_prefix(root).unwrap_or(path).to_path_buf();
scan_lines(&src, &rel, changed_symbols, &mut findings);
}
for entry in walk_rust(root) {
let path = entry.path();
let Ok(src) = std::fs::read_to_string(path) else {
continue;
};
let rel = path.strip_prefix(root).unwrap_or(path).to_path_buf();
let docs = extract_doc_lines(&src);
scan_lines_with_source_map(&docs, &rel, changed_symbols, &mut findings);
}
Ok(findings)
}
fn walk_docs(root: &Path) -> impl Iterator<Item = DirEntry> {
WalkDir::new(root)
.into_iter()
.filter_entry(|e| !is_skippable(e))
.filter_map(std::result::Result::ok)
.filter(|e| e.path().extension().and_then(|s| s.to_str()) == Some("md"))
}
fn walk_rust(root: &Path) -> impl Iterator<Item = DirEntry> {
WalkDir::new(root)
.into_iter()
.filter_entry(|e| !is_skippable(e))
.filter_map(std::result::Result::ok)
.filter(|e| e.path().extension().and_then(|s| s.to_str()) == Some("rs"))
}
fn is_skippable(entry: &DirEntry) -> bool {
if entry.depth() == 0 {
return false;
}
let name = entry.file_name().to_string_lossy();
(name.starts_with('.') && entry.file_type().is_dir()) || name == "target"
}
fn extract_doc_lines(src: &str) -> Vec<(u32, String)> {
let mut out = Vec::new();
for (idx, line) in src.lines().enumerate() {
let trimmed = line.trim_start();
let body = trimmed
.strip_prefix("///")
.or_else(|| trimmed.strip_prefix("//!"));
if let Some(b) = body {
let cleaned = b.strip_prefix(' ').unwrap_or(b);
out.push((
u32::try_from(idx + 1).unwrap_or(u32::MAX),
cleaned.to_string(),
));
}
}
out
}
fn scan_lines(src: &str, rel: &Path, changed: &BTreeSet<String>, out: &mut Vec<Finding>) {
let mut seen_link: BTreeSet<(String, PathBuf)> = BTreeSet::new();
let mut seen_keyword: BTreeSet<(String, PathBuf)> = BTreeSet::new();
for (idx, line) in src.lines().enumerate() {
let lineno = u32::try_from(idx + 1).unwrap_or(u32::MAX);
emit_for_line(
line,
lineno,
rel,
changed,
&mut seen_link,
&mut seen_keyword,
out,
);
}
}
fn scan_lines_with_source_map(
doc_lines: &[(u32, String)],
rel: &Path,
changed: &BTreeSet<String>,
out: &mut Vec<Finding>,
) {
let mut seen_link: BTreeSet<(String, PathBuf)> = BTreeSet::new();
let mut seen_keyword: BTreeSet<(String, PathBuf)> = BTreeSet::new();
for (lineno, line) in doc_lines {
emit_for_line(
line,
*lineno,
rel,
changed,
&mut seen_link,
&mut seen_keyword,
out,
);
}
}
#[allow(clippy::too_many_arguments)]
fn emit_for_line(
line: &str,
lineno: u32,
rel: &Path,
changed: &BTreeSet<String>,
seen_link: &mut BTreeSet<(String, PathBuf)>,
seen_keyword: &mut BTreeSet<(String, PathBuf)>,
out: &mut Vec<Finding>,
) {
for bracketed in extract_bracketed(line) {
if changed.contains(&bracketed) {
let key = (bracketed.clone(), rel.to_path_buf());
if seen_link.insert(key) {
let kind = FindingKind::DocDriftLink {
symbol: bracketed.clone(),
doc: Location {
file: rel.to_path_buf(),
symbol: bracketed.clone(),
},
line: lineno,
};
let evidence = format!(
"`{bracketed}` referenced via intra-doc link in {}:{lineno}",
rel.display()
);
out.push(Finding::new("", Tier::Likely, 0.90, kind, evidence));
}
}
}
for tok in line.split(|c: char| !c.is_alphanumeric() && c != '_') {
if tok.len() >= KEYWORD_MIN_LEN && changed.contains(tok) {
let key = (tok.to_string(), rel.to_path_buf());
if seen_keyword.insert(key.clone()) && !seen_link.contains(&key) {
let kind = FindingKind::DocDriftKeyword {
symbol: tok.to_string(),
doc: Location {
file: rel.to_path_buf(),
symbol: tok.to_string(),
},
line: lineno,
};
let evidence = format!(
"`{tok}` mentioned in {}:{lineno} (plain keyword, not an intra-doc link)",
rel.display()
);
out.push(Finding::new("", Tier::Possible, 0.40, kind, evidence));
}
}
}
}
fn extract_bracketed(line: &str) -> Vec<String> {
let bytes = line.as_bytes();
let mut out = Vec::new();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'['
&& let Some(close) = line[i + 1..].find(']')
{
let inner = &line[i + 1..i + 1 + close];
let after = i + 2 + close;
if after < bytes.len() && bytes[after] == b'(' {
i = after;
continue;
}
let stripped = inner.trim().trim_matches('`');
if let Some(ident) = leading_ident(stripped) {
out.push(ident);
}
i = after;
continue;
}
i += 1;
}
out
}
fn leading_ident(s: &str) -> Option<String> {
let head = s.split("::").next().unwrap_or(s).trim();
if is_plain_ident(head) {
Some(head.to_string())
} else {
None
}
}
fn is_plain_ident(s: &str) -> bool {
!s.is_empty()
&& s.chars()
.next()
.is_some_and(|c| c.is_alphabetic() || c == '_')
&& s.chars().all(|c| c.is_alphanumeric() || c == '_')
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
fn symbols(names: &[&str]) -> BTreeSet<String> {
names.iter().map(|s| (*s).to_string()).collect()
}
#[test]
fn extracts_bracketed_intra_doc_link_with_backticks() {
let got = extract_bracketed("See [`PaymentGateway`] for details.");
assert_eq!(got, vec!["PaymentGateway".to_string()]);
}
#[test]
fn extracts_bracketed_plain_form() {
let got = extract_bracketed("See [Greeter] for the trait.");
assert_eq!(got, vec!["Greeter".to_string()]);
}
#[test]
fn ignores_markdown_url_link() {
let got = extract_bracketed("[docs](https://example.com)");
assert!(got.is_empty());
}
#[test]
fn flags_intra_doc_link_in_markdown_file() {
let dir = TempDir::new().unwrap();
fs::write(
dir.path().join("docs.md"),
"See [`Greeter`] for the greeting trait.\n",
)
.unwrap();
let hits = find_doc_drift(dir.path(), &symbols(&["Greeter"])).unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].tier, Tier::Likely);
assert_eq!(hits[0].confidence, 0.90);
match &hits[0].kind {
FindingKind::DocDriftLink { symbol, line, .. } => {
assert_eq!(symbol, "Greeter");
assert_eq!(*line, 1);
}
other => panic!("expected DocDriftLink, got {other:?}"),
}
}
#[test]
fn flags_intra_doc_link_inside_rust_doc_comment() {
let dir = TempDir::new().unwrap();
fs::write(
dir.path().join("lib.rs"),
"/// Call [`Greeter::hi`] to greet.\npub fn go() {}\n",
)
.unwrap();
let hits = find_doc_drift(dir.path(), &symbols(&["Greeter"])).unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].kind.tag(), "doc_drift_link");
match &hits[0].kind {
FindingKind::DocDriftLink { symbol, line, .. } => {
assert_eq!(symbol, "Greeter");
assert_eq!(*line, 1);
}
other => panic!("expected DocDriftLink, got {other:?}"),
}
}
#[test]
fn flags_keyword_mention_only_for_long_identifiers() {
let dir = TempDir::new().unwrap();
fs::write(
dir.path().join("doc.md"),
"The Greeter trait is important.\nAlso see Foo.\n",
)
.unwrap();
let hits = find_doc_drift(dir.path(), &symbols(&["Greeter", "Foo"])).unwrap();
let tags: Vec<_> = hits.iter().map(|h| h.kind.tag()).collect();
assert_eq!(tags, vec!["doc_drift_keyword"]);
match &hits[0].kind {
FindingKind::DocDriftKeyword { symbol, .. } => assert_eq!(symbol, "Greeter"),
other => panic!("expected DocDriftKeyword, got {other:?}"),
}
}
#[test]
fn link_finding_suppresses_duplicate_keyword_finding() {
let dir = TempDir::new().unwrap();
fs::write(
dir.path().join("doc.md"),
"See [`PaymentGateway`] — the PaymentGateway struct.\n",
)
.unwrap();
let hits = find_doc_drift(dir.path(), &symbols(&["PaymentGateway"])).unwrap();
let tags: Vec<_> = hits.iter().map(|h| h.kind.tag()).collect();
assert_eq!(tags, vec!["doc_drift_link"]);
}
#[test]
fn skips_target_directory() {
let dir = TempDir::new().unwrap();
fs::create_dir_all(dir.path().join("target/doc")).unwrap();
fs::write(
dir.path().join("target/doc/generated.md"),
"See [`Greeter`] in the docs.\n",
)
.unwrap();
let hits = find_doc_drift(dir.path(), &symbols(&["Greeter"])).unwrap();
assert!(hits.is_empty());
}
}