Skip to main content

alint_rules/
markdown_paths_resolve.rs

1//! `markdown_paths_resolve` — backticked workspace paths in
2//! markdown files must resolve to real files or directories.
3//!
4//! Targets the AGENTS.md / CLAUDE.md staleness problem:
5//! agent-context files reference workspace paths in inline
6//! backticks (`` `src/api/users.ts` ``), and those paths drift
7//! as the codebase evolves. The v0.6 `agent-context-no-stale-paths`
8//! rule surfaces *candidate* drift via a regex; this rule does
9//! the precise check.
10//!
11//! Design doc: `docs/design/v0.7/markdown_paths_resolve.md`.
12
13use std::path::Path;
14
15use alint_core::{
16    Context, Error, Level, PerFileRule, Result, Rule, RuleSpec, Scope, ScopeFilter, Violation,
17};
18use serde::Deserialize;
19
20#[derive(Debug, Deserialize)]
21struct Options {
22    /// Whitelist of path-shape prefixes to validate. A backticked
23    /// token must start with one of these to be considered a path
24    /// candidate. No defaults — every project's layout differs and
25    /// the user must declare which prefixes mark a path.
26    prefixes: Vec<String>,
27
28    /// Skip backticked tokens containing template-variable
29    /// markers (`{{ }}`, `${ }`, `<…>`). Default true.
30    #[serde(default = "default_ignore_template_vars")]
31    ignore_template_vars: bool,
32}
33
34fn default_ignore_template_vars() -> bool {
35    true
36}
37
38#[derive(Debug)]
39pub struct MarkdownPathsResolveRule {
40    id: String,
41    level: Level,
42    policy_url: Option<String>,
43    message: Option<String>,
44    scope: Scope,
45    scope_filter: Option<ScopeFilter>,
46    prefixes: Vec<String>,
47    ignore_template_vars: bool,
48}
49
50impl Rule for MarkdownPathsResolveRule {
51    fn id(&self) -> &str {
52        &self.id
53    }
54    fn level(&self) -> Level {
55        self.level
56    }
57    fn policy_url(&self) -> Option<&str> {
58        self.policy_url.as_deref()
59    }
60    fn path_scope(&self) -> Option<&Scope> {
61        Some(&self.scope)
62    }
63
64    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
65        let mut violations = Vec::new();
66        for entry in ctx.index.files() {
67            if !self.scope.matches(&entry.path) {
68                continue;
69            }
70            if let Some(filter) = &self.scope_filter
71                && !filter.matches(&entry.path, ctx.index)
72            {
73                continue;
74            }
75            let full = ctx.root.join(&entry.path);
76            // Unreadable file: silently skip; a sibling rule can flag it.
77            let Ok(bytes) = std::fs::read(&full) else {
78                continue;
79            };
80            violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
81        }
82        Ok(violations)
83    }
84
85    fn as_per_file(&self) -> Option<&dyn PerFileRule> {
86        Some(self)
87    }
88
89    fn scope_filter(&self) -> Option<&ScopeFilter> {
90        self.scope_filter.as_ref()
91    }
92}
93
94impl PerFileRule for MarkdownPathsResolveRule {
95    fn path_scope(&self) -> &Scope {
96        &self.scope
97    }
98
99    fn evaluate_file(
100        &self,
101        ctx: &Context<'_>,
102        path: &Path,
103        bytes: &[u8],
104    ) -> Result<Vec<Violation>> {
105        let Ok(text) = std::str::from_utf8(bytes) else {
106            return Ok(Vec::new()); // non-UTF-8 markdown is degenerate; skip
107        };
108        let mut violations = Vec::new();
109        for cand in scan_markdown_paths(text, &self.prefixes) {
110            if self.ignore_template_vars && has_template_vars(&cand.token) {
111                continue;
112            }
113            let lookup = strip_path_decoration(&cand.token);
114            if path_resolves(ctx, lookup) {
115                continue;
116            }
117            let msg = self.message.clone().unwrap_or_else(|| {
118                format!(
119                    "backticked path `{}` doesn't resolve to a file or directory",
120                    cand.token
121                )
122            });
123            violations.push(
124                Violation::new(msg)
125                    .with_path(std::sync::Arc::<Path>::from(path))
126                    .with_location(cand.line, cand.column),
127            );
128        }
129        Ok(violations)
130    }
131}
132
133pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
134    let Some(paths) = &spec.paths else {
135        return Err(Error::rule_config(
136            &spec.id,
137            "markdown_paths_resolve requires a `paths` field",
138        ));
139    };
140    let opts: Options = spec
141        .deserialize_options()
142        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
143    if opts.prefixes.is_empty() {
144        return Err(Error::rule_config(
145            &spec.id,
146            "markdown_paths_resolve requires a non-empty `prefixes` list — \
147             declare which path shapes (e.g. [\"src/\", \"crates/\", \"docs/\"]) \
148             count as path candidates in your codebase",
149        ));
150    }
151    Ok(Box::new(MarkdownPathsResolveRule {
152        id: spec.id.clone(),
153        level: spec.level,
154        policy_url: spec.policy_url.clone(),
155        message: spec.message.clone(),
156        scope: Scope::from_paths_spec(paths)?,
157        scope_filter: spec.parse_scope_filter()?,
158        prefixes: opts.prefixes,
159        ignore_template_vars: opts.ignore_template_vars,
160    }))
161}
162
163// ─── markdown scanner ──────────────────────────────────────────
164
165/// One backticked path candidate found in a markdown source.
166#[derive(Debug, PartialEq, Eq)]
167struct Candidate {
168    token: String,
169    line: usize,
170    column: usize,
171}
172
173/// Walk a markdown string, returning every backticked token that
174/// starts with one of `prefixes`. Skips fenced code blocks
175/// (```` ``` ```` / `~~~`) and 4-space-indented code blocks; those
176/// contain code samples, not factual claims about the tree.
177fn scan_markdown_paths(text: &str, prefixes: &[String]) -> Vec<Candidate> {
178    let mut out = Vec::new();
179    let mut in_fenced = false;
180    let mut fence_marker: Option<char> = None;
181    let mut fence_len: usize = 0;
182
183    for (line_idx, line) in text.lines().enumerate() {
184        let line_no = line_idx + 1;
185
186        // Detect fenced-code-block boundaries. CommonMark allows
187        // ``` and ~~~ with at least 3 markers; the closing fence
188        // must use the same character and at least as many
189        // markers. `info string` (e.g. ```yaml) follows the
190        // opening fence; we don't care about its content.
191        let trimmed = line.trim_start();
192        if let Some((ch, n)) = detect_fence(trimmed) {
193            if !in_fenced {
194                in_fenced = true;
195                fence_marker = Some(ch);
196                fence_len = n;
197            } else if fence_marker == Some(ch) && n >= fence_len && only_fence(trimmed, ch) {
198                in_fenced = false;
199                fence_marker = None;
200                fence_len = 0;
201            }
202            continue;
203        }
204        if in_fenced {
205            continue;
206        }
207
208        // Skip 4-space indented code blocks. Per CommonMark, only
209        // applies when the indented line is NOT inside a list.
210        // We're conservative — any 4-space-prefixed line is treated
211        // as code unless it's a continuation of a list item, which
212        // we don't track here. Acceptable: false-skip rate >
213        // false-flag rate for our use.
214        if line.starts_with("    ") || line.starts_with('\t') {
215            continue;
216        }
217
218        // Find inline backticks. A run of N backticks opens an
219        // inline span that closes at the next run of EXACTLY N
220        // backticks. Per CommonMark, longer runs nest the span so
221        // it can contain shorter backtick sequences. Most paths
222        // use single backticks, which is what we optimise for.
223        let bytes = line.as_bytes();
224        let mut i = 0;
225        while i < bytes.len() {
226            if bytes[i] != b'`' {
227                i += 1;
228                continue;
229            }
230            let run_start = i;
231            while i < bytes.len() && bytes[i] == b'`' {
232                i += 1;
233            }
234            let run_len = i - run_start;
235            // Find the matching closing run.
236            let close_start = find_closing_run(&bytes[i..], run_len).map(|p| i + p);
237            let Some(close) = close_start else {
238                // Unmatched backticks → not a span; bail this line.
239                break;
240            };
241            let token_bytes = &bytes[i..close];
242            // Inline-code spans wrap their content with one space
243            // padding when the content starts/ends with a backtick;
244            // CommonMark trims one leading + one trailing space.
245            let token = std::str::from_utf8(token_bytes).unwrap_or("").trim();
246            if !token.is_empty() && starts_with_any_prefix(token, prefixes) {
247                out.push(Candidate {
248                    token: token.to_string(),
249                    line: line_no,
250                    column: run_start + 1, // 1-indexed; points at opening backtick
251                });
252            }
253            i = close + run_len;
254        }
255    }
256    out
257}
258
259/// If `s` starts with N+ backticks or tildes (N ≥ 3), return the
260/// fence character and the run length. Otherwise None.
261fn detect_fence(s: &str) -> Option<(char, usize)> {
262    let mut chars = s.chars();
263    let ch = chars.next()?;
264    if ch != '`' && ch != '~' {
265        return None;
266    }
267    let n = 1 + chars.take_while(|&c| c == ch).count();
268    if n >= 3 { Some((ch, n)) } else { None }
269}
270
271/// True if `s` consists only of `ch`-characters (allowing
272/// trailing whitespace). Used to decide if an opening-fence
273/// marker line could close a fence — `CommonMark` says the
274/// closing fence cannot have an info string after the markers.
275fn only_fence(s: &str, ch: char) -> bool {
276    s.trim_end().chars().all(|c| c == ch)
277}
278
279/// Find the position (relative to `bytes` start) of the next run
280/// of exactly `len` backticks. Returns None if not found in
281/// `bytes`.
282fn find_closing_run(bytes: &[u8], len: usize) -> Option<usize> {
283    let mut i = 0;
284    while i < bytes.len() {
285        if bytes[i] != b'`' {
286            i += 1;
287            continue;
288        }
289        let start = i;
290        while i < bytes.len() && bytes[i] == b'`' {
291            i += 1;
292        }
293        if i - start == len {
294            return Some(start);
295        }
296    }
297    None
298}
299
300fn starts_with_any_prefix(s: &str, prefixes: &[String]) -> bool {
301    prefixes.iter().any(|p| s.starts_with(p))
302}
303
304/// True if `s` contains a template-variable marker
305/// (`{{ … }}` / `${ … }` / `<…>`).
306fn has_template_vars(s: &str) -> bool {
307    s.contains("{{") || s.contains("${") || (s.contains('<') && s.contains('>'))
308}
309
310/// Strip trailing punctuation, trailing slashes, and
311/// `:line` / `#L<n>` location suffixes that aren't part of
312/// the path-on-disk we want to look up.
313fn strip_path_decoration(s: &str) -> &str {
314    // Strip a `#L<n>` GitHub-style anchor first (everything from
315    // `#` to end), then a `:N` line-number suffix, then trailing
316    // punctuation, then trailing slash.
317    let hash = s.find('#').unwrap_or(s.len());
318    let s = &s[..hash];
319    let colon_loc = s
320        .rfind(':')
321        .filter(|&i| s[i + 1..].chars().all(|c| c.is_ascii_digit()) && i + 1 < s.len());
322    let s = match colon_loc {
323        Some(i) => &s[..i],
324        None => s,
325    };
326    let s = s.trim_end_matches(|c: char| ".,:;?!".contains(c));
327    s.trim_end_matches('/')
328}
329
330/// Does `lookup` resolve to a real file or directory in the
331/// scanned tree? Glob characters in the lookup are matched
332/// against the file index (any-of); plain paths use exact
333/// lookup of either file or directory.
334fn path_resolves(ctx: &Context<'_>, lookup: &str) -> bool {
335    if lookup.is_empty() {
336        return false;
337    }
338    if lookup.contains('*') || lookup.contains('?') || lookup.contains('[') {
339        // Glob — match against the index. Build a globset on the
340        // fly; cheap for one pattern.
341        let Ok(glob) = globset::Glob::new(lookup) else {
342            return false;
343        };
344        let matcher = glob.compile_matcher();
345        return ctx.index.entries.iter().any(|e| matcher.is_match(&e.path));
346    }
347    let p = Path::new(lookup);
348    ctx.index.entries.iter().any(|e| &*e.path == p)
349}
350
351#[cfg(test)]
352mod tests {
353    use super::*;
354
355    fn prefixes(list: &[&str]) -> Vec<String> {
356        list.iter().map(|s| (*s).to_string()).collect()
357    }
358
359    #[test]
360    fn finds_inline_backtick_with_matching_prefix() {
361        let pf = prefixes(&["src/", "docs/"]);
362        let cands = scan_markdown_paths("see `src/foo.ts` and `npm` and `docs/x.md`", &pf);
363        assert_eq!(cands.len(), 2);
364        assert_eq!(cands[0].token, "src/foo.ts");
365        assert_eq!(cands[1].token, "docs/x.md");
366    }
367
368    #[test]
369    fn skips_fenced_code_blocks() {
370        let pf = prefixes(&["src/"]);
371        let md = "before\n\
372                  ```yaml\n\
373                  example: `src/should-not-fire.ts`\n\
374                  ```\n\
375                  after `src/should-fire.ts`";
376        let cands = scan_markdown_paths(md, &pf);
377        assert_eq!(cands.len(), 1);
378        assert_eq!(cands[0].token, "src/should-fire.ts");
379    }
380
381    #[test]
382    fn skips_indented_code_blocks() {
383        let pf = prefixes(&["src/"]);
384        let md = "normal `src/a.ts` line\n\
385                  \n\
386                  \x20\x20\x20\x20indented `src/should-not-fire.ts`\n";
387        let cands = scan_markdown_paths(md, &pf);
388        assert_eq!(cands.len(), 1);
389        assert_eq!(cands[0].token, "src/a.ts");
390    }
391
392    #[test]
393    fn handles_tilde_fences() {
394        let pf = prefixes(&["src/"]);
395        let md = "before `src/yes.ts`\n~~~\nin code: `src/no.ts`\n~~~\nafter `src/yes2.ts`";
396        let tokens: Vec<_> = scan_markdown_paths(md, &pf)
397            .into_iter()
398            .map(|c| c.token)
399            .collect();
400        assert_eq!(tokens, vec!["src/yes.ts", "src/yes2.ts"]);
401    }
402
403    #[test]
404    fn line_and_column_are_correct() {
405        let pf = prefixes(&["src/"]);
406        let md = "first line\nsecond `src/foo.ts` here";
407        let cands = scan_markdown_paths(md, &pf);
408        assert_eq!(cands.len(), 1);
409        assert_eq!(cands[0].line, 2);
410        // "second " is 7 chars + 1 for the opening backtick at col 8
411        assert_eq!(cands[0].column, 8);
412    }
413
414    #[test]
415    fn template_vars_detected() {
416        assert!(has_template_vars("src/{{user_id}}.json"));
417        assert!(has_template_vars("src/${name}.ts"));
418        assert!(has_template_vars("src/<placeholder>.ts"));
419        assert!(!has_template_vars("src/concrete.ts"));
420        assert!(!has_template_vars("src/foo[0].ts")); // brackets without angle
421    }
422
423    #[test]
424    fn path_decoration_stripped() {
425        assert_eq!(strip_path_decoration("src/foo.ts"), "src/foo.ts");
426        assert_eq!(strip_path_decoration("src/foo.ts."), "src/foo.ts");
427        assert_eq!(strip_path_decoration("src/foo.ts,"), "src/foo.ts");
428        assert_eq!(strip_path_decoration("src/foo.ts:42"), "src/foo.ts");
429        assert_eq!(strip_path_decoration("src/foo.ts#L42"), "src/foo.ts");
430        assert_eq!(strip_path_decoration("src/foo.ts:42#L1"), "src/foo.ts");
431        assert_eq!(strip_path_decoration("src/foo/"), "src/foo");
432    }
433
434    #[test]
435    fn prefix_matching() {
436        let pf = prefixes(&["src/", "crates/"]);
437        assert!(starts_with_any_prefix("src/foo.ts", &pf));
438        assert!(starts_with_any_prefix("crates/alint", &pf));
439        assert!(!starts_with_any_prefix("docs/x.md", &pf));
440        assert!(!starts_with_any_prefix("README.md", &pf));
441    }
442
443    #[test]
444    fn unmatched_backticks_do_not_explode() {
445        let pf = prefixes(&["src/"]);
446        let cands = scan_markdown_paths("`src/foo.ts unmatched", &pf);
447        assert!(cands.is_empty());
448    }
449
450    #[test]
451    fn double_backticks_can_contain_single() {
452        let pf = prefixes(&["src/"]);
453        let md = "double `` ` `` then `src/foo.ts`";
454        let cands = scan_markdown_paths(md, &pf);
455        assert_eq!(cands.len(), 1);
456        assert_eq!(cands[0].token, "src/foo.ts");
457    }
458}