Skip to main content

alint_rules/
markdown_paths_resolve.rs

1//! `markdown_paths_resolve` — backticked workspace paths in
2//! markdown files must resolve to real files or directories.
3//!
4//! Targets the AGENTS.md / CLAUDE.md staleness problem:
5//! agent-context files reference workspace paths in inline
6//! backticks (`` `src/api/users.ts` ``), and those paths drift
7//! as the codebase evolves. The v0.6 `agent-context-no-stale-paths`
8//! rule surfaces *candidate* drift via a regex; this rule does
9//! the precise check.
10//!
11//! Design doc: `docs/design/v0.7/markdown_paths_resolve.md`.
12
13use std::path::Path;
14
15use alint_core::{
16    Context, Error, Level, PerFileRule, Result, Rule, RuleSpec, Scope, Violation, eval_per_file,
17};
18use serde::Deserialize;
19
20#[derive(Debug, Deserialize)]
21#[serde(deny_unknown_fields)]
22struct Options {
23    /// Whitelist of path-shape prefixes to validate. A backticked
24    /// token must start with one of these to be considered a path
25    /// candidate. No defaults — every project's layout differs and
26    /// the user must declare which prefixes mark a path.
27    prefixes: Vec<String>,
28
29    /// Skip backticked tokens containing template-variable
30    /// markers (`{{ }}`, `${ }`, `<…>`). Default true.
31    #[serde(default = "default_ignore_template_vars")]
32    ignore_template_vars: bool,
33}
34
35fn default_ignore_template_vars() -> bool {
36    true
37}
38
39#[derive(Debug)]
40pub struct MarkdownPathsResolveRule {
41    id: String,
42    level: Level,
43    policy_url: Option<String>,
44    message: Option<String>,
45    scope: Scope,
46    prefixes: Vec<String>,
47    ignore_template_vars: bool,
48}
49
50impl Rule for MarkdownPathsResolveRule {
51    alint_core::rule_common_impl!();
52    fn path_scope(&self) -> Option<&Scope> {
53        Some(&self.scope)
54    }
55
56    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
57        eval_per_file(self, ctx)
58    }
59
60    fn as_per_file(&self) -> Option<&dyn PerFileRule> {
61        Some(self)
62    }
63}
64
65impl PerFileRule for MarkdownPathsResolveRule {
66    fn path_scope(&self) -> &Scope {
67        &self.scope
68    }
69
70    fn evaluate_file(
71        &self,
72        ctx: &Context<'_>,
73        path: &Path,
74        bytes: &[u8],
75    ) -> Result<Vec<Violation>> {
76        let Ok(text) = std::str::from_utf8(bytes) else {
77            return Ok(Vec::new()); // non-UTF-8 markdown is degenerate; skip
78        };
79        let mut violations = Vec::new();
80        for cand in scan_markdown_paths(text, &self.prefixes) {
81            if self.ignore_template_vars && has_template_vars(&cand.token) {
82                continue;
83            }
84            let lookup = strip_path_decoration(&cand.token);
85            if path_resolves(ctx, lookup) {
86                continue;
87            }
88            let msg = self.message.clone().unwrap_or_else(|| {
89                format!(
90                    "backticked path `{}` doesn't resolve to a file or directory",
91                    cand.token
92                )
93            });
94            violations.push(
95                Violation::new(msg)
96                    .with_path(std::sync::Arc::<Path>::from(path))
97                    .with_location(cand.line, cand.column),
98            );
99        }
100        Ok(violations)
101    }
102}
103
104pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
105    let Some(_paths) = &spec.paths else {
106        return Err(Error::rule_config(
107            &spec.id,
108            "markdown_paths_resolve requires a `paths` field",
109        ));
110    };
111    let opts: Options = spec
112        .deserialize_options()
113        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
114    if opts.prefixes.is_empty() {
115        return Err(Error::rule_config(
116            &spec.id,
117            "markdown_paths_resolve requires a non-empty `prefixes` list — \
118             declare which path shapes (e.g. [\"src/\", \"crates/\", \"docs/\"]) \
119             count as path candidates in your codebase",
120        ));
121    }
122    Ok(Box::new(MarkdownPathsResolveRule {
123        id: spec.id.clone(),
124        level: spec.level,
125        policy_url: spec.policy_url.clone(),
126        message: spec.message.clone(),
127        scope: Scope::from_spec(spec)?,
128        prefixes: opts.prefixes,
129        ignore_template_vars: opts.ignore_template_vars,
130    }))
131}
132
133// ─── markdown scanner ──────────────────────────────────────────
134
135/// One backticked path candidate found in a markdown source.
136#[derive(Debug, PartialEq, Eq)]
137struct Candidate {
138    token: String,
139    line: usize,
140    column: usize,
141}
142
143/// Walk a markdown string, returning every backticked token that
144/// starts with one of `prefixes`. Skips fenced code blocks
145/// (```` ``` ```` / `~~~`) and 4-space-indented code blocks; those
146/// contain code samples, not factual claims about the tree.
147fn scan_markdown_paths(text: &str, prefixes: &[String]) -> Vec<Candidate> {
148    let mut out = Vec::new();
149    let mut in_fenced = false;
150    let mut fence_marker: Option<char> = None;
151    let mut fence_len: usize = 0;
152
153    for (line_idx, line) in text.lines().enumerate() {
154        let line_no = line_idx + 1;
155
156        // Detect fenced-code-block boundaries. CommonMark allows
157        // ``` and ~~~ with at least 3 markers; the closing fence
158        // must use the same character and at least as many
159        // markers. `info string` (e.g. ```yaml) follows the
160        // opening fence; we don't care about its content.
161        let trimmed = line.trim_start();
162        if let Some((ch, n)) = detect_fence(trimmed) {
163            if !in_fenced {
164                in_fenced = true;
165                fence_marker = Some(ch);
166                fence_len = n;
167            } else if fence_marker == Some(ch) && n >= fence_len && only_fence(trimmed, ch) {
168                in_fenced = false;
169                fence_marker = None;
170                fence_len = 0;
171            }
172            continue;
173        }
174        if in_fenced {
175            continue;
176        }
177
178        // Skip 4-space indented code blocks. Per CommonMark, only
179        // applies when the indented line is NOT inside a list.
180        // We're conservative — any 4-space-prefixed line is treated
181        // as code unless it's a continuation of a list item, which
182        // we don't track here. Acceptable: false-skip rate >
183        // false-flag rate for our use.
184        if line.starts_with("    ") || line.starts_with('\t') {
185            continue;
186        }
187
188        // Find inline backticks. A run of N backticks opens an
189        // inline span that closes at the next run of EXACTLY N
190        // backticks. Per CommonMark, longer runs nest the span so
191        // it can contain shorter backtick sequences. Most paths
192        // use single backticks, which is what we optimise for.
193        let bytes = line.as_bytes();
194        let mut i = 0;
195        while i < bytes.len() {
196            if bytes[i] != b'`' {
197                i += 1;
198                continue;
199            }
200            let run_start = i;
201            while i < bytes.len() && bytes[i] == b'`' {
202                i += 1;
203            }
204            let run_len = i - run_start;
205            // Find the matching closing run.
206            let close_start = find_closing_run(&bytes[i..], run_len).map(|p| i + p);
207            let Some(close) = close_start else {
208                // Unmatched backticks → not a span; bail this line.
209                break;
210            };
211            let token_bytes = &bytes[i..close];
212            // Inline-code spans wrap their content with one space
213            // padding when the content starts/ends with a backtick;
214            // CommonMark trims one leading + one trailing space.
215            let token = std::str::from_utf8(token_bytes).unwrap_or("").trim();
216            if !token.is_empty() && starts_with_any_prefix(token, prefixes) {
217                out.push(Candidate {
218                    token: token.to_string(),
219                    line: line_no,
220                    column: run_start + 1, // 1-indexed; points at opening backtick
221                });
222            }
223            i = close + run_len;
224        }
225    }
226    out
227}
228
229/// If `s` starts with N+ backticks or tildes (N ≥ 3), return the
230/// fence character and the run length. Otherwise None.
231fn detect_fence(s: &str) -> Option<(char, usize)> {
232    let mut chars = s.chars();
233    let ch = chars.next()?;
234    if ch != '`' && ch != '~' {
235        return None;
236    }
237    let n = 1 + chars.take_while(|&c| c == ch).count();
238    if n >= 3 { Some((ch, n)) } else { None }
239}
240
241/// True if `s` consists only of `ch`-characters (allowing
242/// trailing whitespace). Used to decide if an opening-fence
243/// marker line could close a fence — `CommonMark` says the
244/// closing fence cannot have an info string after the markers.
245fn only_fence(s: &str, ch: char) -> bool {
246    s.trim_end().chars().all(|c| c == ch)
247}
248
249/// Find the position (relative to `bytes` start) of the next run
250/// of exactly `len` backticks. Returns None if not found in
251/// `bytes`.
252fn find_closing_run(bytes: &[u8], len: usize) -> Option<usize> {
253    let mut i = 0;
254    while i < bytes.len() {
255        if bytes[i] != b'`' {
256            i += 1;
257            continue;
258        }
259        let start = i;
260        while i < bytes.len() && bytes[i] == b'`' {
261            i += 1;
262        }
263        if i - start == len {
264            return Some(start);
265        }
266    }
267    None
268}
269
270fn starts_with_any_prefix(s: &str, prefixes: &[String]) -> bool {
271    prefixes.iter().any(|p| s.starts_with(p))
272}
273
274/// True if `s` contains a template-variable marker
275/// (`{{ … }}` / `${ … }` / `<…>`).
276fn has_template_vars(s: &str) -> bool {
277    s.contains("{{") || s.contains("${") || (s.contains('<') && s.contains('>'))
278}
279
280/// Strip trailing punctuation, trailing slashes, and
281/// `:line` / `#L<n>` location suffixes that aren't part of
282/// the path-on-disk we want to look up.
283fn strip_path_decoration(s: &str) -> &str {
284    // Strip a `#L<n>` GitHub-style anchor first (everything from
285    // `#` to end), then a `:N` line-number suffix, then trailing
286    // punctuation, then trailing slash.
287    let hash = s.find('#').unwrap_or(s.len());
288    let s = &s[..hash];
289    let colon_loc = s
290        .rfind(':')
291        .filter(|&i| s[i + 1..].chars().all(|c| c.is_ascii_digit()) && i + 1 < s.len());
292    let s = match colon_loc {
293        Some(i) => &s[..i],
294        None => s,
295    };
296    let s = s.trim_end_matches(|c: char| ".,:;?!".contains(c));
297    s.trim_end_matches('/')
298}
299
300/// Does `lookup` resolve to a real file or directory in the
301/// scanned tree? Glob characters in the lookup are matched
302/// against the file index (any-of); plain paths use exact
303/// lookup of either file or directory.
304fn path_resolves(ctx: &Context<'_>, lookup: &str) -> bool {
305    if lookup.is_empty() {
306        return false;
307    }
308    if lookup.contains('*') || lookup.contains('?') || lookup.contains('[') {
309        // Glob — match against the index. Build a globset on the
310        // fly; cheap for one pattern.
311        let Ok(glob) = globset::Glob::new(lookup) else {
312            return false;
313        };
314        let matcher = glob.compile_matcher();
315        return ctx.index.entries.iter().any(|e| matcher.is_match(&e.path));
316    }
317    let p = Path::new(lookup);
318    ctx.index.entries.iter().any(|e| &*e.path == p)
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324
325    fn prefixes(list: &[&str]) -> Vec<String> {
326        list.iter().map(|s| (*s).to_string()).collect()
327    }
328
329    #[test]
330    fn finds_inline_backtick_with_matching_prefix() {
331        let pf = prefixes(&["src/", "docs/"]);
332        let cands = scan_markdown_paths("see `src/foo.ts` and `npm` and `docs/x.md`", &pf);
333        assert_eq!(cands.len(), 2);
334        assert_eq!(cands[0].token, "src/foo.ts");
335        assert_eq!(cands[1].token, "docs/x.md");
336    }
337
338    #[test]
339    fn skips_fenced_code_blocks() {
340        let pf = prefixes(&["src/"]);
341        let md = "before\n\
342                  ```yaml\n\
343                  example: `src/should-not-fire.ts`\n\
344                  ```\n\
345                  after `src/should-fire.ts`";
346        let cands = scan_markdown_paths(md, &pf);
347        assert_eq!(cands.len(), 1);
348        assert_eq!(cands[0].token, "src/should-fire.ts");
349    }
350
351    #[test]
352    fn skips_indented_code_blocks() {
353        let pf = prefixes(&["src/"]);
354        let md = "normal `src/a.ts` line\n\
355                  \n\
356                  \x20\x20\x20\x20indented `src/should-not-fire.ts`\n";
357        let cands = scan_markdown_paths(md, &pf);
358        assert_eq!(cands.len(), 1);
359        assert_eq!(cands[0].token, "src/a.ts");
360    }
361
362    #[test]
363    fn handles_tilde_fences() {
364        let pf = prefixes(&["src/"]);
365        let md = "before `src/yes.ts`\n~~~\nin code: `src/no.ts`\n~~~\nafter `src/yes2.ts`";
366        let tokens: Vec<_> = scan_markdown_paths(md, &pf)
367            .into_iter()
368            .map(|c| c.token)
369            .collect();
370        assert_eq!(tokens, vec!["src/yes.ts", "src/yes2.ts"]);
371    }
372
373    #[test]
374    fn line_and_column_are_correct() {
375        let pf = prefixes(&["src/"]);
376        let md = "first line\nsecond `src/foo.ts` here";
377        let cands = scan_markdown_paths(md, &pf);
378        assert_eq!(cands.len(), 1);
379        assert_eq!(cands[0].line, 2);
380        // "second " is 7 chars + 1 for the opening backtick at col 8
381        assert_eq!(cands[0].column, 8);
382    }
383
384    #[test]
385    fn template_vars_detected() {
386        assert!(has_template_vars("src/{{user_id}}.json"));
387        assert!(has_template_vars("src/${name}.ts"));
388        assert!(has_template_vars("src/<placeholder>.ts"));
389        assert!(!has_template_vars("src/concrete.ts"));
390        assert!(!has_template_vars("src/foo[0].ts")); // brackets without angle
391    }
392
393    #[test]
394    fn path_decoration_stripped() {
395        assert_eq!(strip_path_decoration("src/foo.ts"), "src/foo.ts");
396        assert_eq!(strip_path_decoration("src/foo.ts."), "src/foo.ts");
397        assert_eq!(strip_path_decoration("src/foo.ts,"), "src/foo.ts");
398        assert_eq!(strip_path_decoration("src/foo.ts:42"), "src/foo.ts");
399        assert_eq!(strip_path_decoration("src/foo.ts#L42"), "src/foo.ts");
400        assert_eq!(strip_path_decoration("src/foo.ts:42#L1"), "src/foo.ts");
401        assert_eq!(strip_path_decoration("src/foo/"), "src/foo");
402    }
403
404    #[test]
405    fn prefix_matching() {
406        let pf = prefixes(&["src/", "crates/"]);
407        assert!(starts_with_any_prefix("src/foo.ts", &pf));
408        assert!(starts_with_any_prefix("crates/alint", &pf));
409        assert!(!starts_with_any_prefix("docs/x.md", &pf));
410        assert!(!starts_with_any_prefix("README.md", &pf));
411    }
412
413    #[test]
414    fn unmatched_backticks_do_not_explode() {
415        let pf = prefixes(&["src/"]);
416        let cands = scan_markdown_paths("`src/foo.ts unmatched", &pf);
417        assert!(cands.is_empty());
418    }
419
420    #[test]
421    fn double_backticks_can_contain_single() {
422        let pf = prefixes(&["src/"]);
423        let md = "double `` ` `` then `src/foo.ts`";
424        let cands = scan_markdown_paths(md, &pf);
425        assert_eq!(cands.len(), 1);
426        assert_eq!(cands[0].token, "src/foo.ts");
427    }
428}