Skip to main content

alint_rules/
markdown_paths_resolve.rs

1//! `markdown_paths_resolve` — backticked workspace paths in
2//! markdown files must resolve to real files or directories.
3//!
4//! Targets the AGENTS.md / CLAUDE.md staleness problem:
5//! agent-context files reference workspace paths in inline
6//! backticks (`` `src/api/users.ts` ``), and those paths drift
7//! as the codebase evolves. The v0.6 `agent-context-no-stale-paths`
8//! rule surfaces *candidate* drift via a regex; this rule does
9//! the precise check.
10//!
11//! Design doc: `docs/design/v0.7/markdown_paths_resolve.md`.
12
13use std::path::Path;
14
15use alint_core::{Context, Error, Level, PerFileRule, Result, Rule, RuleSpec, Scope, Violation};
16use serde::Deserialize;
17
18#[derive(Debug, Deserialize)]
19#[serde(deny_unknown_fields)]
20struct Options {
21    /// Whitelist of path-shape prefixes to validate. A backticked
22    /// token must start with one of these to be considered a path
23    /// candidate. No defaults — every project's layout differs and
24    /// the user must declare which prefixes mark a path.
25    prefixes: Vec<String>,
26
27    /// Skip backticked tokens containing template-variable
28    /// markers (`{{ }}`, `${ }`, `<…>`). Default true.
29    #[serde(default = "default_ignore_template_vars")]
30    ignore_template_vars: bool,
31}
32
33fn default_ignore_template_vars() -> bool {
34    true
35}
36
37#[derive(Debug)]
38pub struct MarkdownPathsResolveRule {
39    id: String,
40    level: Level,
41    policy_url: Option<String>,
42    message: Option<String>,
43    scope: Scope,
44    prefixes: Vec<String>,
45    ignore_template_vars: bool,
46}
47
48impl Rule for MarkdownPathsResolveRule {
49    fn id(&self) -> &str {
50        &self.id
51    }
52    fn level(&self) -> Level {
53        self.level
54    }
55    fn policy_url(&self) -> Option<&str> {
56        self.policy_url.as_deref()
57    }
58    fn path_scope(&self) -> Option<&Scope> {
59        Some(&self.scope)
60    }
61
62    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
63        let mut violations = Vec::new();
64        for entry in ctx.index.files() {
65            if !self.scope.matches(&entry.path, ctx.index) {
66                continue;
67            }
68            let full = ctx.root.join(&entry.path);
69            // Unreadable file: silently skip; a sibling rule can flag it.
70            let Ok(bytes) = std::fs::read(&full) else {
71                continue;
72            };
73            violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
74        }
75        Ok(violations)
76    }
77
78    fn as_per_file(&self) -> Option<&dyn PerFileRule> {
79        Some(self)
80    }
81}
82
83impl PerFileRule for MarkdownPathsResolveRule {
84    fn path_scope(&self) -> &Scope {
85        &self.scope
86    }
87
88    fn evaluate_file(
89        &self,
90        ctx: &Context<'_>,
91        path: &Path,
92        bytes: &[u8],
93    ) -> Result<Vec<Violation>> {
94        let Ok(text) = std::str::from_utf8(bytes) else {
95            return Ok(Vec::new()); // non-UTF-8 markdown is degenerate; skip
96        };
97        let mut violations = Vec::new();
98        for cand in scan_markdown_paths(text, &self.prefixes) {
99            if self.ignore_template_vars && has_template_vars(&cand.token) {
100                continue;
101            }
102            let lookup = strip_path_decoration(&cand.token);
103            if path_resolves(ctx, lookup) {
104                continue;
105            }
106            let msg = self.message.clone().unwrap_or_else(|| {
107                format!(
108                    "backticked path `{}` doesn't resolve to a file or directory",
109                    cand.token
110                )
111            });
112            violations.push(
113                Violation::new(msg)
114                    .with_path(std::sync::Arc::<Path>::from(path))
115                    .with_location(cand.line, cand.column),
116            );
117        }
118        Ok(violations)
119    }
120}
121
122pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
123    let Some(_paths) = &spec.paths else {
124        return Err(Error::rule_config(
125            &spec.id,
126            "markdown_paths_resolve requires a `paths` field",
127        ));
128    };
129    let opts: Options = spec
130        .deserialize_options()
131        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
132    if opts.prefixes.is_empty() {
133        return Err(Error::rule_config(
134            &spec.id,
135            "markdown_paths_resolve requires a non-empty `prefixes` list — \
136             declare which path shapes (e.g. [\"src/\", \"crates/\", \"docs/\"]) \
137             count as path candidates in your codebase",
138        ));
139    }
140    Ok(Box::new(MarkdownPathsResolveRule {
141        id: spec.id.clone(),
142        level: spec.level,
143        policy_url: spec.policy_url.clone(),
144        message: spec.message.clone(),
145        scope: Scope::from_spec(spec)?,
146        prefixes: opts.prefixes,
147        ignore_template_vars: opts.ignore_template_vars,
148    }))
149}
150
151// ─── markdown scanner ──────────────────────────────────────────
152
153/// One backticked path candidate found in a markdown source.
154#[derive(Debug, PartialEq, Eq)]
155struct Candidate {
156    token: String,
157    line: usize,
158    column: usize,
159}
160
161/// Walk a markdown string, returning every backticked token that
162/// starts with one of `prefixes`. Skips fenced code blocks
163/// (```` ``` ```` / `~~~`) and 4-space-indented code blocks; those
164/// contain code samples, not factual claims about the tree.
165fn scan_markdown_paths(text: &str, prefixes: &[String]) -> Vec<Candidate> {
166    let mut out = Vec::new();
167    let mut in_fenced = false;
168    let mut fence_marker: Option<char> = None;
169    let mut fence_len: usize = 0;
170
171    for (line_idx, line) in text.lines().enumerate() {
172        let line_no = line_idx + 1;
173
174        // Detect fenced-code-block boundaries. CommonMark allows
175        // ``` and ~~~ with at least 3 markers; the closing fence
176        // must use the same character and at least as many
177        // markers. `info string` (e.g. ```yaml) follows the
178        // opening fence; we don't care about its content.
179        let trimmed = line.trim_start();
180        if let Some((ch, n)) = detect_fence(trimmed) {
181            if !in_fenced {
182                in_fenced = true;
183                fence_marker = Some(ch);
184                fence_len = n;
185            } else if fence_marker == Some(ch) && n >= fence_len && only_fence(trimmed, ch) {
186                in_fenced = false;
187                fence_marker = None;
188                fence_len = 0;
189            }
190            continue;
191        }
192        if in_fenced {
193            continue;
194        }
195
196        // Skip 4-space indented code blocks. Per CommonMark, only
197        // applies when the indented line is NOT inside a list.
198        // We're conservative — any 4-space-prefixed line is treated
199        // as code unless it's a continuation of a list item, which
200        // we don't track here. Acceptable: false-skip rate >
201        // false-flag rate for our use.
202        if line.starts_with("    ") || line.starts_with('\t') {
203            continue;
204        }
205
206        // Find inline backticks. A run of N backticks opens an
207        // inline span that closes at the next run of EXACTLY N
208        // backticks. Per CommonMark, longer runs nest the span so
209        // it can contain shorter backtick sequences. Most paths
210        // use single backticks, which is what we optimise for.
211        let bytes = line.as_bytes();
212        let mut i = 0;
213        while i < bytes.len() {
214            if bytes[i] != b'`' {
215                i += 1;
216                continue;
217            }
218            let run_start = i;
219            while i < bytes.len() && bytes[i] == b'`' {
220                i += 1;
221            }
222            let run_len = i - run_start;
223            // Find the matching closing run.
224            let close_start = find_closing_run(&bytes[i..], run_len).map(|p| i + p);
225            let Some(close) = close_start else {
226                // Unmatched backticks → not a span; bail this line.
227                break;
228            };
229            let token_bytes = &bytes[i..close];
230            // Inline-code spans wrap their content with one space
231            // padding when the content starts/ends with a backtick;
232            // CommonMark trims one leading + one trailing space.
233            let token = std::str::from_utf8(token_bytes).unwrap_or("").trim();
234            if !token.is_empty() && starts_with_any_prefix(token, prefixes) {
235                out.push(Candidate {
236                    token: token.to_string(),
237                    line: line_no,
238                    column: run_start + 1, // 1-indexed; points at opening backtick
239                });
240            }
241            i = close + run_len;
242        }
243    }
244    out
245}
246
247/// If `s` starts with N+ backticks or tildes (N ≥ 3), return the
248/// fence character and the run length. Otherwise None.
249fn detect_fence(s: &str) -> Option<(char, usize)> {
250    let mut chars = s.chars();
251    let ch = chars.next()?;
252    if ch != '`' && ch != '~' {
253        return None;
254    }
255    let n = 1 + chars.take_while(|&c| c == ch).count();
256    if n >= 3 { Some((ch, n)) } else { None }
257}
258
259/// True if `s` consists only of `ch`-characters (allowing
260/// trailing whitespace). Used to decide if an opening-fence
261/// marker line could close a fence — `CommonMark` says the
262/// closing fence cannot have an info string after the markers.
263fn only_fence(s: &str, ch: char) -> bool {
264    s.trim_end().chars().all(|c| c == ch)
265}
266
267/// Find the position (relative to `bytes` start) of the next run
268/// of exactly `len` backticks. Returns None if not found in
269/// `bytes`.
270fn find_closing_run(bytes: &[u8], len: usize) -> Option<usize> {
271    let mut i = 0;
272    while i < bytes.len() {
273        if bytes[i] != b'`' {
274            i += 1;
275            continue;
276        }
277        let start = i;
278        while i < bytes.len() && bytes[i] == b'`' {
279            i += 1;
280        }
281        if i - start == len {
282            return Some(start);
283        }
284    }
285    None
286}
287
288fn starts_with_any_prefix(s: &str, prefixes: &[String]) -> bool {
289    prefixes.iter().any(|p| s.starts_with(p))
290}
291
292/// True if `s` contains a template-variable marker
293/// (`{{ … }}` / `${ … }` / `<…>`).
294fn has_template_vars(s: &str) -> bool {
295    s.contains("{{") || s.contains("${") || (s.contains('<') && s.contains('>'))
296}
297
298/// Strip trailing punctuation, trailing slashes, and
299/// `:line` / `#L<n>` location suffixes that aren't part of
300/// the path-on-disk we want to look up.
301fn strip_path_decoration(s: &str) -> &str {
302    // Strip a `#L<n>` GitHub-style anchor first (everything from
303    // `#` to end), then a `:N` line-number suffix, then trailing
304    // punctuation, then trailing slash.
305    let hash = s.find('#').unwrap_or(s.len());
306    let s = &s[..hash];
307    let colon_loc = s
308        .rfind(':')
309        .filter(|&i| s[i + 1..].chars().all(|c| c.is_ascii_digit()) && i + 1 < s.len());
310    let s = match colon_loc {
311        Some(i) => &s[..i],
312        None => s,
313    };
314    let s = s.trim_end_matches(|c: char| ".,:;?!".contains(c));
315    s.trim_end_matches('/')
316}
317
318/// Does `lookup` resolve to a real file or directory in the
319/// scanned tree? Glob characters in the lookup are matched
320/// against the file index (any-of); plain paths use exact
321/// lookup of either file or directory.
322fn path_resolves(ctx: &Context<'_>, lookup: &str) -> bool {
323    if lookup.is_empty() {
324        return false;
325    }
326    if lookup.contains('*') || lookup.contains('?') || lookup.contains('[') {
327        // Glob — match against the index. Build a globset on the
328        // fly; cheap for one pattern.
329        let Ok(glob) = globset::Glob::new(lookup) else {
330            return false;
331        };
332        let matcher = glob.compile_matcher();
333        return ctx.index.entries.iter().any(|e| matcher.is_match(&e.path));
334    }
335    let p = Path::new(lookup);
336    ctx.index.entries.iter().any(|e| &*e.path == p)
337}
338
339#[cfg(test)]
340mod tests {
341    use super::*;
342
343    fn prefixes(list: &[&str]) -> Vec<String> {
344        list.iter().map(|s| (*s).to_string()).collect()
345    }
346
347    #[test]
348    fn finds_inline_backtick_with_matching_prefix() {
349        let pf = prefixes(&["src/", "docs/"]);
350        let cands = scan_markdown_paths("see `src/foo.ts` and `npm` and `docs/x.md`", &pf);
351        assert_eq!(cands.len(), 2);
352        assert_eq!(cands[0].token, "src/foo.ts");
353        assert_eq!(cands[1].token, "docs/x.md");
354    }
355
356    #[test]
357    fn skips_fenced_code_blocks() {
358        let pf = prefixes(&["src/"]);
359        let md = "before\n\
360                  ```yaml\n\
361                  example: `src/should-not-fire.ts`\n\
362                  ```\n\
363                  after `src/should-fire.ts`";
364        let cands = scan_markdown_paths(md, &pf);
365        assert_eq!(cands.len(), 1);
366        assert_eq!(cands[0].token, "src/should-fire.ts");
367    }
368
369    #[test]
370    fn skips_indented_code_blocks() {
371        let pf = prefixes(&["src/"]);
372        let md = "normal `src/a.ts` line\n\
373                  \n\
374                  \x20\x20\x20\x20indented `src/should-not-fire.ts`\n";
375        let cands = scan_markdown_paths(md, &pf);
376        assert_eq!(cands.len(), 1);
377        assert_eq!(cands[0].token, "src/a.ts");
378    }
379
380    #[test]
381    fn handles_tilde_fences() {
382        let pf = prefixes(&["src/"]);
383        let md = "before `src/yes.ts`\n~~~\nin code: `src/no.ts`\n~~~\nafter `src/yes2.ts`";
384        let tokens: Vec<_> = scan_markdown_paths(md, &pf)
385            .into_iter()
386            .map(|c| c.token)
387            .collect();
388        assert_eq!(tokens, vec!["src/yes.ts", "src/yes2.ts"]);
389    }
390
391    #[test]
392    fn line_and_column_are_correct() {
393        let pf = prefixes(&["src/"]);
394        let md = "first line\nsecond `src/foo.ts` here";
395        let cands = scan_markdown_paths(md, &pf);
396        assert_eq!(cands.len(), 1);
397        assert_eq!(cands[0].line, 2);
398        // "second " is 7 chars + 1 for the opening backtick at col 8
399        assert_eq!(cands[0].column, 8);
400    }
401
402    #[test]
403    fn template_vars_detected() {
404        assert!(has_template_vars("src/{{user_id}}.json"));
405        assert!(has_template_vars("src/${name}.ts"));
406        assert!(has_template_vars("src/<placeholder>.ts"));
407        assert!(!has_template_vars("src/concrete.ts"));
408        assert!(!has_template_vars("src/foo[0].ts")); // brackets without angle
409    }
410
411    #[test]
412    fn path_decoration_stripped() {
413        assert_eq!(strip_path_decoration("src/foo.ts"), "src/foo.ts");
414        assert_eq!(strip_path_decoration("src/foo.ts."), "src/foo.ts");
415        assert_eq!(strip_path_decoration("src/foo.ts,"), "src/foo.ts");
416        assert_eq!(strip_path_decoration("src/foo.ts:42"), "src/foo.ts");
417        assert_eq!(strip_path_decoration("src/foo.ts#L42"), "src/foo.ts");
418        assert_eq!(strip_path_decoration("src/foo.ts:42#L1"), "src/foo.ts");
419        assert_eq!(strip_path_decoration("src/foo/"), "src/foo");
420    }
421
422    #[test]
423    fn prefix_matching() {
424        let pf = prefixes(&["src/", "crates/"]);
425        assert!(starts_with_any_prefix("src/foo.ts", &pf));
426        assert!(starts_with_any_prefix("crates/alint", &pf));
427        assert!(!starts_with_any_prefix("docs/x.md", &pf));
428        assert!(!starts_with_any_prefix("README.md", &pf));
429    }
430
431    #[test]
432    fn unmatched_backticks_do_not_explode() {
433        let pf = prefixes(&["src/"]);
434        let cands = scan_markdown_paths("`src/foo.ts unmatched", &pf);
435        assert!(cands.is_empty());
436    }
437
438    #[test]
439    fn double_backticks_can_contain_single() {
440        let pf = prefixes(&["src/"]);
441        let md = "double `` ` `` then `src/foo.ts`";
442        let cands = scan_markdown_paths(md, &pf);
443        assert_eq!(cands.len(), 1);
444        assert_eq!(cands[0].token, "src/foo.ts");
445    }
446}