Skip to main content

alint_rules/
markdown_paths_resolve.rs

1//! `markdown_paths_resolve` — backticked workspace paths in
2//! markdown files must resolve to real files or directories.
3//!
4//! Targets the AGENTS.md / CLAUDE.md staleness problem:
5//! agent-context files reference workspace paths in inline
6//! backticks (`` `src/api/users.ts` ``), and those paths drift
7//! as the codebase evolves. The v0.6 `agent-context-no-stale-paths`
8//! rule surfaces *candidate* drift via a regex; this rule does
9//! the precise check.
10//!
11//! Design doc: `docs/design/v0.7/markdown_paths_resolve.md`.
12
13use std::path::Path;
14
15use alint_core::{Context, Error, Level, PerFileRule, Result, Rule, RuleSpec, Scope, Violation};
16use serde::Deserialize;
17
18#[derive(Debug, Deserialize)]
19struct Options {
20    /// Whitelist of path-shape prefixes to validate. A backticked
21    /// token must start with one of these to be considered a path
22    /// candidate. No defaults — every project's layout differs and
23    /// the user must declare which prefixes mark a path.
24    prefixes: Vec<String>,
25
26    /// Skip backticked tokens containing template-variable
27    /// markers (`{{ }}`, `${ }`, `<…>`). Default true.
28    #[serde(default = "default_ignore_template_vars")]
29    ignore_template_vars: bool,
30}
31
32fn default_ignore_template_vars() -> bool {
33    true
34}
35
36#[derive(Debug)]
37pub struct MarkdownPathsResolveRule {
38    id: String,
39    level: Level,
40    policy_url: Option<String>,
41    message: Option<String>,
42    scope: Scope,
43    prefixes: Vec<String>,
44    ignore_template_vars: bool,
45}
46
47impl Rule for MarkdownPathsResolveRule {
48    fn id(&self) -> &str {
49        &self.id
50    }
51    fn level(&self) -> Level {
52        self.level
53    }
54    fn policy_url(&self) -> Option<&str> {
55        self.policy_url.as_deref()
56    }
57    fn path_scope(&self) -> Option<&Scope> {
58        Some(&self.scope)
59    }
60
61    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
62        let mut violations = Vec::new();
63        for entry in ctx.index.files() {
64            if !self.scope.matches(&entry.path, ctx.index) {
65                continue;
66            }
67            let full = ctx.root.join(&entry.path);
68            // Unreadable file: silently skip; a sibling rule can flag it.
69            let Ok(bytes) = std::fs::read(&full) else {
70                continue;
71            };
72            violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
73        }
74        Ok(violations)
75    }
76
77    fn as_per_file(&self) -> Option<&dyn PerFileRule> {
78        Some(self)
79    }
80}
81
82impl PerFileRule for MarkdownPathsResolveRule {
83    fn path_scope(&self) -> &Scope {
84        &self.scope
85    }
86
87    fn evaluate_file(
88        &self,
89        ctx: &Context<'_>,
90        path: &Path,
91        bytes: &[u8],
92    ) -> Result<Vec<Violation>> {
93        let Ok(text) = std::str::from_utf8(bytes) else {
94            return Ok(Vec::new()); // non-UTF-8 markdown is degenerate; skip
95        };
96        let mut violations = Vec::new();
97        for cand in scan_markdown_paths(text, &self.prefixes) {
98            if self.ignore_template_vars && has_template_vars(&cand.token) {
99                continue;
100            }
101            let lookup = strip_path_decoration(&cand.token);
102            if path_resolves(ctx, lookup) {
103                continue;
104            }
105            let msg = self.message.clone().unwrap_or_else(|| {
106                format!(
107                    "backticked path `{}` doesn't resolve to a file or directory",
108                    cand.token
109                )
110            });
111            violations.push(
112                Violation::new(msg)
113                    .with_path(std::sync::Arc::<Path>::from(path))
114                    .with_location(cand.line, cand.column),
115            );
116        }
117        Ok(violations)
118    }
119}
120
121pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
122    let Some(_paths) = &spec.paths else {
123        return Err(Error::rule_config(
124            &spec.id,
125            "markdown_paths_resolve requires a `paths` field",
126        ));
127    };
128    let opts: Options = spec
129        .deserialize_options()
130        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
131    if opts.prefixes.is_empty() {
132        return Err(Error::rule_config(
133            &spec.id,
134            "markdown_paths_resolve requires a non-empty `prefixes` list — \
135             declare which path shapes (e.g. [\"src/\", \"crates/\", \"docs/\"]) \
136             count as path candidates in your codebase",
137        ));
138    }
139    Ok(Box::new(MarkdownPathsResolveRule {
140        id: spec.id.clone(),
141        level: spec.level,
142        policy_url: spec.policy_url.clone(),
143        message: spec.message.clone(),
144        scope: Scope::from_spec(spec)?,
145        prefixes: opts.prefixes,
146        ignore_template_vars: opts.ignore_template_vars,
147    }))
148}
149
150// ─── markdown scanner ──────────────────────────────────────────
151
152/// One backticked path candidate found in a markdown source.
153#[derive(Debug, PartialEq, Eq)]
154struct Candidate {
155    token: String,
156    line: usize,
157    column: usize,
158}
159
160/// Walk a markdown string, returning every backticked token that
161/// starts with one of `prefixes`. Skips fenced code blocks
162/// (```` ``` ```` / `~~~`) and 4-space-indented code blocks; those
163/// contain code samples, not factual claims about the tree.
164fn scan_markdown_paths(text: &str, prefixes: &[String]) -> Vec<Candidate> {
165    let mut out = Vec::new();
166    let mut in_fenced = false;
167    let mut fence_marker: Option<char> = None;
168    let mut fence_len: usize = 0;
169
170    for (line_idx, line) in text.lines().enumerate() {
171        let line_no = line_idx + 1;
172
173        // Detect fenced-code-block boundaries. CommonMark allows
174        // ``` and ~~~ with at least 3 markers; the closing fence
175        // must use the same character and at least as many
176        // markers. `info string` (e.g. ```yaml) follows the
177        // opening fence; we don't care about its content.
178        let trimmed = line.trim_start();
179        if let Some((ch, n)) = detect_fence(trimmed) {
180            if !in_fenced {
181                in_fenced = true;
182                fence_marker = Some(ch);
183                fence_len = n;
184            } else if fence_marker == Some(ch) && n >= fence_len && only_fence(trimmed, ch) {
185                in_fenced = false;
186                fence_marker = None;
187                fence_len = 0;
188            }
189            continue;
190        }
191        if in_fenced {
192            continue;
193        }
194
195        // Skip 4-space indented code blocks. Per CommonMark, only
196        // applies when the indented line is NOT inside a list.
197        // We're conservative — any 4-space-prefixed line is treated
198        // as code unless it's a continuation of a list item, which
199        // we don't track here. Acceptable: false-skip rate >
200        // false-flag rate for our use.
201        if line.starts_with("    ") || line.starts_with('\t') {
202            continue;
203        }
204
205        // Find inline backticks. A run of N backticks opens an
206        // inline span that closes at the next run of EXACTLY N
207        // backticks. Per CommonMark, longer runs nest the span so
208        // it can contain shorter backtick sequences. Most paths
209        // use single backticks, which is what we optimise for.
210        let bytes = line.as_bytes();
211        let mut i = 0;
212        while i < bytes.len() {
213            if bytes[i] != b'`' {
214                i += 1;
215                continue;
216            }
217            let run_start = i;
218            while i < bytes.len() && bytes[i] == b'`' {
219                i += 1;
220            }
221            let run_len = i - run_start;
222            // Find the matching closing run.
223            let close_start = find_closing_run(&bytes[i..], run_len).map(|p| i + p);
224            let Some(close) = close_start else {
225                // Unmatched backticks → not a span; bail this line.
226                break;
227            };
228            let token_bytes = &bytes[i..close];
229            // Inline-code spans wrap their content with one space
230            // padding when the content starts/ends with a backtick;
231            // CommonMark trims one leading + one trailing space.
232            let token = std::str::from_utf8(token_bytes).unwrap_or("").trim();
233            if !token.is_empty() && starts_with_any_prefix(token, prefixes) {
234                out.push(Candidate {
235                    token: token.to_string(),
236                    line: line_no,
237                    column: run_start + 1, // 1-indexed; points at opening backtick
238                });
239            }
240            i = close + run_len;
241        }
242    }
243    out
244}
245
246/// If `s` starts with N+ backticks or tildes (N ≥ 3), return the
247/// fence character and the run length. Otherwise None.
248fn detect_fence(s: &str) -> Option<(char, usize)> {
249    let mut chars = s.chars();
250    let ch = chars.next()?;
251    if ch != '`' && ch != '~' {
252        return None;
253    }
254    let n = 1 + chars.take_while(|&c| c == ch).count();
255    if n >= 3 { Some((ch, n)) } else { None }
256}
257
258/// True if `s` consists only of `ch`-characters (allowing
259/// trailing whitespace). Used to decide if an opening-fence
260/// marker line could close a fence — `CommonMark` says the
261/// closing fence cannot have an info string after the markers.
262fn only_fence(s: &str, ch: char) -> bool {
263    s.trim_end().chars().all(|c| c == ch)
264}
265
266/// Find the position (relative to `bytes` start) of the next run
267/// of exactly `len` backticks. Returns None if not found in
268/// `bytes`.
269fn find_closing_run(bytes: &[u8], len: usize) -> Option<usize> {
270    let mut i = 0;
271    while i < bytes.len() {
272        if bytes[i] != b'`' {
273            i += 1;
274            continue;
275        }
276        let start = i;
277        while i < bytes.len() && bytes[i] == b'`' {
278            i += 1;
279        }
280        if i - start == len {
281            return Some(start);
282        }
283    }
284    None
285}
286
287fn starts_with_any_prefix(s: &str, prefixes: &[String]) -> bool {
288    prefixes.iter().any(|p| s.starts_with(p))
289}
290
291/// True if `s` contains a template-variable marker
292/// (`{{ … }}` / `${ … }` / `<…>`).
293fn has_template_vars(s: &str) -> bool {
294    s.contains("{{") || s.contains("${") || (s.contains('<') && s.contains('>'))
295}
296
297/// Strip trailing punctuation, trailing slashes, and
298/// `:line` / `#L<n>` location suffixes that aren't part of
299/// the path-on-disk we want to look up.
300fn strip_path_decoration(s: &str) -> &str {
301    // Strip a `#L<n>` GitHub-style anchor first (everything from
302    // `#` to end), then a `:N` line-number suffix, then trailing
303    // punctuation, then trailing slash.
304    let hash = s.find('#').unwrap_or(s.len());
305    let s = &s[..hash];
306    let colon_loc = s
307        .rfind(':')
308        .filter(|&i| s[i + 1..].chars().all(|c| c.is_ascii_digit()) && i + 1 < s.len());
309    let s = match colon_loc {
310        Some(i) => &s[..i],
311        None => s,
312    };
313    let s = s.trim_end_matches(|c: char| ".,:;?!".contains(c));
314    s.trim_end_matches('/')
315}
316
317/// Does `lookup` resolve to a real file or directory in the
318/// scanned tree? Glob characters in the lookup are matched
319/// against the file index (any-of); plain paths use exact
320/// lookup of either file or directory.
321fn path_resolves(ctx: &Context<'_>, lookup: &str) -> bool {
322    if lookup.is_empty() {
323        return false;
324    }
325    if lookup.contains('*') || lookup.contains('?') || lookup.contains('[') {
326        // Glob — match against the index. Build a globset on the
327        // fly; cheap for one pattern.
328        let Ok(glob) = globset::Glob::new(lookup) else {
329            return false;
330        };
331        let matcher = glob.compile_matcher();
332        return ctx.index.entries.iter().any(|e| matcher.is_match(&e.path));
333    }
334    let p = Path::new(lookup);
335    ctx.index.entries.iter().any(|e| &*e.path == p)
336}
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341
342    fn prefixes(list: &[&str]) -> Vec<String> {
343        list.iter().map(|s| (*s).to_string()).collect()
344    }
345
346    #[test]
347    fn finds_inline_backtick_with_matching_prefix() {
348        let pf = prefixes(&["src/", "docs/"]);
349        let cands = scan_markdown_paths("see `src/foo.ts` and `npm` and `docs/x.md`", &pf);
350        assert_eq!(cands.len(), 2);
351        assert_eq!(cands[0].token, "src/foo.ts");
352        assert_eq!(cands[1].token, "docs/x.md");
353    }
354
355    #[test]
356    fn skips_fenced_code_blocks() {
357        let pf = prefixes(&["src/"]);
358        let md = "before\n\
359                  ```yaml\n\
360                  example: `src/should-not-fire.ts`\n\
361                  ```\n\
362                  after `src/should-fire.ts`";
363        let cands = scan_markdown_paths(md, &pf);
364        assert_eq!(cands.len(), 1);
365        assert_eq!(cands[0].token, "src/should-fire.ts");
366    }
367
368    #[test]
369    fn skips_indented_code_blocks() {
370        let pf = prefixes(&["src/"]);
371        let md = "normal `src/a.ts` line\n\
372                  \n\
373                  \x20\x20\x20\x20indented `src/should-not-fire.ts`\n";
374        let cands = scan_markdown_paths(md, &pf);
375        assert_eq!(cands.len(), 1);
376        assert_eq!(cands[0].token, "src/a.ts");
377    }
378
379    #[test]
380    fn handles_tilde_fences() {
381        let pf = prefixes(&["src/"]);
382        let md = "before `src/yes.ts`\n~~~\nin code: `src/no.ts`\n~~~\nafter `src/yes2.ts`";
383        let tokens: Vec<_> = scan_markdown_paths(md, &pf)
384            .into_iter()
385            .map(|c| c.token)
386            .collect();
387        assert_eq!(tokens, vec!["src/yes.ts", "src/yes2.ts"]);
388    }
389
390    #[test]
391    fn line_and_column_are_correct() {
392        let pf = prefixes(&["src/"]);
393        let md = "first line\nsecond `src/foo.ts` here";
394        let cands = scan_markdown_paths(md, &pf);
395        assert_eq!(cands.len(), 1);
396        assert_eq!(cands[0].line, 2);
397        // "second " is 7 chars + 1 for the opening backtick at col 8
398        assert_eq!(cands[0].column, 8);
399    }
400
401    #[test]
402    fn template_vars_detected() {
403        assert!(has_template_vars("src/{{user_id}}.json"));
404        assert!(has_template_vars("src/${name}.ts"));
405        assert!(has_template_vars("src/<placeholder>.ts"));
406        assert!(!has_template_vars("src/concrete.ts"));
407        assert!(!has_template_vars("src/foo[0].ts")); // brackets without angle
408    }
409
410    #[test]
411    fn path_decoration_stripped() {
412        assert_eq!(strip_path_decoration("src/foo.ts"), "src/foo.ts");
413        assert_eq!(strip_path_decoration("src/foo.ts."), "src/foo.ts");
414        assert_eq!(strip_path_decoration("src/foo.ts,"), "src/foo.ts");
415        assert_eq!(strip_path_decoration("src/foo.ts:42"), "src/foo.ts");
416        assert_eq!(strip_path_decoration("src/foo.ts#L42"), "src/foo.ts");
417        assert_eq!(strip_path_decoration("src/foo.ts:42#L1"), "src/foo.ts");
418        assert_eq!(strip_path_decoration("src/foo/"), "src/foo");
419    }
420
421    #[test]
422    fn prefix_matching() {
423        let pf = prefixes(&["src/", "crates/"]);
424        assert!(starts_with_any_prefix("src/foo.ts", &pf));
425        assert!(starts_with_any_prefix("crates/alint", &pf));
426        assert!(!starts_with_any_prefix("docs/x.md", &pf));
427        assert!(!starts_with_any_prefix("README.md", &pf));
428    }
429
430    #[test]
431    fn unmatched_backticks_do_not_explode() {
432        let pf = prefixes(&["src/"]);
433        let cands = scan_markdown_paths("`src/foo.ts unmatched", &pf);
434        assert!(cands.is_empty());
435    }
436
437    #[test]
438    fn double_backticks_can_contain_single() {
439        let pf = prefixes(&["src/"]);
440        let md = "double `` ` `` then `src/foo.ts`";
441        let cands = scan_markdown_paths(md, &pf);
442        assert_eq!(cands.len(), 1);
443        assert_eq!(cands[0].token, "src/foo.ts");
444    }
445}