1use std::path::Path;
14
15use alint_core::{Context, Error, Level, PerFileRule, Result, Rule, RuleSpec, Scope, Violation};
16use serde::Deserialize;
17
18#[derive(Debug, Deserialize)]
19#[serde(deny_unknown_fields)]
20struct Options {
21 prefixes: Vec<String>,
26
27 #[serde(default = "default_ignore_template_vars")]
30 ignore_template_vars: bool,
31}
32
33fn default_ignore_template_vars() -> bool {
34 true
35}
36
37#[derive(Debug)]
38pub struct MarkdownPathsResolveRule {
39 id: String,
40 level: Level,
41 policy_url: Option<String>,
42 message: Option<String>,
43 scope: Scope,
44 prefixes: Vec<String>,
45 ignore_template_vars: bool,
46}
47
48impl Rule for MarkdownPathsResolveRule {
49 fn id(&self) -> &str {
50 &self.id
51 }
52 fn level(&self) -> Level {
53 self.level
54 }
55 fn policy_url(&self) -> Option<&str> {
56 self.policy_url.as_deref()
57 }
58 fn path_scope(&self) -> Option<&Scope> {
59 Some(&self.scope)
60 }
61
62 fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
63 let mut violations = Vec::new();
64 for entry in ctx.index.files() {
65 if !self.scope.matches(&entry.path, ctx.index) {
66 continue;
67 }
68 let full = ctx.root.join(&entry.path);
69 let Ok(bytes) = std::fs::read(&full) else {
71 continue;
72 };
73 violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
74 }
75 Ok(violations)
76 }
77
78 fn as_per_file(&self) -> Option<&dyn PerFileRule> {
79 Some(self)
80 }
81}
82
83impl PerFileRule for MarkdownPathsResolveRule {
84 fn path_scope(&self) -> &Scope {
85 &self.scope
86 }
87
88 fn evaluate_file(
89 &self,
90 ctx: &Context<'_>,
91 path: &Path,
92 bytes: &[u8],
93 ) -> Result<Vec<Violation>> {
94 let Ok(text) = std::str::from_utf8(bytes) else {
95 return Ok(Vec::new()); };
97 let mut violations = Vec::new();
98 for cand in scan_markdown_paths(text, &self.prefixes) {
99 if self.ignore_template_vars && has_template_vars(&cand.token) {
100 continue;
101 }
102 let lookup = strip_path_decoration(&cand.token);
103 if path_resolves(ctx, lookup) {
104 continue;
105 }
106 let msg = self.message.clone().unwrap_or_else(|| {
107 format!(
108 "backticked path `{}` doesn't resolve to a file or directory",
109 cand.token
110 )
111 });
112 violations.push(
113 Violation::new(msg)
114 .with_path(std::sync::Arc::<Path>::from(path))
115 .with_location(cand.line, cand.column),
116 );
117 }
118 Ok(violations)
119 }
120}
121
122pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
123 let Some(_paths) = &spec.paths else {
124 return Err(Error::rule_config(
125 &spec.id,
126 "markdown_paths_resolve requires a `paths` field",
127 ));
128 };
129 let opts: Options = spec
130 .deserialize_options()
131 .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
132 if opts.prefixes.is_empty() {
133 return Err(Error::rule_config(
134 &spec.id,
135 "markdown_paths_resolve requires a non-empty `prefixes` list — \
136 declare which path shapes (e.g. [\"src/\", \"crates/\", \"docs/\"]) \
137 count as path candidates in your codebase",
138 ));
139 }
140 Ok(Box::new(MarkdownPathsResolveRule {
141 id: spec.id.clone(),
142 level: spec.level,
143 policy_url: spec.policy_url.clone(),
144 message: spec.message.clone(),
145 scope: Scope::from_spec(spec)?,
146 prefixes: opts.prefixes,
147 ignore_template_vars: opts.ignore_template_vars,
148 }))
149}
150
151#[derive(Debug, PartialEq, Eq)]
155struct Candidate {
156 token: String,
157 line: usize,
158 column: usize,
159}
160
161fn scan_markdown_paths(text: &str, prefixes: &[String]) -> Vec<Candidate> {
166 let mut out = Vec::new();
167 let mut in_fenced = false;
168 let mut fence_marker: Option<char> = None;
169 let mut fence_len: usize = 0;
170
171 for (line_idx, line) in text.lines().enumerate() {
172 let line_no = line_idx + 1;
173
174 let trimmed = line.trim_start();
180 if let Some((ch, n)) = detect_fence(trimmed) {
181 if !in_fenced {
182 in_fenced = true;
183 fence_marker = Some(ch);
184 fence_len = n;
185 } else if fence_marker == Some(ch) && n >= fence_len && only_fence(trimmed, ch) {
186 in_fenced = false;
187 fence_marker = None;
188 fence_len = 0;
189 }
190 continue;
191 }
192 if in_fenced {
193 continue;
194 }
195
196 if line.starts_with(" ") || line.starts_with('\t') {
203 continue;
204 }
205
206 let bytes = line.as_bytes();
212 let mut i = 0;
213 while i < bytes.len() {
214 if bytes[i] != b'`' {
215 i += 1;
216 continue;
217 }
218 let run_start = i;
219 while i < bytes.len() && bytes[i] == b'`' {
220 i += 1;
221 }
222 let run_len = i - run_start;
223 let close_start = find_closing_run(&bytes[i..], run_len).map(|p| i + p);
225 let Some(close) = close_start else {
226 break;
228 };
229 let token_bytes = &bytes[i..close];
230 let token = std::str::from_utf8(token_bytes).unwrap_or("").trim();
234 if !token.is_empty() && starts_with_any_prefix(token, prefixes) {
235 out.push(Candidate {
236 token: token.to_string(),
237 line: line_no,
238 column: run_start + 1, });
240 }
241 i = close + run_len;
242 }
243 }
244 out
245}
246
247fn detect_fence(s: &str) -> Option<(char, usize)> {
250 let mut chars = s.chars();
251 let ch = chars.next()?;
252 if ch != '`' && ch != '~' {
253 return None;
254 }
255 let n = 1 + chars.take_while(|&c| c == ch).count();
256 if n >= 3 { Some((ch, n)) } else { None }
257}
258
259fn only_fence(s: &str, ch: char) -> bool {
264 s.trim_end().chars().all(|c| c == ch)
265}
266
267fn find_closing_run(bytes: &[u8], len: usize) -> Option<usize> {
271 let mut i = 0;
272 while i < bytes.len() {
273 if bytes[i] != b'`' {
274 i += 1;
275 continue;
276 }
277 let start = i;
278 while i < bytes.len() && bytes[i] == b'`' {
279 i += 1;
280 }
281 if i - start == len {
282 return Some(start);
283 }
284 }
285 None
286}
287
288fn starts_with_any_prefix(s: &str, prefixes: &[String]) -> bool {
289 prefixes.iter().any(|p| s.starts_with(p))
290}
291
292fn has_template_vars(s: &str) -> bool {
295 s.contains("{{") || s.contains("${") || (s.contains('<') && s.contains('>'))
296}
297
298fn strip_path_decoration(s: &str) -> &str {
302 let hash = s.find('#').unwrap_or(s.len());
306 let s = &s[..hash];
307 let colon_loc = s
308 .rfind(':')
309 .filter(|&i| s[i + 1..].chars().all(|c| c.is_ascii_digit()) && i + 1 < s.len());
310 let s = match colon_loc {
311 Some(i) => &s[..i],
312 None => s,
313 };
314 let s = s.trim_end_matches(|c: char| ".,:;?!".contains(c));
315 s.trim_end_matches('/')
316}
317
318fn path_resolves(ctx: &Context<'_>, lookup: &str) -> bool {
323 if lookup.is_empty() {
324 return false;
325 }
326 if lookup.contains('*') || lookup.contains('?') || lookup.contains('[') {
327 let Ok(glob) = globset::Glob::new(lookup) else {
330 return false;
331 };
332 let matcher = glob.compile_matcher();
333 return ctx.index.entries.iter().any(|e| matcher.is_match(&e.path));
334 }
335 let p = Path::new(lookup);
336 ctx.index.entries.iter().any(|e| &*e.path == p)
337}
338
339#[cfg(test)]
340mod tests {
341 use super::*;
342
343 fn prefixes(list: &[&str]) -> Vec<String> {
344 list.iter().map(|s| (*s).to_string()).collect()
345 }
346
347 #[test]
348 fn finds_inline_backtick_with_matching_prefix() {
349 let pf = prefixes(&["src/", "docs/"]);
350 let cands = scan_markdown_paths("see `src/foo.ts` and `npm` and `docs/x.md`", &pf);
351 assert_eq!(cands.len(), 2);
352 assert_eq!(cands[0].token, "src/foo.ts");
353 assert_eq!(cands[1].token, "docs/x.md");
354 }
355
356 #[test]
357 fn skips_fenced_code_blocks() {
358 let pf = prefixes(&["src/"]);
359 let md = "before\n\
360 ```yaml\n\
361 example: `src/should-not-fire.ts`\n\
362 ```\n\
363 after `src/should-fire.ts`";
364 let cands = scan_markdown_paths(md, &pf);
365 assert_eq!(cands.len(), 1);
366 assert_eq!(cands[0].token, "src/should-fire.ts");
367 }
368
369 #[test]
370 fn skips_indented_code_blocks() {
371 let pf = prefixes(&["src/"]);
372 let md = "normal `src/a.ts` line\n\
373 \n\
374 \x20\x20\x20\x20indented `src/should-not-fire.ts`\n";
375 let cands = scan_markdown_paths(md, &pf);
376 assert_eq!(cands.len(), 1);
377 assert_eq!(cands[0].token, "src/a.ts");
378 }
379
380 #[test]
381 fn handles_tilde_fences() {
382 let pf = prefixes(&["src/"]);
383 let md = "before `src/yes.ts`\n~~~\nin code: `src/no.ts`\n~~~\nafter `src/yes2.ts`";
384 let tokens: Vec<_> = scan_markdown_paths(md, &pf)
385 .into_iter()
386 .map(|c| c.token)
387 .collect();
388 assert_eq!(tokens, vec!["src/yes.ts", "src/yes2.ts"]);
389 }
390
391 #[test]
392 fn line_and_column_are_correct() {
393 let pf = prefixes(&["src/"]);
394 let md = "first line\nsecond `src/foo.ts` here";
395 let cands = scan_markdown_paths(md, &pf);
396 assert_eq!(cands.len(), 1);
397 assert_eq!(cands[0].line, 2);
398 assert_eq!(cands[0].column, 8);
400 }
401
402 #[test]
403 fn template_vars_detected() {
404 assert!(has_template_vars("src/{{user_id}}.json"));
405 assert!(has_template_vars("src/${name}.ts"));
406 assert!(has_template_vars("src/<placeholder>.ts"));
407 assert!(!has_template_vars("src/concrete.ts"));
408 assert!(!has_template_vars("src/foo[0].ts")); }
410
411 #[test]
412 fn path_decoration_stripped() {
413 assert_eq!(strip_path_decoration("src/foo.ts"), "src/foo.ts");
414 assert_eq!(strip_path_decoration("src/foo.ts."), "src/foo.ts");
415 assert_eq!(strip_path_decoration("src/foo.ts,"), "src/foo.ts");
416 assert_eq!(strip_path_decoration("src/foo.ts:42"), "src/foo.ts");
417 assert_eq!(strip_path_decoration("src/foo.ts#L42"), "src/foo.ts");
418 assert_eq!(strip_path_decoration("src/foo.ts:42#L1"), "src/foo.ts");
419 assert_eq!(strip_path_decoration("src/foo/"), "src/foo");
420 }
421
422 #[test]
423 fn prefix_matching() {
424 let pf = prefixes(&["src/", "crates/"]);
425 assert!(starts_with_any_prefix("src/foo.ts", &pf));
426 assert!(starts_with_any_prefix("crates/alint", &pf));
427 assert!(!starts_with_any_prefix("docs/x.md", &pf));
428 assert!(!starts_with_any_prefix("README.md", &pf));
429 }
430
431 #[test]
432 fn unmatched_backticks_do_not_explode() {
433 let pf = prefixes(&["src/"]);
434 let cands = scan_markdown_paths("`src/foo.ts unmatched", &pf);
435 assert!(cands.is_empty());
436 }
437
438 #[test]
439 fn double_backticks_can_contain_single() {
440 let pf = prefixes(&["src/"]);
441 let md = "double `` ` `` then `src/foo.ts`";
442 let cands = scan_markdown_paths(md, &pf);
443 assert_eq!(cands.len(), 1);
444 assert_eq!(cands[0].token, "src/foo.ts");
445 }
446}