quickmark_core/rules/
md053.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3use serde::Deserialize;
4use std::collections::{HashMap, HashSet};
5use std::rc::Rc;
6use tree_sitter::Node;
7
8use crate::{
9    linter::{range_from_tree_sitter, RuleViolation},
10    rules::{Context, Rule, RuleLinter, RuleType},
11};
12
13// MD053-specific configuration types
14#[derive(Debug, PartialEq, Clone, Deserialize)]
15pub struct MD053LinkImageReferenceDefinitionsTable {
16    #[serde(default)]
17    pub ignored_definitions: Vec<String>,
18}
19
20impl Default for MD053LinkImageReferenceDefinitionsTable {
21    fn default() -> Self {
22        Self {
23            ignored_definitions: vec!["//".to_string()],
24        }
25    }
26}
27
28// Pre-compiled regex patterns for performance
29static FULL_REFERENCE_PATTERN: Lazy<Regex> =
30    Lazy::new(|| Regex::new(r"\[([^\]]*)\]\[([^\]]*)\]").unwrap());
31
32static COLLAPSED_REFERENCE_PATTERN: Lazy<Regex> =
33    Lazy::new(|| Regex::new(r"\[([^\]]+)\]\[\]").unwrap());
34
35static SHORTCUT_REFERENCE_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"\[([^\]]+)\]").unwrap());
36
37static REFERENCE_DEFINITION_PATTERN: Lazy<Regex> =
38    Lazy::new(|| Regex::new(r"(?m)^\s*\[([^\]]+)\]:\s*").unwrap());
39
40#[derive(Debug, Clone)]
41struct ReferenceDefinition {
42    label: String,
43    range: tree_sitter::Range,
44}
45
46pub(crate) struct MD053Linter {
47    context: Rc<Context>,
48    definitions: HashMap<String, Vec<ReferenceDefinition>>, // Track multiple definitions per label
49    references: HashSet<String>,                            // All referenced labels
50}
51
52impl MD053Linter {
53    pub fn new(context: Rc<Context>) -> Self {
54        Self {
55            context,
56            definitions: HashMap::new(),
57            references: HashSet::new(),
58        }
59    }
60
61    fn normalize_reference(&self, label: &str) -> String {
62        // Normalize reference labels according to CommonMark spec:
63        // - Convert to lowercase
64        // - Trim whitespace
65        // - Collapse consecutive whitespace to single spaces
66        let mut result = String::with_capacity(label.len());
67        let mut prev_was_space = false;
68
69        for ch in label.chars() {
70            if ch.is_whitespace() {
71                if !prev_was_space && !result.is_empty() {
72                    result.push(' ');
73                    prev_was_space = true;
74                }
75            } else {
76                result.push(ch.to_lowercase().next().unwrap_or(ch));
77                prev_was_space = false;
78            }
79        }
80
81        // Remove trailing space if present
82        if result.ends_with(' ') {
83            result.pop();
84        }
85
86        result
87    }
88
89    fn extract_reference_definition(&self, node: &Node) -> Vec<ReferenceDefinition> {
90        // Extract the label from reference definition nodes
91        // [label]: url "title"
92        let start_byte = node.start_byte();
93        let end_byte = node.end_byte();
94        let document_content = self.context.document_content.borrow();
95        let content = &document_content[start_byte..end_byte];
96
97        REFERENCE_DEFINITION_PATTERN
98            .captures_iter(content)
99            .filter_map(|cap| {
100                cap.get(1).map(|label| {
101                    let normalized_label = self.normalize_reference(label.as_str());
102                    ReferenceDefinition {
103                        label: normalized_label,
104                        range: node.range(),
105                    }
106                })
107            })
108            .collect()
109    }
110
111    fn extract_reference_links(&self, node: &Node) -> Vec<String> {
112        // Extract reference links in different formats:
113        // Full: [text][label]
114        // Collapsed: [label][]
115        // Shortcut: [label]
116        let start_byte = node.start_byte();
117        let end_byte = node.end_byte();
118        let document_content = self.context.document_content.borrow();
119        let content = &document_content[start_byte..end_byte];
120
121        let mut links = Vec::new();
122
123        // Check for inline links first (contain parentheses - not reference links)
124        if content.contains('(') && content.contains(')') {
125            return links; // This is an inline link, not a reference link
126        }
127
128        // Full reference: [text][label]
129        for cap in FULL_REFERENCE_PATTERN.captures_iter(content) {
130            if let Some(label) = cap.get(2) {
131                let label_str = label.as_str();
132                if !label_str.is_empty() {
133                    links.push(self.normalize_reference(label_str));
134                }
135            }
136        }
137
138        // Collapsed reference: [label][]
139        for cap in COLLAPSED_REFERENCE_PATTERN.captures_iter(content) {
140            if let Some(label) = cap.get(1) {
141                links.push(self.normalize_reference(label.as_str()));
142            }
143        }
144
145        // Shortcut reference: [label] - check all potential shortcuts
146        // We need to be careful not to double-count references that were already caught by full/collapsed patterns
147        let mut shortcut_candidates = Vec::new();
148        for cap in SHORTCUT_REFERENCE_PATTERN.captures_iter(content) {
149            if let Some(label) = cap.get(1) {
150                let full_match = cap.get(0).expect("regex match should have group 0");
151                let start = full_match.start();
152                let end = full_match.end();
153                let remaining = &content[end..];
154
155                // Check if this looks like a shortcut (not immediately followed by [] or [label])
156                // We only reject if immediately followed by brackets, not if there's whitespace/newline first
157                let immediately_followed_by_bracket = remaining.starts_with('[');
158                if !immediately_followed_by_bracket {
159                    shortcut_candidates.push((
160                        start,
161                        end,
162                        self.normalize_reference(label.as_str()),
163                    ));
164                }
165            }
166        }
167
168        // Filter out shortcut candidates that overlap with already found full/collapsed references
169        // Use a HashSet for O(1) lookup performance
170        let mut existing_labels: HashSet<String> = links.iter().cloned().collect();
171        for (_start, _end, normalized_label) in shortcut_candidates {
172            // Check if this shortcut overlaps with any full/collapsed reference we already found
173            // For now, we'll use a simple heuristic: if we didn't find this as a full/collapsed reference,
174            // and it's not followed by brackets, treat it as a shortcut
175            if !existing_labels.contains(&normalized_label) {
176                existing_labels.insert(normalized_label.clone());
177                links.push(normalized_label);
178            }
179        }
180
181        links
182    }
183}
184
185impl RuleLinter for MD053Linter {
186    fn feed(&mut self, node: &Node) {
187        match node.kind() {
188            // Handle reference definitions like [label]: url
189            "link_reference_definition" => {
190                let definitions = self.extract_reference_definition(node);
191                for definition in definitions {
192                    self.definitions
193                        .entry(definition.label.clone())
194                        .or_default()
195                        .push(definition);
196                }
197            }
198            // Handle paragraphs for reference links
199            "paragraph" => {
200                let links = self.extract_reference_links(node);
201                for link in links {
202                    self.references.insert(link);
203                }
204            }
205            // Handle reference links [text][label], [label][], [label]
206            "link" | "image" => {
207                let links = self.extract_reference_links(node);
208                for link in links {
209                    self.references.insert(link);
210                }
211            }
212            _ => {
213                // Ignore other node types
214            }
215        }
216    }
217
218    fn finalize(&mut self) -> Vec<RuleViolation> {
219        let mut violations = Vec::new();
220        let config = &self
221            .context
222            .config
223            .linters
224            .settings
225            .link_image_reference_definitions;
226        let ignored_definitions: HashSet<String> = config
227            .ignored_definitions
228            .iter()
229            .map(|label| self.normalize_reference(label))
230            .collect();
231
232        // Check for unused definitions and duplicates
233        for (label, definitions) in &self.definitions {
234            // Skip if label is in ignored list
235            if ignored_definitions.contains(label) {
236                continue;
237            }
238
239            // Check if definition is unused (no references to it)
240            let is_unused = !self.references.contains(label);
241
242            if definitions.len() > 1 {
243                // Handle duplicate definitions
244                if is_unused {
245                    // If unused, report the first definition as unused
246                    let first_def = &definitions[0];
247                    violations.push(RuleViolation::new(
248                        &MD053,
249                        format!(
250                            "Unused link or image reference definition: \"{}\"",
251                            first_def.label
252                        ),
253                        self.context.file_path.clone(),
254                        range_from_tree_sitter(&first_def.range),
255                    ));
256                }
257                // Report all subsequent definitions as duplicates (first definition wins per CommonMark)
258                for definition in &definitions[1..] {
259                    violations.push(RuleViolation::new(
260                        &MD053,
261                        format!(
262                            "Duplicate link or image reference definition: \"{}\"",
263                            definition.label
264                        ),
265                        self.context.file_path.clone(),
266                        range_from_tree_sitter(&definition.range),
267                    ));
268                }
269            } else if is_unused {
270                // Single definition that is unused
271                let def = &definitions[0];
272                violations.push(RuleViolation::new(
273                    &MD053,
274                    format!(
275                        "Unused link or image reference definition: \"{}\"",
276                        def.label
277                    ),
278                    self.context.file_path.clone(),
279                    range_from_tree_sitter(&def.range),
280                ));
281            }
282        }
283
284        violations
285    }
286}
287
288pub const MD053: Rule = Rule {
289    id: "MD053",
290    alias: "link-image-reference-definitions",
291    tags: &["links", "images"],
292    description: "Link and image reference definitions should be needed",
293    rule_type: RuleType::Document,
294    required_nodes: &["link", "image", "paragraph", "link_reference_definition"],
295    new_linter: |context| Box::new(MD053Linter::new(context)),
296};
297
298#[cfg(test)]
299mod test {
300    use std::path::PathBuf;
301
302    use crate::config::{
303        LintersSettingsTable, MD053LinkImageReferenceDefinitionsTable, RuleSeverity,
304    };
305    use crate::linter::MultiRuleLinter;
306    use crate::test_utils::test_helpers::test_config_with_rules;
307
308    fn test_config() -> crate::config::QuickmarkConfig {
309        test_config_with_rules(vec![(
310            "link-image-reference-definitions",
311            RuleSeverity::Error,
312        )])
313    }
314
315    fn test_config_with_ignored_definitions(
316        ignored_definitions: Vec<String>,
317    ) -> crate::config::QuickmarkConfig {
318        crate::test_utils::test_helpers::test_config_with_settings(
319            vec![("link-image-reference-definitions", RuleSeverity::Error)],
320            LintersSettingsTable {
321                link_image_reference_definitions: MD053LinkImageReferenceDefinitionsTable {
322                    ignored_definitions,
323                },
324                ..Default::default()
325            },
326        )
327    }
328
329    #[test]
330    fn test_unused_definition_basic() {
331        let input = "[unused]: https://example.com
332
333Some text.
334";
335
336        let config = test_config();
337        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
338        let violations = linter.analyze();
339
340        // Should have 1 violation - unused reference definition
341        assert_eq!(1, violations.len());
342        assert!(violations[0]
343            .message()
344            .contains("Unused link or image reference definition: \"unused\""));
345    }
346
347    #[test]
348    fn test_used_definition_basic() {
349        let input = "[label]: https://example.com
350
351[Good link][label]
352";
353
354        let config = test_config();
355        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
356        let violations = linter.analyze();
357
358        // Should have no violations - definition is used
359        assert_eq!(0, violations.len());
360    }
361
362    #[test]
363    fn test_duplicate_definitions() {
364        let input = "[label]: https://example.com/1
365[label]: https://example.com/2
366
367[Good link][label]
368";
369
370        let config = test_config();
371        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
372        let violations = linter.analyze();
373
374        // Should have 1 violation - duplicate definition (second one)
375        assert_eq!(1, violations.len());
376        assert!(violations[0]
377            .message()
378            .contains("Duplicate link or image reference definition: \"label\""));
379    }
380
381    #[test]
382    fn test_unused_and_duplicate() {
383        let input = "[unused1]: https://example.com/1
384[unused2]: https://example.com/2
385[duplicate]: https://example.com/3
386[duplicate]: https://example.com/4
387
388Some text.
389";
390
391        let config = test_config();
392        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
393        let violations = linter.analyze();
394
395        // Should have 4 violations: 2 unused + 1 duplicate + 1 unused (both duplicates are unused)
396        assert_eq!(4, violations.len());
397
398        // Check violation types
399        let messages: Vec<&str> = violations.iter().map(|v| v.message()).collect();
400        let unused_count = messages.iter().filter(|m| m.contains("Unused")).count();
401        let duplicate_count = messages.iter().filter(|m| m.contains("Duplicate")).count();
402
403        assert_eq!(3, unused_count); // unused1, unused2, and both duplicate entries are unused
404        assert_eq!(1, duplicate_count); // second duplicate entry
405    }
406
407    #[test]
408    fn test_collapsed_reference_format() {
409        let input = "[label]: https://example.com
410
411[label][]
412";
413
414        let config = test_config();
415        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
416        let violations = linter.analyze();
417
418        // Should have no violations - collapsed reference is used
419        assert_eq!(0, violations.len());
420    }
421
422    #[test]
423    fn test_shortcut_reference_format() {
424        let input = "[label]: https://example.com
425
426[label]
427";
428
429        let config = test_config();
430        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
431        let violations = linter.analyze();
432
433        // Should have no violations - shortcut reference is used
434        assert_eq!(0, violations.len());
435    }
436
437    #[test]
438    fn test_image_references() {
439        let input = "[image]: https://example.com/image.png
440[unused-image]: https://example.com/unused.png
441
442![Alt text][image]
443";
444
445        let config = test_config();
446        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
447        let violations = linter.analyze();
448
449        // Should have 1 violation - unused image reference
450        assert_eq!(1, violations.len());
451        assert!(violations[0]
452            .message()
453            .contains("Unused link or image reference definition: \"unused-image\""));
454    }
455
456    #[test]
457    fn test_case_insensitive_matching() {
458        let input = "[Label]: https://example.com
459
460[Good link][LABEL]
461";
462
463        let config = test_config();
464        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
465        let violations = linter.analyze();
466
467        // Should have no violations - case insensitive matching per CommonMark
468        assert_eq!(0, violations.len());
469    }
470
471    #[test]
472    fn test_whitespace_normalization() {
473        let input = "[  label   with   spaces  ]: https://example.com
474
475[Good link][label with spaces]
476";
477
478        let config = test_config();
479        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
480        let violations = linter.analyze();
481
482        // Should have no violations - whitespace is normalized per CommonMark
483        assert_eq!(0, violations.len());
484    }
485
486    #[test]
487    fn test_ignored_definitions_default() {
488        let input = "[//]: # (This is a comment)
489[unused]: https://example.com
490
491Some text.
492";
493
494        let config = test_config();
495        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
496        let violations = linter.analyze();
497
498        // Should have 1 violation - '//' is ignored by default, but 'unused' is not
499        assert_eq!(1, violations.len());
500        assert!(violations[0]
501            .message()
502            .contains("Unused link or image reference definition: \"unused\""));
503    }
504
505    #[test]
506    fn test_custom_ignored_definitions() {
507        let input = "[custom]: https://example.com
508[another]: https://example.com
509[regular]: https://example.com
510
511[Good link][regular]
512";
513
514        let config =
515            test_config_with_ignored_definitions(vec!["custom".to_string(), "another".to_string()]);
516        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
517        let violations = linter.analyze();
518
519        // Should have no violations - custom and another are ignored, regular is used
520        assert_eq!(0, violations.len());
521    }
522
523    #[test]
524    fn test_mixed_scenarios_comprehensive() {
525        let input = "[used-full]: https://example.com/1
526[used-collapsed]: https://example.com/2
527[used-shortcut]: https://example.com/3
528[unused]: https://example.com/4
529[duplicate-used]: https://example.com/5
530[duplicate-used]: https://example.com/6
531[duplicate-unused]: https://example.com/7
532[duplicate-unused]: https://example.com/8
533[//]: # (Ignored comment)
534
535[Link 1][used-full]
536[used-collapsed][]
537[used-shortcut]
538[Link 2][duplicate-used]
539";
540
541        let config = test_config();
542        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
543        let violations = linter.analyze();
544
545        // Expected violations:
546        // - unused: unused
547        // - duplicate-used (second): duplicate
548        // - duplicate-unused (first): unused
549        // - duplicate-unused (second): duplicate
550        assert_eq!(4, violations.len());
551
552        let messages: Vec<&str> = violations.iter().map(|v| v.message()).collect();
553        let unused_count = messages.iter().filter(|m| m.contains("Unused")).count();
554        let duplicate_count = messages.iter().filter(|m| m.contains("Duplicate")).count();
555
556        assert_eq!(2, unused_count); // unused + duplicate_unused (first)
557        assert_eq!(2, duplicate_count); // duplicate-used (second) + duplicate-unused (second)
558    }
559
560    #[test]
561    fn test_inline_links_ignored() {
562        let input = "[unused]: https://example.com
563
564[Inline link](https://example.com) and [another](https://example.com).
565";
566
567        let config = test_config();
568        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
569        let violations = linter.analyze();
570
571        // Should have 1 violation - unused definition, inline links don't count as references
572        assert_eq!(1, violations.len());
573        assert!(violations[0]
574            .message()
575            .contains("Unused link or image reference definition: \"unused\""));
576    }
577}