quickmark_core/rules/
md052.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3use serde::Deserialize;
4use std::collections::HashSet;
5use std::rc::Rc;
6use tree_sitter::Node;
7
8use crate::{
9    linter::{range_from_tree_sitter, RuleViolation},
10    rules::{Context, Rule, RuleLinter, RuleType},
11};
12
13// MD052-specific configuration types
14#[derive(Debug, PartialEq, Clone, Deserialize)]
15pub struct MD052ReferenceLinksImagesTable {
16    #[serde(default)]
17    pub shortcut_syntax: bool,
18    #[serde(default)]
19    pub ignored_labels: Vec<String>,
20}
21
22impl Default for MD052ReferenceLinksImagesTable {
23    fn default() -> Self {
24        Self {
25            shortcut_syntax: false,
26            ignored_labels: vec!["x".to_string()],
27        }
28    }
29}
30
31// Pre-compiled regex patterns for performance
32static FULL_REFERENCE_PATTERN: Lazy<Regex> =
33    Lazy::new(|| Regex::new(r"\[([^\]]*)\]\[([^\]]*)\]").unwrap());
34
35static COLLAPSED_REFERENCE_PATTERN: Lazy<Regex> =
36    Lazy::new(|| Regex::new(r"\[([^\]]+)\]\[\]").unwrap());
37
38static SHORTCUT_REFERENCE_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"\[([^\]]+)\]").unwrap());
39
40static REFERENCE_DEFINITION_PATTERN: Lazy<Regex> =
41    Lazy::new(|| Regex::new(r"(?m)^\s*\[([^\]]+)\]:\s*").unwrap());
42
43#[derive(Debug, Clone)]
44struct ReferenceLink {
45    label: String,
46    range: tree_sitter::Range,
47    is_shortcut: bool,
48}
49
50pub(crate) struct MD052Linter {
51    context: Rc<Context>,
52    definitions: HashSet<String>,
53    references: Vec<ReferenceLink>,
54}
55
56impl MD052Linter {
57    pub fn new(context: Rc<Context>) -> Self {
58        Self {
59            context,
60            definitions: HashSet::new(),
61            references: Vec::new(),
62        }
63    }
64
65    fn normalize_reference(&self, label: &str) -> String {
66        // Normalize reference labels according to CommonMark spec:
67        // - Convert to lowercase
68        // - Trim whitespace
69        // - Collapse consecutive whitespace to single spaces
70        label
71            .to_lowercase()
72            .split_whitespace()
73            .collect::<Vec<_>>()
74            .join(" ")
75    }
76
77    fn extract_reference_definition(&self, node: &Node) -> Vec<String> {
78        // Extract the label from reference definition nodes
79        // [label]: url "title"
80        let start_byte = node.start_byte();
81        let end_byte = node.end_byte();
82        let document_content = self.context.document_content.borrow();
83        let content = &document_content[start_byte..end_byte];
84
85        let mut definitions = Vec::new();
86        for cap in REFERENCE_DEFINITION_PATTERN.captures_iter(content) {
87            if let Some(label) = cap.get(1) {
88                definitions.push(self.normalize_reference(label.as_str()));
89            }
90        }
91        definitions
92    }
93
94    fn extract_reference_links(&self, node: &Node) -> Vec<(String, bool)> {
95        // Extract reference links in different formats:
96        // Full: [text][label]
97        // Collapsed: [label][]
98        // Shortcut: [label]
99        let start_byte = node.start_byte();
100        let end_byte = node.end_byte();
101        let document_content = self.context.document_content.borrow();
102        let content = &document_content[start_byte..end_byte];
103
104        let mut links = Vec::new();
105
106        // Check for inline links first (contain parentheses - not reference links)
107        if content.contains('(') && content.contains(')') {
108            return links; // This is an inline link, not a reference link
109        }
110
111        // Full reference: [text][label]
112        for cap in FULL_REFERENCE_PATTERN.captures_iter(content) {
113            if let Some(label) = cap.get(2) {
114                let label_str = label.as_str();
115                if !label_str.is_empty() {
116                    links.push((self.normalize_reference(label_str), false));
117                }
118            }
119        }
120
121        // Collapsed reference: [label][]
122        for cap in COLLAPSED_REFERENCE_PATTERN.captures_iter(content) {
123            if let Some(label) = cap.get(1) {
124                links.push((self.normalize_reference(label.as_str()), false));
125            }
126        }
127
128        // Shortcut reference: [label] (only if not caught by other patterns and not inline links)
129        if links.is_empty() {
130            for cap in SHORTCUT_REFERENCE_PATTERN.captures_iter(content) {
131                if let Some(label) = cap.get(1) {
132                    // Only consider it a shortcut if it doesn't look like a full/collapsed reference
133                    // and there's no second bracket pair after this one
134                    let match_end = cap.get(0).unwrap().end();
135                    let remaining = &content[match_end..];
136                    if !remaining.trim_start().starts_with('[') {
137                        links.push((self.normalize_reference(label.as_str()), true));
138                    }
139                }
140            }
141        }
142
143        links
144    }
145}
146
147impl RuleLinter for MD052Linter {
148    fn feed(&mut self, node: &Node) {
149        match node.kind() {
150            // Handle reference definitions like [label]: url
151            "paragraph" => {
152                let definitions = self.extract_reference_definition(node);
153                for definition in definitions {
154                    self.definitions.insert(definition);
155                }
156
157                // Also check for reference links in paragraphs
158                let links = self.extract_reference_links(node);
159                for (label, is_shortcut) in links {
160                    self.references.push(ReferenceLink {
161                        label,
162                        range: node.range(),
163                        is_shortcut,
164                    });
165                }
166            }
167            // Handle reference links [text][label], [label][], [label]
168            "link" | "image" => {
169                let links = self.extract_reference_links(node);
170                for (label, is_shortcut) in links {
171                    self.references.push(ReferenceLink {
172                        label,
173                        range: node.range(),
174                        is_shortcut,
175                    });
176                }
177            }
178            _ => {
179                // Check all other node types for reference definitions
180                let definitions = self.extract_reference_definition(node);
181                for definition in definitions {
182                    self.definitions.insert(definition);
183                }
184            }
185        }
186    }
187
188    fn finalize(&mut self) -> Vec<RuleViolation> {
189        let mut violations = Vec::new();
190        let config = &self.context.config.linters.settings.reference_links_images;
191        let ignored_labels: HashSet<String> = config
192            .ignored_labels
193            .iter()
194            .map(|label| self.normalize_reference(label))
195            .collect();
196
197        for reference in &self.references {
198            // Skip shortcut syntax unless explicitly enabled
199            if reference.is_shortcut && !config.shortcut_syntax {
200                continue;
201            }
202
203            let normalized_label = self.normalize_reference(&reference.label);
204
205            // Skip if label is in ignored list
206            if ignored_labels.contains(&normalized_label) {
207                continue;
208            }
209
210            // Check if definition exists
211            if !self.definitions.contains(&normalized_label) {
212                violations.push(RuleViolation::new(
213                    &MD052,
214                    format!(
215                        "Missing link or image reference definition: \"{}\"",
216                        reference.label
217                    ),
218                    self.context.file_path.clone(),
219                    range_from_tree_sitter(&reference.range),
220                ));
221            }
222        }
223
224        violations
225    }
226}
227
228pub const MD052: Rule = Rule {
229    id: "MD052",
230    alias: "reference-links-images",
231    tags: &["links", "images"],
232    description: "Reference links and images should use a label that is defined",
233    rule_type: RuleType::Document,
234    required_nodes: &["link", "image", "paragraph"],
235    new_linter: |context| Box::new(MD052Linter::new(context)),
236};
237
238#[cfg(test)]
239mod test {
240    use std::path::PathBuf;
241
242    use crate::config::{LintersSettingsTable, MD052ReferenceLinksImagesTable, RuleSeverity};
243    use crate::linter::MultiRuleLinter;
244    use crate::test_utils::test_helpers::test_config_with_rules;
245
246    fn test_config() -> crate::config::QuickmarkConfig {
247        test_config_with_rules(vec![("reference-links-images", RuleSeverity::Error)])
248    }
249
250    fn test_config_with_settings(
251        shortcut_syntax: bool,
252        ignored_labels: Vec<String>,
253    ) -> crate::config::QuickmarkConfig {
254        crate::test_utils::test_helpers::test_config_with_settings(
255            vec![("reference-links-images", RuleSeverity::Error)],
256            LintersSettingsTable {
257                reference_links_images: MD052ReferenceLinksImagesTable {
258                    shortcut_syntax,
259                    ignored_labels,
260                },
261                ..Default::default()
262            },
263        )
264    }
265
266    #[test]
267    fn test_valid_full_reference() {
268        let input = "[Good link][label]
269
270[label]: https://example.com
271";
272
273        let config = test_config();
274        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
275        let violations = linter.analyze();
276
277        // Should have no violations - valid reference
278        assert_eq!(0, violations.len());
279    }
280
281    #[test]
282    fn test_invalid_full_reference() {
283        let input = "[Bad link][missing]
284
285[label]: https://example.com
286";
287
288        let config = test_config();
289        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
290        let violations = linter.analyze();
291
292        // Should have 1 violation - missing reference definition
293        assert_eq!(1, violations.len());
294        assert!(violations[0]
295            .message()
296            .contains("Missing link or image reference definition: \"missing\""));
297    }
298
299    #[test]
300    fn test_valid_collapsed_reference() {
301        let input = "[label][]
302
303[label]: https://example.com
304";
305
306        let config = test_config();
307        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
308        let violations = linter.analyze();
309
310        // Should have no violations - valid collapsed reference
311        assert_eq!(0, violations.len());
312    }
313
314    #[test]
315    fn test_invalid_collapsed_reference() {
316        let input = "[missing][]
317
318[label]: https://example.com
319";
320
321        let config = test_config();
322        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
323        let violations = linter.analyze();
324
325        // Should have 1 violation - missing reference definition
326        assert_eq!(1, violations.len());
327        assert!(violations[0]
328            .message()
329            .contains("Missing link or image reference definition: \"missing\""));
330    }
331
332    #[test]
333    fn test_shortcut_syntax_disabled_by_default() {
334        let input = "[undefined]
335
336[label]: https://example.com
337";
338
339        let config = test_config();
340        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
341        let violations = linter.analyze();
342
343        // Should have no violations - shortcut syntax ignored by default
344        assert_eq!(0, violations.len());
345    }
346
347    #[test]
348    fn test_shortcut_syntax_enabled() {
349        let input = "[undefined]
350
351[label]: https://example.com
352";
353
354        let config = test_config_with_settings(true, vec!["x".to_string()]);
355        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
356        let violations = linter.analyze();
357
358        // Should have 1 violation - shortcut syntax enabled and undefined
359        assert_eq!(1, violations.len());
360        assert!(violations[0]
361            .message()
362            .contains("Missing link or image reference definition: \"undefined\""));
363    }
364
365    #[test]
366    fn test_valid_shortcut_syntax_enabled() {
367        let input = "[label]
368
369[label]: https://example.com
370";
371
372        let config = test_config_with_settings(true, vec!["x".to_string()]);
373        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
374        let violations = linter.analyze();
375
376        // Should have no violations - shortcut syntax enabled and defined
377        assert_eq!(0, violations.len());
378    }
379
380    #[test]
381    fn test_ignored_labels_default_x() {
382        let input = "[x] Task item
383
384[label]: https://example.com
385";
386
387        let config = test_config();
388        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
389        let violations = linter.analyze();
390
391        // Should have no violations - 'x' is ignored by default (GitHub task list)
392        assert_eq!(0, violations.len());
393    }
394
395    #[test]
396    fn test_custom_ignored_labels() {
397        let input = "[custom] Some text
398[another] More text
399
400[label]: https://example.com
401";
402
403        let config =
404            test_config_with_settings(true, vec!["custom".to_string(), "another".to_string()]);
405        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
406        let violations = linter.analyze();
407
408        // Should have no violations - custom labels are ignored
409        assert_eq!(0, violations.len());
410    }
411
412    #[test]
413    fn test_case_insensitive_matching() {
414        let input = "[Good Link][LABEL]
415
416[label]: https://example.com
417";
418
419        let config = test_config();
420        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
421        let violations = linter.analyze();
422
423        // Should have no violations - case insensitive matching per CommonMark
424        assert_eq!(0, violations.len());
425    }
426
427    #[test]
428    fn test_whitespace_normalization() {
429        let input = "[Good Link][  label   with   spaces  ]
430
431[label with spaces]: https://example.com
432";
433
434        let config = test_config();
435        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
436        let violations = linter.analyze();
437
438        // Should have no violations - whitespace is normalized per CommonMark
439        assert_eq!(0, violations.len());
440    }
441
442    #[test]
443    fn test_images_full_reference() {
444        let input = "![Alt text][image]
445
446[image]: https://example.com/image.png
447";
448
449        let config = test_config();
450        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
451        let violations = linter.analyze();
452
453        // Should have no violations - valid image reference
454        assert_eq!(0, violations.len());
455    }
456
457    #[test]
458    fn test_images_invalid_reference() {
459        let input = "![Alt text][missing]
460
461[image]: https://example.com/image.png
462";
463
464        let config = test_config();
465        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
466        let violations = linter.analyze();
467
468        // Should have 1 violation - missing image reference definition
469        assert_eq!(1, violations.len());
470        assert!(violations[0]
471            .message()
472            .contains("Missing link or image reference definition: \"missing\""));
473    }
474
475    #[test]
476    fn test_multiple_violations() {
477        let input = "[Bad link][missing1]
478[Another bad][missing2]
479[Good link][valid]
480
481[valid]: https://example.com
482";
483
484        let config = test_config();
485        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
486        let violations = linter.analyze();
487
488        // Should have 2 violations - two missing reference definitions
489        assert_eq!(2, violations.len());
490    }
491
492    #[test]
493    fn test_mixed_link_types() {
494        let input = "[Full][label1]
495[Collapsed][]
496[Shortcut]
497![Image][image1]
498![Collapsed image][]
499
500[label1]: https://example.com/1
501[collapsed]: https://example.com/2
502[shortcut]: https://example.com/3
503[image1]: https://example.com/image1.png
504[collapsed image]: https://example.com/image2.png
505";
506
507        let config = test_config();
508        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
509        let violations = linter.analyze();
510
511        // Should have no violations - all references defined (shortcut ignored by default)
512        assert_eq!(0, violations.len());
513    }
514
515    #[test]
516    fn test_duplicate_definitions() {
517        let input = "[Good link][label]
518
519[label]: https://example.com/1
520[label]: https://example.com/2
521";
522
523        let config = test_config();
524        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
525        let violations = linter.analyze();
526
527        // Should have no violations - first definition wins per CommonMark spec
528        assert_eq!(0, violations.len());
529    }
530}