quickmark_core/rules/
md059.rs

1use serde::Deserialize;
2use std::collections::HashSet;
3use std::rc::Rc;
4
5use once_cell::sync::Lazy;
6use regex::Regex;
7use tree_sitter::Node;
8
9use crate::{
10    linter::{range_from_tree_sitter, RuleViolation},
11    rules::{Context, Rule, RuleLinter, RuleType},
12};
13
14// MD059-specific configuration types
15#[derive(Debug, PartialEq, Clone, Deserialize)]
16pub struct MD059DescriptiveLinkTextTable {
17    #[serde(default)]
18    pub prohibited_texts: Vec<String>,
19}
20
21impl Default for MD059DescriptiveLinkTextTable {
22    fn default() -> Self {
23        Self {
24            prohibited_texts: vec![
25                "click here".to_string(),
26                "here".to_string(),
27                "link".to_string(),
28                "more".to_string(),
29            ],
30        }
31    }
32}
33
34// Regular inline links: [text](url) - but NOT images ![text](url)
35static RE_INLINE_LINK: Lazy<Regex> = Lazy::new(|| {
36    Regex::new(r"(?:^|[^!])\[([^\]]*)\]\(([^)]+)\)").expect("Failed to compile inline link regex")
37});
38
39// Reference links: [text][ref] - but NOT images ![text][ref]
40static RE_REF_LINK: Lazy<Regex> = Lazy::new(|| {
41    Regex::new(r"(?:^|[^!])\[([^\]]*)\]\[([^\]]+)\]")
42        .expect("Failed to compile reference link regex")
43});
44
45// Collapsed reference links: [text][] - but NOT images ![text][]
46static RE_COLLAPSED_REF_LINK: Lazy<Regex> = Lazy::new(|| {
47    Regex::new(r"(?:^|[^!])\[([^\]]+)\]\[\]")
48        .expect("Failed to compile collapsed reference link regex")
49});
50
51static RE_NORMALIZE_PUNCTUATION: Lazy<Regex> =
52    Lazy::new(|| Regex::new(r"[\W_]+").expect("Failed to compile punctuation regex"));
53static RE_NORMALIZE_WHITESPACE: Lazy<Regex> =
54    Lazy::new(|| Regex::new(r"\s+").expect("Failed to compile whitespace regex"));
55
56/// MD059 - Link text should be descriptive
57///
58/// This rule checks that link text provides meaningful description instead of generic phrases.
59pub(crate) struct MD059Linter {
60    context: Rc<Context>,
61    violations: Vec<RuleViolation>,
62    prohibited_texts: HashSet<String>,
63}
64
65impl MD059Linter {
66    pub fn new(context: Rc<Context>) -> Self {
67        let prohibited_texts = context
68            .config
69            .linters
70            .settings
71            .descriptive_link_text
72            .prohibited_texts
73            .iter()
74            .map(|text| normalize_text(text))
75            .collect();
76
77        Self {
78            context,
79            violations: Vec::new(),
80            prohibited_texts,
81        }
82    }
83}
84
85impl RuleLinter for MD059Linter {
86    fn feed(&mut self, node: &Node) {
87        // Process different possible link node types
88        match node.kind() {
89            "link" => self.check_link_text(node),
90            "inline" => self.check_inline_for_links(node),
91            _ => {}
92        }
93    }
94
95    fn finalize(&mut self) -> Vec<RuleViolation> {
96        std::mem::take(&mut self.violations)
97    }
98}
99
100impl MD059Linter {
101    fn check_inline_for_links(&mut self, inline_node: &Node) {
102        // Look for links within inline content using the text
103        let link_text = {
104            let document_content = self.context.document_content.borrow();
105            inline_node
106                .utf8_text(document_content.as_bytes())
107                .unwrap_or("")
108                .to_string()
109        };
110
111        // Parse the inline content for markdown links
112        if !link_text.is_empty() {
113            self.check_text_for_link_patterns(&link_text, inline_node);
114        }
115    }
116
117    fn check_text_for_link_patterns(&mut self, text: &str, node: &Node) {
118        for caps in RE_INLINE_LINK.captures_iter(text) {
119            if let Some(label_match) = caps.get(1) {
120                let label_text = label_match.as_str();
121                self.check_label_for_prohibited_text(label_text, node);
122            }
123        }
124
125        for caps in RE_REF_LINK.captures_iter(text) {
126            if let Some(label_match) = caps.get(1) {
127                let label_text = label_match.as_str();
128                self.check_label_for_prohibited_text(label_text, node);
129            }
130        }
131
132        for caps in RE_COLLAPSED_REF_LINK.captures_iter(text) {
133            if let Some(label_match) = caps.get(1) {
134                let label_text = label_match.as_str();
135                self.check_label_for_prohibited_text(label_text, node);
136            }
137        }
138    }
139
140    fn check_link_text(&mut self, link_node: &Node) {
141        // Extract the link text content from tree-sitter link nodes
142        if let Some(text) = self.extract_link_text(link_node) {
143            // Check if the link contains code or HTML content - if so, skip validation
144            if self.contains_allowed_elements(link_node) {
145                return;
146            }
147
148            let normalized_text = normalize_text(&text);
149
150            if self.prohibited_texts.contains(&normalized_text) {
151                self.create_violation(link_node, &text);
152            }
153        }
154    }
155
156    fn check_label_for_prohibited_text(&mut self, label_text: &str, node: &Node) {
157        // Check if label text contains code or HTML - if so, skip
158        if label_text.contains('`') || label_text.contains('<') {
159            return;
160        }
161
162        let normalized_text = normalize_text(label_text);
163
164        if self.prohibited_texts.contains(&normalized_text) {
165            self.create_violation(node, label_text);
166        }
167    }
168
169    fn extract_link_text(&self, link_node: &Node) -> Option<String> {
170        // Navigate the tree-sitter AST to find the link text
171        // Links in markdown have structure like: link -> label -> [text content]
172        let document_content = self.context.document_content.borrow();
173        let document_bytes = document_content.as_bytes();
174
175        // Look for label child node
176        for child in link_node.children(&mut link_node.walk()) {
177            if child.kind() == "label" {
178                // Extract text from label, excluding the brackets
179                let label_text = child.utf8_text(document_bytes).unwrap_or("");
180
181                // Remove the surrounding brackets
182                if label_text.starts_with('[') && label_text.ends_with(']') {
183                    let inner_text = &label_text[1..label_text.len() - 1];
184                    return Some(inner_text.to_string());
185                }
186            }
187        }
188
189        // Fallback: try to extract from the full link text
190        let full_text = link_node.utf8_text(document_bytes).unwrap_or("");
191        if let Some(start) = full_text.find('[') {
192            if let Some(end) = full_text[start..].find(']') {
193                let inner_text = &full_text[start + 1..start + end];
194                return Some(inner_text.to_string());
195            }
196        }
197
198        None
199    }
200
201    fn contains_allowed_elements(&self, link_node: &Node) -> bool {
202        // Check if the link contains code or HTML elements, which are allowed.
203        // This is an efficient, allocation-free, iterative pre-order traversal.
204        let allowed_types: &[&str] = &["code_span", "html_tag", "inline_html"];
205        let mut cursor = link_node.walk();
206        loop {
207            if allowed_types.contains(&cursor.node().kind()) {
208                return true;
209            }
210            if !cursor.goto_first_child() {
211                while !cursor.goto_next_sibling() {
212                    if !cursor.goto_parent() {
213                        return false;
214                    }
215                }
216            }
217        }
218    }
219
220    fn create_violation(&mut self, node: &Node, link_text: &str) {
221        let message = format!("Link text should be descriptive: '{link_text}'");
222
223        self.violations.push(RuleViolation::new(
224            &MD059,
225            message,
226            self.context.file_path.clone(),
227            range_from_tree_sitter(&node.range()),
228        ));
229    }
230}
231
232/// Normalizes text using the same algorithm as the original markdownlint
233/// Removes punctuation and extra whitespace, converts to lowercase
234fn normalize_text(text: &str) -> String {
235    // Replace all non-word and underscore characters with spaces
236    let step1 = RE_NORMALIZE_PUNCTUATION.replace_all(text, " ");
237
238    // Replace multiple spaces with single space
239    let step2 = RE_NORMALIZE_WHITESPACE.replace_all(&step1, " ");
240
241    // Convert to lowercase and trim
242    step2.to_lowercase().trim().to_string()
243}
244
245pub const MD059: Rule = Rule {
246    id: "MD059",
247    alias: "descriptive-link-text",
248    tags: &["accessibility", "links"],
249    description: "Link text should be descriptive",
250    rule_type: RuleType::Token,
251    required_nodes: &["link", "inline"],
252    new_linter: |context| Box::new(MD059Linter::new(context)),
253};
254
255#[cfg(test)]
256mod test {
257    use std::path::PathBuf;
258
259    use crate::config::RuleSeverity;
260    use crate::linter::MultiRuleLinter;
261    use crate::test_utils::test_helpers::test_config_with_rules;
262
263    use super::normalize_text;
264
265    fn test_config() -> crate::config::QuickmarkConfig {
266        test_config_with_rules(vec![
267            ("descriptive-link-text", RuleSeverity::Error),
268            ("heading-style", RuleSeverity::Off),
269            ("heading-increment", RuleSeverity::Off),
270            ("line-length", RuleSeverity::Off),
271        ])
272    }
273
274    #[test]
275    fn test_normalize_text() {
276        assert_eq!("click here", normalize_text("click here"));
277        assert_eq!("click here", normalize_text("Click Here"));
278        assert_eq!("click here", normalize_text("click   here"));
279        assert_eq!("click here", normalize_text("click_here"));
280        assert_eq!("click here", normalize_text("click-here"));
281        assert_eq!("click here", normalize_text("  click here  "));
282        assert_eq!("click here", normalize_text("click.here!"));
283    }
284
285    #[test]
286    fn test_descriptive_link_passes() {
287        let input = "[Download the budget document](https://example.com/budget.pdf)";
288
289        let config = test_config();
290        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
291        let violations = linter.analyze();
292
293        assert_eq!(0, violations.len());
294    }
295
296    #[test]
297    fn test_generic_link_text_fails() {
298        let input = "[click here](https://example.com)";
299
300        let config = test_config();
301        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
302        let violations = linter.analyze();
303
304        assert_eq!(1, violations.len());
305        let violation = &violations[0];
306        assert_eq!("MD059", violation.rule().id);
307        assert!(violation
308            .message()
309            .contains("Link text should be descriptive"));
310        assert!(violation.message().contains("click here"));
311    }
312
313    #[test]
314    fn test_prohibited_texts() {
315        let test_cases = vec![
316            "[here](url)",
317            "[link](url)",
318            "[more](url)",
319            "[click here](url)",
320        ];
321
322        for input in test_cases {
323            let config = test_config();
324            let mut linter =
325                MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
326            let violations = linter.analyze();
327
328            assert_eq!(1, violations.len(), "Failed for input: {input}");
329            let violation = &violations[0];
330            assert_eq!("MD059", violation.rule().id);
331        }
332    }
333
334    #[test]
335    fn test_case_insensitive() {
336        let input = "[CLICK HERE](https://example.com)";
337
338        let config = test_config();
339        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
340        let violations = linter.analyze();
341
342        assert_eq!(1, violations.len());
343    }
344
345    #[test]
346    fn test_punctuation_normalized() {
347        let input = "[click-here!](https://example.com)";
348
349        let config = test_config();
350        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
351        let violations = linter.analyze();
352
353        assert_eq!(1, violations.len());
354    }
355
356    #[test]
357    fn test_extra_whitespace_normalized() {
358        let input = "[  click   here  ](https://example.com)";
359
360        let config = test_config();
361        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
362        let violations = linter.analyze();
363
364        assert_eq!(1, violations.len());
365    }
366
367    #[test]
368    fn test_reference_links() {
369        let input = r#"[click here][ref]
370
371[ref]: https://example.com"#;
372
373        let config = test_config();
374        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
375        let violations = linter.analyze();
376
377        assert_eq!(1, violations.len());
378    }
379
380    #[test]
381    fn test_multiple_links() {
382        let input = "[good link](url1) and [click here](url2) and [another good](url3)";
383
384        let config = test_config();
385        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
386        let violations = linter.analyze();
387
388        assert_eq!(1, violations.len());
389        assert!(violations[0].message().contains("click here"));
390    }
391
392    #[test]
393    fn test_empty_link_text() {
394        let input = "[](https://example.com)";
395
396        let config = test_config();
397        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
398        let violations = linter.analyze();
399
400        // Empty link text should not match prohibited texts
401        assert_eq!(0, violations.len());
402    }
403
404    #[test]
405    fn test_links_with_code_allowed() {
406        let input = "[`click here`](https://example.com)";
407
408        let config = test_config();
409        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
410        let violations = linter.analyze();
411
412        // Links containing code should be allowed
413        assert_eq!(0, violations.len());
414    }
415
416    #[test]
417    fn test_image_links_ignored() {
418        let input = "![click here](image.jpg)";
419
420        let config = test_config();
421        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
422        let violations = linter.analyze();
423
424        // Images should be ignored by this rule
425        assert_eq!(0, violations.len());
426    }
427}