mdbook_lint_core/rules/standard/
md059.rs

1//! MD059 - Link text should be descriptive
2//!
3//! This rule is triggered when a link has generic text that doesn't describe
4//! the purpose of the link.
5//!
6//! ## Correct
7//!
8//! ```markdown
9//! \[Download the budget document\](document.pdf)
10//! \[CommonMark Specification\](https://spec.commonmark.org/)
11//! ```
12//!
13//! ## Incorrect
14//!
15//! ```markdown
16//! \[click here\](document.pdf)
17//! \[here\](https://example.com)
18//! \[link\](https://example.com)
19//! \[more\](https://example.com)
20//! ```
21
22use crate::error::Result;
23use crate::{
24    Document, Violation,
25    rule::{Rule, RuleCategory, RuleMetadata},
26    violation::Severity,
27};
28use comrak::nodes::{AstNode, NodeValue};
29
30/// MD059 - Link text should be descriptive
31pub struct MD059 {
32    prohibited_texts: Vec<String>,
33}
34
35impl Default for MD059 {
36    fn default() -> Self {
37        Self::new()
38    }
39}
40
41impl MD059 {
42    /// Create a new MD059 rule instance
43    pub fn new() -> Self {
44        Self {
45            prohibited_texts: vec![
46                "click here".to_string(),
47                "here".to_string(),
48                "link".to_string(),
49                "more".to_string(),
50            ],
51        }
52    }
53
54    /// Set the list of prohibited link texts
55    #[allow(dead_code)]
56    pub fn prohibited_texts(mut self, texts: Vec<String>) -> Self {
57        self.prohibited_texts = texts;
58        self
59    }
60
61    /// Extract text content from a link node
62    fn extract_link_text<'a>(node: &'a AstNode<'a>) -> String {
63        let mut text = String::new();
64        for child in node.children() {
65            match &child.data.borrow().value {
66                NodeValue::Text(t) => text.push_str(t),
67                NodeValue::Code(code) => text.push_str(&code.literal),
68                NodeValue::Emph | NodeValue::Strong => {
69                    text.push_str(&Self::extract_link_text(child));
70                }
71                _ => {}
72            }
73        }
74        text.trim().to_string()
75    }
76
77    /// Check if link text is prohibited
78    fn is_prohibited_text(&self, text: &str) -> bool {
79        let normalized_text = text.to_lowercase();
80        self.prohibited_texts
81            .iter()
82            .any(|prohibited| prohibited.to_lowercase() == normalized_text)
83    }
84
85    /// Check for non-descriptive link text
86    fn check_link_text<'a>(&self, ast: &'a AstNode<'a>) -> Vec<Violation> {
87        let mut violations = Vec::new();
88        self.traverse_for_links(ast, &mut violations);
89        violations
90    }
91
92    /// Traverse AST to find links
93    fn traverse_for_links<'a>(&self, node: &'a AstNode<'a>, violations: &mut Vec<Violation>) {
94        if let NodeValue::Link(link) = &node.data.borrow().value {
95            // Skip autolinks and reference definitions
96            if !link.url.is_empty() {
97                let link_text = Self::extract_link_text(node);
98
99                // Skip empty link text
100                if !link_text.is_empty() && self.is_prohibited_text(&link_text) {
101                    let pos = node.data.borrow().sourcepos;
102                    let line = pos.start.line;
103                    let column = pos.start.column;
104                    violations.push(self.create_violation(
105                        format!(
106                            "Link text '{link_text}' is not descriptive. Use descriptive text that explains the purpose of the link"
107                        ),
108                        line,
109                        column,
110                        Severity::Warning,
111                    ));
112                }
113            }
114        }
115
116        for child in node.children() {
117            self.traverse_for_links(child, violations);
118        }
119    }
120
121    /// Fallback method using manual parsing when no AST is available
122    fn check_link_text_fallback(&self, document: &Document) -> Vec<Violation> {
123        let mut violations = Vec::new();
124
125        for (line_num, line) in document.content.lines().enumerate() {
126            let line_number = line_num + 1;
127            let mut chars = line.char_indices().peekable();
128            let mut in_backticks = false;
129
130            while let Some((i, ch)) = chars.next() {
131                match ch {
132                    '`' => {
133                        in_backticks = !in_backticks;
134                    }
135                    '[' if !in_backticks => {
136                        // Try to parse any kind of link: [text](url) or [text][ref]
137                        if let Some((link_text, text_start, text_end)) =
138                            self.parse_any_link_at(&line[i..])
139                        {
140                            let cleaned_text = Self::strip_emphasis_markers(link_text);
141                            let trimmed_text = cleaned_text.trim();
142
143                            if !trimmed_text.is_empty() && self.is_prohibited_text(trimmed_text) {
144                                violations.push(self.create_violation(
145                                    format!(
146                                        "Link text '{trimmed_text}' is not descriptive. Use descriptive text that explains the purpose of the link"
147                                    ),
148                                    line_number,
149                                    i + text_start + 2, // +1 for 1-based indexing, +1 for opening bracket
150                                    Severity::Warning,
151                                ));
152                            }
153
154                            // Skip past the entire link
155                            for _ in 0..text_end - 1 {
156                                chars.next();
157                            }
158                        }
159                    }
160                    _ => {}
161                }
162            }
163        }
164
165        violations
166    }
167
168    /// Parse any link (inline or reference) starting at the given position
169    /// Returns (link_text, text_start_offset, total_length) if found
170    fn parse_any_link_at<'a>(&self, text: &'a str) -> Option<(&'a str, usize, usize)> {
171        if !text.starts_with('[') {
172            return None;
173        }
174
175        // Find the closing bracket
176        let mut bracket_count = 0;
177        let mut closing_bracket_pos = None;
178
179        for (i, ch) in text.char_indices() {
180            match ch {
181                '[' => bracket_count += 1,
182                ']' => {
183                    bracket_count -= 1;
184                    if bracket_count == 0 {
185                        closing_bracket_pos = Some(i);
186                        break;
187                    }
188                }
189                _ => {}
190            }
191        }
192
193        let closing_bracket_pos = closing_bracket_pos?;
194        let link_text = &text[1..closing_bracket_pos];
195        let remaining = &text[closing_bracket_pos + 1..];
196
197        // Check if this is followed by (url) - inline link
198        if remaining.starts_with('(') {
199            if let Some(closing_paren) = remaining.find(')') {
200                let total_length = closing_bracket_pos + 1 + closing_paren + 1;
201                return Some((link_text, 0, total_length));
202            }
203        }
204        // Check if this is followed by [ref] - reference link
205        else if remaining.starts_with('[')
206            && let Some(ref_end) = remaining.find(']')
207        {
208            let total_length = closing_bracket_pos + 1 + ref_end + 1;
209            return Some((link_text, 0, total_length));
210        }
211
212        None
213    }
214
215    /// Strip emphasis markers from link text (similar to AST extract_link_text)
216    fn strip_emphasis_markers(text: &str) -> String {
217        let mut result = String::new();
218        let mut chars = text.chars().peekable();
219
220        while let Some(ch) = chars.next() {
221            match ch {
222                '*' => {
223                    // Check for ** (strong) or * (emphasis)
224                    if chars.peek() == Some(&'*') {
225                        chars.next(); // consume second *
226                        // Find closing **
227                        let mut temp = String::new();
228                        let mut found_closing = false;
229                        while let Some(inner_ch) = chars.next() {
230                            if inner_ch == '*' && chars.peek() == Some(&'*') {
231                                chars.next(); // consume second *
232                                found_closing = true;
233                                break;
234                            }
235                            temp.push(inner_ch);
236                        }
237                        if found_closing {
238                            result.push_str(&Self::strip_emphasis_markers(&temp));
239                        } else {
240                            result.push_str("**");
241                            result.push_str(&temp);
242                        }
243                    } else {
244                        // Find closing *
245                        let mut temp = String::new();
246                        let mut found_closing = false;
247                        for inner_ch in chars.by_ref() {
248                            if inner_ch == '*' {
249                                found_closing = true;
250                                break;
251                            }
252                            temp.push(inner_ch);
253                        }
254                        if found_closing {
255                            result.push_str(&Self::strip_emphasis_markers(&temp));
256                        } else {
257                            result.push('*');
258                            result.push_str(&temp);
259                        }
260                    }
261                }
262                '`' => {
263                    // Find closing `
264                    let mut temp = String::new();
265                    let mut found_closing = false;
266                    for inner_ch in chars.by_ref() {
267                        if inner_ch == '`' {
268                            found_closing = true;
269                            break;
270                        }
271                        temp.push(inner_ch);
272                    }
273                    if found_closing {
274                        result.push_str(&temp); // Code content as-is
275                    } else {
276                        result.push('`');
277                        result.push_str(&temp);
278                    }
279                }
280                _ => result.push(ch),
281            }
282        }
283
284        result
285    }
286}
287
288impl Rule for MD059 {
289    fn id(&self) -> &'static str {
290        "MD059"
291    }
292
293    fn name(&self) -> &'static str {
294        "descriptive-link-text"
295    }
296
297    fn description(&self) -> &'static str {
298        "Link text should be descriptive"
299    }
300
301    fn metadata(&self) -> RuleMetadata {
302        RuleMetadata::stable(RuleCategory::Accessibility)
303    }
304
305    fn check_with_ast<'a>(
306        &self,
307        document: &Document,
308        ast: Option<&'a AstNode<'a>>,
309    ) -> Result<Vec<Violation>> {
310        if let Some(ast) = ast {
311            let violations = self.check_link_text(ast);
312            Ok(violations)
313        } else {
314            // Simplified regex-based fallback when no AST is available
315            Ok(self.check_link_text_fallback(document))
316        }
317    }
318}
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323    use crate::test_helpers::{
324        assert_no_violations, assert_single_violation, assert_violation_count,
325    };
326
327    #[test]
328    fn test_descriptive_link_text() {
329        let content = r#"[Download the budget document](document.pdf)
330[CommonMark Specification](https://spec.commonmark.org/)
331[View the installation guide](install.md)
332"#;
333
334        assert_no_violations(MD059::new(), content);
335    }
336
337    #[test]
338    fn test_prohibited_link_text() {
339        let content = r#"[click here](document.pdf)
340[here](https://example.com)
341[link](https://example.com)
342[more](info.html)
343"#;
344
345        let violations = assert_violation_count(MD059::new(), content, 4);
346
347        assert_eq!(violations[0].line, 1);
348        assert!(violations[0].message.contains("click here"));
349
350        assert_eq!(violations[1].line, 2);
351        assert!(violations[1].message.contains("here"));
352
353        assert_eq!(violations[2].line, 3);
354        assert!(violations[2].message.contains("link"));
355
356        assert_eq!(violations[3].line, 4);
357        assert!(violations[3].message.contains("more"));
358    }
359
360    #[test]
361    fn test_case_insensitive_matching() {
362        let content = r#"[CLICK HERE](document.pdf)
363[Here](https://example.com)
364[Link](https://example.com)
365[MORE](info.html)
366"#;
367
368        let violations = assert_violation_count(MD059::new(), content, 4);
369        assert_eq!(violations[0].line, 1);
370        assert_eq!(violations[1].line, 2);
371        assert_eq!(violations[2].line, 3);
372        assert_eq!(violations[3].line, 4);
373    }
374
375    #[test]
376    fn test_custom_prohibited_texts() {
377        let content = r#"[read more](document.pdf)
378[see details](https://example.com)
379"#;
380
381        let rule =
382            MD059::new().prohibited_texts(vec!["read more".to_string(), "see details".to_string()]);
383        let violations = assert_violation_count(rule, content, 2);
384        assert_eq!(violations[0].line, 1);
385        assert_eq!(violations[1].line, 2);
386    }
387
388    #[test]
389    fn test_autolinks_ignored() {
390        let content = r#"<https://example.com>
391<mailto:user@example.com>
392"#;
393
394        assert_no_violations(MD059::new(), content);
395    }
396
397    #[test]
398    fn test_reference_links() {
399        let content = r#"[click here][ref]
400[descriptive text][ref2]
401
402[ref]: https://example.com
403[ref2]: https://example.com
404"#;
405
406        let violation = assert_single_violation(MD059::new(), content);
407        assert_eq!(violation.line, 1);
408        assert!(violation.message.contains("click here"));
409    }
410
411    #[test]
412    fn test_links_with_emphasis() {
413        let content = r#"[**click here**](document.pdf)
414[*here*](https://example.com)
415[`code link`](https://example.com)
416"#;
417
418        let violations = assert_violation_count(MD059::new(), content, 2);
419
420        assert_eq!(violations[0].line, 1);
421        assert!(violations[0].message.contains("click here"));
422
423        assert_eq!(violations[1].line, 2);
424        assert!(violations[1].message.contains("here"));
425    }
426
427    #[test]
428    fn test_empty_link_text_ignored() {
429        let content = r#"[](https://example.com)
430"#;
431
432        assert_no_violations(MD059::new(), content);
433    }
434
435    #[test]
436    fn test_mixed_content() {
437        let content = r#"[Download guide](guide.pdf) contains useful information.
438You can [click here](more.html) for additional details.
439See the [API documentation](api.md) for technical details.
440"#;
441
442        let violation = assert_single_violation(MD059::new(), content);
443        assert_eq!(violation.line, 2);
444        assert!(violation.message.contains("click here"));
445    }
446}