rumdl_lib/rules/
md042_no_empty_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3
4/// Rule MD042: No empty links
5///
6/// See [docs/md042.md](../../docs/md042.md) for full documentation, configuration, and examples.
7///
8/// This rule is triggered when a link has no content (text) or destination (URL).
9#[derive(Clone, Default)]
10pub struct MD042NoEmptyLinks {}
11
12impl MD042NoEmptyLinks {
13    pub fn new() -> Self {
14        Self {}
15    }
16}
17
18impl Rule for MD042NoEmptyLinks {
19    fn name(&self) -> &'static str {
20        "MD042"
21    }
22
23    fn description(&self) -> &'static str {
24        "No empty links"
25    }
26
27    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
28        let mut warnings = Vec::new();
29
30        // Check if we're in MkDocs mode from the context
31        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
32
33        // Use centralized link parsing from LintContext
34        for link in &ctx.links {
35            // For reference links, resolve the URL
36            let effective_url = if link.is_reference {
37                if let Some(ref_id) = &link.reference_id {
38                    ctx.get_reference_url(ref_id).unwrap_or("").to_string()
39                } else {
40                    String::new()
41                }
42            } else {
43                link.url.clone()
44            };
45
46            // For MkDocs mode, check if this looks like an auto-reference
47            // Note: We check both the reference_id AND the text since shorthand references
48            // like [class.Name][] use the text as the implicit reference
49            if mkdocs_mode && link.is_reference {
50                // Check the reference_id if present
51                if let Some(ref_id) = &link.reference_id
52                    && is_mkdocs_auto_reference(ref_id)
53                {
54                    continue;
55                }
56                // Also check the link text itself for shorthand references
57                if is_mkdocs_auto_reference(&link.text) {
58                    continue;
59                }
60            }
61
62            // Check for empty links
63            if link.text.trim().is_empty() || effective_url.trim().is_empty() {
64                let replacement = if link.text.trim().is_empty() && effective_url.trim().is_empty() {
65                    "[Link text](https://example.com)".to_string()
66                } else if link.text.trim().is_empty() {
67                    if link.is_reference {
68                        format!("[Link text]{}", &ctx.content[link.byte_offset + 1..link.byte_end])
69                    } else {
70                        format!("[Link text]({effective_url})")
71                    }
72                } else if link.is_reference {
73                    // Keep the reference format
74                    let ref_part = &ctx.content[link.byte_offset + link.text.len() + 2..link.byte_end];
75                    format!("[{}]{}", link.text, ref_part)
76                } else {
77                    format!("[{}](https://example.com)", link.text)
78                };
79
80                // Format the link as it appears in the source
81                let link_display = if link.is_reference {
82                    if let Some(ref_id) = &link.reference_id {
83                        if ref_id.is_empty() {
84                            format!("[{}][]", link.text)
85                        } else {
86                            format!("[{}][{}]", link.text, ref_id)
87                        }
88                    } else {
89                        format!("[{}]", link.text)
90                    }
91                } else {
92                    format!("[{}]({})", link.text, link.url)
93                };
94
95                warnings.push(LintWarning {
96                    rule_name: Some(self.name()),
97                    message: format!("Empty link found: {link_display}"),
98                    line: link.line,
99                    column: link.start_col + 1, // Convert to 1-indexed
100                    end_line: link.line,
101                    end_column: link.end_col + 1, // Convert to 1-indexed
102                    severity: Severity::Warning,
103                    fix: Some(Fix {
104                        range: link.byte_offset..link.byte_end,
105                        replacement,
106                    }),
107                });
108            }
109        }
110
111        Ok(warnings)
112    }
113
114    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
115        let content = ctx.content;
116
117        // Get all warnings first - only fix links that are actually flagged
118        let warnings = self.check(ctx)?;
119        if warnings.is_empty() {
120            return Ok(content.to_string());
121        }
122
123        // Collect all fixes with their ranges
124        let mut fixes: Vec<(std::ops::Range<usize>, String)> = warnings
125            .iter()
126            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.clone(), f.replacement.clone())))
127            .collect();
128
129        // Sort fixes by position (descending) to apply from end to start
130        fixes.sort_by(|a, b| b.0.start.cmp(&a.0.start));
131
132        let mut result = content.to_string();
133
134        // Apply fixes from end to start to maintain correct positions
135        for (range, replacement) in fixes {
136            result.replace_range(range, &replacement);
137        }
138
139        Ok(result)
140    }
141
142    /// Get the category of this rule for selective processing
143    fn category(&self) -> RuleCategory {
144        RuleCategory::Link
145    }
146
147    /// Check if this rule should be skipped
148    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
149        let content = ctx.content;
150        content.is_empty() || !content.contains('[')
151    }
152
153    fn as_any(&self) -> &dyn std::any::Any {
154        self
155    }
156
157    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
158    where
159        Self: Sized,
160    {
161        // Flavor is now accessed from LintContext during check
162        Box::new(MD042NoEmptyLinks::new())
163    }
164}
165
166#[cfg(test)]
167mod tests {
168    use super::*;
169    use crate::lint_context::LintContext;
170
171    #[test]
172    fn test_links_with_text_should_pass() {
173        let ctx = LintContext::new(
174            "[valid link](https://example.com)",
175            crate::config::MarkdownFlavor::Standard,
176        );
177        let rule = MD042NoEmptyLinks::new();
178        let result = rule.check(&ctx).unwrap();
179        assert!(result.is_empty(), "Links with text should pass");
180
181        let ctx = LintContext::new(
182            "[another valid link](path/to/page.html)",
183            crate::config::MarkdownFlavor::Standard,
184        );
185        let result = rule.check(&ctx).unwrap();
186        assert!(result.is_empty(), "Links with text and relative URLs should pass");
187    }
188
189    #[test]
190    fn test_links_with_empty_text_should_fail() {
191        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
192        let rule = MD042NoEmptyLinks::new();
193        let result = rule.check(&ctx).unwrap();
194        assert_eq!(result.len(), 1);
195        assert_eq!(result[0].message, "Empty link found: [](https://example.com)");
196        assert_eq!(result[0].line, 1);
197        assert_eq!(result[0].column, 1);
198    }
199
200    #[test]
201    fn test_links_with_only_whitespace_should_fail() {
202        let ctx = LintContext::new("[   ](https://example.com)", crate::config::MarkdownFlavor::Standard);
203        let rule = MD042NoEmptyLinks::new();
204        let result = rule.check(&ctx).unwrap();
205        assert_eq!(result.len(), 1);
206        assert_eq!(result[0].message, "Empty link found: [   ](https://example.com)");
207
208        let ctx = LintContext::new("[\t\n](https://example.com)", crate::config::MarkdownFlavor::Standard);
209        let result = rule.check(&ctx).unwrap();
210        assert_eq!(result.len(), 1);
211        assert_eq!(result[0].message, "Empty link found: [\t\n](https://example.com)");
212    }
213
214    #[test]
215    fn test_reference_links_with_empty_text() {
216        let ctx = LintContext::new(
217            "[][ref]\n\n[ref]: https://example.com",
218            crate::config::MarkdownFlavor::Standard,
219        );
220        let rule = MD042NoEmptyLinks::new();
221        let result = rule.check(&ctx).unwrap();
222        assert_eq!(result.len(), 1);
223        assert_eq!(result[0].message, "Empty link found: [][ref]");
224        assert_eq!(result[0].line, 1);
225
226        // Empty text with empty reference
227        let ctx = LintContext::new(
228            "[][]\n\n[]: https://example.com",
229            crate::config::MarkdownFlavor::Standard,
230        );
231        let result = rule.check(&ctx).unwrap();
232        assert_eq!(result.len(), 1);
233    }
234
235    #[test]
236    fn test_images_should_be_ignored() {
237        // Images can have empty alt text, so they should not trigger the rule
238        let ctx = LintContext::new("![](image.png)", crate::config::MarkdownFlavor::Standard);
239        let rule = MD042NoEmptyLinks::new();
240        let result = rule.check(&ctx).unwrap();
241        assert!(result.is_empty(), "Images with empty alt text should be ignored");
242
243        let ctx = LintContext::new("![   ](image.png)", crate::config::MarkdownFlavor::Standard);
244        let result = rule.check(&ctx).unwrap();
245        assert!(result.is_empty(), "Images with whitespace alt text should be ignored");
246    }
247
248    #[test]
249    fn test_links_with_nested_formatting() {
250        // Links with nested formatting but empty effective text
251        // Note: [**] contains "**" as text, which is not empty after trimming
252        let ctx = LintContext::new("[**](https://example.com)", crate::config::MarkdownFlavor::Standard);
253        let rule = MD042NoEmptyLinks::new();
254        let result = rule.check(&ctx).unwrap();
255        assert!(result.is_empty(), "[**] is not considered empty since ** is text");
256
257        let ctx = LintContext::new("[__](https://example.com)", crate::config::MarkdownFlavor::Standard);
258        let result = rule.check(&ctx).unwrap();
259        assert!(result.is_empty(), "[__] is not considered empty since __ is text");
260
261        // Links with truly empty formatting should fail
262        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
263        let result = rule.check(&ctx).unwrap();
264        assert_eq!(result.len(), 1);
265
266        // Links with nested formatting and actual text should pass
267        let ctx = LintContext::new(
268            "[**bold text**](https://example.com)",
269            crate::config::MarkdownFlavor::Standard,
270        );
271        let result = rule.check(&ctx).unwrap();
272        assert!(result.is_empty(), "Links with nested formatting and text should pass");
273
274        let ctx = LintContext::new(
275            "[*italic* and **bold**](https://example.com)",
276            crate::config::MarkdownFlavor::Standard,
277        );
278        let result = rule.check(&ctx).unwrap();
279        assert!(result.is_empty(), "Links with multiple nested formatting should pass");
280    }
281
282    #[test]
283    fn test_multiple_empty_links_on_same_line() {
284        let ctx = LintContext::new(
285            "[](url1) and [](url2) and [valid](url3)",
286            crate::config::MarkdownFlavor::Standard,
287        );
288        let rule = MD042NoEmptyLinks::new();
289        let result = rule.check(&ctx).unwrap();
290        assert_eq!(result.len(), 2, "Should detect both empty links");
291        assert_eq!(result[0].column, 1);
292        assert_eq!(result[1].column, 14);
293    }
294
295    #[test]
296    fn test_escaped_brackets() {
297        // Escaped brackets should not be treated as links
298        let ctx = LintContext::new("\\[\\](https://example.com)", crate::config::MarkdownFlavor::Standard);
299        let rule = MD042NoEmptyLinks::new();
300        let result = rule.check(&ctx).unwrap();
301        assert!(result.is_empty(), "Escaped brackets should not be treated as links");
302
303        // But this should still be a link
304        let ctx = LintContext::new("[\\[\\]](https://example.com)", crate::config::MarkdownFlavor::Standard);
305        let result = rule.check(&ctx).unwrap();
306        assert!(result.is_empty(), "Link with escaped brackets in text should pass");
307    }
308
309    #[test]
310    fn test_links_in_lists_and_blockquotes() {
311        // Empty links in lists
312        let ctx = LintContext::new(
313            "- [](https://example.com)\n- [valid](https://example.com)",
314            crate::config::MarkdownFlavor::Standard,
315        );
316        let rule = MD042NoEmptyLinks::new();
317        let result = rule.check(&ctx).unwrap();
318        assert_eq!(result.len(), 1);
319        assert_eq!(result[0].line, 1);
320
321        // Empty links in blockquotes
322        let ctx = LintContext::new(
323            "> [](https://example.com)\n> [valid](https://example.com)",
324            crate::config::MarkdownFlavor::Standard,
325        );
326        let result = rule.check(&ctx).unwrap();
327        assert_eq!(result.len(), 1);
328        assert_eq!(result[0].line, 1);
329
330        // Nested structures
331        let ctx = LintContext::new(
332            "> - [](url1)\n> - [text](url2)",
333            crate::config::MarkdownFlavor::Standard,
334        );
335        let result = rule.check(&ctx).unwrap();
336        assert_eq!(result.len(), 1);
337    }
338
339    #[test]
340    fn test_unicode_whitespace_characters() {
341        // Non-breaking space (U+00A0) - IS considered whitespace by Rust's trim()
342        let ctx = LintContext::new(
343            "[\u{00A0}](https://example.com)",
344            crate::config::MarkdownFlavor::Standard,
345        );
346        let rule = MD042NoEmptyLinks::new();
347        let result = rule.check(&ctx).unwrap();
348        assert_eq!(result.len(), 1, "Non-breaking space should be treated as whitespace");
349
350        // Em space (U+2003) - IS considered whitespace by Rust's trim()
351        let ctx = LintContext::new(
352            "[\u{2003}](https://example.com)",
353            crate::config::MarkdownFlavor::Standard,
354        );
355        let result = rule.check(&ctx).unwrap();
356        assert_eq!(result.len(), 1, "Em space should be treated as whitespace");
357
358        // Zero-width space (U+200B) - NOT considered whitespace by Rust's trim()
359        // This is a formatting character, not a whitespace character
360        let ctx = LintContext::new(
361            "[\u{200B}](https://example.com)",
362            crate::config::MarkdownFlavor::Standard,
363        );
364        let result = rule.check(&ctx).unwrap();
365        assert!(
366            result.is_empty(),
367            "Zero-width space is not considered whitespace by trim()"
368        );
369
370        // Test with zero-width space between spaces
371        // Since trim() doesn't consider zero-width space as whitespace,
372        // " \u{200B} " becomes "\u{200B}" after trimming, which is NOT empty
373        let ctx = LintContext::new(
374            "[ \u{200B} ](https://example.com)",
375            crate::config::MarkdownFlavor::Standard,
376        );
377        let result = rule.check(&ctx).unwrap();
378        assert!(
379            result.is_empty(),
380            "Zero-width space remains after trim(), so link is not empty"
381        );
382    }
383
384    #[test]
385    fn test_empty_url_with_text() {
386        let ctx = LintContext::new("[some text]()", crate::config::MarkdownFlavor::Standard);
387        let rule = MD042NoEmptyLinks::new();
388        let result = rule.check(&ctx).unwrap();
389        assert_eq!(result.len(), 1);
390        assert_eq!(result[0].message, "Empty link found: [some text]()");
391    }
392
393    #[test]
394    fn test_both_empty_text_and_url() {
395        let ctx = LintContext::new("[]()", crate::config::MarkdownFlavor::Standard);
396        let rule = MD042NoEmptyLinks::new();
397        let result = rule.check(&ctx).unwrap();
398        assert_eq!(result.len(), 1);
399        assert_eq!(result[0].message, "Empty link found: []()");
400    }
401
402    #[test]
403    fn test_reference_link_with_undefined_reference() {
404        let ctx = LintContext::new("[text][undefined]", crate::config::MarkdownFlavor::Standard);
405        let rule = MD042NoEmptyLinks::new();
406        let result = rule.check(&ctx).unwrap();
407        assert_eq!(result.len(), 1, "Undefined reference should be treated as empty URL");
408    }
409
410    #[test]
411    fn test_shortcut_reference_links() {
412        // Valid shortcut reference link (implicit reference)
413        // Note: [example] by itself is not parsed as a link by the LINK_PATTERN regex
414        // It needs to be followed by [] or () to be recognized as a link
415        let ctx = LintContext::new(
416            "[example][]\n\n[example]: https://example.com",
417            crate::config::MarkdownFlavor::Standard,
418        );
419        let rule = MD042NoEmptyLinks::new();
420        let result = rule.check(&ctx).unwrap();
421        assert!(result.is_empty(), "Valid implicit reference link should pass");
422
423        // Empty implicit reference link
424        let ctx = LintContext::new(
425            "[][]\n\n[]: https://example.com",
426            crate::config::MarkdownFlavor::Standard,
427        );
428        let result = rule.check(&ctx).unwrap();
429        assert_eq!(result.len(), 1, "Empty implicit reference link should fail");
430
431        // Test actual shortcut-style links are not detected (since they don't match the pattern)
432        let ctx = LintContext::new(
433            "[example]\n\n[example]: https://example.com",
434            crate::config::MarkdownFlavor::Standard,
435        );
436        let result = rule.check(&ctx).unwrap();
437        assert!(
438            result.is_empty(),
439            "Shortcut links without [] or () are not parsed as links"
440        );
441    }
442
443    #[test]
444    fn test_fix_suggestions() {
445        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
446        let rule = MD042NoEmptyLinks::new();
447        let result = rule.check(&ctx).unwrap();
448        assert!(result[0].fix.is_some());
449        let fix = result[0].fix.as_ref().unwrap();
450        assert_eq!(fix.replacement, "[Link text](https://example.com)");
451
452        let ctx = LintContext::new("[text]()", crate::config::MarkdownFlavor::Standard);
453        let result = rule.check(&ctx).unwrap();
454        assert!(result[0].fix.is_some());
455        let fix = result[0].fix.as_ref().unwrap();
456        assert_eq!(fix.replacement, "[text](https://example.com)");
457
458        let ctx = LintContext::new("[]()", crate::config::MarkdownFlavor::Standard);
459        let result = rule.check(&ctx).unwrap();
460        assert!(result[0].fix.is_some());
461        let fix = result[0].fix.as_ref().unwrap();
462        assert_eq!(fix.replacement, "[Link text](https://example.com)");
463    }
464
465    #[test]
466    fn test_complex_markdown_document() {
467        let content = r#"# Document with various links
468
469[Valid link](https://example.com) followed by [](empty.com).
470
471## Lists with links
472- [Good link](url1)
473- [](url2)
474- Item with [inline empty]() link
475
476> Quote with [](quoted-empty.com)
477> And [valid quoted](quoted-valid.com)
478
479Code block should be ignored:
480```
481[](this-is-code)
482```
483
484[Reference style][ref1] and [][ref2]
485
486[ref1]: https://ref1.com
487[ref2]: https://ref2.com
488"#;
489
490        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
491        let rule = MD042NoEmptyLinks::new();
492        let result = rule.check(&ctx).unwrap();
493
494        // Count the empty links
495        let empty_link_lines = [3, 7, 8, 10, 18];
496        assert_eq!(result.len(), empty_link_lines.len(), "Should find all empty links");
497
498        // Verify line numbers
499        for (i, &expected_line) in empty_link_lines.iter().enumerate() {
500            assert_eq!(
501                result[i].line, expected_line,
502                "Empty link {i} should be on line {expected_line}"
503            );
504        }
505    }
506
507    #[test]
508    fn test_issue_29_code_block_with_tildes() {
509        // Test for issue #29 - code blocks with tilde markers should not break reference links
510        let content = r#"In addition to the [local scope][] and the [global scope][], Python also has a **built-in scope**.
511
512```pycon
513>>> @count_calls
514... def greet(name):
515...     print("Hi", name)
516...
517>>> greet("Trey")
518Traceback (most recent call last):
519  File "<python-input-2>", line 1, in <module>
520    greet("Trey")
521    ~~~~~^^^^^^^^
522  File "<python-input-0>", line 4, in wrapper
523    calls += 1
524    ^^^^^
525UnboundLocalError: cannot access local variable 'calls' where it is not associated with a value
526```
527
528
529[local scope]: https://www.pythonmorsels.com/local-and-global-variables/
530[global scope]: https://www.pythonmorsels.com/assigning-global-variables/"#;
531
532        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
533        let rule = MD042NoEmptyLinks::new();
534        let result = rule.check(&ctx).unwrap();
535
536        // These reference links should NOT be flagged as empty
537        assert!(
538            result.is_empty(),
539            "Should not flag reference links as empty when code blocks contain tildes (issue #29). Got: {result:?}"
540        );
541    }
542}