rumdl_lib/rules/
md042_no_empty_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
3use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
4use crate::utils::range_utils::LineIndex;
5
6/// Rule MD042: No empty links
7///
8/// See [docs/md042.md](../../docs/md042.md) for full documentation, configuration, and examples.
9///
10/// This rule is triggered when a link has no content (text) or destination (URL).
11#[derive(Clone, Default)]
12pub struct MD042NoEmptyLinks {}
13
14impl MD042NoEmptyLinks {
15    pub fn new() -> Self {
16        Self {}
17    }
18}
19
20impl Rule for MD042NoEmptyLinks {
21    fn name(&self) -> &'static str {
22        "MD042"
23    }
24
25    fn description(&self) -> &'static str {
26        "No empty links"
27    }
28
29    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
30        let mut warnings = Vec::new();
31
32        // Check if we're in MkDocs mode from the context
33        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
34
35        // Use centralized link parsing from LintContext
36        for link in &ctx.links {
37            // For reference links, resolve the URL
38            let effective_url = if link.is_reference {
39                if let Some(ref_id) = &link.reference_id {
40                    ctx.get_reference_url(ref_id).unwrap_or("").to_string()
41                } else {
42                    String::new()
43                }
44            } else {
45                link.url.clone()
46            };
47
48            // For MkDocs mode, check if this looks like an auto-reference
49            // Note: We check both the reference_id AND the text since shorthand references
50            // like [class.Name][] use the text as the implicit reference
51            if mkdocs_mode && link.is_reference {
52                // Check the reference_id if present
53                if let Some(ref_id) = &link.reference_id
54                    && is_mkdocs_auto_reference(ref_id)
55                {
56                    continue;
57                }
58                // Also check the link text itself for shorthand references
59                if is_mkdocs_auto_reference(&link.text) {
60                    continue;
61                }
62            }
63
64            // Check for empty links
65            if link.text.trim().is_empty() || effective_url.trim().is_empty() {
66                let replacement = if link.text.trim().is_empty() && effective_url.trim().is_empty() {
67                    "[Link text](https://example.com)".to_string()
68                } else if link.text.trim().is_empty() {
69                    if link.is_reference {
70                        format!("[Link text]{}", &ctx.content[link.byte_offset + 1..link.byte_end])
71                    } else {
72                        format!("[Link text]({effective_url})")
73                    }
74                } else if link.is_reference {
75                    // Keep the reference format
76                    let ref_part = &ctx.content[link.byte_offset + link.text.len() + 2..link.byte_end];
77                    format!("[{}]{}", link.text, ref_part)
78                } else {
79                    format!("[{}](https://example.com)", link.text)
80                };
81
82                // Format the link as it appears in the source
83                let link_display = if link.is_reference {
84                    if let Some(ref_id) = &link.reference_id {
85                        if ref_id.is_empty() {
86                            format!("[{}][]", link.text)
87                        } else {
88                            format!("[{}][{}]", link.text, ref_id)
89                        }
90                    } else {
91                        format!("[{}]", link.text)
92                    }
93                } else {
94                    format!("[{}]({})", link.text, link.url)
95                };
96
97                warnings.push(LintWarning {
98                    rule_name: Some(self.name()),
99                    message: format!("Empty link found: {link_display}"),
100                    line: link.line,
101                    column: link.start_col + 1, // Convert to 1-indexed
102                    end_line: link.line,
103                    end_column: link.end_col + 1, // Convert to 1-indexed
104                    severity: Severity::Warning,
105                    fix: Some(Fix {
106                        range: link.byte_offset..link.byte_end,
107                        replacement,
108                    }),
109                });
110            }
111        }
112
113        Ok(warnings)
114    }
115
116    /// Optimized check using document structure
117    fn check_with_structure(
118        &self,
119        _ctx: &crate::lint_context::LintContext,
120        structure: &DocumentStructure,
121    ) -> LintResult {
122        let content = _ctx.content;
123        // Early return if there are no links
124        if structure.links.is_empty() {
125            return Ok(Vec::new());
126        }
127
128        let line_index = LineIndex::new(content.to_string());
129        let mut warnings = Vec::new();
130
131        // Get pre-computed empty links
132        let empty_links = structure.get_empty_links();
133
134        for link in empty_links {
135            let replacement = if link.text.trim().is_empty() && link.url.trim().is_empty() {
136                "[Link text](https://example.com)".to_string()
137            } else if link.text.trim().is_empty() {
138                format!("[Link text]({})", link.url)
139            } else {
140                format!("[{}](https://example.com)", link.text)
141            };
142
143            warnings.push(LintWarning {
144                rule_name: Some(self.name()),
145                message: format!("Empty link found: [{}]({})", link.text, link.url),
146                line: link.line,
147                column: link.start_col,
148                end_line: link.line,
149                end_column: link.end_col + 1,
150                severity: Severity::Warning,
151                fix: Some(Fix {
152                    range: line_index.line_col_to_byte_range_with_length(
153                        link.line,
154                        link.start_col,
155                        (link.end_col + 1).saturating_sub(link.start_col),
156                    ),
157                    replacement,
158                }),
159            });
160        }
161
162        Ok(warnings)
163    }
164
165    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
166        let content = ctx.content;
167
168        // Get all warnings first - only fix links that are actually flagged
169        let warnings = self.check(ctx)?;
170        if warnings.is_empty() {
171            return Ok(content.to_string());
172        }
173
174        // Collect all fixes with their ranges
175        let mut fixes: Vec<(std::ops::Range<usize>, String)> = warnings
176            .iter()
177            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.clone(), f.replacement.clone())))
178            .collect();
179
180        // Sort fixes by position (descending) to apply from end to start
181        fixes.sort_by(|a, b| b.0.start.cmp(&a.0.start));
182
183        let mut result = content.to_string();
184
185        // Apply fixes from end to start to maintain correct positions
186        for (range, replacement) in fixes {
187            result.replace_range(range, &replacement);
188        }
189
190        Ok(result)
191    }
192
193    /// Get the category of this rule for selective processing
194    fn category(&self) -> RuleCategory {
195        RuleCategory::Link
196    }
197
198    /// Check if this rule should be skipped
199    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
200        let content = ctx.content;
201        content.is_empty() || !content.contains('[')
202    }
203
204    fn as_any(&self) -> &dyn std::any::Any {
205        self
206    }
207
208    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
209    where
210        Self: Sized,
211    {
212        // Flavor is now accessed from LintContext during check
213        Box::new(MD042NoEmptyLinks::new())
214    }
215}
216
217impl DocumentStructureExtensions for MD042NoEmptyLinks {
218    fn has_relevant_elements(
219        &self,
220        _ctx: &crate::lint_context::LintContext,
221        doc_structure: &DocumentStructure,
222    ) -> bool {
223        !doc_structure.links.is_empty()
224    }
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230    use crate::lint_context::LintContext;
231
232    #[test]
233    fn test_links_with_text_should_pass() {
234        let ctx = LintContext::new(
235            "[valid link](https://example.com)",
236            crate::config::MarkdownFlavor::Standard,
237        );
238        let rule = MD042NoEmptyLinks::new();
239        let result = rule.check(&ctx).unwrap();
240        assert!(result.is_empty(), "Links with text should pass");
241
242        let ctx = LintContext::new(
243            "[another valid link](path/to/page.html)",
244            crate::config::MarkdownFlavor::Standard,
245        );
246        let result = rule.check(&ctx).unwrap();
247        assert!(result.is_empty(), "Links with text and relative URLs should pass");
248    }
249
250    #[test]
251    fn test_links_with_empty_text_should_fail() {
252        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
253        let rule = MD042NoEmptyLinks::new();
254        let result = rule.check(&ctx).unwrap();
255        assert_eq!(result.len(), 1);
256        assert_eq!(result[0].message, "Empty link found: [](https://example.com)");
257        assert_eq!(result[0].line, 1);
258        assert_eq!(result[0].column, 1);
259    }
260
261    #[test]
262    fn test_links_with_only_whitespace_should_fail() {
263        let ctx = LintContext::new("[   ](https://example.com)", crate::config::MarkdownFlavor::Standard);
264        let rule = MD042NoEmptyLinks::new();
265        let result = rule.check(&ctx).unwrap();
266        assert_eq!(result.len(), 1);
267        assert_eq!(result[0].message, "Empty link found: [   ](https://example.com)");
268
269        let ctx = LintContext::new("[\t\n](https://example.com)", crate::config::MarkdownFlavor::Standard);
270        let result = rule.check(&ctx).unwrap();
271        assert_eq!(result.len(), 1);
272        assert_eq!(result[0].message, "Empty link found: [\t\n](https://example.com)");
273    }
274
275    #[test]
276    fn test_reference_links_with_empty_text() {
277        let ctx = LintContext::new(
278            "[][ref]\n\n[ref]: https://example.com",
279            crate::config::MarkdownFlavor::Standard,
280        );
281        let rule = MD042NoEmptyLinks::new();
282        let result = rule.check(&ctx).unwrap();
283        assert_eq!(result.len(), 1);
284        assert_eq!(result[0].message, "Empty link found: [][ref]");
285        assert_eq!(result[0].line, 1);
286
287        // Empty text with empty reference
288        let ctx = LintContext::new(
289            "[][]\n\n[]: https://example.com",
290            crate::config::MarkdownFlavor::Standard,
291        );
292        let result = rule.check(&ctx).unwrap();
293        assert_eq!(result.len(), 1);
294    }
295
296    #[test]
297    fn test_images_should_be_ignored() {
298        // Images can have empty alt text, so they should not trigger the rule
299        let ctx = LintContext::new("![](image.png)", crate::config::MarkdownFlavor::Standard);
300        let rule = MD042NoEmptyLinks::new();
301        let result = rule.check(&ctx).unwrap();
302        assert!(result.is_empty(), "Images with empty alt text should be ignored");
303
304        let ctx = LintContext::new("![   ](image.png)", crate::config::MarkdownFlavor::Standard);
305        let result = rule.check(&ctx).unwrap();
306        assert!(result.is_empty(), "Images with whitespace alt text should be ignored");
307    }
308
309    #[test]
310    fn test_links_with_nested_formatting() {
311        // Links with nested formatting but empty effective text
312        // Note: [**] contains "**" as text, which is not empty after trimming
313        let ctx = LintContext::new("[**](https://example.com)", crate::config::MarkdownFlavor::Standard);
314        let rule = MD042NoEmptyLinks::new();
315        let result = rule.check(&ctx).unwrap();
316        assert!(result.is_empty(), "[**] is not considered empty since ** is text");
317
318        let ctx = LintContext::new("[__](https://example.com)", crate::config::MarkdownFlavor::Standard);
319        let result = rule.check(&ctx).unwrap();
320        assert!(result.is_empty(), "[__] is not considered empty since __ is text");
321
322        // Links with truly empty formatting should fail
323        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
324        let result = rule.check(&ctx).unwrap();
325        assert_eq!(result.len(), 1);
326
327        // Links with nested formatting and actual text should pass
328        let ctx = LintContext::new(
329            "[**bold text**](https://example.com)",
330            crate::config::MarkdownFlavor::Standard,
331        );
332        let result = rule.check(&ctx).unwrap();
333        assert!(result.is_empty(), "Links with nested formatting and text should pass");
334
335        let ctx = LintContext::new(
336            "[*italic* and **bold**](https://example.com)",
337            crate::config::MarkdownFlavor::Standard,
338        );
339        let result = rule.check(&ctx).unwrap();
340        assert!(result.is_empty(), "Links with multiple nested formatting should pass");
341    }
342
343    #[test]
344    fn test_multiple_empty_links_on_same_line() {
345        let ctx = LintContext::new(
346            "[](url1) and [](url2) and [valid](url3)",
347            crate::config::MarkdownFlavor::Standard,
348        );
349        let rule = MD042NoEmptyLinks::new();
350        let result = rule.check(&ctx).unwrap();
351        assert_eq!(result.len(), 2, "Should detect both empty links");
352        assert_eq!(result[0].column, 1);
353        assert_eq!(result[1].column, 14);
354    }
355
356    #[test]
357    fn test_escaped_brackets() {
358        // Escaped brackets should not be treated as links
359        let ctx = LintContext::new("\\[\\](https://example.com)", crate::config::MarkdownFlavor::Standard);
360        let rule = MD042NoEmptyLinks::new();
361        let result = rule.check(&ctx).unwrap();
362        assert!(result.is_empty(), "Escaped brackets should not be treated as links");
363
364        // But this should still be a link
365        let ctx = LintContext::new("[\\[\\]](https://example.com)", crate::config::MarkdownFlavor::Standard);
366        let result = rule.check(&ctx).unwrap();
367        assert!(result.is_empty(), "Link with escaped brackets in text should pass");
368    }
369
370    #[test]
371    fn test_links_in_lists_and_blockquotes() {
372        // Empty links in lists
373        let ctx = LintContext::new(
374            "- [](https://example.com)\n- [valid](https://example.com)",
375            crate::config::MarkdownFlavor::Standard,
376        );
377        let rule = MD042NoEmptyLinks::new();
378        let result = rule.check(&ctx).unwrap();
379        assert_eq!(result.len(), 1);
380        assert_eq!(result[0].line, 1);
381
382        // Empty links in blockquotes
383        let ctx = LintContext::new(
384            "> [](https://example.com)\n> [valid](https://example.com)",
385            crate::config::MarkdownFlavor::Standard,
386        );
387        let result = rule.check(&ctx).unwrap();
388        assert_eq!(result.len(), 1);
389        assert_eq!(result[0].line, 1);
390
391        // Nested structures
392        let ctx = LintContext::new(
393            "> - [](url1)\n> - [text](url2)",
394            crate::config::MarkdownFlavor::Standard,
395        );
396        let result = rule.check(&ctx).unwrap();
397        assert_eq!(result.len(), 1);
398    }
399
400    #[test]
401    fn test_unicode_whitespace_characters() {
402        // Non-breaking space (U+00A0) - IS considered whitespace by Rust's trim()
403        let ctx = LintContext::new(
404            "[\u{00A0}](https://example.com)",
405            crate::config::MarkdownFlavor::Standard,
406        );
407        let rule = MD042NoEmptyLinks::new();
408        let result = rule.check(&ctx).unwrap();
409        assert_eq!(result.len(), 1, "Non-breaking space should be treated as whitespace");
410
411        // Em space (U+2003) - IS considered whitespace by Rust's trim()
412        let ctx = LintContext::new(
413            "[\u{2003}](https://example.com)",
414            crate::config::MarkdownFlavor::Standard,
415        );
416        let result = rule.check(&ctx).unwrap();
417        assert_eq!(result.len(), 1, "Em space should be treated as whitespace");
418
419        // Zero-width space (U+200B) - NOT considered whitespace by Rust's trim()
420        // This is a formatting character, not a whitespace character
421        let ctx = LintContext::new(
422            "[\u{200B}](https://example.com)",
423            crate::config::MarkdownFlavor::Standard,
424        );
425        let result = rule.check(&ctx).unwrap();
426        assert!(
427            result.is_empty(),
428            "Zero-width space is not considered whitespace by trim()"
429        );
430
431        // Test with zero-width space between spaces
432        // Since trim() doesn't consider zero-width space as whitespace,
433        // " \u{200B} " becomes "\u{200B}" after trimming, which is NOT empty
434        let ctx = LintContext::new(
435            "[ \u{200B} ](https://example.com)",
436            crate::config::MarkdownFlavor::Standard,
437        );
438        let result = rule.check(&ctx).unwrap();
439        assert!(
440            result.is_empty(),
441            "Zero-width space remains after trim(), so link is not empty"
442        );
443    }
444
445    #[test]
446    fn test_empty_url_with_text() {
447        let ctx = LintContext::new("[some text]()", crate::config::MarkdownFlavor::Standard);
448        let rule = MD042NoEmptyLinks::new();
449        let result = rule.check(&ctx).unwrap();
450        assert_eq!(result.len(), 1);
451        assert_eq!(result[0].message, "Empty link found: [some text]()");
452    }
453
454    #[test]
455    fn test_both_empty_text_and_url() {
456        let ctx = LintContext::new("[]()", crate::config::MarkdownFlavor::Standard);
457        let rule = MD042NoEmptyLinks::new();
458        let result = rule.check(&ctx).unwrap();
459        assert_eq!(result.len(), 1);
460        assert_eq!(result[0].message, "Empty link found: []()");
461    }
462
463    #[test]
464    fn test_reference_link_with_undefined_reference() {
465        let ctx = LintContext::new("[text][undefined]", crate::config::MarkdownFlavor::Standard);
466        let rule = MD042NoEmptyLinks::new();
467        let result = rule.check(&ctx).unwrap();
468        assert_eq!(result.len(), 1, "Undefined reference should be treated as empty URL");
469    }
470
471    #[test]
472    fn test_shortcut_reference_links() {
473        // Valid shortcut reference link (implicit reference)
474        // Note: [example] by itself is not parsed as a link by the LINK_PATTERN regex
475        // It needs to be followed by [] or () to be recognized as a link
476        let ctx = LintContext::new(
477            "[example][]\n\n[example]: https://example.com",
478            crate::config::MarkdownFlavor::Standard,
479        );
480        let rule = MD042NoEmptyLinks::new();
481        let result = rule.check(&ctx).unwrap();
482        assert!(result.is_empty(), "Valid implicit reference link should pass");
483
484        // Empty implicit reference link
485        let ctx = LintContext::new(
486            "[][]\n\n[]: https://example.com",
487            crate::config::MarkdownFlavor::Standard,
488        );
489        let result = rule.check(&ctx).unwrap();
490        assert_eq!(result.len(), 1, "Empty implicit reference link should fail");
491
492        // Test actual shortcut-style links are not detected (since they don't match the pattern)
493        let ctx = LintContext::new(
494            "[example]\n\n[example]: https://example.com",
495            crate::config::MarkdownFlavor::Standard,
496        );
497        let result = rule.check(&ctx).unwrap();
498        assert!(
499            result.is_empty(),
500            "Shortcut links without [] or () are not parsed as links"
501        );
502    }
503
504    #[test]
505    fn test_fix_suggestions() {
506        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
507        let rule = MD042NoEmptyLinks::new();
508        let result = rule.check(&ctx).unwrap();
509        assert!(result[0].fix.is_some());
510        let fix = result[0].fix.as_ref().unwrap();
511        assert_eq!(fix.replacement, "[Link text](https://example.com)");
512
513        let ctx = LintContext::new("[text]()", crate::config::MarkdownFlavor::Standard);
514        let result = rule.check(&ctx).unwrap();
515        assert!(result[0].fix.is_some());
516        let fix = result[0].fix.as_ref().unwrap();
517        assert_eq!(fix.replacement, "[text](https://example.com)");
518
519        let ctx = LintContext::new("[]()", crate::config::MarkdownFlavor::Standard);
520        let result = rule.check(&ctx).unwrap();
521        assert!(result[0].fix.is_some());
522        let fix = result[0].fix.as_ref().unwrap();
523        assert_eq!(fix.replacement, "[Link text](https://example.com)");
524    }
525
526    #[test]
527    fn test_complex_markdown_document() {
528        let content = r#"# Document with various links
529
530[Valid link](https://example.com) followed by [](empty.com).
531
532## Lists with links
533- [Good link](url1)
534- [](url2)
535- Item with [inline empty]() link
536
537> Quote with [](quoted-empty.com)
538> And [valid quoted](quoted-valid.com)
539
540Code block should be ignored:
541```
542[](this-is-code)
543```
544
545[Reference style][ref1] and [][ref2]
546
547[ref1]: https://ref1.com
548[ref2]: https://ref2.com
549"#;
550
551        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
552        let rule = MD042NoEmptyLinks::new();
553        let result = rule.check(&ctx).unwrap();
554
555        // Count the empty links
556        let empty_link_lines = [3, 7, 8, 10, 18];
557        assert_eq!(result.len(), empty_link_lines.len(), "Should find all empty links");
558
559        // Verify line numbers
560        for (i, &expected_line) in empty_link_lines.iter().enumerate() {
561            assert_eq!(
562                result[i].line, expected_line,
563                "Empty link {i} should be on line {expected_line}"
564            );
565        }
566    }
567
568    #[test]
569    fn test_issue_29_code_block_with_tildes() {
570        // Test for issue #29 - code blocks with tilde markers should not break reference links
571        let content = r#"In addition to the [local scope][] and the [global scope][], Python also has a **built-in scope**.
572
573```pycon
574>>> @count_calls
575... def greet(name):
576...     print("Hi", name)
577...
578>>> greet("Trey")
579Traceback (most recent call last):
580  File "<python-input-2>", line 1, in <module>
581    greet("Trey")
582    ~~~~~^^^^^^^^
583  File "<python-input-0>", line 4, in wrapper
584    calls += 1
585    ^^^^^
586UnboundLocalError: cannot access local variable 'calls' where it is not associated with a value
587```
588
589
590[local scope]: https://www.pythonmorsels.com/local-and-global-variables/
591[global scope]: https://www.pythonmorsels.com/assigning-global-variables/"#;
592
593        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
594        let rule = MD042NoEmptyLinks::new();
595        let result = rule.check(&ctx).unwrap();
596
597        // These reference links should NOT be flagged as empty
598        assert!(
599            result.is_empty(),
600            "Should not flag reference links as empty when code blocks contain tildes (issue #29). Got: {result:?}"
601        );
602    }
603}