rumdl_lib/rules/
md042_no_empty_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
3use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
4use crate::utils::range_utils::LineIndex;
5
6/// Rule MD042: No empty links
7///
8/// See [docs/md042.md](../../docs/md042.md) for full documentation, configuration, and examples.
9///
10/// This rule is triggered when a link has no content (text) or destination (URL).
11#[derive(Clone, Default)]
12pub struct MD042NoEmptyLinks {}
13
14impl MD042NoEmptyLinks {
15    pub fn new() -> Self {
16        Self {}
17    }
18}
19
20impl Rule for MD042NoEmptyLinks {
21    fn name(&self) -> &'static str {
22        "MD042"
23    }
24
25    fn description(&self) -> &'static str {
26        "No empty links"
27    }
28
29    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
30        let mut warnings = Vec::new();
31
32        // Check if we're in MkDocs mode from the context
33        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
34
35        // Use centralized link parsing from LintContext
36        for link in &ctx.links {
37            // For reference links, resolve the URL
38            let effective_url = if link.is_reference {
39                if let Some(ref_id) = &link.reference_id {
40                    ctx.get_reference_url(ref_id).unwrap_or("").to_string()
41                } else {
42                    String::new()
43                }
44            } else {
45                link.url.clone()
46            };
47
48            // For MkDocs mode, check if this looks like an auto-reference
49            // Note: We check both the reference_id AND the text since shorthand references
50            // like [class.Name][] use the text as the implicit reference
51            if mkdocs_mode && link.is_reference {
52                // Check the reference_id if present
53                if let Some(ref_id) = &link.reference_id
54                    && is_mkdocs_auto_reference(ref_id)
55                {
56                    continue;
57                }
58                // Also check the link text itself for shorthand references
59                if is_mkdocs_auto_reference(&link.text) {
60                    continue;
61                }
62            }
63
64            // Check for empty links
65            if link.text.trim().is_empty() || effective_url.trim().is_empty() {
66                let replacement = if link.text.trim().is_empty() && effective_url.trim().is_empty() {
67                    "[Link text](https://example.com)".to_string()
68                } else if link.text.trim().is_empty() {
69                    if link.is_reference {
70                        format!("[Link text]{}", &ctx.content[link.byte_offset + 1..link.byte_end])
71                    } else {
72                        format!("[Link text]({effective_url})")
73                    }
74                } else if link.is_reference {
75                    // Keep the reference format
76                    let ref_part = &ctx.content[link.byte_offset + link.text.len() + 2..link.byte_end];
77                    format!("[{}]{}", link.text, ref_part)
78                } else {
79                    format!("[{}](https://example.com)", link.text)
80                };
81
82                warnings.push(LintWarning {
83                    rule_name: Some(self.name()),
84                    message: format!("Empty link found: [{}]({})", link.text, effective_url),
85                    line: link.line,
86                    column: link.start_col + 1, // Convert to 1-indexed
87                    end_line: link.line,
88                    end_column: link.end_col + 1, // Convert to 1-indexed
89                    severity: Severity::Warning,
90                    fix: Some(Fix {
91                        range: link.byte_offset..link.byte_end,
92                        replacement,
93                    }),
94                });
95            }
96        }
97
98        Ok(warnings)
99    }
100
101    /// Optimized check using document structure
102    fn check_with_structure(
103        &self,
104        _ctx: &crate::lint_context::LintContext,
105        structure: &DocumentStructure,
106    ) -> LintResult {
107        let content = _ctx.content;
108        // Early return if there are no links
109        if structure.links.is_empty() {
110            return Ok(Vec::new());
111        }
112
113        let line_index = LineIndex::new(content.to_string());
114        let mut warnings = Vec::new();
115
116        // Get pre-computed empty links
117        let empty_links = structure.get_empty_links();
118
119        for link in empty_links {
120            let replacement = if link.text.trim().is_empty() && link.url.trim().is_empty() {
121                "[Link text](https://example.com)".to_string()
122            } else if link.text.trim().is_empty() {
123                format!("[Link text]({})", link.url)
124            } else {
125                format!("[{}](https://example.com)", link.text)
126            };
127
128            warnings.push(LintWarning {
129                rule_name: Some(self.name()),
130                message: format!("Empty link found: [{}]({})", link.text, link.url),
131                line: link.line,
132                column: link.start_col,
133                end_line: link.line,
134                end_column: link.end_col + 1,
135                severity: Severity::Warning,
136                fix: Some(Fix {
137                    range: line_index.line_col_to_byte_range_with_length(
138                        link.line,
139                        link.start_col,
140                        (link.end_col + 1).saturating_sub(link.start_col),
141                    ),
142                    replacement,
143                }),
144            });
145        }
146
147        Ok(warnings)
148    }
149
150    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
151        let content = ctx.content;
152
153        // Get all warnings first - only fix links that are actually flagged
154        let warnings = self.check(ctx)?;
155        if warnings.is_empty() {
156            return Ok(content.to_string());
157        }
158
159        // Collect all fixes with their ranges
160        let mut fixes: Vec<(std::ops::Range<usize>, String)> = warnings
161            .iter()
162            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.clone(), f.replacement.clone())))
163            .collect();
164
165        // Sort fixes by position (descending) to apply from end to start
166        fixes.sort_by(|a, b| b.0.start.cmp(&a.0.start));
167
168        let mut result = content.to_string();
169
170        // Apply fixes from end to start to maintain correct positions
171        for (range, replacement) in fixes {
172            result.replace_range(range, &replacement);
173        }
174
175        Ok(result)
176    }
177
178    /// Get the category of this rule for selective processing
179    fn category(&self) -> RuleCategory {
180        RuleCategory::Link
181    }
182
183    /// Check if this rule should be skipped
184    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
185        let content = ctx.content;
186        content.is_empty() || !content.contains('[')
187    }
188
189    fn as_any(&self) -> &dyn std::any::Any {
190        self
191    }
192
193    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
194    where
195        Self: Sized,
196    {
197        // Flavor is now accessed from LintContext during check
198        Box::new(MD042NoEmptyLinks::new())
199    }
200}
201
202impl DocumentStructureExtensions for MD042NoEmptyLinks {
203    fn has_relevant_elements(
204        &self,
205        _ctx: &crate::lint_context::LintContext,
206        doc_structure: &DocumentStructure,
207    ) -> bool {
208        !doc_structure.links.is_empty()
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215    use crate::lint_context::LintContext;
216
217    #[test]
218    fn test_links_with_text_should_pass() {
219        let ctx = LintContext::new(
220            "[valid link](https://example.com)",
221            crate::config::MarkdownFlavor::Standard,
222        );
223        let rule = MD042NoEmptyLinks::new();
224        let result = rule.check(&ctx).unwrap();
225        assert!(result.is_empty(), "Links with text should pass");
226
227        let ctx = LintContext::new(
228            "[another valid link](path/to/page.html)",
229            crate::config::MarkdownFlavor::Standard,
230        );
231        let result = rule.check(&ctx).unwrap();
232        assert!(result.is_empty(), "Links with text and relative URLs should pass");
233    }
234
235    #[test]
236    fn test_links_with_empty_text_should_fail() {
237        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
238        let rule = MD042NoEmptyLinks::new();
239        let result = rule.check(&ctx).unwrap();
240        assert_eq!(result.len(), 1);
241        assert_eq!(result[0].message, "Empty link found: [](https://example.com)");
242        assert_eq!(result[0].line, 1);
243        assert_eq!(result[0].column, 1);
244    }
245
246    #[test]
247    fn test_links_with_only_whitespace_should_fail() {
248        let ctx = LintContext::new("[   ](https://example.com)", crate::config::MarkdownFlavor::Standard);
249        let rule = MD042NoEmptyLinks::new();
250        let result = rule.check(&ctx).unwrap();
251        assert_eq!(result.len(), 1);
252        assert_eq!(result[0].message, "Empty link found: [   ](https://example.com)");
253
254        let ctx = LintContext::new("[\t\n](https://example.com)", crate::config::MarkdownFlavor::Standard);
255        let result = rule.check(&ctx).unwrap();
256        assert_eq!(result.len(), 1);
257        assert_eq!(result[0].message, "Empty link found: [\t\n](https://example.com)");
258    }
259
260    #[test]
261    fn test_reference_links_with_empty_text() {
262        let ctx = LintContext::new(
263            "[][ref]\n\n[ref]: https://example.com",
264            crate::config::MarkdownFlavor::Standard,
265        );
266        let rule = MD042NoEmptyLinks::new();
267        let result = rule.check(&ctx).unwrap();
268        assert_eq!(result.len(), 1);
269        assert_eq!(result[0].message, "Empty link found: [](https://example.com)");
270        assert_eq!(result[0].line, 1);
271
272        // Empty text with empty reference
273        let ctx = LintContext::new(
274            "[][]\n\n[]: https://example.com",
275            crate::config::MarkdownFlavor::Standard,
276        );
277        let result = rule.check(&ctx).unwrap();
278        assert_eq!(result.len(), 1);
279    }
280
281    #[test]
282    fn test_images_should_be_ignored() {
283        // Images can have empty alt text, so they should not trigger the rule
284        let ctx = LintContext::new("![](image.png)", crate::config::MarkdownFlavor::Standard);
285        let rule = MD042NoEmptyLinks::new();
286        let result = rule.check(&ctx).unwrap();
287        assert!(result.is_empty(), "Images with empty alt text should be ignored");
288
289        let ctx = LintContext::new("![   ](image.png)", crate::config::MarkdownFlavor::Standard);
290        let result = rule.check(&ctx).unwrap();
291        assert!(result.is_empty(), "Images with whitespace alt text should be ignored");
292    }
293
294    #[test]
295    fn test_links_with_nested_formatting() {
296        // Links with nested formatting but empty effective text
297        // Note: [**] contains "**" as text, which is not empty after trimming
298        let ctx = LintContext::new("[**](https://example.com)", crate::config::MarkdownFlavor::Standard);
299        let rule = MD042NoEmptyLinks::new();
300        let result = rule.check(&ctx).unwrap();
301        assert!(result.is_empty(), "[**] is not considered empty since ** is text");
302
303        let ctx = LintContext::new("[__](https://example.com)", crate::config::MarkdownFlavor::Standard);
304        let result = rule.check(&ctx).unwrap();
305        assert!(result.is_empty(), "[__] is not considered empty since __ is text");
306
307        // Links with truly empty formatting should fail
308        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
309        let result = rule.check(&ctx).unwrap();
310        assert_eq!(result.len(), 1);
311
312        // Links with nested formatting and actual text should pass
313        let ctx = LintContext::new(
314            "[**bold text**](https://example.com)",
315            crate::config::MarkdownFlavor::Standard,
316        );
317        let result = rule.check(&ctx).unwrap();
318        assert!(result.is_empty(), "Links with nested formatting and text should pass");
319
320        let ctx = LintContext::new(
321            "[*italic* and **bold**](https://example.com)",
322            crate::config::MarkdownFlavor::Standard,
323        );
324        let result = rule.check(&ctx).unwrap();
325        assert!(result.is_empty(), "Links with multiple nested formatting should pass");
326    }
327
328    #[test]
329    fn test_multiple_empty_links_on_same_line() {
330        let ctx = LintContext::new(
331            "[](url1) and [](url2) and [valid](url3)",
332            crate::config::MarkdownFlavor::Standard,
333        );
334        let rule = MD042NoEmptyLinks::new();
335        let result = rule.check(&ctx).unwrap();
336        assert_eq!(result.len(), 2, "Should detect both empty links");
337        assert_eq!(result[0].column, 1);
338        assert_eq!(result[1].column, 14);
339    }
340
341    #[test]
342    fn test_escaped_brackets() {
343        // Escaped brackets should not be treated as links
344        let ctx = LintContext::new("\\[\\](https://example.com)", crate::config::MarkdownFlavor::Standard);
345        let rule = MD042NoEmptyLinks::new();
346        let result = rule.check(&ctx).unwrap();
347        assert!(result.is_empty(), "Escaped brackets should not be treated as links");
348
349        // But this should still be a link
350        let ctx = LintContext::new("[\\[\\]](https://example.com)", crate::config::MarkdownFlavor::Standard);
351        let result = rule.check(&ctx).unwrap();
352        assert!(result.is_empty(), "Link with escaped brackets in text should pass");
353    }
354
355    #[test]
356    fn test_links_in_lists_and_blockquotes() {
357        // Empty links in lists
358        let ctx = LintContext::new(
359            "- [](https://example.com)\n- [valid](https://example.com)",
360            crate::config::MarkdownFlavor::Standard,
361        );
362        let rule = MD042NoEmptyLinks::new();
363        let result = rule.check(&ctx).unwrap();
364        assert_eq!(result.len(), 1);
365        assert_eq!(result[0].line, 1);
366
367        // Empty links in blockquotes
368        let ctx = LintContext::new(
369            "> [](https://example.com)\n> [valid](https://example.com)",
370            crate::config::MarkdownFlavor::Standard,
371        );
372        let result = rule.check(&ctx).unwrap();
373        assert_eq!(result.len(), 1);
374        assert_eq!(result[0].line, 1);
375
376        // Nested structures
377        let ctx = LintContext::new(
378            "> - [](url1)\n> - [text](url2)",
379            crate::config::MarkdownFlavor::Standard,
380        );
381        let result = rule.check(&ctx).unwrap();
382        assert_eq!(result.len(), 1);
383    }
384
385    #[test]
386    fn test_unicode_whitespace_characters() {
387        // Non-breaking space (U+00A0) - IS considered whitespace by Rust's trim()
388        let ctx = LintContext::new(
389            "[\u{00A0}](https://example.com)",
390            crate::config::MarkdownFlavor::Standard,
391        );
392        let rule = MD042NoEmptyLinks::new();
393        let result = rule.check(&ctx).unwrap();
394        assert_eq!(result.len(), 1, "Non-breaking space should be treated as whitespace");
395
396        // Em space (U+2003) - IS considered whitespace by Rust's trim()
397        let ctx = LintContext::new(
398            "[\u{2003}](https://example.com)",
399            crate::config::MarkdownFlavor::Standard,
400        );
401        let result = rule.check(&ctx).unwrap();
402        assert_eq!(result.len(), 1, "Em space should be treated as whitespace");
403
404        // Zero-width space (U+200B) - NOT considered whitespace by Rust's trim()
405        // This is a formatting character, not a whitespace character
406        let ctx = LintContext::new(
407            "[\u{200B}](https://example.com)",
408            crate::config::MarkdownFlavor::Standard,
409        );
410        let result = rule.check(&ctx).unwrap();
411        assert!(
412            result.is_empty(),
413            "Zero-width space is not considered whitespace by trim()"
414        );
415
416        // Test with zero-width space between spaces
417        // Since trim() doesn't consider zero-width space as whitespace,
418        // " \u{200B} " becomes "\u{200B}" after trimming, which is NOT empty
419        let ctx = LintContext::new(
420            "[ \u{200B} ](https://example.com)",
421            crate::config::MarkdownFlavor::Standard,
422        );
423        let result = rule.check(&ctx).unwrap();
424        assert!(
425            result.is_empty(),
426            "Zero-width space remains after trim(), so link is not empty"
427        );
428    }
429
430    #[test]
431    fn test_empty_url_with_text() {
432        let ctx = LintContext::new("[some text]()", crate::config::MarkdownFlavor::Standard);
433        let rule = MD042NoEmptyLinks::new();
434        let result = rule.check(&ctx).unwrap();
435        assert_eq!(result.len(), 1);
436        assert_eq!(result[0].message, "Empty link found: [some text]()");
437    }
438
439    #[test]
440    fn test_both_empty_text_and_url() {
441        let ctx = LintContext::new("[]()", crate::config::MarkdownFlavor::Standard);
442        let rule = MD042NoEmptyLinks::new();
443        let result = rule.check(&ctx).unwrap();
444        assert_eq!(result.len(), 1);
445        assert_eq!(result[0].message, "Empty link found: []()");
446    }
447
448    #[test]
449    fn test_reference_link_with_undefined_reference() {
450        let ctx = LintContext::new("[text][undefined]", crate::config::MarkdownFlavor::Standard);
451        let rule = MD042NoEmptyLinks::new();
452        let result = rule.check(&ctx).unwrap();
453        assert_eq!(result.len(), 1, "Undefined reference should be treated as empty URL");
454    }
455
456    #[test]
457    fn test_shortcut_reference_links() {
458        // Valid shortcut reference link (implicit reference)
459        // Note: [example] by itself is not parsed as a link by the LINK_PATTERN regex
460        // It needs to be followed by [] or () to be recognized as a link
461        let ctx = LintContext::new(
462            "[example][]\n\n[example]: https://example.com",
463            crate::config::MarkdownFlavor::Standard,
464        );
465        let rule = MD042NoEmptyLinks::new();
466        let result = rule.check(&ctx).unwrap();
467        assert!(result.is_empty(), "Valid implicit reference link should pass");
468
469        // Empty implicit reference link
470        let ctx = LintContext::new(
471            "[][]\n\n[]: https://example.com",
472            crate::config::MarkdownFlavor::Standard,
473        );
474        let result = rule.check(&ctx).unwrap();
475        assert_eq!(result.len(), 1, "Empty implicit reference link should fail");
476
477        // Test actual shortcut-style links are not detected (since they don't match the pattern)
478        let ctx = LintContext::new(
479            "[example]\n\n[example]: https://example.com",
480            crate::config::MarkdownFlavor::Standard,
481        );
482        let result = rule.check(&ctx).unwrap();
483        assert!(
484            result.is_empty(),
485            "Shortcut links without [] or () are not parsed as links"
486        );
487    }
488
489    #[test]
490    fn test_fix_suggestions() {
491        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
492        let rule = MD042NoEmptyLinks::new();
493        let result = rule.check(&ctx).unwrap();
494        assert!(result[0].fix.is_some());
495        let fix = result[0].fix.as_ref().unwrap();
496        assert_eq!(fix.replacement, "[Link text](https://example.com)");
497
498        let ctx = LintContext::new("[text]()", crate::config::MarkdownFlavor::Standard);
499        let result = rule.check(&ctx).unwrap();
500        assert!(result[0].fix.is_some());
501        let fix = result[0].fix.as_ref().unwrap();
502        assert_eq!(fix.replacement, "[text](https://example.com)");
503
504        let ctx = LintContext::new("[]()", crate::config::MarkdownFlavor::Standard);
505        let result = rule.check(&ctx).unwrap();
506        assert!(result[0].fix.is_some());
507        let fix = result[0].fix.as_ref().unwrap();
508        assert_eq!(fix.replacement, "[Link text](https://example.com)");
509    }
510
511    #[test]
512    fn test_complex_markdown_document() {
513        let content = r#"# Document with various links
514
515[Valid link](https://example.com) followed by [](empty.com).
516
517## Lists with links
518- [Good link](url1)
519- [](url2)
520- Item with [inline empty]() link
521
522> Quote with [](quoted-empty.com)
523> And [valid quoted](quoted-valid.com)
524
525Code block should be ignored:
526```
527[](this-is-code)
528```
529
530[Reference style][ref1] and [][ref2]
531
532[ref1]: https://ref1.com
533[ref2]: https://ref2.com
534"#;
535
536        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
537        let rule = MD042NoEmptyLinks::new();
538        let result = rule.check(&ctx).unwrap();
539
540        // Count the empty links
541        let empty_link_lines = [3, 7, 8, 10, 18];
542        assert_eq!(result.len(), empty_link_lines.len(), "Should find all empty links");
543
544        // Verify line numbers
545        for (i, &expected_line) in empty_link_lines.iter().enumerate() {
546            assert_eq!(
547                result[i].line, expected_line,
548                "Empty link {i} should be on line {expected_line}"
549            );
550        }
551    }
552
553    #[test]
554    fn test_issue_29_code_block_with_tildes() {
555        // Test for issue #29 - code blocks with tilde markers should not break reference links
556        let content = r#"In addition to the [local scope][] and the [global scope][], Python also has a **built-in scope**.
557
558```pycon
559>>> @count_calls
560... def greet(name):
561...     print("Hi", name)
562...
563>>> greet("Trey")
564Traceback (most recent call last):
565  File "<python-input-2>", line 1, in <module>
566    greet("Trey")
567    ~~~~~^^^^^^^^
568  File "<python-input-0>", line 4, in wrapper
569    calls += 1
570    ^^^^^
571UnboundLocalError: cannot access local variable 'calls' where it is not associated with a value
572```
573
574
575[local scope]: https://www.pythonmorsels.com/local-and-global-variables/
576[global scope]: https://www.pythonmorsels.com/assigning-global-variables/"#;
577
578        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
579        let rule = MD042NoEmptyLinks::new();
580        let result = rule.check(&ctx).unwrap();
581
582        // These reference links should NOT be flagged as empty
583        assert!(
584            result.is_empty(),
585            "Should not flag reference links as empty when code blocks contain tildes (issue #29). Got: {result:?}"
586        );
587    }
588}