rumdl_lib/rules/
md042_no_empty_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
3use crate::utils::range_utils::LineIndex;
4
5/// Rule MD042: No empty links
6///
7/// See [docs/md042.md](../../docs/md042.md) for full documentation, configuration, and examples.
8///
9/// This rule is triggered when a link has no content (text) or destination (URL).
10#[derive(Clone)]
11pub struct MD042NoEmptyLinks;
12
13impl Default for MD042NoEmptyLinks {
14    fn default() -> Self {
15        Self::new()
16    }
17}
18
19impl MD042NoEmptyLinks {
20    pub fn new() -> Self {
21        Self
22    }
23}
24
25impl Rule for MD042NoEmptyLinks {
26    fn name(&self) -> &'static str {
27        "MD042"
28    }
29
30    fn description(&self) -> &'static str {
31        "No empty links"
32    }
33
34    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
35        let mut warnings = Vec::new();
36
37        // Use centralized link parsing from LintContext
38        for link in &ctx.links {
39            // For reference links, resolve the URL
40            let effective_url = if link.is_reference {
41                if let Some(ref_id) = &link.reference_id {
42                    ctx.get_reference_url(ref_id).unwrap_or("").to_string()
43                } else {
44                    String::new()
45                }
46            } else {
47                link.url.clone()
48            };
49
50            // Check for empty links
51            if link.text.trim().is_empty() || effective_url.trim().is_empty() {
52                let replacement = if link.text.trim().is_empty() && effective_url.trim().is_empty() {
53                    "[Link text](https://example.com)".to_string()
54                } else if link.text.trim().is_empty() {
55                    if link.is_reference {
56                        format!("[Link text]{}", &ctx.content[link.byte_offset + 1..link.byte_end])
57                    } else {
58                        format!("[Link text]({effective_url})")
59                    }
60                } else if link.is_reference {
61                    // Keep the reference format
62                    let ref_part = &ctx.content[link.byte_offset + link.text.len() + 2..link.byte_end];
63                    format!("[{}]{}", link.text, ref_part)
64                } else {
65                    format!("[{}](https://example.com)", link.text)
66                };
67
68                warnings.push(LintWarning {
69                    rule_name: Some(self.name()),
70                    message: format!("Empty link found: [{}]({})", link.text, effective_url),
71                    line: link.line,
72                    column: link.start_col + 1, // Convert to 1-indexed
73                    end_line: link.line,
74                    end_column: link.end_col + 1, // Convert to 1-indexed
75                    severity: Severity::Warning,
76                    fix: Some(Fix {
77                        range: link.byte_offset..link.byte_end,
78                        replacement,
79                    }),
80                });
81            }
82        }
83
84        Ok(warnings)
85    }
86
87    /// Optimized check using document structure
88    fn check_with_structure(
89        &self,
90        _ctx: &crate::lint_context::LintContext,
91        structure: &DocumentStructure,
92    ) -> LintResult {
93        let content = _ctx.content;
94        // Early return if there are no links
95        if structure.links.is_empty() {
96            return Ok(Vec::new());
97        }
98
99        let line_index = LineIndex::new(content.to_string());
100        let mut warnings = Vec::new();
101
102        // Get pre-computed empty links
103        let empty_links = structure.get_empty_links();
104
105        for link in empty_links {
106            let replacement = if link.text.trim().is_empty() && link.url.trim().is_empty() {
107                "[Link text](https://example.com)".to_string()
108            } else if link.text.trim().is_empty() {
109                format!("[Link text]({})", link.url)
110            } else {
111                format!("[{}](https://example.com)", link.text)
112            };
113
114            warnings.push(LintWarning {
115                rule_name: Some(self.name()),
116                message: format!("Empty link found: [{}]({})", link.text, link.url),
117                line: link.line,
118                column: link.start_col,
119                end_line: link.line,
120                end_column: link.end_col + 1,
121                severity: Severity::Warning,
122                fix: Some(Fix {
123                    range: line_index.line_col_to_byte_range_with_length(
124                        link.line,
125                        link.start_col,
126                        (link.end_col + 1).saturating_sub(link.start_col),
127                    ),
128                    replacement,
129                }),
130            });
131        }
132
133        Ok(warnings)
134    }
135
136    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
137        let content = ctx.content;
138
139        // Get all warnings first - only fix links that are actually flagged
140        let warnings = self.check(ctx)?;
141        if warnings.is_empty() {
142            return Ok(content.to_string());
143        }
144
145        // Collect all fixes with their ranges
146        let mut fixes: Vec<(std::ops::Range<usize>, String)> = warnings
147            .iter()
148            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.clone(), f.replacement.clone())))
149            .collect();
150
151        // Sort fixes by position (descending) to apply from end to start
152        fixes.sort_by(|a, b| b.0.start.cmp(&a.0.start));
153
154        let mut result = content.to_string();
155
156        // Apply fixes from end to start to maintain correct positions
157        for (range, replacement) in fixes {
158            result.replace_range(range, &replacement);
159        }
160
161        Ok(result)
162    }
163
164    /// Get the category of this rule for selective processing
165    fn category(&self) -> RuleCategory {
166        RuleCategory::Link
167    }
168
169    /// Check if this rule should be skipped
170    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
171        let content = ctx.content;
172        content.is_empty() || !content.contains('[')
173    }
174
175    fn as_any(&self) -> &dyn std::any::Any {
176        self
177    }
178
179    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
180    where
181        Self: Sized,
182    {
183        Box::new(MD042NoEmptyLinks)
184    }
185}
186
187impl DocumentStructureExtensions for MD042NoEmptyLinks {
188    fn has_relevant_elements(
189        &self,
190        _ctx: &crate::lint_context::LintContext,
191        doc_structure: &DocumentStructure,
192    ) -> bool {
193        !doc_structure.links.is_empty()
194    }
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200    use crate::lint_context::LintContext;
201
202    #[test]
203    fn test_links_with_text_should_pass() {
204        let ctx = LintContext::new("[valid link](https://example.com)");
205        let rule = MD042NoEmptyLinks::new();
206        let result = rule.check(&ctx).unwrap();
207        assert!(result.is_empty(), "Links with text should pass");
208
209        let ctx = LintContext::new("[another valid link](path/to/page.html)");
210        let result = rule.check(&ctx).unwrap();
211        assert!(result.is_empty(), "Links with text and relative URLs should pass");
212    }
213
214    #[test]
215    fn test_links_with_empty_text_should_fail() {
216        let ctx = LintContext::new("[](https://example.com)");
217        let rule = MD042NoEmptyLinks::new();
218        let result = rule.check(&ctx).unwrap();
219        assert_eq!(result.len(), 1);
220        assert_eq!(result[0].message, "Empty link found: [](https://example.com)");
221        assert_eq!(result[0].line, 1);
222        assert_eq!(result[0].column, 1);
223    }
224
225    #[test]
226    fn test_links_with_only_whitespace_should_fail() {
227        let ctx = LintContext::new("[   ](https://example.com)");
228        let rule = MD042NoEmptyLinks::new();
229        let result = rule.check(&ctx).unwrap();
230        assert_eq!(result.len(), 1);
231        assert_eq!(result[0].message, "Empty link found: [   ](https://example.com)");
232
233        let ctx = LintContext::new("[\t\n](https://example.com)");
234        let result = rule.check(&ctx).unwrap();
235        assert_eq!(result.len(), 1);
236        assert_eq!(result[0].message, "Empty link found: [\t\n](https://example.com)");
237    }
238
239    #[test]
240    fn test_reference_links_with_empty_text() {
241        let ctx = LintContext::new("[][ref]\n\n[ref]: https://example.com");
242        let rule = MD042NoEmptyLinks::new();
243        let result = rule.check(&ctx).unwrap();
244        assert_eq!(result.len(), 1);
245        assert_eq!(result[0].message, "Empty link found: [](https://example.com)");
246        assert_eq!(result[0].line, 1);
247
248        // Empty text with empty reference
249        let ctx = LintContext::new("[][]\n\n[]: https://example.com");
250        let result = rule.check(&ctx).unwrap();
251        assert_eq!(result.len(), 1);
252    }
253
254    #[test]
255    fn test_images_should_be_ignored() {
256        // Images can have empty alt text, so they should not trigger the rule
257        let ctx = LintContext::new("![](image.png)");
258        let rule = MD042NoEmptyLinks::new();
259        let result = rule.check(&ctx).unwrap();
260        assert!(result.is_empty(), "Images with empty alt text should be ignored");
261
262        let ctx = LintContext::new("![   ](image.png)");
263        let result = rule.check(&ctx).unwrap();
264        assert!(result.is_empty(), "Images with whitespace alt text should be ignored");
265    }
266
267    #[test]
268    fn test_links_with_nested_formatting() {
269        // Links with nested formatting but empty effective text
270        // Note: [**] contains "**" as text, which is not empty after trimming
271        let ctx = LintContext::new("[**](https://example.com)");
272        let rule = MD042NoEmptyLinks::new();
273        let result = rule.check(&ctx).unwrap();
274        assert!(result.is_empty(), "[**] is not considered empty since ** is text");
275
276        let ctx = LintContext::new("[__](https://example.com)");
277        let result = rule.check(&ctx).unwrap();
278        assert!(result.is_empty(), "[__] is not considered empty since __ is text");
279
280        // Links with truly empty formatting should fail
281        let ctx = LintContext::new("[](https://example.com)");
282        let result = rule.check(&ctx).unwrap();
283        assert_eq!(result.len(), 1);
284
285        // Links with nested formatting and actual text should pass
286        let ctx = LintContext::new("[**bold text**](https://example.com)");
287        let result = rule.check(&ctx).unwrap();
288        assert!(result.is_empty(), "Links with nested formatting and text should pass");
289
290        let ctx = LintContext::new("[*italic* and **bold**](https://example.com)");
291        let result = rule.check(&ctx).unwrap();
292        assert!(result.is_empty(), "Links with multiple nested formatting should pass");
293    }
294
295    #[test]
296    fn test_multiple_empty_links_on_same_line() {
297        let ctx = LintContext::new("[](url1) and [](url2) and [valid](url3)");
298        let rule = MD042NoEmptyLinks::new();
299        let result = rule.check(&ctx).unwrap();
300        assert_eq!(result.len(), 2, "Should detect both empty links");
301        assert_eq!(result[0].column, 1);
302        assert_eq!(result[1].column, 14);
303    }
304
305    #[test]
306    fn test_escaped_brackets() {
307        // Escaped brackets should not be treated as links
308        let ctx = LintContext::new("\\[\\](https://example.com)");
309        let rule = MD042NoEmptyLinks::new();
310        let result = rule.check(&ctx).unwrap();
311        assert!(result.is_empty(), "Escaped brackets should not be treated as links");
312
313        // But this should still be a link
314        let ctx = LintContext::new("[\\[\\]](https://example.com)");
315        let result = rule.check(&ctx).unwrap();
316        assert!(result.is_empty(), "Link with escaped brackets in text should pass");
317    }
318
319    #[test]
320    fn test_links_in_lists_and_blockquotes() {
321        // Empty links in lists
322        let ctx = LintContext::new("- [](https://example.com)\n- [valid](https://example.com)");
323        let rule = MD042NoEmptyLinks::new();
324        let result = rule.check(&ctx).unwrap();
325        assert_eq!(result.len(), 1);
326        assert_eq!(result[0].line, 1);
327
328        // Empty links in blockquotes
329        let ctx = LintContext::new("> [](https://example.com)\n> [valid](https://example.com)");
330        let result = rule.check(&ctx).unwrap();
331        assert_eq!(result.len(), 1);
332        assert_eq!(result[0].line, 1);
333
334        // Nested structures
335        let ctx = LintContext::new("> - [](url1)\n> - [text](url2)");
336        let result = rule.check(&ctx).unwrap();
337        assert_eq!(result.len(), 1);
338    }
339
340    #[test]
341    fn test_unicode_whitespace_characters() {
342        // Non-breaking space (U+00A0) - IS considered whitespace by Rust's trim()
343        let ctx = LintContext::new("[\u{00A0}](https://example.com)");
344        let rule = MD042NoEmptyLinks::new();
345        let result = rule.check(&ctx).unwrap();
346        assert_eq!(result.len(), 1, "Non-breaking space should be treated as whitespace");
347
348        // Em space (U+2003) - IS considered whitespace by Rust's trim()
349        let ctx = LintContext::new("[\u{2003}](https://example.com)");
350        let result = rule.check(&ctx).unwrap();
351        assert_eq!(result.len(), 1, "Em space should be treated as whitespace");
352
353        // Zero-width space (U+200B) - NOT considered whitespace by Rust's trim()
354        // This is a formatting character, not a whitespace character
355        let ctx = LintContext::new("[\u{200B}](https://example.com)");
356        let result = rule.check(&ctx).unwrap();
357        assert!(
358            result.is_empty(),
359            "Zero-width space is not considered whitespace by trim()"
360        );
361
362        // Test with zero-width space between spaces
363        // Since trim() doesn't consider zero-width space as whitespace,
364        // " \u{200B} " becomes "\u{200B}" after trimming, which is NOT empty
365        let ctx = LintContext::new("[ \u{200B} ](https://example.com)");
366        let result = rule.check(&ctx).unwrap();
367        assert!(
368            result.is_empty(),
369            "Zero-width space remains after trim(), so link is not empty"
370        );
371    }
372
373    #[test]
374    fn test_empty_url_with_text() {
375        let ctx = LintContext::new("[some text]()");
376        let rule = MD042NoEmptyLinks::new();
377        let result = rule.check(&ctx).unwrap();
378        assert_eq!(result.len(), 1);
379        assert_eq!(result[0].message, "Empty link found: [some text]()");
380    }
381
382    #[test]
383    fn test_both_empty_text_and_url() {
384        let ctx = LintContext::new("[]()");
385        let rule = MD042NoEmptyLinks::new();
386        let result = rule.check(&ctx).unwrap();
387        assert_eq!(result.len(), 1);
388        assert_eq!(result[0].message, "Empty link found: []()");
389    }
390
391    #[test]
392    fn test_reference_link_with_undefined_reference() {
393        let ctx = LintContext::new("[text][undefined]");
394        let rule = MD042NoEmptyLinks::new();
395        let result = rule.check(&ctx).unwrap();
396        assert_eq!(result.len(), 1, "Undefined reference should be treated as empty URL");
397    }
398
399    #[test]
400    fn test_shortcut_reference_links() {
401        // Valid shortcut reference link (implicit reference)
402        // Note: [example] by itself is not parsed as a link by the LINK_PATTERN regex
403        // It needs to be followed by [] or () to be recognized as a link
404        let ctx = LintContext::new("[example][]\n\n[example]: https://example.com");
405        let rule = MD042NoEmptyLinks::new();
406        let result = rule.check(&ctx).unwrap();
407        assert!(result.is_empty(), "Valid implicit reference link should pass");
408
409        // Empty implicit reference link
410        let ctx = LintContext::new("[][]\n\n[]: https://example.com");
411        let result = rule.check(&ctx).unwrap();
412        assert_eq!(result.len(), 1, "Empty implicit reference link should fail");
413
414        // Test actual shortcut-style links are not detected (since they don't match the pattern)
415        let ctx = LintContext::new("[example]\n\n[example]: https://example.com");
416        let result = rule.check(&ctx).unwrap();
417        assert!(
418            result.is_empty(),
419            "Shortcut links without [] or () are not parsed as links"
420        );
421    }
422
423    #[test]
424    fn test_fix_suggestions() {
425        let ctx = LintContext::new("[](https://example.com)");
426        let rule = MD042NoEmptyLinks::new();
427        let result = rule.check(&ctx).unwrap();
428        assert!(result[0].fix.is_some());
429        let fix = result[0].fix.as_ref().unwrap();
430        assert_eq!(fix.replacement, "[Link text](https://example.com)");
431
432        let ctx = LintContext::new("[text]()");
433        let result = rule.check(&ctx).unwrap();
434        assert!(result[0].fix.is_some());
435        let fix = result[0].fix.as_ref().unwrap();
436        assert_eq!(fix.replacement, "[text](https://example.com)");
437
438        let ctx = LintContext::new("[]()");
439        let result = rule.check(&ctx).unwrap();
440        assert!(result[0].fix.is_some());
441        let fix = result[0].fix.as_ref().unwrap();
442        assert_eq!(fix.replacement, "[Link text](https://example.com)");
443    }
444
445    #[test]
446    fn test_complex_markdown_document() {
447        let content = r#"# Document with various links
448
449[Valid link](https://example.com) followed by [](empty.com).
450
451## Lists with links
452- [Good link](url1)
453- [](url2)
454- Item with [inline empty]() link
455
456> Quote with [](quoted-empty.com)
457> And [valid quoted](quoted-valid.com)
458
459Code block should be ignored:
460```
461[](this-is-code)
462```
463
464[Reference style][ref1] and [][ref2]
465
466[ref1]: https://ref1.com
467[ref2]: https://ref2.com
468"#;
469
470        let ctx = LintContext::new(content);
471        let rule = MD042NoEmptyLinks::new();
472        let result = rule.check(&ctx).unwrap();
473
474        // Count the empty links
475        let empty_link_lines = [3, 7, 8, 10, 18];
476        assert_eq!(result.len(), empty_link_lines.len(), "Should find all empty links");
477
478        // Verify line numbers
479        for (i, &expected_line) in empty_link_lines.iter().enumerate() {
480            assert_eq!(
481                result[i].line, expected_line,
482                "Empty link {i} should be on line {expected_line}"
483            );
484        }
485    }
486
487    #[test]
488    fn test_issue_29_code_block_with_tildes() {
489        // Test for issue #29 - code blocks with tilde markers should not break reference links
490        let content = r#"In addition to the [local scope][] and the [global scope][], Python also has a **built-in scope**.
491
492```pycon
493>>> @count_calls
494... def greet(name):
495...     print("Hi", name)
496...
497>>> greet("Trey")
498Traceback (most recent call last):
499  File "<python-input-2>", line 1, in <module>
500    greet("Trey")
501    ~~~~~^^^^^^^^
502  File "<python-input-0>", line 4, in wrapper
503    calls += 1
504    ^^^^^
505UnboundLocalError: cannot access local variable 'calls' where it is not associated with a value
506```
507
508
509[local scope]: https://www.pythonmorsels.com/local-and-global-variables/
510[global scope]: https://www.pythonmorsels.com/assigning-global-variables/"#;
511
512        let ctx = LintContext::new(content);
513        let rule = MD042NoEmptyLinks::new();
514        let result = rule.check(&ctx).unwrap();
515
516        // These reference links should NOT be flagged as empty
517        assert!(
518            result.is_empty(),
519            "Should not flag reference links as empty when code blocks contain tildes (issue #29). Got: {result:?}"
520        );
521    }
522}