rumdl_lib/rules/
md042_no_empty_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3
4/// Rule MD042: No empty links
5///
6/// See [docs/md042.md](../../docs/md042.md) for full documentation, configuration, and examples.
7///
8/// This rule is triggered when a link has no content (text) or destination (URL).
9#[derive(Clone, Default)]
10pub struct MD042NoEmptyLinks {}
11
12impl MD042NoEmptyLinks {
13    pub fn new() -> Self {
14        Self {}
15    }
16
17    /// Strip surrounding backticks from a string
18    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
19    fn strip_backticks(s: &str) -> &str {
20        s.trim_start_matches('`').trim_end_matches('`')
21    }
22
23    /// Check if a string is a valid Python identifier
24    /// Python identifiers can contain alphanumeric characters and underscores, but cannot start with a digit
25    fn is_valid_python_identifier(s: &str) -> bool {
26        if s.is_empty() {
27            return false;
28        }
29
30        let first_char = s.chars().next().unwrap();
31        if !first_char.is_ascii_alphabetic() && first_char != '_' {
32            return false;
33        }
34
35        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
36    }
37}
38
39impl Rule for MD042NoEmptyLinks {
40    fn name(&self) -> &'static str {
41        "MD042"
42    }
43
44    fn description(&self) -> &'static str {
45        "No empty links"
46    }
47
48    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
49        let mut warnings = Vec::new();
50
51        // Check if we're in MkDocs mode from the context
52        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
53
54        // Use centralized link parsing from LintContext
55        for link in &ctx.links {
56            // For reference links, resolve the URL
57            let effective_url = if link.is_reference {
58                if let Some(ref_id) = &link.reference_id {
59                    ctx.get_reference_url(ref_id).unwrap_or("").to_string()
60                } else {
61                    String::new()
62                }
63            } else {
64                link.url.clone()
65            };
66
67            // For MkDocs mode, check if this looks like an auto-reference
68            // Note: We check both the reference_id AND the text since shorthand references
69            // like [class.Name][] use the text as the implicit reference
70            // Also strip backticks since MkDocs resolves `module.Class` as module.Class
71            if mkdocs_mode && link.is_reference {
72                // Check the reference_id if present (strip backticks first)
73                if let Some(ref_id) = &link.reference_id {
74                    let stripped_ref = Self::strip_backticks(ref_id);
75                    // Accept if it matches MkDocs patterns OR if it's a backtick-wrapped valid identifier
76                    // Backticks indicate code/type reference (like `str`, `int`, `MyClass`)
77                    if is_mkdocs_auto_reference(stripped_ref)
78                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
79                    {
80                        continue;
81                    }
82                }
83                // Also check the link text itself for shorthand references (strip backticks)
84                let stripped_text = Self::strip_backticks(&link.text);
85                // Accept if it matches MkDocs patterns OR if it's a backtick-wrapped valid identifier
86                if is_mkdocs_auto_reference(stripped_text)
87                    || (link.text.as_str() != stripped_text && Self::is_valid_python_identifier(stripped_text))
88                {
89                    continue;
90                }
91            }
92
93            // Check for empty links
94            if link.text.trim().is_empty() || effective_url.trim().is_empty() {
95                let replacement = if link.text.trim().is_empty() && effective_url.trim().is_empty() {
96                    "[Link text](https://example.com)".to_string()
97                } else if link.text.trim().is_empty() {
98                    if link.is_reference {
99                        format!("[Link text]{}", &ctx.content[link.byte_offset + 1..link.byte_end])
100                    } else {
101                        format!("[Link text]({effective_url})")
102                    }
103                } else if link.is_reference {
104                    // Keep the reference format
105                    let ref_part = &ctx.content[link.byte_offset + link.text.len() + 2..link.byte_end];
106                    format!("[{}]{}", link.text, ref_part)
107                } else {
108                    format!("[{}](https://example.com)", link.text)
109                };
110
111                // Extract the exact link text from the source
112                let link_display = &ctx.content[link.byte_offset..link.byte_end];
113
114                warnings.push(LintWarning {
115                    rule_name: Some(self.name()),
116                    message: format!("Empty link found: {link_display}"),
117                    line: link.line,
118                    column: link.start_col + 1, // Convert to 1-indexed
119                    end_line: link.line,
120                    end_column: link.end_col + 1, // Convert to 1-indexed
121                    severity: Severity::Warning,
122                    fix: Some(Fix {
123                        range: link.byte_offset..link.byte_end,
124                        replacement,
125                    }),
126                });
127            }
128        }
129
130        Ok(warnings)
131    }
132
133    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
134        let content = ctx.content;
135
136        // Get all warnings first - only fix links that are actually flagged
137        let warnings = self.check(ctx)?;
138        if warnings.is_empty() {
139            return Ok(content.to_string());
140        }
141
142        // Collect all fixes with their ranges
143        let mut fixes: Vec<(std::ops::Range<usize>, String)> = warnings
144            .iter()
145            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.clone(), f.replacement.clone())))
146            .collect();
147
148        // Sort fixes by position (descending) to apply from end to start
149        fixes.sort_by(|a, b| b.0.start.cmp(&a.0.start));
150
151        let mut result = content.to_string();
152
153        // Apply fixes from end to start to maintain correct positions
154        for (range, replacement) in fixes {
155            result.replace_range(range, &replacement);
156        }
157
158        Ok(result)
159    }
160
161    /// Get the category of this rule for selective processing
162    fn category(&self) -> RuleCategory {
163        RuleCategory::Link
164    }
165
166    /// Check if this rule should be skipped
167    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
168        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
169    }
170
171    fn as_any(&self) -> &dyn std::any::Any {
172        self
173    }
174
175    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
176    where
177        Self: Sized,
178    {
179        // Flavor is now accessed from LintContext during check
180        Box::new(MD042NoEmptyLinks::new())
181    }
182}
183
184#[cfg(test)]
185mod tests {
186    use super::*;
187    use crate::lint_context::LintContext;
188
189    #[test]
190    fn test_links_with_text_should_pass() {
191        let ctx = LintContext::new(
192            "[valid link](https://example.com)",
193            crate::config::MarkdownFlavor::Standard,
194        );
195        let rule = MD042NoEmptyLinks::new();
196        let result = rule.check(&ctx).unwrap();
197        assert!(result.is_empty(), "Links with text should pass");
198
199        let ctx = LintContext::new(
200            "[another valid link](path/to/page.html)",
201            crate::config::MarkdownFlavor::Standard,
202        );
203        let result = rule.check(&ctx).unwrap();
204        assert!(result.is_empty(), "Links with text and relative URLs should pass");
205    }
206
207    #[test]
208    fn test_links_with_empty_text_should_fail() {
209        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
210        let rule = MD042NoEmptyLinks::new();
211        let result = rule.check(&ctx).unwrap();
212        assert_eq!(result.len(), 1);
213        assert_eq!(result[0].message, "Empty link found: [](https://example.com)");
214        assert_eq!(result[0].line, 1);
215        assert_eq!(result[0].column, 1);
216    }
217
218    #[test]
219    fn test_links_with_only_whitespace_should_fail() {
220        let ctx = LintContext::new("[   ](https://example.com)", crate::config::MarkdownFlavor::Standard);
221        let rule = MD042NoEmptyLinks::new();
222        let result = rule.check(&ctx).unwrap();
223        assert_eq!(result.len(), 1);
224        assert_eq!(result[0].message, "Empty link found: [   ](https://example.com)");
225
226        let ctx = LintContext::new("[\t\n](https://example.com)", crate::config::MarkdownFlavor::Standard);
227        let result = rule.check(&ctx).unwrap();
228        assert_eq!(result.len(), 1);
229        assert_eq!(result[0].message, "Empty link found: [\t\n](https://example.com)");
230    }
231
232    #[test]
233    fn test_reference_links_with_empty_text() {
234        let ctx = LintContext::new(
235            "[][ref]\n\n[ref]: https://example.com",
236            crate::config::MarkdownFlavor::Standard,
237        );
238        let rule = MD042NoEmptyLinks::new();
239        let result = rule.check(&ctx).unwrap();
240        assert_eq!(result.len(), 1);
241        assert_eq!(result[0].message, "Empty link found: [][ref]");
242        assert_eq!(result[0].line, 1);
243
244        // Empty text with empty reference
245        let ctx = LintContext::new(
246            "[][]\n\n[]: https://example.com",
247            crate::config::MarkdownFlavor::Standard,
248        );
249        let result = rule.check(&ctx).unwrap();
250        assert_eq!(result.len(), 1);
251    }
252
253    #[test]
254    fn test_images_should_be_ignored() {
255        // Images can have empty alt text, so they should not trigger the rule
256        let ctx = LintContext::new("![](image.png)", crate::config::MarkdownFlavor::Standard);
257        let rule = MD042NoEmptyLinks::new();
258        let result = rule.check(&ctx).unwrap();
259        assert!(result.is_empty(), "Images with empty alt text should be ignored");
260
261        let ctx = LintContext::new("![   ](image.png)", crate::config::MarkdownFlavor::Standard);
262        let result = rule.check(&ctx).unwrap();
263        assert!(result.is_empty(), "Images with whitespace alt text should be ignored");
264    }
265
266    #[test]
267    fn test_links_with_nested_formatting() {
268        // Links with nested formatting but empty effective text
269        // Note: [**] contains "**" as text, which is not empty after trimming
270        let ctx = LintContext::new("[**](https://example.com)", crate::config::MarkdownFlavor::Standard);
271        let rule = MD042NoEmptyLinks::new();
272        let result = rule.check(&ctx).unwrap();
273        assert!(result.is_empty(), "[**] is not considered empty since ** is text");
274
275        let ctx = LintContext::new("[__](https://example.com)", crate::config::MarkdownFlavor::Standard);
276        let result = rule.check(&ctx).unwrap();
277        assert!(result.is_empty(), "[__] is not considered empty since __ is text");
278
279        // Links with truly empty formatting should fail
280        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
281        let result = rule.check(&ctx).unwrap();
282        assert_eq!(result.len(), 1);
283
284        // Links with nested formatting and actual text should pass
285        let ctx = LintContext::new(
286            "[**bold text**](https://example.com)",
287            crate::config::MarkdownFlavor::Standard,
288        );
289        let result = rule.check(&ctx).unwrap();
290        assert!(result.is_empty(), "Links with nested formatting and text should pass");
291
292        let ctx = LintContext::new(
293            "[*italic* and **bold**](https://example.com)",
294            crate::config::MarkdownFlavor::Standard,
295        );
296        let result = rule.check(&ctx).unwrap();
297        assert!(result.is_empty(), "Links with multiple nested formatting should pass");
298    }
299
300    #[test]
301    fn test_multiple_empty_links_on_same_line() {
302        let ctx = LintContext::new(
303            "[](url1) and [](url2) and [valid](url3)",
304            crate::config::MarkdownFlavor::Standard,
305        );
306        let rule = MD042NoEmptyLinks::new();
307        let result = rule.check(&ctx).unwrap();
308        assert_eq!(result.len(), 2, "Should detect both empty links");
309        assert_eq!(result[0].column, 1);
310        assert_eq!(result[1].column, 14);
311    }
312
313    #[test]
314    fn test_escaped_brackets() {
315        // Escaped brackets should not be treated as links
316        let ctx = LintContext::new("\\[\\](https://example.com)", crate::config::MarkdownFlavor::Standard);
317        let rule = MD042NoEmptyLinks::new();
318        let result = rule.check(&ctx).unwrap();
319        assert!(result.is_empty(), "Escaped brackets should not be treated as links");
320
321        // But this should still be a link
322        let ctx = LintContext::new("[\\[\\]](https://example.com)", crate::config::MarkdownFlavor::Standard);
323        let result = rule.check(&ctx).unwrap();
324        assert!(result.is_empty(), "Link with escaped brackets in text should pass");
325    }
326
327    #[test]
328    fn test_links_in_lists_and_blockquotes() {
329        // Empty links in lists
330        let ctx = LintContext::new(
331            "- [](https://example.com)\n- [valid](https://example.com)",
332            crate::config::MarkdownFlavor::Standard,
333        );
334        let rule = MD042NoEmptyLinks::new();
335        let result = rule.check(&ctx).unwrap();
336        assert_eq!(result.len(), 1);
337        assert_eq!(result[0].line, 1);
338
339        // Empty links in blockquotes
340        let ctx = LintContext::new(
341            "> [](https://example.com)\n> [valid](https://example.com)",
342            crate::config::MarkdownFlavor::Standard,
343        );
344        let result = rule.check(&ctx).unwrap();
345        assert_eq!(result.len(), 1);
346        assert_eq!(result[0].line, 1);
347
348        // Nested structures
349        let ctx = LintContext::new(
350            "> - [](url1)\n> - [text](url2)",
351            crate::config::MarkdownFlavor::Standard,
352        );
353        let result = rule.check(&ctx).unwrap();
354        assert_eq!(result.len(), 1);
355    }
356
357    #[test]
358    fn test_unicode_whitespace_characters() {
359        // Non-breaking space (U+00A0) - IS considered whitespace by Rust's trim()
360        let ctx = LintContext::new(
361            "[\u{00A0}](https://example.com)",
362            crate::config::MarkdownFlavor::Standard,
363        );
364        let rule = MD042NoEmptyLinks::new();
365        let result = rule.check(&ctx).unwrap();
366        assert_eq!(result.len(), 1, "Non-breaking space should be treated as whitespace");
367
368        // Em space (U+2003) - IS considered whitespace by Rust's trim()
369        let ctx = LintContext::new(
370            "[\u{2003}](https://example.com)",
371            crate::config::MarkdownFlavor::Standard,
372        );
373        let result = rule.check(&ctx).unwrap();
374        assert_eq!(result.len(), 1, "Em space should be treated as whitespace");
375
376        // Zero-width space (U+200B) - NOT considered whitespace by Rust's trim()
377        // This is a formatting character, not a whitespace character
378        let ctx = LintContext::new(
379            "[\u{200B}](https://example.com)",
380            crate::config::MarkdownFlavor::Standard,
381        );
382        let result = rule.check(&ctx).unwrap();
383        assert!(
384            result.is_empty(),
385            "Zero-width space is not considered whitespace by trim()"
386        );
387
388        // Test with zero-width space between spaces
389        // Since trim() doesn't consider zero-width space as whitespace,
390        // " \u{200B} " becomes "\u{200B}" after trimming, which is NOT empty
391        let ctx = LintContext::new(
392            "[ \u{200B} ](https://example.com)",
393            crate::config::MarkdownFlavor::Standard,
394        );
395        let result = rule.check(&ctx).unwrap();
396        assert!(
397            result.is_empty(),
398            "Zero-width space remains after trim(), so link is not empty"
399        );
400    }
401
402    #[test]
403    fn test_empty_url_with_text() {
404        let ctx = LintContext::new("[some text]()", crate::config::MarkdownFlavor::Standard);
405        let rule = MD042NoEmptyLinks::new();
406        let result = rule.check(&ctx).unwrap();
407        assert_eq!(result.len(), 1);
408        assert_eq!(result[0].message, "Empty link found: [some text]()");
409    }
410
411    #[test]
412    fn test_both_empty_text_and_url() {
413        let ctx = LintContext::new("[]()", crate::config::MarkdownFlavor::Standard);
414        let rule = MD042NoEmptyLinks::new();
415        let result = rule.check(&ctx).unwrap();
416        assert_eq!(result.len(), 1);
417        assert_eq!(result[0].message, "Empty link found: []()");
418    }
419
420    #[test]
421    fn test_reference_link_with_undefined_reference() {
422        let ctx = LintContext::new("[text][undefined]", crate::config::MarkdownFlavor::Standard);
423        let rule = MD042NoEmptyLinks::new();
424        let result = rule.check(&ctx).unwrap();
425        assert_eq!(result.len(), 1, "Undefined reference should be treated as empty URL");
426    }
427
428    #[test]
429    fn test_shortcut_reference_links() {
430        // Valid shortcut reference link (implicit reference)
431        // Note: [example] by itself is not parsed as a link by the LINK_PATTERN regex
432        // It needs to be followed by [] or () to be recognized as a link
433        let ctx = LintContext::new(
434            "[example][]\n\n[example]: https://example.com",
435            crate::config::MarkdownFlavor::Standard,
436        );
437        let rule = MD042NoEmptyLinks::new();
438        let result = rule.check(&ctx).unwrap();
439        assert!(result.is_empty(), "Valid implicit reference link should pass");
440
441        // Empty implicit reference link
442        let ctx = LintContext::new(
443            "[][]\n\n[]: https://example.com",
444            crate::config::MarkdownFlavor::Standard,
445        );
446        let result = rule.check(&ctx).unwrap();
447        assert_eq!(result.len(), 1, "Empty implicit reference link should fail");
448
449        // Test actual shortcut-style links are not detected (since they don't match the pattern)
450        let ctx = LintContext::new(
451            "[example]\n\n[example]: https://example.com",
452            crate::config::MarkdownFlavor::Standard,
453        );
454        let result = rule.check(&ctx).unwrap();
455        assert!(
456            result.is_empty(),
457            "Shortcut links without [] or () are not parsed as links"
458        );
459    }
460
461    #[test]
462    fn test_fix_suggestions() {
463        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
464        let rule = MD042NoEmptyLinks::new();
465        let result = rule.check(&ctx).unwrap();
466        assert!(result[0].fix.is_some());
467        let fix = result[0].fix.as_ref().unwrap();
468        assert_eq!(fix.replacement, "[Link text](https://example.com)");
469
470        let ctx = LintContext::new("[text]()", crate::config::MarkdownFlavor::Standard);
471        let result = rule.check(&ctx).unwrap();
472        assert!(result[0].fix.is_some());
473        let fix = result[0].fix.as_ref().unwrap();
474        assert_eq!(fix.replacement, "[text](https://example.com)");
475
476        let ctx = LintContext::new("[]()", crate::config::MarkdownFlavor::Standard);
477        let result = rule.check(&ctx).unwrap();
478        assert!(result[0].fix.is_some());
479        let fix = result[0].fix.as_ref().unwrap();
480        assert_eq!(fix.replacement, "[Link text](https://example.com)");
481    }
482
483    #[test]
484    fn test_complex_markdown_document() {
485        let content = r#"# Document with various links
486
487[Valid link](https://example.com) followed by [](empty.com).
488
489## Lists with links
490- [Good link](url1)
491- [](url2)
492- Item with [inline empty]() link
493
494> Quote with [](quoted-empty.com)
495> And [valid quoted](quoted-valid.com)
496
497Code block should be ignored:
498```
499[](this-is-code)
500```
501
502[Reference style][ref1] and [][ref2]
503
504[ref1]: https://ref1.com
505[ref2]: https://ref2.com
506"#;
507
508        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
509        let rule = MD042NoEmptyLinks::new();
510        let result = rule.check(&ctx).unwrap();
511
512        // Count the empty links
513        let empty_link_lines = [3, 7, 8, 10, 18];
514        assert_eq!(result.len(), empty_link_lines.len(), "Should find all empty links");
515
516        // Verify line numbers
517        for (i, &expected_line) in empty_link_lines.iter().enumerate() {
518            assert_eq!(
519                result[i].line, expected_line,
520                "Empty link {i} should be on line {expected_line}"
521            );
522        }
523    }
524
525    #[test]
526    fn test_issue_29_code_block_with_tildes() {
527        // Test for issue #29 - code blocks with tilde markers should not break reference links
528        let content = r#"In addition to the [local scope][] and the [global scope][], Python also has a **built-in scope**.
529
530```pycon
531>>> @count_calls
532... def greet(name):
533...     print("Hi", name)
534...
535>>> greet("Trey")
536Traceback (most recent call last):
537  File "<python-input-2>", line 1, in <module>
538    greet("Trey")
539    ~~~~~^^^^^^^^
540  File "<python-input-0>", line 4, in wrapper
541    calls += 1
542    ^^^^^
543UnboundLocalError: cannot access local variable 'calls' where it is not associated with a value
544```
545
546
547[local scope]: https://www.pythonmorsels.com/local-and-global-variables/
548[global scope]: https://www.pythonmorsels.com/assigning-global-variables/"#;
549
550        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
551        let rule = MD042NoEmptyLinks::new();
552        let result = rule.check(&ctx).unwrap();
553
554        // These reference links should NOT be flagged as empty
555        assert!(
556            result.is_empty(),
557            "Should not flag reference links as empty when code blocks contain tildes (issue #29). Got: {result:?}"
558        );
559    }
560
561    #[test]
562    fn test_mkdocs_backtick_wrapped_references() {
563        // Test for issue #97 - backtick-wrapped references should be recognized as MkDocs auto-references
564        let rule = MD042NoEmptyLinks::new();
565
566        // Module.Class pattern with backticks
567        let ctx = LintContext::new("[`module.Class`][]", crate::config::MarkdownFlavor::MkDocs);
568        let result = rule.check(&ctx).unwrap();
569        assert!(
570            result.is_empty(),
571            "Should not flag [`module.Class`][] as empty in MkDocs mode (issue #97). Got: {result:?}"
572        );
573
574        // Reference with explicit ID
575        let ctx = LintContext::new("[`module.Class`][ref]", crate::config::MarkdownFlavor::MkDocs);
576        let result = rule.check(&ctx).unwrap();
577        assert!(
578            result.is_empty(),
579            "Should not flag [`module.Class`][ref] as empty in MkDocs mode (issue #97). Got: {result:?}"
580        );
581
582        // Path-like reference with backticks
583        let ctx = LintContext::new("[`api/endpoint`][]", crate::config::MarkdownFlavor::MkDocs);
584        let result = rule.check(&ctx).unwrap();
585        assert!(
586            result.is_empty(),
587            "Should not flag [`api/endpoint`][] as empty in MkDocs mode (issue #97). Got: {result:?}"
588        );
589
590        // Should still flag in standard mode
591        let ctx = LintContext::new("[`module.Class`][]", crate::config::MarkdownFlavor::Standard);
592        let result = rule.check(&ctx).unwrap();
593        assert_eq!(
594            result.len(),
595            1,
596            "Should flag [`module.Class`][] as empty in Standard mode (no auto-refs). Got: {result:?}"
597        );
598
599        // Should still flag truly empty links even in MkDocs mode
600        let ctx = LintContext::new("[][]", crate::config::MarkdownFlavor::MkDocs);
601        let result = rule.check(&ctx).unwrap();
602        assert_eq!(
603            result.len(),
604            1,
605            "Should still flag [][] as empty in MkDocs mode. Got: {result:?}"
606        );
607    }
608}