rumdl_lib/rules/
md042_no_empty_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::mkdocs_patterns::is_mkdocs_auto_reference;
3
4/// Rule MD042: No empty links
5///
6/// See [docs/md042.md](../../docs/md042.md) for full documentation, configuration, and examples.
7///
8/// This rule is triggered when a link has no content (text) or destination (URL).
9#[derive(Clone, Default)]
10pub struct MD042NoEmptyLinks {}
11
12impl MD042NoEmptyLinks {
13    pub fn new() -> Self {
14        Self {}
15    }
16
17    /// Strip surrounding backticks from a string
18    /// Used for MkDocs auto-reference detection where `module.Class` should be treated as module.Class
19    fn strip_backticks(s: &str) -> &str {
20        s.trim_start_matches('`').trim_end_matches('`')
21    }
22
23    /// Check if a string is a valid Python identifier
24    /// Python identifiers can contain alphanumeric characters and underscores, but cannot start with a digit
25    fn is_valid_python_identifier(s: &str) -> bool {
26        if s.is_empty() {
27            return false;
28        }
29
30        let first_char = s.chars().next().unwrap();
31        if !first_char.is_ascii_alphabetic() && first_char != '_' {
32            return false;
33        }
34
35        s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
36    }
37}
38
39impl Rule for MD042NoEmptyLinks {
40    fn name(&self) -> &'static str {
41        "MD042"
42    }
43
44    fn description(&self) -> &'static str {
45        "No empty links"
46    }
47
48    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
49        let mut warnings = Vec::new();
50
51        // Check if we're in MkDocs mode from the context
52        let mkdocs_mode = ctx.flavor == crate::config::MarkdownFlavor::MkDocs;
53
54        // Use centralized link parsing from LintContext
55        for link in &ctx.links {
56            // For reference links, resolve the URL
57            let effective_url = if link.is_reference {
58                if let Some(ref_id) = &link.reference_id {
59                    ctx.get_reference_url(ref_id).unwrap_or("").to_string()
60                } else {
61                    String::new()
62                }
63            } else {
64                link.url.clone()
65            };
66
67            // For MkDocs mode, check if this looks like an auto-reference
68            // Note: We check both the reference_id AND the text since shorthand references
69            // like [class.Name][] use the text as the implicit reference
70            // Also strip backticks since MkDocs resolves `module.Class` as module.Class
71            if mkdocs_mode && link.is_reference {
72                // Check the reference_id if present (strip backticks first)
73                if let Some(ref_id) = &link.reference_id {
74                    let stripped_ref = Self::strip_backticks(ref_id);
75                    // Accept if it matches MkDocs patterns OR if it's a backtick-wrapped valid identifier
76                    // Backticks indicate code/type reference (like `str`, `int`, `MyClass`)
77                    if is_mkdocs_auto_reference(stripped_ref)
78                        || (ref_id != stripped_ref && Self::is_valid_python_identifier(stripped_ref))
79                    {
80                        continue;
81                    }
82                }
83                // Also check the link text itself for shorthand references (strip backticks)
84                let stripped_text = Self::strip_backticks(&link.text);
85                // Accept if it matches MkDocs patterns OR if it's a backtick-wrapped valid identifier
86                if is_mkdocs_auto_reference(stripped_text)
87                    || (link.text.as_str() != stripped_text && Self::is_valid_python_identifier(stripped_text))
88                {
89                    continue;
90                }
91            }
92
93            // Check for empty links
94            if link.text.trim().is_empty() || effective_url.trim().is_empty() {
95                let replacement = if link.text.trim().is_empty() && effective_url.trim().is_empty() {
96                    "[Link text](https://example.com)".to_string()
97                } else if link.text.trim().is_empty() {
98                    if link.is_reference {
99                        format!("[Link text]{}", &ctx.content[link.byte_offset + 1..link.byte_end])
100                    } else {
101                        format!("[Link text]({effective_url})")
102                    }
103                } else if link.is_reference {
104                    // Keep the reference format
105                    let ref_part = &ctx.content[link.byte_offset + link.text.len() + 2..link.byte_end];
106                    format!("[{}]{}", link.text, ref_part)
107                } else {
108                    format!("[{}](https://example.com)", link.text)
109                };
110
111                // Extract the exact link text from the source
112                let link_display = &ctx.content[link.byte_offset..link.byte_end];
113
114                warnings.push(LintWarning {
115                    rule_name: Some(self.name()),
116                    message: format!("Empty link found: {link_display}"),
117                    line: link.line,
118                    column: link.start_col + 1, // Convert to 1-indexed
119                    end_line: link.line,
120                    end_column: link.end_col + 1, // Convert to 1-indexed
121                    severity: Severity::Warning,
122                    fix: Some(Fix {
123                        range: link.byte_offset..link.byte_end,
124                        replacement,
125                    }),
126                });
127            }
128        }
129
130        Ok(warnings)
131    }
132
133    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
134        let content = ctx.content;
135
136        // Get all warnings first - only fix links that are actually flagged
137        let warnings = self.check(ctx)?;
138        if warnings.is_empty() {
139            return Ok(content.to_string());
140        }
141
142        // Collect all fixes with their ranges
143        let mut fixes: Vec<(std::ops::Range<usize>, String)> = warnings
144            .iter()
145            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.clone(), f.replacement.clone())))
146            .collect();
147
148        // Sort fixes by position (descending) to apply from end to start
149        fixes.sort_by(|a, b| b.0.start.cmp(&a.0.start));
150
151        let mut result = content.to_string();
152
153        // Apply fixes from end to start to maintain correct positions
154        for (range, replacement) in fixes {
155            result.replace_range(range, &replacement);
156        }
157
158        Ok(result)
159    }
160
161    /// Get the category of this rule for selective processing
162    fn category(&self) -> RuleCategory {
163        RuleCategory::Link
164    }
165
166    /// Check if this rule should be skipped
167    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
168        let content = ctx.content;
169        content.is_empty() || !content.contains('[')
170    }
171
172    fn as_any(&self) -> &dyn std::any::Any {
173        self
174    }
175
176    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
177    where
178        Self: Sized,
179    {
180        // Flavor is now accessed from LintContext during check
181        Box::new(MD042NoEmptyLinks::new())
182    }
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188    use crate::lint_context::LintContext;
189
190    #[test]
191    fn test_links_with_text_should_pass() {
192        let ctx = LintContext::new(
193            "[valid link](https://example.com)",
194            crate::config::MarkdownFlavor::Standard,
195        );
196        let rule = MD042NoEmptyLinks::new();
197        let result = rule.check(&ctx).unwrap();
198        assert!(result.is_empty(), "Links with text should pass");
199
200        let ctx = LintContext::new(
201            "[another valid link](path/to/page.html)",
202            crate::config::MarkdownFlavor::Standard,
203        );
204        let result = rule.check(&ctx).unwrap();
205        assert!(result.is_empty(), "Links with text and relative URLs should pass");
206    }
207
208    #[test]
209    fn test_links_with_empty_text_should_fail() {
210        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
211        let rule = MD042NoEmptyLinks::new();
212        let result = rule.check(&ctx).unwrap();
213        assert_eq!(result.len(), 1);
214        assert_eq!(result[0].message, "Empty link found: [](https://example.com)");
215        assert_eq!(result[0].line, 1);
216        assert_eq!(result[0].column, 1);
217    }
218
219    #[test]
220    fn test_links_with_only_whitespace_should_fail() {
221        let ctx = LintContext::new("[   ](https://example.com)", crate::config::MarkdownFlavor::Standard);
222        let rule = MD042NoEmptyLinks::new();
223        let result = rule.check(&ctx).unwrap();
224        assert_eq!(result.len(), 1);
225        assert_eq!(result[0].message, "Empty link found: [   ](https://example.com)");
226
227        let ctx = LintContext::new("[\t\n](https://example.com)", crate::config::MarkdownFlavor::Standard);
228        let result = rule.check(&ctx).unwrap();
229        assert_eq!(result.len(), 1);
230        assert_eq!(result[0].message, "Empty link found: [\t\n](https://example.com)");
231    }
232
233    #[test]
234    fn test_reference_links_with_empty_text() {
235        let ctx = LintContext::new(
236            "[][ref]\n\n[ref]: https://example.com",
237            crate::config::MarkdownFlavor::Standard,
238        );
239        let rule = MD042NoEmptyLinks::new();
240        let result = rule.check(&ctx).unwrap();
241        assert_eq!(result.len(), 1);
242        assert_eq!(result[0].message, "Empty link found: [][ref]");
243        assert_eq!(result[0].line, 1);
244
245        // Empty text with empty reference
246        let ctx = LintContext::new(
247            "[][]\n\n[]: https://example.com",
248            crate::config::MarkdownFlavor::Standard,
249        );
250        let result = rule.check(&ctx).unwrap();
251        assert_eq!(result.len(), 1);
252    }
253
254    #[test]
255    fn test_images_should_be_ignored() {
256        // Images can have empty alt text, so they should not trigger the rule
257        let ctx = LintContext::new("![](image.png)", crate::config::MarkdownFlavor::Standard);
258        let rule = MD042NoEmptyLinks::new();
259        let result = rule.check(&ctx).unwrap();
260        assert!(result.is_empty(), "Images with empty alt text should be ignored");
261
262        let ctx = LintContext::new("![   ](image.png)", crate::config::MarkdownFlavor::Standard);
263        let result = rule.check(&ctx).unwrap();
264        assert!(result.is_empty(), "Images with whitespace alt text should be ignored");
265    }
266
267    #[test]
268    fn test_links_with_nested_formatting() {
269        // Links with nested formatting but empty effective text
270        // Note: [**] contains "**" as text, which is not empty after trimming
271        let ctx = LintContext::new("[**](https://example.com)", crate::config::MarkdownFlavor::Standard);
272        let rule = MD042NoEmptyLinks::new();
273        let result = rule.check(&ctx).unwrap();
274        assert!(result.is_empty(), "[**] is not considered empty since ** is text");
275
276        let ctx = LintContext::new("[__](https://example.com)", crate::config::MarkdownFlavor::Standard);
277        let result = rule.check(&ctx).unwrap();
278        assert!(result.is_empty(), "[__] is not considered empty since __ is text");
279
280        // Links with truly empty formatting should fail
281        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
282        let result = rule.check(&ctx).unwrap();
283        assert_eq!(result.len(), 1);
284
285        // Links with nested formatting and actual text should pass
286        let ctx = LintContext::new(
287            "[**bold text**](https://example.com)",
288            crate::config::MarkdownFlavor::Standard,
289        );
290        let result = rule.check(&ctx).unwrap();
291        assert!(result.is_empty(), "Links with nested formatting and text should pass");
292
293        let ctx = LintContext::new(
294            "[*italic* and **bold**](https://example.com)",
295            crate::config::MarkdownFlavor::Standard,
296        );
297        let result = rule.check(&ctx).unwrap();
298        assert!(result.is_empty(), "Links with multiple nested formatting should pass");
299    }
300
301    #[test]
302    fn test_multiple_empty_links_on_same_line() {
303        let ctx = LintContext::new(
304            "[](url1) and [](url2) and [valid](url3)",
305            crate::config::MarkdownFlavor::Standard,
306        );
307        let rule = MD042NoEmptyLinks::new();
308        let result = rule.check(&ctx).unwrap();
309        assert_eq!(result.len(), 2, "Should detect both empty links");
310        assert_eq!(result[0].column, 1);
311        assert_eq!(result[1].column, 14);
312    }
313
314    #[test]
315    fn test_escaped_brackets() {
316        // Escaped brackets should not be treated as links
317        let ctx = LintContext::new("\\[\\](https://example.com)", crate::config::MarkdownFlavor::Standard);
318        let rule = MD042NoEmptyLinks::new();
319        let result = rule.check(&ctx).unwrap();
320        assert!(result.is_empty(), "Escaped brackets should not be treated as links");
321
322        // But this should still be a link
323        let ctx = LintContext::new("[\\[\\]](https://example.com)", crate::config::MarkdownFlavor::Standard);
324        let result = rule.check(&ctx).unwrap();
325        assert!(result.is_empty(), "Link with escaped brackets in text should pass");
326    }
327
328    #[test]
329    fn test_links_in_lists_and_blockquotes() {
330        // Empty links in lists
331        let ctx = LintContext::new(
332            "- [](https://example.com)\n- [valid](https://example.com)",
333            crate::config::MarkdownFlavor::Standard,
334        );
335        let rule = MD042NoEmptyLinks::new();
336        let result = rule.check(&ctx).unwrap();
337        assert_eq!(result.len(), 1);
338        assert_eq!(result[0].line, 1);
339
340        // Empty links in blockquotes
341        let ctx = LintContext::new(
342            "> [](https://example.com)\n> [valid](https://example.com)",
343            crate::config::MarkdownFlavor::Standard,
344        );
345        let result = rule.check(&ctx).unwrap();
346        assert_eq!(result.len(), 1);
347        assert_eq!(result[0].line, 1);
348
349        // Nested structures
350        let ctx = LintContext::new(
351            "> - [](url1)\n> - [text](url2)",
352            crate::config::MarkdownFlavor::Standard,
353        );
354        let result = rule.check(&ctx).unwrap();
355        assert_eq!(result.len(), 1);
356    }
357
358    #[test]
359    fn test_unicode_whitespace_characters() {
360        // Non-breaking space (U+00A0) - IS considered whitespace by Rust's trim()
361        let ctx = LintContext::new(
362            "[\u{00A0}](https://example.com)",
363            crate::config::MarkdownFlavor::Standard,
364        );
365        let rule = MD042NoEmptyLinks::new();
366        let result = rule.check(&ctx).unwrap();
367        assert_eq!(result.len(), 1, "Non-breaking space should be treated as whitespace");
368
369        // Em space (U+2003) - IS considered whitespace by Rust's trim()
370        let ctx = LintContext::new(
371            "[\u{2003}](https://example.com)",
372            crate::config::MarkdownFlavor::Standard,
373        );
374        let result = rule.check(&ctx).unwrap();
375        assert_eq!(result.len(), 1, "Em space should be treated as whitespace");
376
377        // Zero-width space (U+200B) - NOT considered whitespace by Rust's trim()
378        // This is a formatting character, not a whitespace character
379        let ctx = LintContext::new(
380            "[\u{200B}](https://example.com)",
381            crate::config::MarkdownFlavor::Standard,
382        );
383        let result = rule.check(&ctx).unwrap();
384        assert!(
385            result.is_empty(),
386            "Zero-width space is not considered whitespace by trim()"
387        );
388
389        // Test with zero-width space between spaces
390        // Since trim() doesn't consider zero-width space as whitespace,
391        // " \u{200B} " becomes "\u{200B}" after trimming, which is NOT empty
392        let ctx = LintContext::new(
393            "[ \u{200B} ](https://example.com)",
394            crate::config::MarkdownFlavor::Standard,
395        );
396        let result = rule.check(&ctx).unwrap();
397        assert!(
398            result.is_empty(),
399            "Zero-width space remains after trim(), so link is not empty"
400        );
401    }
402
403    #[test]
404    fn test_empty_url_with_text() {
405        let ctx = LintContext::new("[some text]()", crate::config::MarkdownFlavor::Standard);
406        let rule = MD042NoEmptyLinks::new();
407        let result = rule.check(&ctx).unwrap();
408        assert_eq!(result.len(), 1);
409        assert_eq!(result[0].message, "Empty link found: [some text]()");
410    }
411
412    #[test]
413    fn test_both_empty_text_and_url() {
414        let ctx = LintContext::new("[]()", crate::config::MarkdownFlavor::Standard);
415        let rule = MD042NoEmptyLinks::new();
416        let result = rule.check(&ctx).unwrap();
417        assert_eq!(result.len(), 1);
418        assert_eq!(result[0].message, "Empty link found: []()");
419    }
420
421    #[test]
422    fn test_reference_link_with_undefined_reference() {
423        let ctx = LintContext::new("[text][undefined]", crate::config::MarkdownFlavor::Standard);
424        let rule = MD042NoEmptyLinks::new();
425        let result = rule.check(&ctx).unwrap();
426        assert_eq!(result.len(), 1, "Undefined reference should be treated as empty URL");
427    }
428
429    #[test]
430    fn test_shortcut_reference_links() {
431        // Valid shortcut reference link (implicit reference)
432        // Note: [example] by itself is not parsed as a link by the LINK_PATTERN regex
433        // It needs to be followed by [] or () to be recognized as a link
434        let ctx = LintContext::new(
435            "[example][]\n\n[example]: https://example.com",
436            crate::config::MarkdownFlavor::Standard,
437        );
438        let rule = MD042NoEmptyLinks::new();
439        let result = rule.check(&ctx).unwrap();
440        assert!(result.is_empty(), "Valid implicit reference link should pass");
441
442        // Empty implicit reference link
443        let ctx = LintContext::new(
444            "[][]\n\n[]: https://example.com",
445            crate::config::MarkdownFlavor::Standard,
446        );
447        let result = rule.check(&ctx).unwrap();
448        assert_eq!(result.len(), 1, "Empty implicit reference link should fail");
449
450        // Test actual shortcut-style links are not detected (since they don't match the pattern)
451        let ctx = LintContext::new(
452            "[example]\n\n[example]: https://example.com",
453            crate::config::MarkdownFlavor::Standard,
454        );
455        let result = rule.check(&ctx).unwrap();
456        assert!(
457            result.is_empty(),
458            "Shortcut links without [] or () are not parsed as links"
459        );
460    }
461
462    #[test]
463    fn test_fix_suggestions() {
464        let ctx = LintContext::new("[](https://example.com)", crate::config::MarkdownFlavor::Standard);
465        let rule = MD042NoEmptyLinks::new();
466        let result = rule.check(&ctx).unwrap();
467        assert!(result[0].fix.is_some());
468        let fix = result[0].fix.as_ref().unwrap();
469        assert_eq!(fix.replacement, "[Link text](https://example.com)");
470
471        let ctx = LintContext::new("[text]()", crate::config::MarkdownFlavor::Standard);
472        let result = rule.check(&ctx).unwrap();
473        assert!(result[0].fix.is_some());
474        let fix = result[0].fix.as_ref().unwrap();
475        assert_eq!(fix.replacement, "[text](https://example.com)");
476
477        let ctx = LintContext::new("[]()", crate::config::MarkdownFlavor::Standard);
478        let result = rule.check(&ctx).unwrap();
479        assert!(result[0].fix.is_some());
480        let fix = result[0].fix.as_ref().unwrap();
481        assert_eq!(fix.replacement, "[Link text](https://example.com)");
482    }
483
484    #[test]
485    fn test_complex_markdown_document() {
486        let content = r#"# Document with various links
487
488[Valid link](https://example.com) followed by [](empty.com).
489
490## Lists with links
491- [Good link](url1)
492- [](url2)
493- Item with [inline empty]() link
494
495> Quote with [](quoted-empty.com)
496> And [valid quoted](quoted-valid.com)
497
498Code block should be ignored:
499```
500[](this-is-code)
501```
502
503[Reference style][ref1] and [][ref2]
504
505[ref1]: https://ref1.com
506[ref2]: https://ref2.com
507"#;
508
509        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
510        let rule = MD042NoEmptyLinks::new();
511        let result = rule.check(&ctx).unwrap();
512
513        // Count the empty links
514        let empty_link_lines = [3, 7, 8, 10, 18];
515        assert_eq!(result.len(), empty_link_lines.len(), "Should find all empty links");
516
517        // Verify line numbers
518        for (i, &expected_line) in empty_link_lines.iter().enumerate() {
519            assert_eq!(
520                result[i].line, expected_line,
521                "Empty link {i} should be on line {expected_line}"
522            );
523        }
524    }
525
526    #[test]
527    fn test_issue_29_code_block_with_tildes() {
528        // Test for issue #29 - code blocks with tilde markers should not break reference links
529        let content = r#"In addition to the [local scope][] and the [global scope][], Python also has a **built-in scope**.
530
531```pycon
532>>> @count_calls
533... def greet(name):
534...     print("Hi", name)
535...
536>>> greet("Trey")
537Traceback (most recent call last):
538  File "<python-input-2>", line 1, in <module>
539    greet("Trey")
540    ~~~~~^^^^^^^^
541  File "<python-input-0>", line 4, in wrapper
542    calls += 1
543    ^^^^^
544UnboundLocalError: cannot access local variable 'calls' where it is not associated with a value
545```
546
547
548[local scope]: https://www.pythonmorsels.com/local-and-global-variables/
549[global scope]: https://www.pythonmorsels.com/assigning-global-variables/"#;
550
551        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
552        let rule = MD042NoEmptyLinks::new();
553        let result = rule.check(&ctx).unwrap();
554
555        // These reference links should NOT be flagged as empty
556        assert!(
557            result.is_empty(),
558            "Should not flag reference links as empty when code blocks contain tildes (issue #29). Got: {result:?}"
559        );
560    }
561
562    #[test]
563    fn test_mkdocs_backtick_wrapped_references() {
564        // Test for issue #97 - backtick-wrapped references should be recognized as MkDocs auto-references
565        let rule = MD042NoEmptyLinks::new();
566
567        // Module.Class pattern with backticks
568        let ctx = LintContext::new("[`module.Class`][]", crate::config::MarkdownFlavor::MkDocs);
569        let result = rule.check(&ctx).unwrap();
570        assert!(
571            result.is_empty(),
572            "Should not flag [`module.Class`][] as empty in MkDocs mode (issue #97). Got: {result:?}"
573        );
574
575        // Reference with explicit ID
576        let ctx = LintContext::new("[`module.Class`][ref]", crate::config::MarkdownFlavor::MkDocs);
577        let result = rule.check(&ctx).unwrap();
578        assert!(
579            result.is_empty(),
580            "Should not flag [`module.Class`][ref] as empty in MkDocs mode (issue #97). Got: {result:?}"
581        );
582
583        // Path-like reference with backticks
584        let ctx = LintContext::new("[`api/endpoint`][]", crate::config::MarkdownFlavor::MkDocs);
585        let result = rule.check(&ctx).unwrap();
586        assert!(
587            result.is_empty(),
588            "Should not flag [`api/endpoint`][] as empty in MkDocs mode (issue #97). Got: {result:?}"
589        );
590
591        // Should still flag in standard mode
592        let ctx = LintContext::new("[`module.Class`][]", crate::config::MarkdownFlavor::Standard);
593        let result = rule.check(&ctx).unwrap();
594        assert_eq!(
595            result.len(),
596            1,
597            "Should flag [`module.Class`][] as empty in Standard mode (no auto-refs). Got: {result:?}"
598        );
599
600        // Should still flag truly empty links even in MkDocs mode
601        let ctx = LintContext::new("[][]", crate::config::MarkdownFlavor::MkDocs);
602        let result = rule.check(&ctx).unwrap();
603        assert_eq!(
604            result.len(),
605            1,
606            "Should still flag [][] as empty in MkDocs mode. Got: {result:?}"
607        );
608    }
609}