Skip to main content

rumdl_lib/rules/
md039_no_space_in_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::regex_cache::get_cached_regex;
3
4// Regex patterns
5const ALL_WHITESPACE_STR: &str = r"^\s*$";
6
7/// Rule MD039: No space inside link text
8///
9/// See [docs/md039.md](../../docs/md039.md) for full documentation, configuration, and examples.
10///
11/// This rule is triggered when link text has leading or trailing spaces which can cause
12/// unexpected rendering in some Markdown parsers.
13#[derive(Debug, Default, Clone)]
14pub struct MD039NoSpaceInLinks;
15
16// Static definition for the warning message
17const WARNING_MESSAGE: &str = "Remove spaces inside link text";
18
19impl MD039NoSpaceInLinks {
20    pub fn new() -> Self {
21        Self
22    }
23
24    #[inline]
25    fn trim_link_text_preserve_escapes(text: &str) -> &str {
26        // Optimized trimming that preserves escapes
27        let start = text
28            .char_indices()
29            .find(|&(_, c)| !c.is_whitespace())
30            .map_or(text.len(), |(i, _)| i);
31        let end = text
32            .char_indices()
33            .rev()
34            .find(|&(_, c)| !c.is_whitespace())
35            .map_or(0, |(i, c)| i + c.len_utf8());
36        if start >= end { "" } else { &text[start..end] }
37    }
38
39    /// Optimized whitespace checking for link text
40    #[inline]
41    fn needs_trimming(&self, text: &str) -> bool {
42        // Simple and fast check: compare with trimmed version
43        text != text.trim_matches(|c: char| c.is_whitespace())
44    }
45
46    /// Optimized unescaping for performance-critical path
47    #[inline]
48    fn unescape_fast(&self, text: &str) -> String {
49        if !text.contains('\\') {
50            return text.to_string();
51        }
52
53        let mut result = String::with_capacity(text.len());
54        let mut chars = text.chars().peekable();
55
56        while let Some(c) = chars.next() {
57            if c == '\\' {
58                if let Some(&next) = chars.peek() {
59                    result.push(next);
60                    chars.next();
61                } else {
62                    result.push(c);
63                }
64            } else {
65                result.push(c);
66            }
67        }
68        result
69    }
70}
71
72impl Rule for MD039NoSpaceInLinks {
73    fn name(&self) -> &'static str {
74        "MD039"
75    }
76
77    fn description(&self) -> &'static str {
78        "Spaces inside link text"
79    }
80
81    fn category(&self) -> RuleCategory {
82        RuleCategory::Link
83    }
84
85    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
86        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
87    }
88
89    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
90        let mut warnings = Vec::new();
91
92        // Use centralized link parsing from LintContext
93        for link in &ctx.links {
94            // Skip reference links (markdownlint doesn't check these)
95            if link.is_reference {
96                continue;
97            }
98
99            // Skip links inside Jinja templates
100            if ctx.is_in_jinja_range(link.byte_offset) {
101                continue;
102            }
103
104            // Skip links inside JSX expressions or MDX comments
105            if ctx.is_in_jsx_expression(link.byte_offset) || ctx.is_in_mdx_comment(link.byte_offset) {
106                continue;
107            }
108
109            // Fast check if trimming is needed
110            if !self.needs_trimming(&link.text) {
111                continue;
112            }
113
114            // Optimized unescaping for whitespace check
115            let unescaped = self.unescape_fast(&link.text);
116
117            let needs_warning = if get_cached_regex(ALL_WHITESPACE_STR)
118                .map(|re| re.is_match(&unescaped))
119                .unwrap_or(false)
120            {
121                true
122            } else {
123                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
124                link.text.as_ref() != trimmed
125            };
126
127            if needs_warning {
128                // Extract the destination portion from the original content so that
129                // titles and attributes are preserved. Find `](` for inline links
130                // or `][` for reference links to split text from destination.
131                let original = &ctx.content[link.byte_offset..link.byte_end];
132                let dest_start = original
133                    .find("](")
134                    .or_else(|| original.find("]["))
135                    .map_or(original.len(), |p| p + 1);
136                let dest_portion = &original[dest_start..];
137
138                let fixed = if get_cached_regex(ALL_WHITESPACE_STR)
139                    .map(|re| re.is_match(&unescaped))
140                    .unwrap_or(false)
141                {
142                    format!("[]{dest_portion}")
143                } else {
144                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
145                    format!("[{trimmed}]{dest_portion}")
146                };
147
148                warnings.push(LintWarning {
149                    rule_name: Some(self.name().to_string()),
150                    line: link.line,
151                    column: link.start_col + 1, // Convert to 1-indexed
152                    end_line: link.line,
153                    end_column: link.end_col + 1, // Convert to 1-indexed
154                    message: WARNING_MESSAGE.to_string(),
155                    severity: Severity::Warning,
156                    fix: Some(Fix {
157                        range: link.byte_offset..link.byte_end,
158                        replacement: fixed,
159                    }),
160                });
161            }
162        }
163
164        // Also check images
165        for image in &ctx.images {
166            // Skip reference images (markdownlint doesn't check these)
167            if image.is_reference {
168                continue;
169            }
170
171            // Skip images inside JSX expressions or MDX comments
172            if ctx.is_in_jsx_expression(image.byte_offset) || ctx.is_in_mdx_comment(image.byte_offset) {
173                continue;
174            }
175
176            // Skip images inside Jinja templates
177            if ctx.is_in_jinja_range(image.byte_offset) {
178                continue;
179            }
180
181            // Fast check if trimming is needed
182            if !self.needs_trimming(&image.alt_text) {
183                continue;
184            }
185
186            // Optimized unescaping for whitespace check
187            let unescaped = self.unescape_fast(&image.alt_text);
188
189            let needs_warning = if get_cached_regex(ALL_WHITESPACE_STR)
190                .map(|re| re.is_match(&unescaped))
191                .unwrap_or(false)
192            {
193                true
194            } else {
195                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
196                image.alt_text.as_ref() != trimmed
197            };
198
199            if needs_warning {
200                let original = &ctx.content[image.byte_offset..image.byte_end];
201                let dest_start = original
202                    .find("](")
203                    .or_else(|| original.find("]["))
204                    .map_or(original.len(), |p| p + 1);
205                let dest_portion = &original[dest_start..];
206
207                let fixed = if get_cached_regex(ALL_WHITESPACE_STR)
208                    .map(|re| re.is_match(&unescaped))
209                    .unwrap_or(false)
210                {
211                    format!("![]{dest_portion}")
212                } else {
213                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
214                    format!("![{trimmed}]{dest_portion}")
215                };
216
217                warnings.push(LintWarning {
218                    rule_name: Some(self.name().to_string()),
219                    line: image.line,
220                    column: image.start_col + 1, // Convert to 1-indexed
221                    end_line: image.line,
222                    end_column: image.end_col + 1, // Convert to 1-indexed
223                    message: WARNING_MESSAGE.to_string(),
224                    severity: Severity::Warning,
225                    fix: Some(Fix {
226                        range: image.byte_offset..image.byte_end,
227                        replacement: fixed,
228                    }),
229                });
230            }
231        }
232
233        Ok(warnings)
234    }
235
236    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
237        if self.should_skip(ctx) {
238            return Ok(ctx.content.to_string());
239        }
240        let warnings = self.check(ctx)?;
241        if warnings.is_empty() {
242            return Ok(ctx.content.to_string());
243        }
244        let warnings =
245            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
246        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::InvalidInput)
247    }
248
249    fn as_any(&self) -> &dyn std::any::Any {
250        self
251    }
252
253    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
254    where
255        Self: Sized,
256    {
257        Box::new(Self)
258    }
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264
265    #[test]
266    fn test_valid_links() {
267        let rule = MD039NoSpaceInLinks::new();
268        let content = "[link](url) and [another link](url) here";
269        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
270        let result = rule.check(&ctx).unwrap();
271        assert!(result.is_empty());
272    }
273
274    #[test]
275    fn test_spaces_both_ends() {
276        let rule = MD039NoSpaceInLinks::new();
277        let content = "[ link ](url) and [ another link ](url) here";
278        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
279        let result = rule.check(&ctx).unwrap();
280        assert_eq!(result.len(), 2);
281        let fixed = rule.fix(&ctx).unwrap();
282        assert_eq!(fixed, "[link](url) and [another link](url) here");
283    }
284
285    #[test]
286    fn test_space_at_start() {
287        let rule = MD039NoSpaceInLinks::new();
288        let content = "[ link](url) and [ another link](url) here";
289        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
290        let result = rule.check(&ctx).unwrap();
291        assert_eq!(result.len(), 2);
292        let fixed = rule.fix(&ctx).unwrap();
293        assert_eq!(fixed, "[link](url) and [another link](url) here");
294    }
295
296    #[test]
297    fn test_space_at_end() {
298        let rule = MD039NoSpaceInLinks::new();
299        let content = "[link ](url) and [another link ](url) here";
300        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
301        let result = rule.check(&ctx).unwrap();
302        assert_eq!(result.len(), 2);
303        let fixed = rule.fix(&ctx).unwrap();
304        assert_eq!(fixed, "[link](url) and [another link](url) here");
305    }
306
307    #[test]
308    fn test_link_in_code_block() {
309        let rule = MD039NoSpaceInLinks::new();
310        let content = "```
311[ link ](url)
312```
313[ link ](url)";
314        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
315        let result = rule.check(&ctx).unwrap();
316        assert_eq!(result.len(), 1);
317        let fixed = rule.fix(&ctx).unwrap();
318        assert_eq!(
319            fixed,
320            "```
321[ link ](url)
322```
323[link](url)"
324        );
325    }
326
327    #[test]
328    fn test_multiple_links() {
329        let rule = MD039NoSpaceInLinks::new();
330        let content = "[ link ](url) and [ another ](url) in one line";
331        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
332        let result = rule.check(&ctx).unwrap();
333        assert_eq!(result.len(), 2);
334        let fixed = rule.fix(&ctx).unwrap();
335        assert_eq!(fixed, "[link](url) and [another](url) in one line");
336    }
337
338    #[test]
339    fn test_link_with_internal_spaces() {
340        let rule = MD039NoSpaceInLinks::new();
341        let content = "[this is link](url) and [ this is also link ](url)";
342        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
343        let result = rule.check(&ctx).unwrap();
344        assert_eq!(result.len(), 1);
345        let fixed = rule.fix(&ctx).unwrap();
346        assert_eq!(fixed, "[this is link](url) and [this is also link](url)");
347    }
348
349    #[test]
350    fn test_link_with_punctuation() {
351        let rule = MD039NoSpaceInLinks::new();
352        let content = "[ link! ](url) and [ link? ](url) here";
353        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
354        let result = rule.check(&ctx).unwrap();
355        assert_eq!(result.len(), 2);
356        let fixed = rule.fix(&ctx).unwrap();
357        assert_eq!(fixed, "[link!](url) and [link?](url) here");
358    }
359
360    #[test]
361    fn test_parity_only_whitespace_and_newlines_minimal() {
362        let rule = MD039NoSpaceInLinks::new();
363        let content = "[   \n  ](url) and [\t\n\t](url)";
364        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
365        let fixed = rule.fix(&ctx).unwrap();
366        // markdownlint removes all whitespace, resulting in empty link text
367        assert_eq!(fixed, "[](url) and [](url)");
368    }
369
370    #[test]
371    fn test_parity_internal_newlines_minimal() {
372        let rule = MD039NoSpaceInLinks::new();
373        let content = "[link\ntext](url) and [ another\nlink ](url)";
374        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
375        let fixed = rule.fix(&ctx).unwrap();
376        // markdownlint trims only leading/trailing whitespace, preserves internal newlines
377        assert_eq!(fixed, "[link\ntext](url) and [another\nlink](url)");
378    }
379
380    #[test]
381    fn test_parity_escaped_brackets_minimal() {
382        let rule = MD039NoSpaceInLinks::new();
383        let content = "[link\\]](url) and [link\\[]](url)";
384        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
385        let fixed = rule.fix(&ctx).unwrap();
386        // markdownlint does not trim or remove escapes, so output should be unchanged
387        assert_eq!(fixed, "[link\\]](url) and [link\\[]](url)");
388    }
389
390    #[test]
391    fn test_performance_md039() {
392        use std::time::Instant;
393
394        let rule = MD039NoSpaceInLinks::new();
395
396        // Generate test content with many links
397        let mut content = String::with_capacity(100_000);
398
399        // Add links with spaces (should be detected and fixed)
400        for i in 0..500 {
401            content.push_str(&format!("Line {i} with [ spaced link {i} ](url{i}) and text.\n"));
402        }
403
404        // Add valid links (should be fast to skip)
405        for i in 0..500 {
406            content.push_str(&format!(
407                "Line {} with [valid link {}](url{}) and text.\n",
408                i + 500,
409                i,
410                i
411            ));
412        }
413
414        println!(
415            "MD039 Performance Test - Content: {} bytes, {} lines",
416            content.len(),
417            content.lines().count()
418        );
419
420        let ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
421
422        // Warm up
423        let _ = rule.check(&ctx).unwrap();
424
425        // Measure check performance
426        let mut total_duration = std::time::Duration::ZERO;
427        let runs = 5;
428        let mut warnings_count = 0;
429
430        for _ in 0..runs {
431            let start = Instant::now();
432            let warnings = rule.check(&ctx).unwrap();
433            total_duration += start.elapsed();
434            warnings_count = warnings.len();
435        }
436
437        let avg_check_duration = total_duration / runs;
438
439        println!("MD039 Optimized Performance:");
440        println!(
441            "- Average check time: {:?} ({:.2} ms)",
442            avg_check_duration,
443            avg_check_duration.as_secs_f64() * 1000.0
444        );
445        println!("- Found {warnings_count} warnings");
446        println!(
447            "- Lines per second: {:.0}",
448            content.lines().count() as f64 / avg_check_duration.as_secs_f64()
449        );
450        println!(
451            "- Microseconds per line: {:.2}",
452            avg_check_duration.as_micros() as f64 / content.lines().count() as f64
453        );
454
455        // Performance assertion - should complete reasonably fast
456        assert!(
457            avg_check_duration.as_millis() < 200,
458            "MD039 check should complete in under 200ms, took {}ms",
459            avg_check_duration.as_millis()
460        );
461
462        // Verify we're finding the expected number of warnings (500 links with spaces)
463        assert_eq!(warnings_count, 500, "Should find 500 warnings for links with spaces");
464    }
465}