rumdl_lib/rules/
md039_no_space_in_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::regex_cache::get_cached_regex;
3
4// Regex patterns
5const ALL_WHITESPACE_STR: &str = r"^\s*$";
6
7/// Rule MD039: No space inside link text
8///
9/// See [docs/md039.md](../../docs/md039.md) for full documentation, configuration, and examples.
10///
11/// This rule is triggered when link text has leading or trailing spaces which can cause
12/// unexpected rendering in some Markdown parsers.
13#[derive(Debug, Default, Clone)]
14pub struct MD039NoSpaceInLinks;
15
16// Static definition for the warning message
17const WARNING_MESSAGE: &str = "Remove spaces inside link text";
18
19impl MD039NoSpaceInLinks {
20    pub fn new() -> Self {
21        Self
22    }
23
24    #[inline]
25    fn trim_link_text_preserve_escapes(text: &str) -> &str {
26        // Optimized trimming that preserves escapes
27        let start = text
28            .char_indices()
29            .find(|&(_, c)| !c.is_whitespace())
30            .map(|(i, _)| i)
31            .unwrap_or(text.len());
32        let end = text
33            .char_indices()
34            .rev()
35            .find(|&(_, c)| !c.is_whitespace())
36            .map(|(i, c)| i + c.len_utf8())
37            .unwrap_or(0);
38        if start >= end { "" } else { &text[start..end] }
39    }
40
41    /// Optimized whitespace checking for link text
42    #[inline]
43    fn needs_trimming(&self, text: &str) -> bool {
44        // Simple and fast check: compare with trimmed version
45        text != text.trim_matches(|c: char| c.is_whitespace())
46    }
47
48    /// Optimized unescaping for performance-critical path
49    #[inline]
50    fn unescape_fast(&self, text: &str) -> String {
51        if !text.contains('\\') {
52            return text.to_string();
53        }
54
55        let mut result = String::with_capacity(text.len());
56        let mut chars = text.chars().peekable();
57
58        while let Some(c) = chars.next() {
59            if c == '\\' {
60                if let Some(&next) = chars.peek() {
61                    result.push(next);
62                    chars.next();
63                } else {
64                    result.push(c);
65                }
66            } else {
67                result.push(c);
68            }
69        }
70        result
71    }
72}
73
74impl Rule for MD039NoSpaceInLinks {
75    fn name(&self) -> &'static str {
76        "MD039"
77    }
78
79    fn description(&self) -> &'static str {
80        "Spaces inside link text"
81    }
82
83    fn category(&self) -> RuleCategory {
84        RuleCategory::Link
85    }
86
87    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
88        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
89    }
90
91    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
92        let mut warnings = Vec::new();
93
94        // Use centralized link parsing from LintContext
95        for link in &ctx.links {
96            // Skip reference links (markdownlint doesn't check these)
97            if link.is_reference {
98                continue;
99            }
100
101            // Fast check if trimming is needed
102            if !self.needs_trimming(&link.text) {
103                continue;
104            }
105
106            // Optimized unescaping for whitespace check
107            let unescaped = self.unescape_fast(&link.text);
108
109            let needs_warning = if get_cached_regex(ALL_WHITESPACE_STR)
110                .map(|re| re.is_match(&unescaped))
111                .unwrap_or(false)
112            {
113                true
114            } else {
115                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
116                link.text.as_str() != trimmed
117            };
118
119            if needs_warning {
120                let url = if link.is_reference {
121                    if let Some(ref_id) = &link.reference_id {
122                        format!("[{ref_id}]")
123                    } else {
124                        "[]".to_string()
125                    }
126                } else {
127                    format!("({})", link.url)
128                };
129
130                let fixed = if get_cached_regex(ALL_WHITESPACE_STR)
131                    .map(|re| re.is_match(&unescaped))
132                    .unwrap_or(false)
133                {
134                    format!("[]{url}")
135                } else {
136                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
137                    format!("[{trimmed}]{url}")
138                };
139
140                warnings.push(LintWarning {
141                    rule_name: Some(self.name().to_string()),
142                    line: link.line,
143                    column: link.start_col + 1, // Convert to 1-indexed
144                    end_line: link.line,
145                    end_column: link.end_col + 1, // Convert to 1-indexed
146                    message: WARNING_MESSAGE.to_string(),
147                    severity: Severity::Warning,
148                    fix: Some(Fix {
149                        range: link.byte_offset..link.byte_end,
150                        replacement: fixed,
151                    }),
152                });
153            }
154        }
155
156        // Also check images
157        for image in &ctx.images {
158            // Skip reference images (markdownlint doesn't check these)
159            if image.is_reference {
160                continue;
161            }
162
163            // Fast check if trimming is needed
164            if !self.needs_trimming(&image.alt_text) {
165                continue;
166            }
167
168            // Optimized unescaping for whitespace check
169            let unescaped = self.unescape_fast(&image.alt_text);
170
171            let needs_warning = if get_cached_regex(ALL_WHITESPACE_STR)
172                .map(|re| re.is_match(&unescaped))
173                .unwrap_or(false)
174            {
175                true
176            } else {
177                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
178                image.alt_text.as_str() != trimmed
179            };
180
181            if needs_warning {
182                let url = if image.is_reference {
183                    if let Some(ref_id) = &image.reference_id {
184                        format!("[{ref_id}]")
185                    } else {
186                        "[]".to_string()
187                    }
188                } else {
189                    format!("({})", image.url)
190                };
191
192                let fixed = if get_cached_regex(ALL_WHITESPACE_STR)
193                    .map(|re| re.is_match(&unescaped))
194                    .unwrap_or(false)
195                {
196                    format!("![]{url}")
197                } else {
198                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
199                    format!("![{trimmed}]{url}")
200                };
201
202                warnings.push(LintWarning {
203                    rule_name: Some(self.name().to_string()),
204                    line: image.line,
205                    column: image.start_col + 1, // Convert to 1-indexed
206                    end_line: image.line,
207                    end_column: image.end_col + 1, // Convert to 1-indexed
208                    message: WARNING_MESSAGE.to_string(),
209                    severity: Severity::Warning,
210                    fix: Some(Fix {
211                        range: image.byte_offset..image.byte_end,
212                        replacement: fixed,
213                    }),
214                });
215            }
216        }
217
218        Ok(warnings)
219    }
220
221    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
222        let content = ctx.content;
223        let mut fixes = Vec::new();
224
225        // Process links
226        for link in &ctx.links {
227            // Skip reference links (markdownlint doesn't check these)
228            if link.is_reference {
229                continue;
230            }
231
232            if !self.needs_trimming(&link.text) {
233                continue;
234            }
235
236            let unescaped = self.unescape_fast(&link.text);
237
238            let needs_fix = if get_cached_regex(ALL_WHITESPACE_STR)
239                .map(|re| re.is_match(&unescaped))
240                .unwrap_or(false)
241            {
242                true
243            } else {
244                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
245                link.text.as_str() != trimmed
246            };
247
248            if needs_fix {
249                let url_part = if link.is_reference {
250                    if let Some(ref_id) = &link.reference_id {
251                        format!("[{ref_id}]")
252                    } else {
253                        "[]".to_string()
254                    }
255                } else {
256                    format!("({})", link.url)
257                };
258
259                let replacement = if get_cached_regex(ALL_WHITESPACE_STR)
260                    .map(|re| re.is_match(&unescaped))
261                    .unwrap_or(false)
262                {
263                    format!("[]{url_part}")
264                } else {
265                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
266                    format!("[{trimmed}]{url_part}")
267                };
268
269                fixes.push((link.byte_offset, link.byte_end, replacement));
270            }
271        }
272
273        // Process images
274        for image in &ctx.images {
275            // Skip reference images (markdownlint doesn't check these)
276            if image.is_reference {
277                continue;
278            }
279
280            if !self.needs_trimming(&image.alt_text) {
281                continue;
282            }
283
284            let unescaped = self.unescape_fast(&image.alt_text);
285
286            let needs_fix = if get_cached_regex(ALL_WHITESPACE_STR)
287                .map(|re| re.is_match(&unescaped))
288                .unwrap_or(false)
289            {
290                true
291            } else {
292                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
293                image.alt_text.as_str() != trimmed
294            };
295
296            if needs_fix {
297                let url_part = if image.is_reference {
298                    if let Some(ref_id) = &image.reference_id {
299                        format!("[{ref_id}]")
300                    } else {
301                        "[]".to_string()
302                    }
303                } else {
304                    format!("({})", image.url)
305                };
306
307                let replacement = if get_cached_regex(ALL_WHITESPACE_STR)
308                    .map(|re| re.is_match(&unescaped))
309                    .unwrap_or(false)
310                {
311                    format!("![]{url_part}")
312                } else {
313                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
314                    format!("![{trimmed}]{url_part}")
315                };
316
317                fixes.push((image.byte_offset, image.byte_end, replacement));
318            }
319        }
320
321        if fixes.is_empty() {
322            return Ok(content.to_string());
323        }
324
325        // Sort fixes by position to apply them in order
326        fixes.sort_by_key(|&(start, _, _)| start);
327
328        // Apply fixes efficiently
329        let mut result = String::with_capacity(content.len());
330        let mut last_pos = 0;
331
332        for (start, end, replacement) in fixes {
333            if start < last_pos {
334                // This should not happen if fixes are properly sorted and non-overlapping
335                return Err(LintError::FixFailed(format!(
336                    "Overlapping fixes detected: last_pos={last_pos}, start={start}"
337                )));
338            }
339            result.push_str(&content[last_pos..start]);
340            result.push_str(&replacement);
341            last_pos = end;
342        }
343        result.push_str(&content[last_pos..]);
344
345        Ok(result)
346    }
347
348    fn as_any(&self) -> &dyn std::any::Any {
349        self
350    }
351
352    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
353    where
354        Self: Sized,
355    {
356        Box::new(Self)
357    }
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363
364    #[test]
365    fn test_valid_links() {
366        let rule = MD039NoSpaceInLinks::new();
367        let content = "[link](url) and [another link](url) here";
368        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
369        let result = rule.check(&ctx).unwrap();
370        assert!(result.is_empty());
371    }
372
373    #[test]
374    fn test_spaces_both_ends() {
375        let rule = MD039NoSpaceInLinks::new();
376        let content = "[ link ](url) and [ another link ](url) here";
377        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
378        let result = rule.check(&ctx).unwrap();
379        assert_eq!(result.len(), 2);
380        let fixed = rule.fix(&ctx).unwrap();
381        assert_eq!(fixed, "[link](url) and [another link](url) here");
382    }
383
384    #[test]
385    fn test_space_at_start() {
386        let rule = MD039NoSpaceInLinks::new();
387        let content = "[ link](url) and [ another link](url) here";
388        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
389        let result = rule.check(&ctx).unwrap();
390        assert_eq!(result.len(), 2);
391        let fixed = rule.fix(&ctx).unwrap();
392        assert_eq!(fixed, "[link](url) and [another link](url) here");
393    }
394
395    #[test]
396    fn test_space_at_end() {
397        let rule = MD039NoSpaceInLinks::new();
398        let content = "[link ](url) and [another link ](url) here";
399        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
400        let result = rule.check(&ctx).unwrap();
401        assert_eq!(result.len(), 2);
402        let fixed = rule.fix(&ctx).unwrap();
403        assert_eq!(fixed, "[link](url) and [another link](url) here");
404    }
405
406    #[test]
407    fn test_link_in_code_block() {
408        let rule = MD039NoSpaceInLinks::new();
409        let content = "```
410[ link ](url)
411```
412[ link ](url)";
413        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
414        let result = rule.check(&ctx).unwrap();
415        assert_eq!(result.len(), 1);
416        let fixed = rule.fix(&ctx).unwrap();
417        assert_eq!(
418            fixed,
419            "```
420[ link ](url)
421```
422[link](url)"
423        );
424    }
425
426    #[test]
427    fn test_multiple_links() {
428        let rule = MD039NoSpaceInLinks::new();
429        let content = "[ link ](url) and [ another ](url) in one line";
430        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
431        let result = rule.check(&ctx).unwrap();
432        assert_eq!(result.len(), 2);
433        let fixed = rule.fix(&ctx).unwrap();
434        assert_eq!(fixed, "[link](url) and [another](url) in one line");
435    }
436
437    #[test]
438    fn test_link_with_internal_spaces() {
439        let rule = MD039NoSpaceInLinks::new();
440        let content = "[this is link](url) and [ this is also link ](url)";
441        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
442        let result = rule.check(&ctx).unwrap();
443        assert_eq!(result.len(), 1);
444        let fixed = rule.fix(&ctx).unwrap();
445        assert_eq!(fixed, "[this is link](url) and [this is also link](url)");
446    }
447
448    #[test]
449    fn test_link_with_punctuation() {
450        let rule = MD039NoSpaceInLinks::new();
451        let content = "[ link! ](url) and [ link? ](url) here";
452        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
453        let result = rule.check(&ctx).unwrap();
454        assert_eq!(result.len(), 2);
455        let fixed = rule.fix(&ctx).unwrap();
456        assert_eq!(fixed, "[link!](url) and [link?](url) here");
457    }
458
459    #[test]
460    fn test_parity_only_whitespace_and_newlines_minimal() {
461        let rule = MD039NoSpaceInLinks::new();
462        let content = "[   \n  ](url) and [\t\n\t](url)";
463        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
464        let fixed = rule.fix(&ctx).unwrap();
465        // markdownlint removes all whitespace, resulting in empty link text
466        assert_eq!(fixed, "[](url) and [](url)");
467    }
468
469    #[test]
470    fn test_parity_internal_newlines_minimal() {
471        let rule = MD039NoSpaceInLinks::new();
472        let content = "[link\ntext](url) and [ another\nlink ](url)";
473        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
474        let fixed = rule.fix(&ctx).unwrap();
475        // markdownlint trims only leading/trailing whitespace, preserves internal newlines
476        assert_eq!(fixed, "[link\ntext](url) and [another\nlink](url)");
477    }
478
479    #[test]
480    fn test_parity_escaped_brackets_minimal() {
481        let rule = MD039NoSpaceInLinks::new();
482        let content = "[link\\]](url) and [link\\[]](url)";
483        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
484        let fixed = rule.fix(&ctx).unwrap();
485        // markdownlint does not trim or remove escapes, so output should be unchanged
486        assert_eq!(fixed, "[link\\]](url) and [link\\[]](url)");
487    }
488
489    #[test]
490    fn test_performance_md039() {
491        use std::time::Instant;
492
493        let rule = MD039NoSpaceInLinks::new();
494
495        // Generate test content with many links
496        let mut content = String::with_capacity(100_000);
497
498        // Add links with spaces (should be detected and fixed)
499        for i in 0..500 {
500            content.push_str(&format!("Line {i} with [ spaced link {i} ](url{i}) and text.\n"));
501        }
502
503        // Add valid links (should be fast to skip)
504        for i in 0..500 {
505            content.push_str(&format!(
506                "Line {} with [valid link {}](url{}) and text.\n",
507                i + 500,
508                i,
509                i
510            ));
511        }
512
513        println!(
514            "MD039 Performance Test - Content: {} bytes, {} lines",
515            content.len(),
516            content.lines().count()
517        );
518
519        let ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
520
521        // Warm up
522        let _ = rule.check(&ctx).unwrap();
523
524        // Measure check performance
525        let mut total_duration = std::time::Duration::ZERO;
526        let runs = 5;
527        let mut warnings_count = 0;
528
529        for _ in 0..runs {
530            let start = Instant::now();
531            let warnings = rule.check(&ctx).unwrap();
532            total_duration += start.elapsed();
533            warnings_count = warnings.len();
534        }
535
536        let avg_check_duration = total_duration / runs;
537
538        println!("MD039 Optimized Performance:");
539        println!(
540            "- Average check time: {:?} ({:.2} ms)",
541            avg_check_duration,
542            avg_check_duration.as_secs_f64() * 1000.0
543        );
544        println!("- Found {warnings_count} warnings");
545        println!(
546            "- Lines per second: {:.0}",
547            content.lines().count() as f64 / avg_check_duration.as_secs_f64()
548        );
549        println!(
550            "- Microseconds per line: {:.2}",
551            avg_check_duration.as_micros() as f64 / content.lines().count() as f64
552        );
553
554        // Performance assertion - should complete reasonably fast
555        assert!(
556            avg_check_duration.as_millis() < 200,
557            "MD039 check should complete in under 200ms, took {}ms",
558            avg_check_duration.as_millis()
559        );
560
561        // Verify we're finding the expected number of warnings (500 links with spaces)
562        assert_eq!(warnings_count, 500, "Should find 500 warnings for links with spaces");
563    }
564}