rumdl_lib/rules/
md039_no_space_in_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::regex_cache::get_cached_regex;
3
4// Regex patterns
5const LINK_PATTERN_STR: &str = r"(?s)!?\[([^\]]*)\]\(([^)]*)\)";
6const ALL_WHITESPACE_STR: &str = r"^\s*$";
7
8/// Rule MD039: No space inside link text
9///
10/// See [docs/md039.md](../../docs/md039.md) for full documentation, configuration, and examples.
11///
12/// This rule is triggered when link text has leading or trailing spaces which can cause
13/// unexpected rendering in some Markdown parsers.
14#[derive(Debug, Default, Clone)]
15pub struct MD039NoSpaceInLinks;
16
17// Static definition for the warning message
18const WARNING_MESSAGE: &str = "Remove spaces inside link text";
19
20impl MD039NoSpaceInLinks {
21    pub fn new() -> Self {
22        Self
23    }
24
25    /// Optimized fast check to see if content has any potential links or images
26    #[inline]
27    fn has_links_or_images(&self, content: &str) -> bool {
28        get_cached_regex(LINK_PATTERN_STR)
29            .map(|re| re.is_match(content))
30            .unwrap_or(false)
31    }
32
33    #[inline]
34    fn trim_link_text_preserve_escapes(text: &str) -> &str {
35        // Optimized trimming that preserves escapes
36        let start = text
37            .char_indices()
38            .find(|&(_, c)| !c.is_whitespace())
39            .map(|(i, _)| i)
40            .unwrap_or(text.len());
41        let end = text
42            .char_indices()
43            .rev()
44            .find(|&(_, c)| !c.is_whitespace())
45            .map(|(i, c)| i + c.len_utf8())
46            .unwrap_or(0);
47        if start >= end { "" } else { &text[start..end] }
48    }
49
50    /// Optimized whitespace checking for link text
51    #[inline]
52    fn needs_trimming(&self, text: &str) -> bool {
53        // Simple and fast check: compare with trimmed version
54        text != text.trim_matches(|c: char| c.is_whitespace())
55    }
56
57    /// Optimized unescaping for performance-critical path
58    #[inline]
59    fn unescape_fast(&self, text: &str) -> String {
60        if !text.contains('\\') {
61            return text.to_string();
62        }
63
64        let mut result = String::with_capacity(text.len());
65        let mut chars = text.chars().peekable();
66
67        while let Some(c) = chars.next() {
68            if c == '\\' {
69                if let Some(&next) = chars.peek() {
70                    result.push(next);
71                    chars.next();
72                } else {
73                    result.push(c);
74                }
75            } else {
76                result.push(c);
77            }
78        }
79        result
80    }
81}
82
83impl Rule for MD039NoSpaceInLinks {
84    fn name(&self) -> &'static str {
85        "MD039"
86    }
87
88    fn description(&self) -> &'static str {
89        "Spaces inside link text"
90    }
91
92    fn category(&self) -> RuleCategory {
93        RuleCategory::Link
94    }
95
96    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
97        let content = ctx.content;
98        content.is_empty() || !self.has_links_or_images(content)
99    }
100
101    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
102        let mut warnings = Vec::new();
103
104        // Use centralized link parsing from LintContext
105        for link in &ctx.links {
106            // Skip reference links (markdownlint doesn't check these)
107            if link.is_reference {
108                continue;
109            }
110
111            // Fast check if trimming is needed
112            if !self.needs_trimming(&link.text) {
113                continue;
114            }
115
116            // Optimized unescaping for whitespace check
117            let unescaped = self.unescape_fast(&link.text);
118
119            let needs_warning = if get_cached_regex(ALL_WHITESPACE_STR)
120                .map(|re| re.is_match(&unescaped))
121                .unwrap_or(false)
122            {
123                true
124            } else {
125                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
126                link.text.as_str() != trimmed
127            };
128
129            if needs_warning {
130                let url = if link.is_reference {
131                    if let Some(ref_id) = &link.reference_id {
132                        format!("[{ref_id}]")
133                    } else {
134                        "[]".to_string()
135                    }
136                } else {
137                    format!("({})", link.url)
138                };
139
140                let fixed = if get_cached_regex(ALL_WHITESPACE_STR)
141                    .map(|re| re.is_match(&unescaped))
142                    .unwrap_or(false)
143                {
144                    format!("[]{url}")
145                } else {
146                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
147                    format!("[{trimmed}]{url}")
148                };
149
150                warnings.push(LintWarning {
151                    rule_name: Some(self.name()),
152                    line: link.line,
153                    column: link.start_col + 1, // Convert to 1-indexed
154                    end_line: link.line,
155                    end_column: link.end_col + 1, // Convert to 1-indexed
156                    message: WARNING_MESSAGE.to_string(),
157                    severity: Severity::Warning,
158                    fix: Some(Fix {
159                        range: link.byte_offset..link.byte_end,
160                        replacement: fixed,
161                    }),
162                });
163            }
164        }
165
166        // Also check images
167        for image in &ctx.images {
168            // Skip reference images (markdownlint doesn't check these)
169            if image.is_reference {
170                continue;
171            }
172
173            // Fast check if trimming is needed
174            if !self.needs_trimming(&image.alt_text) {
175                continue;
176            }
177
178            // Optimized unescaping for whitespace check
179            let unescaped = self.unescape_fast(&image.alt_text);
180
181            let needs_warning = if get_cached_regex(ALL_WHITESPACE_STR)
182                .map(|re| re.is_match(&unescaped))
183                .unwrap_or(false)
184            {
185                true
186            } else {
187                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
188                image.alt_text.as_str() != trimmed
189            };
190
191            if needs_warning {
192                let url = if image.is_reference {
193                    if let Some(ref_id) = &image.reference_id {
194                        format!("[{ref_id}]")
195                    } else {
196                        "[]".to_string()
197                    }
198                } else {
199                    format!("({})", image.url)
200                };
201
202                let fixed = if get_cached_regex(ALL_WHITESPACE_STR)
203                    .map(|re| re.is_match(&unescaped))
204                    .unwrap_or(false)
205                {
206                    format!("![]{url}")
207                } else {
208                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
209                    format!("![{trimmed}]{url}")
210                };
211
212                warnings.push(LintWarning {
213                    rule_name: Some(self.name()),
214                    line: image.line,
215                    column: image.start_col + 1, // Convert to 1-indexed
216                    end_line: image.line,
217                    end_column: image.end_col + 1, // Convert to 1-indexed
218                    message: WARNING_MESSAGE.to_string(),
219                    severity: Severity::Warning,
220                    fix: Some(Fix {
221                        range: image.byte_offset..image.byte_end,
222                        replacement: fixed,
223                    }),
224                });
225            }
226        }
227
228        Ok(warnings)
229    }
230
231    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
232        let content = ctx.content;
233        let mut fixes = Vec::new();
234
235        // Process links
236        for link in &ctx.links {
237            // Skip reference links (markdownlint doesn't check these)
238            if link.is_reference {
239                continue;
240            }
241
242            if !self.needs_trimming(&link.text) {
243                continue;
244            }
245
246            let unescaped = self.unescape_fast(&link.text);
247
248            let needs_fix = if get_cached_regex(ALL_WHITESPACE_STR)
249                .map(|re| re.is_match(&unescaped))
250                .unwrap_or(false)
251            {
252                true
253            } else {
254                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
255                link.text.as_str() != trimmed
256            };
257
258            if needs_fix {
259                let url_part = if link.is_reference {
260                    if let Some(ref_id) = &link.reference_id {
261                        format!("[{ref_id}]")
262                    } else {
263                        "[]".to_string()
264                    }
265                } else {
266                    format!("({})", link.url)
267                };
268
269                let replacement = if get_cached_regex(ALL_WHITESPACE_STR)
270                    .map(|re| re.is_match(&unescaped))
271                    .unwrap_or(false)
272                {
273                    format!("[]{url_part}")
274                } else {
275                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
276                    format!("[{trimmed}]{url_part}")
277                };
278
279                fixes.push((link.byte_offset, link.byte_end, replacement));
280            }
281        }
282
283        // Process images
284        for image in &ctx.images {
285            // Skip reference images (markdownlint doesn't check these)
286            if image.is_reference {
287                continue;
288            }
289
290            if !self.needs_trimming(&image.alt_text) {
291                continue;
292            }
293
294            let unescaped = self.unescape_fast(&image.alt_text);
295
296            let needs_fix = if get_cached_regex(ALL_WHITESPACE_STR)
297                .map(|re| re.is_match(&unescaped))
298                .unwrap_or(false)
299            {
300                true
301            } else {
302                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
303                image.alt_text.as_str() != trimmed
304            };
305
306            if needs_fix {
307                let url_part = if image.is_reference {
308                    if let Some(ref_id) = &image.reference_id {
309                        format!("[{ref_id}]")
310                    } else {
311                        "[]".to_string()
312                    }
313                } else {
314                    format!("({})", image.url)
315                };
316
317                let replacement = if get_cached_regex(ALL_WHITESPACE_STR)
318                    .map(|re| re.is_match(&unescaped))
319                    .unwrap_or(false)
320                {
321                    format!("![]{url_part}")
322                } else {
323                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
324                    format!("![{trimmed}]{url_part}")
325                };
326
327                fixes.push((image.byte_offset, image.byte_end, replacement));
328            }
329        }
330
331        if fixes.is_empty() {
332            return Ok(content.to_string());
333        }
334
335        // Sort fixes by position to apply them in order
336        fixes.sort_by_key(|&(start, _, _)| start);
337
338        // Apply fixes efficiently
339        let mut result = String::with_capacity(content.len());
340        let mut last_pos = 0;
341
342        for (start, end, replacement) in fixes {
343            if start < last_pos {
344                // This should not happen if fixes are properly sorted and non-overlapping
345                return Err(LintError::FixFailed(format!(
346                    "Overlapping fixes detected: last_pos={last_pos}, start={start}"
347                )));
348            }
349            result.push_str(&content[last_pos..start]);
350            result.push_str(&replacement);
351            last_pos = end;
352        }
353        result.push_str(&content[last_pos..]);
354
355        Ok(result)
356    }
357
358    fn as_any(&self) -> &dyn std::any::Any {
359        self
360    }
361
362    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
363    where
364        Self: Sized,
365    {
366        Box::new(Self)
367    }
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373
374    #[test]
375    fn test_valid_links() {
376        let rule = MD039NoSpaceInLinks::new();
377        let content = "[link](url) and [another link](url) here";
378        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
379        let result = rule.check(&ctx).unwrap();
380        assert!(result.is_empty());
381    }
382
383    #[test]
384    fn test_spaces_both_ends() {
385        let rule = MD039NoSpaceInLinks::new();
386        let content = "[ link ](url) and [ another link ](url) here";
387        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
388        let result = rule.check(&ctx).unwrap();
389        assert_eq!(result.len(), 2);
390        let fixed = rule.fix(&ctx).unwrap();
391        assert_eq!(fixed, "[link](url) and [another link](url) here");
392    }
393
394    #[test]
395    fn test_space_at_start() {
396        let rule = MD039NoSpaceInLinks::new();
397        let content = "[ link](url) and [ another link](url) here";
398        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
399        let result = rule.check(&ctx).unwrap();
400        assert_eq!(result.len(), 2);
401        let fixed = rule.fix(&ctx).unwrap();
402        assert_eq!(fixed, "[link](url) and [another link](url) here");
403    }
404
405    #[test]
406    fn test_space_at_end() {
407        let rule = MD039NoSpaceInLinks::new();
408        let content = "[link ](url) and [another link ](url) here";
409        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
410        let result = rule.check(&ctx).unwrap();
411        assert_eq!(result.len(), 2);
412        let fixed = rule.fix(&ctx).unwrap();
413        assert_eq!(fixed, "[link](url) and [another link](url) here");
414    }
415
416    #[test]
417    fn test_link_in_code_block() {
418        let rule = MD039NoSpaceInLinks::new();
419        let content = "```
420[ link ](url)
421```
422[ link ](url)";
423        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
424        let result = rule.check(&ctx).unwrap();
425        assert_eq!(result.len(), 1);
426        let fixed = rule.fix(&ctx).unwrap();
427        assert_eq!(
428            fixed,
429            "```
430[ link ](url)
431```
432[link](url)"
433        );
434    }
435
436    #[test]
437    fn test_multiple_links() {
438        let rule = MD039NoSpaceInLinks::new();
439        let content = "[ link ](url) and [ another ](url) in one line";
440        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
441        let result = rule.check(&ctx).unwrap();
442        assert_eq!(result.len(), 2);
443        let fixed = rule.fix(&ctx).unwrap();
444        assert_eq!(fixed, "[link](url) and [another](url) in one line");
445    }
446
447    #[test]
448    fn test_link_with_internal_spaces() {
449        let rule = MD039NoSpaceInLinks::new();
450        let content = "[this is link](url) and [ this is also link ](url)";
451        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
452        let result = rule.check(&ctx).unwrap();
453        assert_eq!(result.len(), 1);
454        let fixed = rule.fix(&ctx).unwrap();
455        assert_eq!(fixed, "[this is link](url) and [this is also link](url)");
456    }
457
458    #[test]
459    fn test_link_with_punctuation() {
460        let rule = MD039NoSpaceInLinks::new();
461        let content = "[ link! ](url) and [ link? ](url) here";
462        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
463        let result = rule.check(&ctx).unwrap();
464        assert_eq!(result.len(), 2);
465        let fixed = rule.fix(&ctx).unwrap();
466        assert_eq!(fixed, "[link!](url) and [link?](url) here");
467    }
468
469    #[test]
470    fn test_parity_only_whitespace_and_newlines_minimal() {
471        let rule = MD039NoSpaceInLinks::new();
472        let content = "[   \n  ](url) and [\t\n\t](url)";
473        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
474        let fixed = rule.fix(&ctx).unwrap();
475        // markdownlint removes all whitespace, resulting in empty link text
476        assert_eq!(fixed, "[](url) and [](url)");
477    }
478
479    #[test]
480    fn test_parity_internal_newlines_minimal() {
481        let rule = MD039NoSpaceInLinks::new();
482        let content = "[link\ntext](url) and [ another\nlink ](url)";
483        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
484        let fixed = rule.fix(&ctx).unwrap();
485        // markdownlint trims only leading/trailing whitespace, preserves internal newlines
486        assert_eq!(fixed, "[link\ntext](url) and [another\nlink](url)");
487    }
488
489    #[test]
490    fn test_parity_escaped_brackets_minimal() {
491        let rule = MD039NoSpaceInLinks::new();
492        let content = "[link\\]](url) and [link\\[]](url)";
493        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
494        let fixed = rule.fix(&ctx).unwrap();
495        // markdownlint does not trim or remove escapes, so output should be unchanged
496        assert_eq!(fixed, "[link\\]](url) and [link\\[]](url)");
497    }
498
499    #[test]
500    fn test_performance_md039() {
501        use std::time::Instant;
502
503        let rule = MD039NoSpaceInLinks::new();
504
505        // Generate test content with many links
506        let mut content = String::with_capacity(100_000);
507
508        // Add links with spaces (should be detected and fixed)
509        for i in 0..500 {
510            content.push_str(&format!("Line {i} with [ spaced link {i} ](url{i}) and text.\n"));
511        }
512
513        // Add valid links (should be fast to skip)
514        for i in 0..500 {
515            content.push_str(&format!(
516                "Line {} with [valid link {}](url{}) and text.\n",
517                i + 500,
518                i,
519                i
520            ));
521        }
522
523        println!(
524            "MD039 Performance Test - Content: {} bytes, {} lines",
525            content.len(),
526            content.lines().count()
527        );
528
529        let ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
530
531        // Warm up
532        let _ = rule.check(&ctx).unwrap();
533
534        // Measure check performance
535        let mut total_duration = std::time::Duration::ZERO;
536        let runs = 5;
537        let mut warnings_count = 0;
538
539        for _ in 0..runs {
540            let start = Instant::now();
541            let warnings = rule.check(&ctx).unwrap();
542            total_duration += start.elapsed();
543            warnings_count = warnings.len();
544        }
545
546        let avg_check_duration = total_duration / runs;
547
548        println!("MD039 Optimized Performance:");
549        println!(
550            "- Average check time: {:?} ({:.2} ms)",
551            avg_check_duration,
552            avg_check_duration.as_secs_f64() * 1000.0
553        );
554        println!("- Found {warnings_count} warnings");
555        println!(
556            "- Lines per second: {:.0}",
557            content.lines().count() as f64 / avg_check_duration.as_secs_f64()
558        );
559        println!(
560            "- Microseconds per line: {:.2}",
561            avg_check_duration.as_micros() as f64 / content.lines().count() as f64
562        );
563
564        // Performance assertion - should complete reasonably fast
565        assert!(
566            avg_check_duration.as_millis() < 200,
567            "MD039 check should complete in under 200ms, took {}ms",
568            avg_check_duration.as_millis()
569        );
570
571        // Verify we're finding the expected number of warnings (500 links with spaces)
572        assert_eq!(warnings_count, 500, "Should find 500 warnings for links with spaces");
573    }
574}