rumdl_lib/rules/
md039_no_space_in_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use lazy_static::lazy_static;
3use regex::Regex;
4
5lazy_static! {
6    // Pre-compiled regex patterns for performance - using DOTALL flag to match newlines
7    static ref LINK_PATTERN: Regex = Regex::new(r"(?s)!?\[([^\]]*)\]\(([^)]*)\)").unwrap();
8
9    // Fast check patterns - simple string-based checks are faster than complex regex
10    static ref WHITESPACE_CHECK: Regex = Regex::new(r"^\s+|\s+$").unwrap();
11    static ref ALL_WHITESPACE: Regex = Regex::new(r"^\s*$").unwrap();
12}
13
14/// Rule MD039: No space inside link text
15///
16/// See [docs/md039.md](../../docs/md039.md) for full documentation, configuration, and examples.
17///
18/// This rule is triggered when link text has leading or trailing spaces which can cause
19/// unexpected rendering in some Markdown parsers.
20#[derive(Debug, Default, Clone)]
21pub struct MD039NoSpaceInLinks;
22
23// Static definition for the warning message
24const WARNING_MESSAGE: &str = "Remove spaces inside link text";
25
26impl MD039NoSpaceInLinks {
27    pub fn new() -> Self {
28        Self
29    }
30
31    /// Optimized fast check to see if content has any potential links or images
32    #[inline]
33    fn has_links_or_images(&self, content: &str) -> bool {
34        LINK_PATTERN.is_match(content)
35    }
36
37    #[inline]
38    fn trim_link_text_preserve_escapes(text: &str) -> &str {
39        // Optimized trimming that preserves escapes
40        let start = text
41            .char_indices()
42            .find(|&(_, c)| !c.is_whitespace())
43            .map(|(i, _)| i)
44            .unwrap_or(text.len());
45        let end = text
46            .char_indices()
47            .rev()
48            .find(|&(_, c)| !c.is_whitespace())
49            .map(|(i, c)| i + c.len_utf8())
50            .unwrap_or(0);
51        if start >= end { "" } else { &text[start..end] }
52    }
53
54    /// Optimized whitespace checking for link text
55    #[inline]
56    fn needs_trimming(&self, text: &str) -> bool {
57        // Simple and fast check: compare with trimmed version
58        text != text.trim_matches(|c: char| c.is_whitespace())
59    }
60
61    /// Optimized unescaping for performance-critical path
62    #[inline]
63    fn unescape_fast(&self, text: &str) -> String {
64        if !text.contains('\\') {
65            return text.to_string();
66        }
67
68        let mut result = String::with_capacity(text.len());
69        let mut chars = text.chars().peekable();
70
71        while let Some(c) = chars.next() {
72            if c == '\\' {
73                if let Some(&next) = chars.peek() {
74                    result.push(next);
75                    chars.next();
76                } else {
77                    result.push(c);
78                }
79            } else {
80                result.push(c);
81            }
82        }
83        result
84    }
85}
86
87impl Rule for MD039NoSpaceInLinks {
88    fn name(&self) -> &'static str {
89        "MD039"
90    }
91
92    fn description(&self) -> &'static str {
93        "Spaces inside link text"
94    }
95
96    fn category(&self) -> RuleCategory {
97        RuleCategory::Link
98    }
99
100    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
101        let content = ctx.content;
102        content.is_empty() || !self.has_links_or_images(content)
103    }
104
105    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
106        let mut warnings = Vec::new();
107
108        // Use centralized link parsing from LintContext
109        for link in &ctx.links {
110            // Skip reference links (markdownlint doesn't check these)
111            if link.is_reference {
112                continue;
113            }
114
115            // Fast check if trimming is needed
116            if !self.needs_trimming(&link.text) {
117                continue;
118            }
119
120            // Optimized unescaping for whitespace check
121            let unescaped = self.unescape_fast(&link.text);
122
123            let needs_warning = if ALL_WHITESPACE.is_match(&unescaped) {
124                true
125            } else {
126                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
127                link.text.as_str() != trimmed
128            };
129
130            if needs_warning {
131                let url = if link.is_reference {
132                    if let Some(ref_id) = &link.reference_id {
133                        format!("[{ref_id}]")
134                    } else {
135                        "[]".to_string()
136                    }
137                } else {
138                    format!("({})", link.url)
139                };
140
141                let fixed = if ALL_WHITESPACE.is_match(&unescaped) {
142                    format!("[]{url}")
143                } else {
144                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
145                    format!("[{trimmed}]{url}")
146                };
147
148                warnings.push(LintWarning {
149                    rule_name: Some(self.name()),
150                    line: link.line,
151                    column: link.start_col + 1, // Convert to 1-indexed
152                    end_line: link.line,
153                    end_column: link.end_col + 1, // Convert to 1-indexed
154                    message: WARNING_MESSAGE.to_string(),
155                    severity: Severity::Warning,
156                    fix: Some(Fix {
157                        range: link.byte_offset..link.byte_end,
158                        replacement: fixed,
159                    }),
160                });
161            }
162        }
163
164        // Also check images
165        for image in &ctx.images {
166            // Skip reference images (markdownlint doesn't check these)
167            if image.is_reference {
168                continue;
169            }
170
171            // Fast check if trimming is needed
172            if !self.needs_trimming(&image.alt_text) {
173                continue;
174            }
175
176            // Optimized unescaping for whitespace check
177            let unescaped = self.unescape_fast(&image.alt_text);
178
179            let needs_warning = if ALL_WHITESPACE.is_match(&unescaped) {
180                true
181            } else {
182                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
183                image.alt_text.as_str() != trimmed
184            };
185
186            if needs_warning {
187                let url = if image.is_reference {
188                    if let Some(ref_id) = &image.reference_id {
189                        format!("[{ref_id}]")
190                    } else {
191                        "[]".to_string()
192                    }
193                } else {
194                    format!("({})", image.url)
195                };
196
197                let fixed = if ALL_WHITESPACE.is_match(&unescaped) {
198                    format!("![]{url}")
199                } else {
200                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
201                    format!("![{trimmed}]{url}")
202                };
203
204                warnings.push(LintWarning {
205                    rule_name: Some(self.name()),
206                    line: image.line,
207                    column: image.start_col + 1, // Convert to 1-indexed
208                    end_line: image.line,
209                    end_column: image.end_col + 1, // Convert to 1-indexed
210                    message: WARNING_MESSAGE.to_string(),
211                    severity: Severity::Warning,
212                    fix: Some(Fix {
213                        range: image.byte_offset..image.byte_end,
214                        replacement: fixed,
215                    }),
216                });
217            }
218        }
219
220        Ok(warnings)
221    }
222
223    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
224        let content = ctx.content;
225        let mut fixes = Vec::new();
226
227        // Process links
228        for link in &ctx.links {
229            // Skip reference links (markdownlint doesn't check these)
230            if link.is_reference {
231                continue;
232            }
233
234            if !self.needs_trimming(&link.text) {
235                continue;
236            }
237
238            let unescaped = self.unescape_fast(&link.text);
239
240            let needs_fix = if ALL_WHITESPACE.is_match(&unescaped) {
241                true
242            } else {
243                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
244                link.text.as_str() != trimmed
245            };
246
247            if needs_fix {
248                let url_part = if link.is_reference {
249                    if let Some(ref_id) = &link.reference_id {
250                        format!("[{ref_id}]")
251                    } else {
252                        "[]".to_string()
253                    }
254                } else {
255                    format!("({})", link.url)
256                };
257
258                let replacement = if ALL_WHITESPACE.is_match(&unescaped) {
259                    format!("[]{url_part}")
260                } else {
261                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
262                    format!("[{trimmed}]{url_part}")
263                };
264
265                fixes.push((link.byte_offset, link.byte_end, replacement));
266            }
267        }
268
269        // Process images
270        for image in &ctx.images {
271            // Skip reference images (markdownlint doesn't check these)
272            if image.is_reference {
273                continue;
274            }
275
276            if !self.needs_trimming(&image.alt_text) {
277                continue;
278            }
279
280            let unescaped = self.unescape_fast(&image.alt_text);
281
282            let needs_fix = if ALL_WHITESPACE.is_match(&unescaped) {
283                true
284            } else {
285                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
286                image.alt_text.as_str() != trimmed
287            };
288
289            if needs_fix {
290                let url_part = if image.is_reference {
291                    if let Some(ref_id) = &image.reference_id {
292                        format!("[{ref_id}]")
293                    } else {
294                        "[]".to_string()
295                    }
296                } else {
297                    format!("({})", image.url)
298                };
299
300                let replacement = if ALL_WHITESPACE.is_match(&unescaped) {
301                    format!("![]{url_part}")
302                } else {
303                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
304                    format!("![{trimmed}]{url_part}")
305                };
306
307                fixes.push((image.byte_offset, image.byte_end, replacement));
308            }
309        }
310
311        if fixes.is_empty() {
312            return Ok(content.to_string());
313        }
314
315        // Sort fixes by position to apply them in order
316        fixes.sort_by_key(|&(start, _, _)| start);
317
318        // Apply fixes efficiently
319        let mut result = String::with_capacity(content.len());
320        let mut last_pos = 0;
321
322        for (start, end, replacement) in fixes {
323            if start < last_pos {
324                // This should not happen if fixes are properly sorted and non-overlapping
325                return Err(LintError::FixFailed(format!(
326                    "Overlapping fixes detected: last_pos={last_pos}, start={start}"
327                )));
328            }
329            result.push_str(&content[last_pos..start]);
330            result.push_str(&replacement);
331            last_pos = end;
332        }
333        result.push_str(&content[last_pos..]);
334
335        Ok(result)
336    }
337
338    fn as_any(&self) -> &dyn std::any::Any {
339        self
340    }
341
342    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
343    where
344        Self: Sized,
345    {
346        Box::new(Self)
347    }
348}
349
350#[cfg(test)]
351mod tests {
352    use super::*;
353
354    #[test]
355    fn test_valid_links() {
356        let rule = MD039NoSpaceInLinks::new();
357        let content = "[link](url) and [another link](url) here";
358        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
359        let result = rule.check(&ctx).unwrap();
360        assert!(result.is_empty());
361    }
362
363    #[test]
364    fn test_spaces_both_ends() {
365        let rule = MD039NoSpaceInLinks::new();
366        let content = "[ link ](url) and [ another link ](url) here";
367        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
368        let result = rule.check(&ctx).unwrap();
369        assert_eq!(result.len(), 2);
370        let fixed = rule.fix(&ctx).unwrap();
371        assert_eq!(fixed, "[link](url) and [another link](url) here");
372    }
373
374    #[test]
375    fn test_space_at_start() {
376        let rule = MD039NoSpaceInLinks::new();
377        let content = "[ link](url) and [ another link](url) here";
378        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
379        let result = rule.check(&ctx).unwrap();
380        assert_eq!(result.len(), 2);
381        let fixed = rule.fix(&ctx).unwrap();
382        assert_eq!(fixed, "[link](url) and [another link](url) here");
383    }
384
385    #[test]
386    fn test_space_at_end() {
387        let rule = MD039NoSpaceInLinks::new();
388        let content = "[link ](url) and [another link ](url) here";
389        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
390        let result = rule.check(&ctx).unwrap();
391        assert_eq!(result.len(), 2);
392        let fixed = rule.fix(&ctx).unwrap();
393        assert_eq!(fixed, "[link](url) and [another link](url) here");
394    }
395
396    #[test]
397    fn test_link_in_code_block() {
398        let rule = MD039NoSpaceInLinks::new();
399        let content = "```
400[ link ](url)
401```
402[ link ](url)";
403        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
404        let result = rule.check(&ctx).unwrap();
405        assert_eq!(result.len(), 1);
406        let fixed = rule.fix(&ctx).unwrap();
407        assert_eq!(
408            fixed,
409            "```
410[ link ](url)
411```
412[link](url)"
413        );
414    }
415
416    #[test]
417    fn test_multiple_links() {
418        let rule = MD039NoSpaceInLinks::new();
419        let content = "[ link ](url) and [ another ](url) in one line";
420        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
421        let result = rule.check(&ctx).unwrap();
422        assert_eq!(result.len(), 2);
423        let fixed = rule.fix(&ctx).unwrap();
424        assert_eq!(fixed, "[link](url) and [another](url) in one line");
425    }
426
427    #[test]
428    fn test_link_with_internal_spaces() {
429        let rule = MD039NoSpaceInLinks::new();
430        let content = "[this is link](url) and [ this is also link ](url)";
431        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
432        let result = rule.check(&ctx).unwrap();
433        assert_eq!(result.len(), 1);
434        let fixed = rule.fix(&ctx).unwrap();
435        assert_eq!(fixed, "[this is link](url) and [this is also link](url)");
436    }
437
438    #[test]
439    fn test_link_with_punctuation() {
440        let rule = MD039NoSpaceInLinks::new();
441        let content = "[ link! ](url) and [ link? ](url) here";
442        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
443        let result = rule.check(&ctx).unwrap();
444        assert_eq!(result.len(), 2);
445        let fixed = rule.fix(&ctx).unwrap();
446        assert_eq!(fixed, "[link!](url) and [link?](url) here");
447    }
448
449    #[test]
450    fn test_parity_only_whitespace_and_newlines_minimal() {
451        let rule = MD039NoSpaceInLinks::new();
452        let content = "[   \n  ](url) and [\t\n\t](url)";
453        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
454        let fixed = rule.fix(&ctx).unwrap();
455        // markdownlint removes all whitespace, resulting in empty link text
456        assert_eq!(fixed, "[](url) and [](url)");
457    }
458
459    #[test]
460    fn test_parity_internal_newlines_minimal() {
461        let rule = MD039NoSpaceInLinks::new();
462        let content = "[link\ntext](url) and [ another\nlink ](url)";
463        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
464        let fixed = rule.fix(&ctx).unwrap();
465        // markdownlint trims only leading/trailing whitespace, preserves internal newlines
466        assert_eq!(fixed, "[link\ntext](url) and [another\nlink](url)");
467    }
468
469    #[test]
470    fn test_parity_escaped_brackets_minimal() {
471        let rule = MD039NoSpaceInLinks::new();
472        let content = "[link\\]](url) and [link\\[]](url)";
473        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
474        let fixed = rule.fix(&ctx).unwrap();
475        // markdownlint does not trim or remove escapes, so output should be unchanged
476        assert_eq!(fixed, "[link\\]](url) and [link\\[]](url)");
477    }
478
479    #[test]
480    fn test_performance_md039() {
481        use std::time::Instant;
482
483        let rule = MD039NoSpaceInLinks::new();
484
485        // Generate test content with many links
486        let mut content = String::with_capacity(100_000);
487
488        // Add links with spaces (should be detected and fixed)
489        for i in 0..500 {
490            content.push_str(&format!("Line {i} with [ spaced link {i} ](url{i}) and text.\n"));
491        }
492
493        // Add valid links (should be fast to skip)
494        for i in 0..500 {
495            content.push_str(&format!(
496                "Line {} with [valid link {}](url{}) and text.\n",
497                i + 500,
498                i,
499                i
500            ));
501        }
502
503        println!(
504            "MD039 Performance Test - Content: {} bytes, {} lines",
505            content.len(),
506            content.lines().count()
507        );
508
509        let ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
510
511        // Warm up
512        let _ = rule.check(&ctx).unwrap();
513
514        // Measure check performance
515        let mut total_duration = std::time::Duration::ZERO;
516        let runs = 5;
517        let mut warnings_count = 0;
518
519        for _ in 0..runs {
520            let start = Instant::now();
521            let warnings = rule.check(&ctx).unwrap();
522            total_duration += start.elapsed();
523            warnings_count = warnings.len();
524        }
525
526        let avg_check_duration = total_duration / runs;
527
528        println!("MD039 Optimized Performance:");
529        println!(
530            "- Average check time: {:?} ({:.2} ms)",
531            avg_check_duration,
532            avg_check_duration.as_secs_f64() * 1000.0
533        );
534        println!("- Found {warnings_count} warnings");
535        println!(
536            "- Lines per second: {:.0}",
537            content.lines().count() as f64 / avg_check_duration.as_secs_f64()
538        );
539        println!(
540            "- Microseconds per line: {:.2}",
541            avg_check_duration.as_micros() as f64 / content.lines().count() as f64
542        );
543
544        // Performance assertion - should complete reasonably fast
545        assert!(
546            avg_check_duration.as_millis() < 200,
547            "MD039 check should complete in under 200ms, took {}ms",
548            avg_check_duration.as_millis()
549        );
550
551        // Verify we're finding the expected number of warnings (500 links with spaces)
552        assert_eq!(warnings_count, 500, "Should find 500 warnings for links with spaces");
553    }
554}