rumdl_lib/rules/
md039_no_space_in_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::regex_cache::get_cached_regex;
3
4// Regex patterns
5const ALL_WHITESPACE_STR: &str = r"^\s*$";
6
7/// Rule MD039: No space inside link text
8///
9/// See [docs/md039.md](../../docs/md039.md) for full documentation, configuration, and examples.
10///
11/// This rule is triggered when link text has leading or trailing spaces which can cause
12/// unexpected rendering in some Markdown parsers.
13#[derive(Debug, Default, Clone)]
14pub struct MD039NoSpaceInLinks;
15
16// Static definition for the warning message
17const WARNING_MESSAGE: &str = "Remove spaces inside link text";
18
19impl MD039NoSpaceInLinks {
20    pub fn new() -> Self {
21        Self
22    }
23
24    #[inline]
25    fn trim_link_text_preserve_escapes(text: &str) -> &str {
26        // Optimized trimming that preserves escapes
27        let start = text
28            .char_indices()
29            .find(|&(_, c)| !c.is_whitespace())
30            .map(|(i, _)| i)
31            .unwrap_or(text.len());
32        let end = text
33            .char_indices()
34            .rev()
35            .find(|&(_, c)| !c.is_whitespace())
36            .map(|(i, c)| i + c.len_utf8())
37            .unwrap_or(0);
38        if start >= end { "" } else { &text[start..end] }
39    }
40
41    /// Optimized whitespace checking for link text
42    #[inline]
43    fn needs_trimming(&self, text: &str) -> bool {
44        // Simple and fast check: compare with trimmed version
45        text != text.trim_matches(|c: char| c.is_whitespace())
46    }
47
48    /// Optimized unescaping for performance-critical path
49    #[inline]
50    fn unescape_fast(&self, text: &str) -> String {
51        if !text.contains('\\') {
52            return text.to_string();
53        }
54
55        let mut result = String::with_capacity(text.len());
56        let mut chars = text.chars().peekable();
57
58        while let Some(c) = chars.next() {
59            if c == '\\' {
60                if let Some(&next) = chars.peek() {
61                    result.push(next);
62                    chars.next();
63                } else {
64                    result.push(c);
65                }
66            } else {
67                result.push(c);
68            }
69        }
70        result
71    }
72}
73
74impl Rule for MD039NoSpaceInLinks {
75    fn name(&self) -> &'static str {
76        "MD039"
77    }
78
79    fn description(&self) -> &'static str {
80        "Spaces inside link text"
81    }
82
83    fn category(&self) -> RuleCategory {
84        RuleCategory::Link
85    }
86
87    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
88        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
89    }
90
91    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
92        let mut warnings = Vec::new();
93
94        // Use centralized link parsing from LintContext
95        for link in &ctx.links {
96            // Skip reference links (markdownlint doesn't check these)
97            if link.is_reference {
98                continue;
99            }
100
101            // Skip links inside Jinja templates
102            if ctx.is_in_jinja_range(link.byte_offset) {
103                continue;
104            }
105
106            // Skip links inside JSX expressions or MDX comments
107            if ctx.is_in_jsx_expression(link.byte_offset) || ctx.is_in_mdx_comment(link.byte_offset) {
108                continue;
109            }
110
111            // Fast check if trimming is needed
112            if !self.needs_trimming(&link.text) {
113                continue;
114            }
115
116            // Optimized unescaping for whitespace check
117            let unescaped = self.unescape_fast(&link.text);
118
119            let needs_warning = if get_cached_regex(ALL_WHITESPACE_STR)
120                .map(|re| re.is_match(&unescaped))
121                .unwrap_or(false)
122            {
123                true
124            } else {
125                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
126                link.text.as_ref() != trimmed
127            };
128
129            if needs_warning {
130                let url = if link.is_reference {
131                    if let Some(ref_id) = &link.reference_id {
132                        format!("[{ref_id}]")
133                    } else {
134                        "[]".to_string()
135                    }
136                } else {
137                    format!("({})", link.url)
138                };
139
140                let fixed = if get_cached_regex(ALL_WHITESPACE_STR)
141                    .map(|re| re.is_match(&unescaped))
142                    .unwrap_or(false)
143                {
144                    format!("[]{url}")
145                } else {
146                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
147                    format!("[{trimmed}]{url}")
148                };
149
150                warnings.push(LintWarning {
151                    rule_name: Some(self.name().to_string()),
152                    line: link.line,
153                    column: link.start_col + 1, // Convert to 1-indexed
154                    end_line: link.line,
155                    end_column: link.end_col + 1, // Convert to 1-indexed
156                    message: WARNING_MESSAGE.to_string(),
157                    severity: Severity::Warning,
158                    fix: Some(Fix {
159                        range: link.byte_offset..link.byte_end,
160                        replacement: fixed,
161                    }),
162                });
163            }
164        }
165
166        // Also check images
167        for image in &ctx.images {
168            // Skip reference images (markdownlint doesn't check these)
169            if image.is_reference {
170                continue;
171            }
172
173            // Skip images inside JSX expressions or MDX comments
174            if ctx.is_in_jsx_expression(image.byte_offset) || ctx.is_in_mdx_comment(image.byte_offset) {
175                continue;
176            }
177
178            // Skip images inside Jinja templates
179            if ctx.is_in_jinja_range(image.byte_offset) {
180                continue;
181            }
182
183            // Fast check if trimming is needed
184            if !self.needs_trimming(&image.alt_text) {
185                continue;
186            }
187
188            // Optimized unescaping for whitespace check
189            let unescaped = self.unescape_fast(&image.alt_text);
190
191            let needs_warning = if get_cached_regex(ALL_WHITESPACE_STR)
192                .map(|re| re.is_match(&unescaped))
193                .unwrap_or(false)
194            {
195                true
196            } else {
197                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
198                image.alt_text.as_ref() != trimmed
199            };
200
201            if needs_warning {
202                let url = if image.is_reference {
203                    if let Some(ref_id) = &image.reference_id {
204                        format!("[{ref_id}]")
205                    } else {
206                        "[]".to_string()
207                    }
208                } else {
209                    format!("({})", image.url)
210                };
211
212                let fixed = if get_cached_regex(ALL_WHITESPACE_STR)
213                    .map(|re| re.is_match(&unescaped))
214                    .unwrap_or(false)
215                {
216                    format!("![]{url}")
217                } else {
218                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
219                    format!("![{trimmed}]{url}")
220                };
221
222                warnings.push(LintWarning {
223                    rule_name: Some(self.name().to_string()),
224                    line: image.line,
225                    column: image.start_col + 1, // Convert to 1-indexed
226                    end_line: image.line,
227                    end_column: image.end_col + 1, // Convert to 1-indexed
228                    message: WARNING_MESSAGE.to_string(),
229                    severity: Severity::Warning,
230                    fix: Some(Fix {
231                        range: image.byte_offset..image.byte_end,
232                        replacement: fixed,
233                    }),
234                });
235            }
236        }
237
238        Ok(warnings)
239    }
240
241    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
242        let content = ctx.content;
243        let mut fixes = Vec::new();
244
245        // Process links
246        for link in &ctx.links {
247            // Skip reference links (markdownlint doesn't check these)
248            if link.is_reference {
249                continue;
250            }
251
252            // Skip links inside Jinja templates
253            if ctx.is_in_jinja_range(link.byte_offset) {
254                continue;
255            }
256
257            if !self.needs_trimming(&link.text) {
258                continue;
259            }
260
261            let unescaped = self.unescape_fast(&link.text);
262
263            let needs_fix = if get_cached_regex(ALL_WHITESPACE_STR)
264                .map(|re| re.is_match(&unescaped))
265                .unwrap_or(false)
266            {
267                true
268            } else {
269                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
270                link.text.as_ref() != trimmed
271            };
272
273            if needs_fix {
274                let url_part = if link.is_reference {
275                    if let Some(ref_id) = &link.reference_id {
276                        format!("[{ref_id}]")
277                    } else {
278                        "[]".to_string()
279                    }
280                } else {
281                    format!("({})", link.url)
282                };
283
284                let replacement = if get_cached_regex(ALL_WHITESPACE_STR)
285                    .map(|re| re.is_match(&unescaped))
286                    .unwrap_or(false)
287                {
288                    format!("[]{url_part}")
289                } else {
290                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
291                    format!("[{trimmed}]{url_part}")
292                };
293
294                fixes.push((link.byte_offset, link.byte_end, replacement));
295            }
296        }
297
298        // Process images
299        for image in &ctx.images {
300            // Skip reference images (markdownlint doesn't check these)
301            if image.is_reference {
302                continue;
303            }
304
305            // Skip images inside Jinja templates
306            if ctx.is_in_jinja_range(image.byte_offset) {
307                continue;
308            }
309
310            if !self.needs_trimming(&image.alt_text) {
311                continue;
312            }
313
314            let unescaped = self.unescape_fast(&image.alt_text);
315
316            let needs_fix = if get_cached_regex(ALL_WHITESPACE_STR)
317                .map(|re| re.is_match(&unescaped))
318                .unwrap_or(false)
319            {
320                true
321            } else {
322                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
323                image.alt_text.as_ref() != trimmed
324            };
325
326            if needs_fix {
327                let url_part = if image.is_reference {
328                    if let Some(ref_id) = &image.reference_id {
329                        format!("[{ref_id}]")
330                    } else {
331                        "[]".to_string()
332                    }
333                } else {
334                    format!("({})", image.url)
335                };
336
337                let replacement = if get_cached_regex(ALL_WHITESPACE_STR)
338                    .map(|re| re.is_match(&unescaped))
339                    .unwrap_or(false)
340                {
341                    format!("![]{url_part}")
342                } else {
343                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
344                    format!("![{trimmed}]{url_part}")
345                };
346
347                fixes.push((image.byte_offset, image.byte_end, replacement));
348            }
349        }
350
351        if fixes.is_empty() {
352            return Ok(content.to_string());
353        }
354
355        // Sort fixes by position to apply them in order
356        fixes.sort_by_key(|&(start, _, _)| start);
357
358        // Apply fixes efficiently
359        let mut result = String::with_capacity(content.len());
360        let mut last_pos = 0;
361
362        for (start, end, replacement) in fixes {
363            if start < last_pos {
364                // This should not happen if fixes are properly sorted and non-overlapping
365                return Err(LintError::FixFailed(format!(
366                    "Overlapping fixes detected: last_pos={last_pos}, start={start}"
367                )));
368            }
369            result.push_str(&content[last_pos..start]);
370            result.push_str(&replacement);
371            last_pos = end;
372        }
373        result.push_str(&content[last_pos..]);
374
375        Ok(result)
376    }
377
378    fn as_any(&self) -> &dyn std::any::Any {
379        self
380    }
381
382    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
383    where
384        Self: Sized,
385    {
386        Box::new(Self)
387    }
388}
389
390#[cfg(test)]
391mod tests {
392    use super::*;
393
394    #[test]
395    fn test_valid_links() {
396        let rule = MD039NoSpaceInLinks::new();
397        let content = "[link](url) and [another link](url) here";
398        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
399        let result = rule.check(&ctx).unwrap();
400        assert!(result.is_empty());
401    }
402
403    #[test]
404    fn test_spaces_both_ends() {
405        let rule = MD039NoSpaceInLinks::new();
406        let content = "[ link ](url) and [ another link ](url) here";
407        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
408        let result = rule.check(&ctx).unwrap();
409        assert_eq!(result.len(), 2);
410        let fixed = rule.fix(&ctx).unwrap();
411        assert_eq!(fixed, "[link](url) and [another link](url) here");
412    }
413
414    #[test]
415    fn test_space_at_start() {
416        let rule = MD039NoSpaceInLinks::new();
417        let content = "[ link](url) and [ another link](url) here";
418        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
419        let result = rule.check(&ctx).unwrap();
420        assert_eq!(result.len(), 2);
421        let fixed = rule.fix(&ctx).unwrap();
422        assert_eq!(fixed, "[link](url) and [another link](url) here");
423    }
424
425    #[test]
426    fn test_space_at_end() {
427        let rule = MD039NoSpaceInLinks::new();
428        let content = "[link ](url) and [another link ](url) here";
429        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
430        let result = rule.check(&ctx).unwrap();
431        assert_eq!(result.len(), 2);
432        let fixed = rule.fix(&ctx).unwrap();
433        assert_eq!(fixed, "[link](url) and [another link](url) here");
434    }
435
436    #[test]
437    fn test_link_in_code_block() {
438        let rule = MD039NoSpaceInLinks::new();
439        let content = "```
440[ link ](url)
441```
442[ link ](url)";
443        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
444        let result = rule.check(&ctx).unwrap();
445        assert_eq!(result.len(), 1);
446        let fixed = rule.fix(&ctx).unwrap();
447        assert_eq!(
448            fixed,
449            "```
450[ link ](url)
451```
452[link](url)"
453        );
454    }
455
456    #[test]
457    fn test_multiple_links() {
458        let rule = MD039NoSpaceInLinks::new();
459        let content = "[ link ](url) and [ another ](url) in one line";
460        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
461        let result = rule.check(&ctx).unwrap();
462        assert_eq!(result.len(), 2);
463        let fixed = rule.fix(&ctx).unwrap();
464        assert_eq!(fixed, "[link](url) and [another](url) in one line");
465    }
466
467    #[test]
468    fn test_link_with_internal_spaces() {
469        let rule = MD039NoSpaceInLinks::new();
470        let content = "[this is link](url) and [ this is also link ](url)";
471        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
472        let result = rule.check(&ctx).unwrap();
473        assert_eq!(result.len(), 1);
474        let fixed = rule.fix(&ctx).unwrap();
475        assert_eq!(fixed, "[this is link](url) and [this is also link](url)");
476    }
477
478    #[test]
479    fn test_link_with_punctuation() {
480        let rule = MD039NoSpaceInLinks::new();
481        let content = "[ link! ](url) and [ link? ](url) here";
482        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
483        let result = rule.check(&ctx).unwrap();
484        assert_eq!(result.len(), 2);
485        let fixed = rule.fix(&ctx).unwrap();
486        assert_eq!(fixed, "[link!](url) and [link?](url) here");
487    }
488
489    #[test]
490    fn test_parity_only_whitespace_and_newlines_minimal() {
491        let rule = MD039NoSpaceInLinks::new();
492        let content = "[   \n  ](url) and [\t\n\t](url)";
493        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
494        let fixed = rule.fix(&ctx).unwrap();
495        // markdownlint removes all whitespace, resulting in empty link text
496        assert_eq!(fixed, "[](url) and [](url)");
497    }
498
499    #[test]
500    fn test_parity_internal_newlines_minimal() {
501        let rule = MD039NoSpaceInLinks::new();
502        let content = "[link\ntext](url) and [ another\nlink ](url)";
503        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
504        let fixed = rule.fix(&ctx).unwrap();
505        // markdownlint trims only leading/trailing whitespace, preserves internal newlines
506        assert_eq!(fixed, "[link\ntext](url) and [another\nlink](url)");
507    }
508
509    #[test]
510    fn test_parity_escaped_brackets_minimal() {
511        let rule = MD039NoSpaceInLinks::new();
512        let content = "[link\\]](url) and [link\\[]](url)";
513        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
514        let fixed = rule.fix(&ctx).unwrap();
515        // markdownlint does not trim or remove escapes, so output should be unchanged
516        assert_eq!(fixed, "[link\\]](url) and [link\\[]](url)");
517    }
518
519    #[test]
520    fn test_performance_md039() {
521        use std::time::Instant;
522
523        let rule = MD039NoSpaceInLinks::new();
524
525        // Generate test content with many links
526        let mut content = String::with_capacity(100_000);
527
528        // Add links with spaces (should be detected and fixed)
529        for i in 0..500 {
530            content.push_str(&format!("Line {i} with [ spaced link {i} ](url{i}) and text.\n"));
531        }
532
533        // Add valid links (should be fast to skip)
534        for i in 0..500 {
535            content.push_str(&format!(
536                "Line {} with [valid link {}](url{}) and text.\n",
537                i + 500,
538                i,
539                i
540            ));
541        }
542
543        println!(
544            "MD039 Performance Test - Content: {} bytes, {} lines",
545            content.len(),
546            content.lines().count()
547        );
548
549        let ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
550
551        // Warm up
552        let _ = rule.check(&ctx).unwrap();
553
554        // Measure check performance
555        let mut total_duration = std::time::Duration::ZERO;
556        let runs = 5;
557        let mut warnings_count = 0;
558
559        for _ in 0..runs {
560            let start = Instant::now();
561            let warnings = rule.check(&ctx).unwrap();
562            total_duration += start.elapsed();
563            warnings_count = warnings.len();
564        }
565
566        let avg_check_duration = total_duration / runs;
567
568        println!("MD039 Optimized Performance:");
569        println!(
570            "- Average check time: {:?} ({:.2} ms)",
571            avg_check_duration,
572            avg_check_duration.as_secs_f64() * 1000.0
573        );
574        println!("- Found {warnings_count} warnings");
575        println!(
576            "- Lines per second: {:.0}",
577            content.lines().count() as f64 / avg_check_duration.as_secs_f64()
578        );
579        println!(
580            "- Microseconds per line: {:.2}",
581            avg_check_duration.as_micros() as f64 / content.lines().count() as f64
582        );
583
584        // Performance assertion - should complete reasonably fast
585        assert!(
586            avg_check_duration.as_millis() < 200,
587            "MD039 check should complete in under 200ms, took {}ms",
588            avg_check_duration.as_millis()
589        );
590
591        // Verify we're finding the expected number of warnings (500 links with spaces)
592        assert_eq!(warnings_count, 500, "Should find 500 warnings for links with spaces");
593    }
594}