rumdl_lib/rules/
md039_no_space_in_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::regex_cache::get_cached_regex;
3
4// Regex patterns
5const ALL_WHITESPACE_STR: &str = r"^\s*$";
6
7/// Rule MD039: No space inside link text
8///
9/// See [docs/md039.md](../../docs/md039.md) for full documentation, configuration, and examples.
10///
11/// This rule is triggered when link text has leading or trailing spaces which can cause
12/// unexpected rendering in some Markdown parsers.
13#[derive(Debug, Default, Clone)]
14pub struct MD039NoSpaceInLinks;
15
16// Static definition for the warning message
17const WARNING_MESSAGE: &str = "Remove spaces inside link text";
18
19impl MD039NoSpaceInLinks {
20    pub fn new() -> Self {
21        Self
22    }
23
24    #[inline]
25    fn trim_link_text_preserve_escapes(text: &str) -> &str {
26        // Optimized trimming that preserves escapes
27        let start = text
28            .char_indices()
29            .find(|&(_, c)| !c.is_whitespace())
30            .map(|(i, _)| i)
31            .unwrap_or(text.len());
32        let end = text
33            .char_indices()
34            .rev()
35            .find(|&(_, c)| !c.is_whitespace())
36            .map(|(i, c)| i + c.len_utf8())
37            .unwrap_or(0);
38        if start >= end { "" } else { &text[start..end] }
39    }
40
41    /// Optimized whitespace checking for link text
42    #[inline]
43    fn needs_trimming(&self, text: &str) -> bool {
44        // Simple and fast check: compare with trimmed version
45        text != text.trim_matches(|c: char| c.is_whitespace())
46    }
47
48    /// Optimized unescaping for performance-critical path
49    #[inline]
50    fn unescape_fast(&self, text: &str) -> String {
51        if !text.contains('\\') {
52            return text.to_string();
53        }
54
55        let mut result = String::with_capacity(text.len());
56        let mut chars = text.chars().peekable();
57
58        while let Some(c) = chars.next() {
59            if c == '\\' {
60                if let Some(&next) = chars.peek() {
61                    result.push(next);
62                    chars.next();
63                } else {
64                    result.push(c);
65                }
66            } else {
67                result.push(c);
68            }
69        }
70        result
71    }
72}
73
74impl Rule for MD039NoSpaceInLinks {
75    fn name(&self) -> &'static str {
76        "MD039"
77    }
78
79    fn description(&self) -> &'static str {
80        "Spaces inside link text"
81    }
82
83    fn category(&self) -> RuleCategory {
84        RuleCategory::Link
85    }
86
87    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
88        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
89    }
90
91    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
92        let mut warnings = Vec::new();
93
94        // Use centralized link parsing from LintContext
95        for link in &ctx.links {
96            // Skip reference links (markdownlint doesn't check these)
97            if link.is_reference {
98                continue;
99            }
100
101            // Skip links inside Jinja templates
102            if ctx.is_in_jinja_range(link.byte_offset) {
103                continue;
104            }
105
106            // Fast check if trimming is needed
107            if !self.needs_trimming(&link.text) {
108                continue;
109            }
110
111            // Optimized unescaping for whitespace check
112            let unescaped = self.unescape_fast(&link.text);
113
114            let needs_warning = if get_cached_regex(ALL_WHITESPACE_STR)
115                .map(|re| re.is_match(&unescaped))
116                .unwrap_or(false)
117            {
118                true
119            } else {
120                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
121                link.text.as_ref() != trimmed
122            };
123
124            if needs_warning {
125                let url = if link.is_reference {
126                    if let Some(ref_id) = &link.reference_id {
127                        format!("[{ref_id}]")
128                    } else {
129                        "[]".to_string()
130                    }
131                } else {
132                    format!("({})", link.url)
133                };
134
135                let fixed = if get_cached_regex(ALL_WHITESPACE_STR)
136                    .map(|re| re.is_match(&unescaped))
137                    .unwrap_or(false)
138                {
139                    format!("[]{url}")
140                } else {
141                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
142                    format!("[{trimmed}]{url}")
143                };
144
145                warnings.push(LintWarning {
146                    rule_name: Some(self.name().to_string()),
147                    line: link.line,
148                    column: link.start_col + 1, // Convert to 1-indexed
149                    end_line: link.line,
150                    end_column: link.end_col + 1, // Convert to 1-indexed
151                    message: WARNING_MESSAGE.to_string(),
152                    severity: Severity::Warning,
153                    fix: Some(Fix {
154                        range: link.byte_offset..link.byte_end,
155                        replacement: fixed,
156                    }),
157                });
158            }
159        }
160
161        // Also check images
162        for image in &ctx.images {
163            // Skip reference images (markdownlint doesn't check these)
164            if image.is_reference {
165                continue;
166            }
167
168            // Skip images inside Jinja templates
169            if ctx.is_in_jinja_range(image.byte_offset) {
170                continue;
171            }
172
173            // Fast check if trimming is needed
174            if !self.needs_trimming(&image.alt_text) {
175                continue;
176            }
177
178            // Optimized unescaping for whitespace check
179            let unescaped = self.unescape_fast(&image.alt_text);
180
181            let needs_warning = if get_cached_regex(ALL_WHITESPACE_STR)
182                .map(|re| re.is_match(&unescaped))
183                .unwrap_or(false)
184            {
185                true
186            } else {
187                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
188                image.alt_text.as_ref() != trimmed
189            };
190
191            if needs_warning {
192                let url = if image.is_reference {
193                    if let Some(ref_id) = &image.reference_id {
194                        format!("[{ref_id}]")
195                    } else {
196                        "[]".to_string()
197                    }
198                } else {
199                    format!("({})", image.url)
200                };
201
202                let fixed = if get_cached_regex(ALL_WHITESPACE_STR)
203                    .map(|re| re.is_match(&unescaped))
204                    .unwrap_or(false)
205                {
206                    format!("![]{url}")
207                } else {
208                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
209                    format!("![{trimmed}]{url}")
210                };
211
212                warnings.push(LintWarning {
213                    rule_name: Some(self.name().to_string()),
214                    line: image.line,
215                    column: image.start_col + 1, // Convert to 1-indexed
216                    end_line: image.line,
217                    end_column: image.end_col + 1, // Convert to 1-indexed
218                    message: WARNING_MESSAGE.to_string(),
219                    severity: Severity::Warning,
220                    fix: Some(Fix {
221                        range: image.byte_offset..image.byte_end,
222                        replacement: fixed,
223                    }),
224                });
225            }
226        }
227
228        Ok(warnings)
229    }
230
231    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
232        let content = ctx.content;
233        let mut fixes = Vec::new();
234
235        // Process links
236        for link in &ctx.links {
237            // Skip reference links (markdownlint doesn't check these)
238            if link.is_reference {
239                continue;
240            }
241
242            // Skip links inside Jinja templates
243            if ctx.is_in_jinja_range(link.byte_offset) {
244                continue;
245            }
246
247            if !self.needs_trimming(&link.text) {
248                continue;
249            }
250
251            let unescaped = self.unescape_fast(&link.text);
252
253            let needs_fix = if get_cached_regex(ALL_WHITESPACE_STR)
254                .map(|re| re.is_match(&unescaped))
255                .unwrap_or(false)
256            {
257                true
258            } else {
259                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
260                link.text.as_ref() != trimmed
261            };
262
263            if needs_fix {
264                let url_part = if link.is_reference {
265                    if let Some(ref_id) = &link.reference_id {
266                        format!("[{ref_id}]")
267                    } else {
268                        "[]".to_string()
269                    }
270                } else {
271                    format!("({})", link.url)
272                };
273
274                let replacement = if get_cached_regex(ALL_WHITESPACE_STR)
275                    .map(|re| re.is_match(&unescaped))
276                    .unwrap_or(false)
277                {
278                    format!("[]{url_part}")
279                } else {
280                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
281                    format!("[{trimmed}]{url_part}")
282                };
283
284                fixes.push((link.byte_offset, link.byte_end, replacement));
285            }
286        }
287
288        // Process images
289        for image in &ctx.images {
290            // Skip reference images (markdownlint doesn't check these)
291            if image.is_reference {
292                continue;
293            }
294
295            // Skip images inside Jinja templates
296            if ctx.is_in_jinja_range(image.byte_offset) {
297                continue;
298            }
299
300            if !self.needs_trimming(&image.alt_text) {
301                continue;
302            }
303
304            let unescaped = self.unescape_fast(&image.alt_text);
305
306            let needs_fix = if get_cached_regex(ALL_WHITESPACE_STR)
307                .map(|re| re.is_match(&unescaped))
308                .unwrap_or(false)
309            {
310                true
311            } else {
312                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
313                image.alt_text.as_ref() != trimmed
314            };
315
316            if needs_fix {
317                let url_part = if image.is_reference {
318                    if let Some(ref_id) = &image.reference_id {
319                        format!("[{ref_id}]")
320                    } else {
321                        "[]".to_string()
322                    }
323                } else {
324                    format!("({})", image.url)
325                };
326
327                let replacement = if get_cached_regex(ALL_WHITESPACE_STR)
328                    .map(|re| re.is_match(&unescaped))
329                    .unwrap_or(false)
330                {
331                    format!("![]{url_part}")
332                } else {
333                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
334                    format!("![{trimmed}]{url_part}")
335                };
336
337                fixes.push((image.byte_offset, image.byte_end, replacement));
338            }
339        }
340
341        if fixes.is_empty() {
342            return Ok(content.to_string());
343        }
344
345        // Sort fixes by position to apply them in order
346        fixes.sort_by_key(|&(start, _, _)| start);
347
348        // Apply fixes efficiently
349        let mut result = String::with_capacity(content.len());
350        let mut last_pos = 0;
351
352        for (start, end, replacement) in fixes {
353            if start < last_pos {
354                // This should not happen if fixes are properly sorted and non-overlapping
355                return Err(LintError::FixFailed(format!(
356                    "Overlapping fixes detected: last_pos={last_pos}, start={start}"
357                )));
358            }
359            result.push_str(&content[last_pos..start]);
360            result.push_str(&replacement);
361            last_pos = end;
362        }
363        result.push_str(&content[last_pos..]);
364
365        Ok(result)
366    }
367
368    fn as_any(&self) -> &dyn std::any::Any {
369        self
370    }
371
372    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
373    where
374        Self: Sized,
375    {
376        Box::new(Self)
377    }
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383
384    #[test]
385    fn test_valid_links() {
386        let rule = MD039NoSpaceInLinks::new();
387        let content = "[link](url) and [another link](url) here";
388        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
389        let result = rule.check(&ctx).unwrap();
390        assert!(result.is_empty());
391    }
392
393    #[test]
394    fn test_spaces_both_ends() {
395        let rule = MD039NoSpaceInLinks::new();
396        let content = "[ link ](url) and [ another link ](url) here";
397        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
398        let result = rule.check(&ctx).unwrap();
399        assert_eq!(result.len(), 2);
400        let fixed = rule.fix(&ctx).unwrap();
401        assert_eq!(fixed, "[link](url) and [another link](url) here");
402    }
403
404    #[test]
405    fn test_space_at_start() {
406        let rule = MD039NoSpaceInLinks::new();
407        let content = "[ link](url) and [ another link](url) here";
408        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
409        let result = rule.check(&ctx).unwrap();
410        assert_eq!(result.len(), 2);
411        let fixed = rule.fix(&ctx).unwrap();
412        assert_eq!(fixed, "[link](url) and [another link](url) here");
413    }
414
415    #[test]
416    fn test_space_at_end() {
417        let rule = MD039NoSpaceInLinks::new();
418        let content = "[link ](url) and [another link ](url) here";
419        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
420        let result = rule.check(&ctx).unwrap();
421        assert_eq!(result.len(), 2);
422        let fixed = rule.fix(&ctx).unwrap();
423        assert_eq!(fixed, "[link](url) and [another link](url) here");
424    }
425
426    #[test]
427    fn test_link_in_code_block() {
428        let rule = MD039NoSpaceInLinks::new();
429        let content = "```
430[ link ](url)
431```
432[ link ](url)";
433        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
434        let result = rule.check(&ctx).unwrap();
435        assert_eq!(result.len(), 1);
436        let fixed = rule.fix(&ctx).unwrap();
437        assert_eq!(
438            fixed,
439            "```
440[ link ](url)
441```
442[link](url)"
443        );
444    }
445
446    #[test]
447    fn test_multiple_links() {
448        let rule = MD039NoSpaceInLinks::new();
449        let content = "[ link ](url) and [ another ](url) in one line";
450        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
451        let result = rule.check(&ctx).unwrap();
452        assert_eq!(result.len(), 2);
453        let fixed = rule.fix(&ctx).unwrap();
454        assert_eq!(fixed, "[link](url) and [another](url) in one line");
455    }
456
457    #[test]
458    fn test_link_with_internal_spaces() {
459        let rule = MD039NoSpaceInLinks::new();
460        let content = "[this is link](url) and [ this is also link ](url)";
461        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
462        let result = rule.check(&ctx).unwrap();
463        assert_eq!(result.len(), 1);
464        let fixed = rule.fix(&ctx).unwrap();
465        assert_eq!(fixed, "[this is link](url) and [this is also link](url)");
466    }
467
468    #[test]
469    fn test_link_with_punctuation() {
470        let rule = MD039NoSpaceInLinks::new();
471        let content = "[ link! ](url) and [ link? ](url) here";
472        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
473        let result = rule.check(&ctx).unwrap();
474        assert_eq!(result.len(), 2);
475        let fixed = rule.fix(&ctx).unwrap();
476        assert_eq!(fixed, "[link!](url) and [link?](url) here");
477    }
478
479    #[test]
480    fn test_parity_only_whitespace_and_newlines_minimal() {
481        let rule = MD039NoSpaceInLinks::new();
482        let content = "[   \n  ](url) and [\t\n\t](url)";
483        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
484        let fixed = rule.fix(&ctx).unwrap();
485        // markdownlint removes all whitespace, resulting in empty link text
486        assert_eq!(fixed, "[](url) and [](url)");
487    }
488
489    #[test]
490    fn test_parity_internal_newlines_minimal() {
491        let rule = MD039NoSpaceInLinks::new();
492        let content = "[link\ntext](url) and [ another\nlink ](url)";
493        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
494        let fixed = rule.fix(&ctx).unwrap();
495        // markdownlint trims only leading/trailing whitespace, preserves internal newlines
496        assert_eq!(fixed, "[link\ntext](url) and [another\nlink](url)");
497    }
498
499    #[test]
500    fn test_parity_escaped_brackets_minimal() {
501        let rule = MD039NoSpaceInLinks::new();
502        let content = "[link\\]](url) and [link\\[]](url)";
503        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
504        let fixed = rule.fix(&ctx).unwrap();
505        // markdownlint does not trim or remove escapes, so output should be unchanged
506        assert_eq!(fixed, "[link\\]](url) and [link\\[]](url)");
507    }
508
509    #[test]
510    fn test_performance_md039() {
511        use std::time::Instant;
512
513        let rule = MD039NoSpaceInLinks::new();
514
515        // Generate test content with many links
516        let mut content = String::with_capacity(100_000);
517
518        // Add links with spaces (should be detected and fixed)
519        for i in 0..500 {
520            content.push_str(&format!("Line {i} with [ spaced link {i} ](url{i}) and text.\n"));
521        }
522
523        // Add valid links (should be fast to skip)
524        for i in 0..500 {
525            content.push_str(&format!(
526                "Line {} with [valid link {}](url{}) and text.\n",
527                i + 500,
528                i,
529                i
530            ));
531        }
532
533        println!(
534            "MD039 Performance Test - Content: {} bytes, {} lines",
535            content.len(),
536            content.lines().count()
537        );
538
539        let ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
540
541        // Warm up
542        let _ = rule.check(&ctx).unwrap();
543
544        // Measure check performance
545        let mut total_duration = std::time::Duration::ZERO;
546        let runs = 5;
547        let mut warnings_count = 0;
548
549        for _ in 0..runs {
550            let start = Instant::now();
551            let warnings = rule.check(&ctx).unwrap();
552            total_duration += start.elapsed();
553            warnings_count = warnings.len();
554        }
555
556        let avg_check_duration = total_duration / runs;
557
558        println!("MD039 Optimized Performance:");
559        println!(
560            "- Average check time: {:?} ({:.2} ms)",
561            avg_check_duration,
562            avg_check_duration.as_secs_f64() * 1000.0
563        );
564        println!("- Found {warnings_count} warnings");
565        println!(
566            "- Lines per second: {:.0}",
567            content.lines().count() as f64 / avg_check_duration.as_secs_f64()
568        );
569        println!(
570            "- Microseconds per line: {:.2}",
571            avg_check_duration.as_micros() as f64 / content.lines().count() as f64
572        );
573
574        // Performance assertion - should complete reasonably fast
575        assert!(
576            avg_check_duration.as_millis() < 200,
577            "MD039 check should complete in under 200ms, took {}ms",
578            avg_check_duration.as_millis()
579        );
580
581        // Verify we're finding the expected number of warnings (500 links with spaces)
582        assert_eq!(warnings_count, 500, "Should find 500 warnings for links with spaces");
583    }
584}