Skip to main content

rumdl_lib/rules/
md039_no_space_in_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::regex_cache::get_cached_regex;
3
4// Regex patterns
5const ALL_WHITESPACE_STR: &str = r"^\s*$";
6
7/// Rule MD039: No space inside link text
8///
9/// See [docs/md039.md](../../docs/md039.md) for full documentation, configuration, and examples.
10///
11/// This rule is triggered when link text has leading or trailing spaces which can cause
12/// unexpected rendering in some Markdown parsers.
13#[derive(Debug, Default, Clone)]
14pub struct MD039NoSpaceInLinks;
15
16// Static definition for the warning message
17const WARNING_MESSAGE: &str = "Remove spaces inside link text";
18
19impl MD039NoSpaceInLinks {
20    pub fn new() -> Self {
21        Self
22    }
23
24    #[inline]
25    fn trim_link_text_preserve_escapes(text: &str) -> &str {
26        // Optimized trimming that preserves escapes
27        let start = text
28            .char_indices()
29            .find(|&(_, c)| !c.is_whitespace())
30            .map(|(i, _)| i)
31            .unwrap_or(text.len());
32        let end = text
33            .char_indices()
34            .rev()
35            .find(|&(_, c)| !c.is_whitespace())
36            .map(|(i, c)| i + c.len_utf8())
37            .unwrap_or(0);
38        if start >= end { "" } else { &text[start..end] }
39    }
40
41    /// Optimized whitespace checking for link text
42    #[inline]
43    fn needs_trimming(&self, text: &str) -> bool {
44        // Simple and fast check: compare with trimmed version
45        text != text.trim_matches(|c: char| c.is_whitespace())
46    }
47
48    /// Optimized unescaping for performance-critical path
49    #[inline]
50    fn unescape_fast(&self, text: &str) -> String {
51        if !text.contains('\\') {
52            return text.to_string();
53        }
54
55        let mut result = String::with_capacity(text.len());
56        let mut chars = text.chars().peekable();
57
58        while let Some(c) = chars.next() {
59            if c == '\\' {
60                if let Some(&next) = chars.peek() {
61                    result.push(next);
62                    chars.next();
63                } else {
64                    result.push(c);
65                }
66            } else {
67                result.push(c);
68            }
69        }
70        result
71    }
72}
73
74impl Rule for MD039NoSpaceInLinks {
75    fn name(&self) -> &'static str {
76        "MD039"
77    }
78
79    fn description(&self) -> &'static str {
80        "Spaces inside link text"
81    }
82
83    fn category(&self) -> RuleCategory {
84        RuleCategory::Link
85    }
86
87    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
88        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
89    }
90
91    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
92        let mut warnings = Vec::new();
93
94        // Use centralized link parsing from LintContext
95        for link in &ctx.links {
96            // Skip reference links (markdownlint doesn't check these)
97            if link.is_reference {
98                continue;
99            }
100
101            // Skip links inside Jinja templates
102            if ctx.is_in_jinja_range(link.byte_offset) {
103                continue;
104            }
105
106            // Skip links inside JSX expressions or MDX comments
107            if ctx.is_in_jsx_expression(link.byte_offset) || ctx.is_in_mdx_comment(link.byte_offset) {
108                continue;
109            }
110
111            // Fast check if trimming is needed
112            if !self.needs_trimming(&link.text) {
113                continue;
114            }
115
116            // Optimized unescaping for whitespace check
117            let unescaped = self.unescape_fast(&link.text);
118
119            let needs_warning = if get_cached_regex(ALL_WHITESPACE_STR)
120                .map(|re| re.is_match(&unescaped))
121                .unwrap_or(false)
122            {
123                true
124            } else {
125                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
126                link.text.as_ref() != trimmed
127            };
128
129            if needs_warning {
130                // Extract the destination portion from the original content so that
131                // titles and attributes are preserved. Find `](` for inline links
132                // or `][` for reference links to split text from destination.
133                let original = &ctx.content[link.byte_offset..link.byte_end];
134                let dest_start = original
135                    .find("](")
136                    .or_else(|| original.find("]["))
137                    .map(|p| p + 1)
138                    .unwrap_or(original.len());
139                let dest_portion = &original[dest_start..];
140
141                let fixed = if get_cached_regex(ALL_WHITESPACE_STR)
142                    .map(|re| re.is_match(&unescaped))
143                    .unwrap_or(false)
144                {
145                    format!("[]{dest_portion}")
146                } else {
147                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
148                    format!("[{trimmed}]{dest_portion}")
149                };
150
151                warnings.push(LintWarning {
152                    rule_name: Some(self.name().to_string()),
153                    line: link.line,
154                    column: link.start_col + 1, // Convert to 1-indexed
155                    end_line: link.line,
156                    end_column: link.end_col + 1, // Convert to 1-indexed
157                    message: WARNING_MESSAGE.to_string(),
158                    severity: Severity::Warning,
159                    fix: Some(Fix {
160                        range: link.byte_offset..link.byte_end,
161                        replacement: fixed,
162                    }),
163                });
164            }
165        }
166
167        // Also check images
168        for image in &ctx.images {
169            // Skip reference images (markdownlint doesn't check these)
170            if image.is_reference {
171                continue;
172            }
173
174            // Skip images inside JSX expressions or MDX comments
175            if ctx.is_in_jsx_expression(image.byte_offset) || ctx.is_in_mdx_comment(image.byte_offset) {
176                continue;
177            }
178
179            // Skip images inside Jinja templates
180            if ctx.is_in_jinja_range(image.byte_offset) {
181                continue;
182            }
183
184            // Fast check if trimming is needed
185            if !self.needs_trimming(&image.alt_text) {
186                continue;
187            }
188
189            // Optimized unescaping for whitespace check
190            let unescaped = self.unescape_fast(&image.alt_text);
191
192            let needs_warning = if get_cached_regex(ALL_WHITESPACE_STR)
193                .map(|re| re.is_match(&unescaped))
194                .unwrap_or(false)
195            {
196                true
197            } else {
198                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
199                image.alt_text.as_ref() != trimmed
200            };
201
202            if needs_warning {
203                let original = &ctx.content[image.byte_offset..image.byte_end];
204                let dest_start = original
205                    .find("](")
206                    .or_else(|| original.find("]["))
207                    .map(|p| p + 1)
208                    .unwrap_or(original.len());
209                let dest_portion = &original[dest_start..];
210
211                let fixed = if get_cached_regex(ALL_WHITESPACE_STR)
212                    .map(|re| re.is_match(&unescaped))
213                    .unwrap_or(false)
214                {
215                    format!("![]{dest_portion}")
216                } else {
217                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
218                    format!("![{trimmed}]{dest_portion}")
219                };
220
221                warnings.push(LintWarning {
222                    rule_name: Some(self.name().to_string()),
223                    line: image.line,
224                    column: image.start_col + 1, // Convert to 1-indexed
225                    end_line: image.line,
226                    end_column: image.end_col + 1, // Convert to 1-indexed
227                    message: WARNING_MESSAGE.to_string(),
228                    severity: Severity::Warning,
229                    fix: Some(Fix {
230                        range: image.byte_offset..image.byte_end,
231                        replacement: fixed,
232                    }),
233                });
234            }
235        }
236
237        Ok(warnings)
238    }
239
240    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
241        if self.should_skip(ctx) {
242            return Ok(ctx.content.to_string());
243        }
244        let warnings = self.check(ctx)?;
245        if warnings.is_empty() {
246            return Ok(ctx.content.to_string());
247        }
248        let warnings =
249            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
250        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::InvalidInput)
251    }
252
253    fn as_any(&self) -> &dyn std::any::Any {
254        self
255    }
256
257    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
258    where
259        Self: Sized,
260    {
261        Box::new(Self)
262    }
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn test_valid_links() {
271        let rule = MD039NoSpaceInLinks::new();
272        let content = "[link](url) and [another link](url) here";
273        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
274        let result = rule.check(&ctx).unwrap();
275        assert!(result.is_empty());
276    }
277
278    #[test]
279    fn test_spaces_both_ends() {
280        let rule = MD039NoSpaceInLinks::new();
281        let content = "[ link ](url) and [ another link ](url) here";
282        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
283        let result = rule.check(&ctx).unwrap();
284        assert_eq!(result.len(), 2);
285        let fixed = rule.fix(&ctx).unwrap();
286        assert_eq!(fixed, "[link](url) and [another link](url) here");
287    }
288
289    #[test]
290    fn test_space_at_start() {
291        let rule = MD039NoSpaceInLinks::new();
292        let content = "[ link](url) and [ another link](url) here";
293        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
294        let result = rule.check(&ctx).unwrap();
295        assert_eq!(result.len(), 2);
296        let fixed = rule.fix(&ctx).unwrap();
297        assert_eq!(fixed, "[link](url) and [another link](url) here");
298    }
299
300    #[test]
301    fn test_space_at_end() {
302        let rule = MD039NoSpaceInLinks::new();
303        let content = "[link ](url) and [another link ](url) here";
304        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
305        let result = rule.check(&ctx).unwrap();
306        assert_eq!(result.len(), 2);
307        let fixed = rule.fix(&ctx).unwrap();
308        assert_eq!(fixed, "[link](url) and [another link](url) here");
309    }
310
311    #[test]
312    fn test_link_in_code_block() {
313        let rule = MD039NoSpaceInLinks::new();
314        let content = "```
315[ link ](url)
316```
317[ link ](url)";
318        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
319        let result = rule.check(&ctx).unwrap();
320        assert_eq!(result.len(), 1);
321        let fixed = rule.fix(&ctx).unwrap();
322        assert_eq!(
323            fixed,
324            "```
325[ link ](url)
326```
327[link](url)"
328        );
329    }
330
331    #[test]
332    fn test_multiple_links() {
333        let rule = MD039NoSpaceInLinks::new();
334        let content = "[ link ](url) and [ another ](url) in one line";
335        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
336        let result = rule.check(&ctx).unwrap();
337        assert_eq!(result.len(), 2);
338        let fixed = rule.fix(&ctx).unwrap();
339        assert_eq!(fixed, "[link](url) and [another](url) in one line");
340    }
341
342    #[test]
343    fn test_link_with_internal_spaces() {
344        let rule = MD039NoSpaceInLinks::new();
345        let content = "[this is link](url) and [ this is also link ](url)";
346        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
347        let result = rule.check(&ctx).unwrap();
348        assert_eq!(result.len(), 1);
349        let fixed = rule.fix(&ctx).unwrap();
350        assert_eq!(fixed, "[this is link](url) and [this is also link](url)");
351    }
352
353    #[test]
354    fn test_link_with_punctuation() {
355        let rule = MD039NoSpaceInLinks::new();
356        let content = "[ link! ](url) and [ link? ](url) here";
357        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
358        let result = rule.check(&ctx).unwrap();
359        assert_eq!(result.len(), 2);
360        let fixed = rule.fix(&ctx).unwrap();
361        assert_eq!(fixed, "[link!](url) and [link?](url) here");
362    }
363
364    #[test]
365    fn test_parity_only_whitespace_and_newlines_minimal() {
366        let rule = MD039NoSpaceInLinks::new();
367        let content = "[   \n  ](url) and [\t\n\t](url)";
368        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
369        let fixed = rule.fix(&ctx).unwrap();
370        // markdownlint removes all whitespace, resulting in empty link text
371        assert_eq!(fixed, "[](url) and [](url)");
372    }
373
374    #[test]
375    fn test_parity_internal_newlines_minimal() {
376        let rule = MD039NoSpaceInLinks::new();
377        let content = "[link\ntext](url) and [ another\nlink ](url)";
378        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
379        let fixed = rule.fix(&ctx).unwrap();
380        // markdownlint trims only leading/trailing whitespace, preserves internal newlines
381        assert_eq!(fixed, "[link\ntext](url) and [another\nlink](url)");
382    }
383
384    #[test]
385    fn test_parity_escaped_brackets_minimal() {
386        let rule = MD039NoSpaceInLinks::new();
387        let content = "[link\\]](url) and [link\\[]](url)";
388        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
389        let fixed = rule.fix(&ctx).unwrap();
390        // markdownlint does not trim or remove escapes, so output should be unchanged
391        assert_eq!(fixed, "[link\\]](url) and [link\\[]](url)");
392    }
393
394    #[test]
395    fn test_performance_md039() {
396        use std::time::Instant;
397
398        let rule = MD039NoSpaceInLinks::new();
399
400        // Generate test content with many links
401        let mut content = String::with_capacity(100_000);
402
403        // Add links with spaces (should be detected and fixed)
404        for i in 0..500 {
405            content.push_str(&format!("Line {i} with [ spaced link {i} ](url{i}) and text.\n"));
406        }
407
408        // Add valid links (should be fast to skip)
409        for i in 0..500 {
410            content.push_str(&format!(
411                "Line {} with [valid link {}](url{}) and text.\n",
412                i + 500,
413                i,
414                i
415            ));
416        }
417
418        println!(
419            "MD039 Performance Test - Content: {} bytes, {} lines",
420            content.len(),
421            content.lines().count()
422        );
423
424        let ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
425
426        // Warm up
427        let _ = rule.check(&ctx).unwrap();
428
429        // Measure check performance
430        let mut total_duration = std::time::Duration::ZERO;
431        let runs = 5;
432        let mut warnings_count = 0;
433
434        for _ in 0..runs {
435            let start = Instant::now();
436            let warnings = rule.check(&ctx).unwrap();
437            total_duration += start.elapsed();
438            warnings_count = warnings.len();
439        }
440
441        let avg_check_duration = total_duration / runs;
442
443        println!("MD039 Optimized Performance:");
444        println!(
445            "- Average check time: {:?} ({:.2} ms)",
446            avg_check_duration,
447            avg_check_duration.as_secs_f64() * 1000.0
448        );
449        println!("- Found {warnings_count} warnings");
450        println!(
451            "- Lines per second: {:.0}",
452            content.lines().count() as f64 / avg_check_duration.as_secs_f64()
453        );
454        println!(
455            "- Microseconds per line: {:.2}",
456            avg_check_duration.as_micros() as f64 / content.lines().count() as f64
457        );
458
459        // Performance assertion - should complete reasonably fast
460        assert!(
461            avg_check_duration.as_millis() < 200,
462            "MD039 check should complete in under 200ms, took {}ms",
463            avg_check_duration.as_millis()
464        );
465
466        // Verify we're finding the expected number of warnings (500 links with spaces)
467        assert_eq!(warnings_count, 500, "Should find 500 warnings for links with spaces");
468    }
469}