Skip to main content

rumdl_lib/rules/
md039_no_space_in_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::utils::regex_cache::get_cached_regex;
3
4// Regex patterns
5const ALL_WHITESPACE_STR: &str = r"^\s*$";
6
7/// Rule MD039: No space inside link text
8///
9/// See [docs/md039.md](../../docs/md039.md) for full documentation, configuration, and examples.
10///
11/// This rule is triggered when link text has leading or trailing spaces which can cause
12/// unexpected rendering in some Markdown parsers.
13#[derive(Debug, Default, Clone)]
14pub struct MD039NoSpaceInLinks;
15
16// Static definition for the warning message
17const WARNING_MESSAGE: &str = "Remove spaces inside link text";
18
19impl MD039NoSpaceInLinks {
20    pub fn new() -> Self {
21        Self
22    }
23
24    #[inline]
25    fn trim_link_text_preserve_escapes(text: &str) -> &str {
26        // Optimized trimming that preserves escapes
27        let start = text
28            .char_indices()
29            .find(|&(_, c)| !c.is_whitespace())
30            .map_or(text.len(), |(i, _)| i);
31        let end = text
32            .char_indices()
33            .rev()
34            .find(|&(_, c)| !c.is_whitespace())
35            .map_or(0, |(i, c)| i + c.len_utf8());
36        if start >= end { "" } else { &text[start..end] }
37    }
38
39    /// Optimized whitespace checking for link text
40    #[inline]
41    fn needs_trimming(&self, text: &str) -> bool {
42        // Simple and fast check: compare with trimmed version
43        text != text.trim_matches(|c: char| c.is_whitespace())
44    }
45
46    /// Optimized unescaping for performance-critical path
47    #[inline]
48    fn unescape_fast(&self, text: &str) -> String {
49        if !text.contains('\\') {
50            return text.to_string();
51        }
52
53        let mut result = String::with_capacity(text.len());
54        let mut chars = text.chars().peekable();
55
56        while let Some(c) = chars.next() {
57            if c == '\\' {
58                if let Some(&next) = chars.peek() {
59                    result.push(next);
60                    chars.next();
61                } else {
62                    result.push(c);
63                }
64            } else {
65                result.push(c);
66            }
67        }
68        result
69    }
70}
71
72impl Rule for MD039NoSpaceInLinks {
73    fn name(&self) -> &'static str {
74        "MD039"
75    }
76
77    fn description(&self) -> &'static str {
78        "Spaces inside link text"
79    }
80
81    fn category(&self) -> RuleCategory {
82        RuleCategory::Link
83    }
84
85    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
86        ctx.content.is_empty() || !ctx.likely_has_links_or_images()
87    }
88
89    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
90        let mut warnings = Vec::new();
91
92        // Use centralized link parsing from LintContext
93        for link in &ctx.links {
94            // Skip reference links (markdownlint doesn't check these)
95            if link.is_reference {
96                continue;
97            }
98
99            // Skip links inside Jinja templates
100            if ctx.is_in_jinja_range(link.byte_offset) {
101                continue;
102            }
103
104            // Skip links inside JSX expressions or MDX comments
105            if ctx.is_in_jsx_expression(link.byte_offset) || ctx.is_in_mdx_comment(link.byte_offset) {
106                continue;
107            }
108
109            // Fast check if trimming is needed
110            if !self.needs_trimming(&link.text) {
111                continue;
112            }
113
114            // Optimized unescaping for whitespace check
115            let unescaped = self.unescape_fast(&link.text);
116
117            let needs_warning = if get_cached_regex(ALL_WHITESPACE_STR)
118                .map(|re| re.is_match(&unescaped))
119                .unwrap_or(false)
120            {
121                true
122            } else {
123                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
124                link.text.as_ref() != trimmed
125            };
126
127            if needs_warning {
128                // Extract the destination portion from the original content so that
129                // titles and attributes are preserved. Find `](` for inline links
130                // or `][` for reference links to split text from destination.
131                let original = &ctx.content[link.byte_offset..link.byte_end];
132                let dest_start = original
133                    .find("](")
134                    .or_else(|| original.find("]["))
135                    .map_or(original.len(), |p| p + 1);
136                let dest_portion = &original[dest_start..];
137
138                let fixed = if get_cached_regex(ALL_WHITESPACE_STR)
139                    .map(|re| re.is_match(&unescaped))
140                    .unwrap_or(false)
141                {
142                    format!("[]{dest_portion}")
143                } else {
144                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
145                    format!("[{trimmed}]{dest_portion}")
146                };
147
148                warnings.push(LintWarning {
149                    rule_name: Some(self.name().to_string()),
150                    line: link.line,
151                    column: link.start_col + 1, // Convert to 1-indexed
152                    end_line: link.line,
153                    end_column: link.end_col + 1, // Convert to 1-indexed
154                    message: WARNING_MESSAGE.to_string(),
155                    severity: Severity::Warning,
156                    fix: Some(Fix::new(link.byte_offset..link.byte_end, fixed)),
157                });
158            }
159        }
160
161        // Also check images
162        for image in &ctx.images {
163            // Skip reference images (markdownlint doesn't check these)
164            if image.is_reference {
165                continue;
166            }
167
168            // Skip images inside JSX expressions or MDX comments
169            if ctx.is_in_jsx_expression(image.byte_offset) || ctx.is_in_mdx_comment(image.byte_offset) {
170                continue;
171            }
172
173            // Skip images inside Jinja templates
174            if ctx.is_in_jinja_range(image.byte_offset) {
175                continue;
176            }
177
178            // Fast check if trimming is needed
179            if !self.needs_trimming(&image.alt_text) {
180                continue;
181            }
182
183            // Optimized unescaping for whitespace check
184            let unescaped = self.unescape_fast(&image.alt_text);
185
186            let needs_warning = if get_cached_regex(ALL_WHITESPACE_STR)
187                .map(|re| re.is_match(&unescaped))
188                .unwrap_or(false)
189            {
190                true
191            } else {
192                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
193                image.alt_text.as_ref() != trimmed
194            };
195
196            if needs_warning {
197                let original = &ctx.content[image.byte_offset..image.byte_end];
198                let dest_start = original
199                    .find("](")
200                    .or_else(|| original.find("]["))
201                    .map_or(original.len(), |p| p + 1);
202                let dest_portion = &original[dest_start..];
203
204                let fixed = if get_cached_regex(ALL_WHITESPACE_STR)
205                    .map(|re| re.is_match(&unescaped))
206                    .unwrap_or(false)
207                {
208                    format!("![]{dest_portion}")
209                } else {
210                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
211                    format!("![{trimmed}]{dest_portion}")
212                };
213
214                warnings.push(LintWarning {
215                    rule_name: Some(self.name().to_string()),
216                    line: image.line,
217                    column: image.start_col + 1, // Convert to 1-indexed
218                    end_line: image.line,
219                    end_column: image.end_col + 1, // Convert to 1-indexed
220                    message: WARNING_MESSAGE.to_string(),
221                    severity: Severity::Warning,
222                    fix: Some(Fix::new(image.byte_offset..image.byte_end, fixed)),
223                });
224            }
225        }
226
227        Ok(warnings)
228    }
229
230    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
231        if self.should_skip(ctx) {
232            return Ok(ctx.content.to_string());
233        }
234        let warnings = self.check(ctx)?;
235        if warnings.is_empty() {
236            return Ok(ctx.content.to_string());
237        }
238        let warnings =
239            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
240        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::InvalidInput)
241    }
242
243    fn as_any(&self) -> &dyn std::any::Any {
244        self
245    }
246
247    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
248    where
249        Self: Sized,
250    {
251        Box::new(Self)
252    }
253}
254
255#[cfg(test)]
256mod tests {
257    use super::*;
258
259    #[test]
260    fn test_valid_links() {
261        let rule = MD039NoSpaceInLinks::new();
262        let content = "[link](url) and [another link](url) here";
263        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
264        let result = rule.check(&ctx).unwrap();
265        assert!(result.is_empty());
266    }
267
268    #[test]
269    fn test_spaces_both_ends() {
270        let rule = MD039NoSpaceInLinks::new();
271        let content = "[ link ](url) and [ another link ](url) here";
272        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
273        let result = rule.check(&ctx).unwrap();
274        assert_eq!(result.len(), 2);
275        let fixed = rule.fix(&ctx).unwrap();
276        assert_eq!(fixed, "[link](url) and [another link](url) here");
277    }
278
279    #[test]
280    fn test_space_at_start() {
281        let rule = MD039NoSpaceInLinks::new();
282        let content = "[ link](url) and [ another link](url) here";
283        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
284        let result = rule.check(&ctx).unwrap();
285        assert_eq!(result.len(), 2);
286        let fixed = rule.fix(&ctx).unwrap();
287        assert_eq!(fixed, "[link](url) and [another link](url) here");
288    }
289
290    #[test]
291    fn test_space_at_end() {
292        let rule = MD039NoSpaceInLinks::new();
293        let content = "[link ](url) and [another link ](url) here";
294        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
295        let result = rule.check(&ctx).unwrap();
296        assert_eq!(result.len(), 2);
297        let fixed = rule.fix(&ctx).unwrap();
298        assert_eq!(fixed, "[link](url) and [another link](url) here");
299    }
300
301    #[test]
302    fn test_link_in_code_block() {
303        let rule = MD039NoSpaceInLinks::new();
304        let content = "```
305[ link ](url)
306```
307[ link ](url)";
308        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
309        let result = rule.check(&ctx).unwrap();
310        assert_eq!(result.len(), 1);
311        let fixed = rule.fix(&ctx).unwrap();
312        assert_eq!(
313            fixed,
314            "```
315[ link ](url)
316```
317[link](url)"
318        );
319    }
320
321    #[test]
322    fn test_multiple_links() {
323        let rule = MD039NoSpaceInLinks::new();
324        let content = "[ link ](url) and [ another ](url) in one line";
325        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
326        let result = rule.check(&ctx).unwrap();
327        assert_eq!(result.len(), 2);
328        let fixed = rule.fix(&ctx).unwrap();
329        assert_eq!(fixed, "[link](url) and [another](url) in one line");
330    }
331
332    #[test]
333    fn test_link_with_internal_spaces() {
334        let rule = MD039NoSpaceInLinks::new();
335        let content = "[this is link](url) and [ this is also link ](url)";
336        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
337        let result = rule.check(&ctx).unwrap();
338        assert_eq!(result.len(), 1);
339        let fixed = rule.fix(&ctx).unwrap();
340        assert_eq!(fixed, "[this is link](url) and [this is also link](url)");
341    }
342
343    #[test]
344    fn test_link_with_punctuation() {
345        let rule = MD039NoSpaceInLinks::new();
346        let content = "[ link! ](url) and [ link? ](url) here";
347        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
348        let result = rule.check(&ctx).unwrap();
349        assert_eq!(result.len(), 2);
350        let fixed = rule.fix(&ctx).unwrap();
351        assert_eq!(fixed, "[link!](url) and [link?](url) here");
352    }
353
354    #[test]
355    fn test_parity_only_whitespace_and_newlines_minimal() {
356        let rule = MD039NoSpaceInLinks::new();
357        let content = "[   \n  ](url) and [\t\n\t](url)";
358        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
359        let fixed = rule.fix(&ctx).unwrap();
360        // markdownlint removes all whitespace, resulting in empty link text
361        assert_eq!(fixed, "[](url) and [](url)");
362    }
363
364    #[test]
365    fn test_parity_internal_newlines_minimal() {
366        let rule = MD039NoSpaceInLinks::new();
367        let content = "[link\ntext](url) and [ another\nlink ](url)";
368        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
369        let fixed = rule.fix(&ctx).unwrap();
370        // markdownlint trims only leading/trailing whitespace, preserves internal newlines
371        assert_eq!(fixed, "[link\ntext](url) and [another\nlink](url)");
372    }
373
374    #[test]
375    fn test_parity_escaped_brackets_minimal() {
376        let rule = MD039NoSpaceInLinks::new();
377        let content = "[link\\]](url) and [link\\[]](url)";
378        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
379        let fixed = rule.fix(&ctx).unwrap();
380        // markdownlint does not trim or remove escapes, so output should be unchanged
381        assert_eq!(fixed, "[link\\]](url) and [link\\[]](url)");
382    }
383
384    #[test]
385    fn test_performance_md039() {
386        use std::time::Instant;
387
388        let rule = MD039NoSpaceInLinks::new();
389
390        // Generate test content with many links
391        let mut content = String::with_capacity(100_000);
392
393        // Add links with spaces (should be detected and fixed)
394        for i in 0..500 {
395            content.push_str(&format!("Line {i} with [ spaced link {i} ](url{i}) and text.\n"));
396        }
397
398        // Add valid links (should be fast to skip)
399        for i in 0..500 {
400            content.push_str(&format!(
401                "Line {} with [valid link {}](url{}) and text.\n",
402                i + 500,
403                i,
404                i
405            ));
406        }
407
408        println!(
409            "MD039 Performance Test - Content: {} bytes, {} lines",
410            content.len(),
411            content.lines().count()
412        );
413
414        let ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
415
416        // Warm up
417        let _ = rule.check(&ctx).unwrap();
418
419        // Measure check performance
420        let mut total_duration = std::time::Duration::ZERO;
421        let runs = 5;
422        let mut warnings_count = 0;
423
424        for _ in 0..runs {
425            let start = Instant::now();
426            let warnings = rule.check(&ctx).unwrap();
427            total_duration += start.elapsed();
428            warnings_count = warnings.len();
429        }
430
431        let avg_check_duration = total_duration / runs;
432
433        println!("MD039 Optimized Performance:");
434        println!(
435            "- Average check time: {:?} ({:.2} ms)",
436            avg_check_duration,
437            avg_check_duration.as_secs_f64() * 1000.0
438        );
439        println!("- Found {warnings_count} warnings");
440        println!(
441            "- Lines per second: {:.0}",
442            content.lines().count() as f64 / avg_check_duration.as_secs_f64()
443        );
444        println!(
445            "- Microseconds per line: {:.2}",
446            avg_check_duration.as_micros() as f64 / content.lines().count() as f64
447        );
448
449        // Performance assertion - should complete reasonably fast
450        assert!(
451            avg_check_duration.as_millis() < 200,
452            "MD039 check should complete in under 200ms, took {}ms",
453            avg_check_duration.as_millis()
454        );
455
456        // Verify we're finding the expected number of warnings (500 links with spaces)
457        assert_eq!(warnings_count, 500, "Should find 500 warnings for links with spaces");
458    }
459}