rumdl_lib/rules/
md039_no_space_in_links.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use lazy_static::lazy_static;
3use regex::Regex;
4
5lazy_static! {
6    // Pre-compiled regex patterns for performance - using DOTALL flag to match newlines
7    static ref LINK_PATTERN: Regex = Regex::new(r"(?s)!?\[([^\]]*)\]\(([^)]*)\)").unwrap();
8
9    // Fast check patterns - simple string-based checks are faster than complex regex
10    static ref WHITESPACE_CHECK: Regex = Regex::new(r"^\s+|\s+$").unwrap();
11    static ref ALL_WHITESPACE: Regex = Regex::new(r"^\s*$").unwrap();
12}
13
14/// Rule MD039: No space inside link text
15///
16/// See [docs/md039.md](../../docs/md039.md) for full documentation, configuration, and examples.
17///
18/// This rule is triggered when link text has leading or trailing spaces which can cause
19/// unexpected rendering in some Markdown parsers.
20#[derive(Debug, Default, Clone)]
21pub struct MD039NoSpaceInLinks;
22
23// Static definition for the warning message
24const WARNING_MESSAGE: &str = "Remove spaces inside link text";
25
26impl MD039NoSpaceInLinks {
27    pub fn new() -> Self {
28        Self
29    }
30
31    /// Optimized fast check to see if content has any potential links or images
32    #[inline]
33    fn has_links_or_images(&self, content: &str) -> bool {
34        LINK_PATTERN.is_match(content)
35    }
36
37    #[inline]
38    fn trim_link_text_preserve_escapes(text: &str) -> &str {
39        // Optimized trimming that preserves escapes
40        let start = text
41            .char_indices()
42            .find(|&(_, c)| !c.is_whitespace())
43            .map(|(i, _)| i)
44            .unwrap_or(text.len());
45        let end = text
46            .char_indices()
47            .rev()
48            .find(|&(_, c)| !c.is_whitespace())
49            .map(|(i, c)| i + c.len_utf8())
50            .unwrap_or(0);
51        if start >= end { "" } else { &text[start..end] }
52    }
53
54    /// Optimized whitespace checking for link text
55    #[inline]
56    fn needs_trimming(&self, text: &str) -> bool {
57        // Simple and fast check: compare with trimmed version
58        text != text.trim_matches(|c: char| c.is_whitespace())
59    }
60
61    /// Optimized unescaping for performance-critical path
62    #[inline]
63    fn unescape_fast(&self, text: &str) -> String {
64        if !text.contains('\\') {
65            return text.to_string();
66        }
67
68        let mut result = String::with_capacity(text.len());
69        let mut chars = text.chars().peekable();
70
71        while let Some(c) = chars.next() {
72            if c == '\\' {
73                if let Some(&next) = chars.peek() {
74                    result.push(next);
75                    chars.next();
76                } else {
77                    result.push(c);
78                }
79            } else {
80                result.push(c);
81            }
82        }
83        result
84    }
85}
86
87impl Rule for MD039NoSpaceInLinks {
88    fn name(&self) -> &'static str {
89        "MD039"
90    }
91
92    fn description(&self) -> &'static str {
93        "Spaces inside link text"
94    }
95
96    fn category(&self) -> RuleCategory {
97        RuleCategory::Link
98    }
99
100    fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
101        Some(self)
102    }
103
104    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
105        let content = ctx.content;
106        content.is_empty() || !self.has_links_or_images(content)
107    }
108
109    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
110        let mut warnings = Vec::new();
111
112        // Use centralized link parsing from LintContext
113        for link in &ctx.links {
114            // Skip reference links (markdownlint doesn't check these)
115            if link.is_reference {
116                continue;
117            }
118
119            // Fast check if trimming is needed
120            if !self.needs_trimming(&link.text) {
121                continue;
122            }
123
124            // Optimized unescaping for whitespace check
125            let unescaped = self.unescape_fast(&link.text);
126
127            let needs_warning = if ALL_WHITESPACE.is_match(&unescaped) {
128                true
129            } else {
130                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
131                link.text.as_str() != trimmed
132            };
133
134            if needs_warning {
135                let url = if link.is_reference {
136                    if let Some(ref_id) = &link.reference_id {
137                        format!("[{ref_id}]")
138                    } else {
139                        "[]".to_string()
140                    }
141                } else {
142                    format!("({})", link.url)
143                };
144
145                let fixed = if ALL_WHITESPACE.is_match(&unescaped) {
146                    format!("[]{url}")
147                } else {
148                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
149                    format!("[{trimmed}]{url}")
150                };
151
152                warnings.push(LintWarning {
153                    rule_name: Some(self.name()),
154                    line: link.line,
155                    column: link.start_col + 1, // Convert to 1-indexed
156                    end_line: link.line,
157                    end_column: link.end_col + 1, // Convert to 1-indexed
158                    message: WARNING_MESSAGE.to_string(),
159                    severity: Severity::Warning,
160                    fix: Some(Fix {
161                        range: link.byte_offset..link.byte_end,
162                        replacement: fixed,
163                    }),
164                });
165            }
166        }
167
168        // Also check images
169        for image in &ctx.images {
170            // Skip reference images (markdownlint doesn't check these)
171            if image.is_reference {
172                continue;
173            }
174
175            // Fast check if trimming is needed
176            if !self.needs_trimming(&image.alt_text) {
177                continue;
178            }
179
180            // Optimized unescaping for whitespace check
181            let unescaped = self.unescape_fast(&image.alt_text);
182
183            let needs_warning = if ALL_WHITESPACE.is_match(&unescaped) {
184                true
185            } else {
186                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
187                image.alt_text.as_str() != trimmed
188            };
189
190            if needs_warning {
191                let url = if image.is_reference {
192                    if let Some(ref_id) = &image.reference_id {
193                        format!("[{ref_id}]")
194                    } else {
195                        "[]".to_string()
196                    }
197                } else {
198                    format!("({})", image.url)
199                };
200
201                let fixed = if ALL_WHITESPACE.is_match(&unescaped) {
202                    format!("![]{url}")
203                } else {
204                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
205                    format!("![{trimmed}]{url}")
206                };
207
208                warnings.push(LintWarning {
209                    rule_name: Some(self.name()),
210                    line: image.line,
211                    column: image.start_col + 1, // Convert to 1-indexed
212                    end_line: image.line,
213                    end_column: image.end_col + 1, // Convert to 1-indexed
214                    message: WARNING_MESSAGE.to_string(),
215                    severity: Severity::Warning,
216                    fix: Some(Fix {
217                        range: image.byte_offset..image.byte_end,
218                        replacement: fixed,
219                    }),
220                });
221            }
222        }
223
224        Ok(warnings)
225    }
226
227    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
228        let content = ctx.content;
229        let mut fixes = Vec::new();
230
231        // Process links
232        for link in &ctx.links {
233            // Skip reference links (markdownlint doesn't check these)
234            if link.is_reference {
235                continue;
236            }
237
238            if !self.needs_trimming(&link.text) {
239                continue;
240            }
241
242            let unescaped = self.unescape_fast(&link.text);
243
244            let needs_fix = if ALL_WHITESPACE.is_match(&unescaped) {
245                true
246            } else {
247                let trimmed = link.text.trim_matches(|c: char| c.is_whitespace());
248                link.text.as_str() != trimmed
249            };
250
251            if needs_fix {
252                let url_part = if link.is_reference {
253                    if let Some(ref_id) = &link.reference_id {
254                        format!("[{ref_id}]")
255                    } else {
256                        "[]".to_string()
257                    }
258                } else {
259                    format!("({})", link.url)
260                };
261
262                let replacement = if ALL_WHITESPACE.is_match(&unescaped) {
263                    format!("[]{url_part}")
264                } else {
265                    let trimmed = Self::trim_link_text_preserve_escapes(&link.text);
266                    format!("[{trimmed}]{url_part}")
267                };
268
269                fixes.push((link.byte_offset, link.byte_end, replacement));
270            }
271        }
272
273        // Process images
274        for image in &ctx.images {
275            // Skip reference images (markdownlint doesn't check these)
276            if image.is_reference {
277                continue;
278            }
279
280            if !self.needs_trimming(&image.alt_text) {
281                continue;
282            }
283
284            let unescaped = self.unescape_fast(&image.alt_text);
285
286            let needs_fix = if ALL_WHITESPACE.is_match(&unescaped) {
287                true
288            } else {
289                let trimmed = image.alt_text.trim_matches(|c: char| c.is_whitespace());
290                image.alt_text.as_str() != trimmed
291            };
292
293            if needs_fix {
294                let url_part = if image.is_reference {
295                    if let Some(ref_id) = &image.reference_id {
296                        format!("[{ref_id}]")
297                    } else {
298                        "[]".to_string()
299                    }
300                } else {
301                    format!("({})", image.url)
302                };
303
304                let replacement = if ALL_WHITESPACE.is_match(&unescaped) {
305                    format!("![]{url_part}")
306                } else {
307                    let trimmed = Self::trim_link_text_preserve_escapes(&image.alt_text);
308                    format!("![{trimmed}]{url_part}")
309                };
310
311                fixes.push((image.byte_offset, image.byte_end, replacement));
312            }
313        }
314
315        if fixes.is_empty() {
316            return Ok(content.to_string());
317        }
318
319        // Sort fixes by position to apply them in order
320        fixes.sort_by_key(|&(start, _, _)| start);
321
322        // Apply fixes efficiently
323        let mut result = String::with_capacity(content.len());
324        let mut last_pos = 0;
325
326        for (start, end, replacement) in fixes {
327            if start < last_pos {
328                // This should not happen if fixes are properly sorted and non-overlapping
329                return Err(LintError::FixFailed(format!(
330                    "Overlapping fixes detected: last_pos={last_pos}, start={start}"
331                )));
332            }
333            result.push_str(&content[last_pos..start]);
334            result.push_str(&replacement);
335            last_pos = end;
336        }
337        result.push_str(&content[last_pos..]);
338
339        Ok(result)
340    }
341
342    fn as_any(&self) -> &dyn std::any::Any {
343        self
344    }
345
346    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
347    where
348        Self: Sized,
349    {
350        Box::new(Self)
351    }
352}
353
354impl crate::utils::document_structure::DocumentStructureExtensions for MD039NoSpaceInLinks {
355    fn has_relevant_elements(
356        &self,
357        _ctx: &crate::lint_context::LintContext,
358        doc_structure: &crate::utils::document_structure::DocumentStructure,
359    ) -> bool {
360        !doc_structure.links.is_empty() || !doc_structure.images.is_empty()
361    }
362}
363
364#[cfg(test)]
365mod tests {
366    use super::*;
367
368    #[test]
369    fn test_valid_links() {
370        let rule = MD039NoSpaceInLinks::new();
371        let content = "[link](url) and [another link](url) here";
372        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
373        let result = rule.check(&ctx).unwrap();
374        assert!(result.is_empty());
375    }
376
377    #[test]
378    fn test_spaces_both_ends() {
379        let rule = MD039NoSpaceInLinks::new();
380        let content = "[ link ](url) and [ another link ](url) here";
381        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
382        let result = rule.check(&ctx).unwrap();
383        assert_eq!(result.len(), 2);
384        let fixed = rule.fix(&ctx).unwrap();
385        assert_eq!(fixed, "[link](url) and [another link](url) here");
386    }
387
388    #[test]
389    fn test_space_at_start() {
390        let rule = MD039NoSpaceInLinks::new();
391        let content = "[ link](url) and [ another link](url) here";
392        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
393        let result = rule.check(&ctx).unwrap();
394        assert_eq!(result.len(), 2);
395        let fixed = rule.fix(&ctx).unwrap();
396        assert_eq!(fixed, "[link](url) and [another link](url) here");
397    }
398
399    #[test]
400    fn test_space_at_end() {
401        let rule = MD039NoSpaceInLinks::new();
402        let content = "[link ](url) and [another link ](url) here";
403        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
404        let result = rule.check(&ctx).unwrap();
405        assert_eq!(result.len(), 2);
406        let fixed = rule.fix(&ctx).unwrap();
407        assert_eq!(fixed, "[link](url) and [another link](url) here");
408    }
409
410    #[test]
411    fn test_link_in_code_block() {
412        let rule = MD039NoSpaceInLinks::new();
413        let content = "```
414[ link ](url)
415```
416[ link ](url)";
417        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
418        let result = rule.check(&ctx).unwrap();
419        assert_eq!(result.len(), 1);
420        let fixed = rule.fix(&ctx).unwrap();
421        assert_eq!(
422            fixed,
423            "```
424[ link ](url)
425```
426[link](url)"
427        );
428    }
429
430    #[test]
431    fn test_multiple_links() {
432        let rule = MD039NoSpaceInLinks::new();
433        let content = "[ link ](url) and [ another ](url) in one line";
434        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
435        let result = rule.check(&ctx).unwrap();
436        assert_eq!(result.len(), 2);
437        let fixed = rule.fix(&ctx).unwrap();
438        assert_eq!(fixed, "[link](url) and [another](url) in one line");
439    }
440
441    #[test]
442    fn test_link_with_internal_spaces() {
443        let rule = MD039NoSpaceInLinks::new();
444        let content = "[this is link](url) and [ this is also link ](url)";
445        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
446        let result = rule.check(&ctx).unwrap();
447        assert_eq!(result.len(), 1);
448        let fixed = rule.fix(&ctx).unwrap();
449        assert_eq!(fixed, "[this is link](url) and [this is also link](url)");
450    }
451
452    #[test]
453    fn test_link_with_punctuation() {
454        let rule = MD039NoSpaceInLinks::new();
455        let content = "[ link! ](url) and [ link? ](url) here";
456        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
457        let result = rule.check(&ctx).unwrap();
458        assert_eq!(result.len(), 2);
459        let fixed = rule.fix(&ctx).unwrap();
460        assert_eq!(fixed, "[link!](url) and [link?](url) here");
461    }
462
463    #[test]
464    fn test_parity_only_whitespace_and_newlines_minimal() {
465        let rule = MD039NoSpaceInLinks::new();
466        let content = "[   \n  ](url) and [\t\n\t](url)";
467        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
468        let fixed = rule.fix(&ctx).unwrap();
469        // markdownlint removes all whitespace, resulting in empty link text
470        assert_eq!(fixed, "[](url) and [](url)");
471    }
472
473    #[test]
474    fn test_parity_internal_newlines_minimal() {
475        let rule = MD039NoSpaceInLinks::new();
476        let content = "[link\ntext](url) and [ another\nlink ](url)";
477        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
478        let fixed = rule.fix(&ctx).unwrap();
479        // markdownlint trims only leading/trailing whitespace, preserves internal newlines
480        assert_eq!(fixed, "[link\ntext](url) and [another\nlink](url)");
481    }
482
483    #[test]
484    fn test_parity_escaped_brackets_minimal() {
485        let rule = MD039NoSpaceInLinks::new();
486        let content = "[link\\]](url) and [link\\[]](url)";
487        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
488        let fixed = rule.fix(&ctx).unwrap();
489        // markdownlint does not trim or remove escapes, so output should be unchanged
490        assert_eq!(fixed, "[link\\]](url) and [link\\[]](url)");
491    }
492
493    #[test]
494    fn test_performance_md039() {
495        use std::time::Instant;
496
497        let rule = MD039NoSpaceInLinks::new();
498
499        // Generate test content with many links
500        let mut content = String::with_capacity(100_000);
501
502        // Add links with spaces (should be detected and fixed)
503        for i in 0..500 {
504            content.push_str(&format!("Line {i} with [ spaced link {i} ](url{i}) and text.\n"));
505        }
506
507        // Add valid links (should be fast to skip)
508        for i in 0..500 {
509            content.push_str(&format!(
510                "Line {} with [valid link {}](url{}) and text.\n",
511                i + 500,
512                i,
513                i
514            ));
515        }
516
517        println!(
518            "MD039 Performance Test - Content: {} bytes, {} lines",
519            content.len(),
520            content.lines().count()
521        );
522
523        let ctx = crate::lint_context::LintContext::new(&content, crate::config::MarkdownFlavor::Standard);
524
525        // Warm up
526        let _ = rule.check(&ctx).unwrap();
527
528        // Measure check performance
529        let mut total_duration = std::time::Duration::ZERO;
530        let runs = 5;
531        let mut warnings_count = 0;
532
533        for _ in 0..runs {
534            let start = Instant::now();
535            let warnings = rule.check(&ctx).unwrap();
536            total_duration += start.elapsed();
537            warnings_count = warnings.len();
538        }
539
540        let avg_check_duration = total_duration / runs;
541
542        println!("MD039 Optimized Performance:");
543        println!(
544            "- Average check time: {:?} ({:.2} ms)",
545            avg_check_duration,
546            avg_check_duration.as_secs_f64() * 1000.0
547        );
548        println!("- Found {warnings_count} warnings");
549        println!(
550            "- Lines per second: {:.0}",
551            content.lines().count() as f64 / avg_check_duration.as_secs_f64()
552        );
553        println!(
554            "- Microseconds per line: {:.2}",
555            avg_check_duration.as_micros() as f64 / content.lines().count() as f64
556        );
557
558        // Performance assertion - should complete reasonably fast
559        assert!(
560            avg_check_duration.as_millis() < 200,
561            "MD039 check should complete in under 200ms, took {}ms",
562            avg_check_duration.as_millis()
563        );
564
565        // Verify we're finding the expected number of warnings (500 links with spaces)
566        assert_eq!(warnings_count, 500, "Should find 500 warnings for links with spaces");
567    }
568}