rumdl_lib/rules/
md038_no_space_in_code.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2
3/// Rule MD038: No space inside code span markers
4///
5/// See [docs/md038.md](../../docs/md038.md) for full documentation, configuration, and examples.
6///
7/// MD038: Spaces inside code span elements
8///
9/// This rule is triggered when there are spaces inside code span elements.
10///
11/// For example:
12///
13/// ``` markdown
14/// ` some text`
15/// `some text `
16/// ` some text `
17/// ```
18///
19/// To fix this issue, remove the leading and trailing spaces within the code span markers:
20///
21/// ``` markdown
22/// `some text`
23/// ```
24///
25/// Note: Code spans containing backticks (e.g., `` `backticks` inside ``) are not flagged
26/// to avoid breaking nested backtick structures used to display backticks in documentation.
27#[derive(Debug, Clone, Default)]
28pub struct MD038NoSpaceInCode {
29    pub enabled: bool,
30}
31
32impl MD038NoSpaceInCode {
33    pub fn new() -> Self {
34        Self { enabled: true }
35    }
36
37    /// Check if a code span is likely part of a nested backtick structure
38    fn is_likely_nested_backticks(&self, ctx: &crate::lint_context::LintContext, span_index: usize) -> bool {
39        // If there are multiple code spans on the same line, and there's text
40        // between them that contains "code" or other indicators, it's likely nested
41        let code_spans = ctx.code_spans();
42        let current_span = &code_spans[span_index];
43        let current_line = current_span.line;
44
45        // Look for other code spans on the same line
46        let same_line_spans: Vec<_> = code_spans
47            .iter()
48            .enumerate()
49            .filter(|(i, s)| s.line == current_line && *i != span_index)
50            .collect();
51
52        if same_line_spans.is_empty() {
53            return false;
54        }
55
56        // Check if there's content between spans that might indicate nesting
57        // Get the line content
58        let line_idx = current_line - 1; // Convert to 0-based
59        if line_idx >= ctx.lines.len() {
60            return false;
61        }
62
63        let line_content = &ctx.lines[line_idx].content(ctx.content);
64
65        // For each pair of adjacent code spans, check what's between them
66        for (_, other_span) in &same_line_spans {
67            let start = current_span.end_col.min(other_span.end_col);
68            let end = current_span.start_col.max(other_span.start_col);
69
70            if start < end && end <= line_content.len() {
71                // Use .get() to safely handle multi-byte UTF-8 characters
72                if let Some(between) = line_content.get(start..end) {
73                    // If there's text containing "code" or similar patterns between spans,
74                    // it's likely they're showing nested backticks
75                    if between.contains("code") || between.contains("backtick") {
76                        return true;
77                    }
78                }
79            }
80        }
81
82        false
83    }
84}
85
86impl Rule for MD038NoSpaceInCode {
87    fn name(&self) -> &'static str {
88        "MD038"
89    }
90
91    fn description(&self) -> &'static str {
92        "Spaces inside code span elements"
93    }
94
95    fn category(&self) -> RuleCategory {
96        RuleCategory::Other
97    }
98
99    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
100        if !self.enabled {
101            return Ok(vec![]);
102        }
103
104        let mut warnings = Vec::new();
105
106        // Use centralized code spans from LintContext
107        let code_spans = ctx.code_spans();
108        for (i, code_span) in code_spans.iter().enumerate() {
109            let code_content = &code_span.content;
110
111            // Skip empty code spans
112            if code_content.is_empty() {
113                continue;
114            }
115
116            // Early check: if no leading/trailing whitespace, skip
117            let has_leading_space = code_content.chars().next().is_some_and(|c| c.is_whitespace());
118            let has_trailing_space = code_content.chars().last().is_some_and(|c| c.is_whitespace());
119
120            if !has_leading_space && !has_trailing_space {
121                continue;
122            }
123
124            let trimmed = code_content.trim();
125
126            // Check if there are leading or trailing spaces
127            if code_content != trimmed {
128                // CommonMark behavior: if there is exactly ONE space at start AND ONE at end,
129                // and the content after trimming is non-empty, those spaces are stripped.
130                // We should NOT flag this case since the spaces are intentionally stripped.
131                // See: https://spec.commonmark.org/0.31.2/#code-spans
132                //
133                // Examples:
134                // ` text ` → "text" (spaces stripped, NOT flagged)
135                // `  text ` → " text" (extra leading space remains, FLAGGED)
136                // ` text  ` → "text " (extra trailing space remains, FLAGGED)
137                // ` text` → " text" (no trailing space to balance, FLAGGED)
138                // `text ` → "text " (no leading space to balance, FLAGGED)
139                if has_leading_space && has_trailing_space && !trimmed.is_empty() {
140                    let leading_spaces = code_content.len() - code_content.trim_start().len();
141                    let trailing_spaces = code_content.len() - code_content.trim_end().len();
142
143                    // Exactly one space on each side - CommonMark strips them
144                    if leading_spaces == 1 && trailing_spaces == 1 {
145                        continue;
146                    }
147                }
148                // Check if the content itself contains backticks - if so, skip to avoid
149                // breaking nested backtick structures
150                if trimmed.contains('`') {
151                    continue;
152                }
153
154                // Skip inline R code in Quarto/RMarkdown: `r expression`
155                // This is a legitimate pattern where space is required after 'r'
156                if ctx.flavor == crate::config::MarkdownFlavor::Quarto
157                    && trimmed.starts_with('r')
158                    && trimmed.len() > 1
159                    && trimmed.chars().nth(1).is_some_and(|c| c.is_whitespace())
160                {
161                    continue;
162                }
163
164                // Check if this might be part of a nested backtick structure
165                // by looking for other code spans nearby that might indicate nesting
166                if self.is_likely_nested_backticks(ctx, i) {
167                    continue;
168                }
169
170                warnings.push(LintWarning {
171                    rule_name: Some(self.name().to_string()),
172                    line: code_span.line,
173                    column: code_span.start_col + 1, // Convert to 1-indexed
174                    end_line: code_span.line,
175                    end_column: code_span.end_col, // Don't add 1 to match test expectation
176                    message: "Spaces inside code span elements".to_string(),
177                    severity: Severity::Warning,
178                    fix: Some(Fix {
179                        range: code_span.byte_offset..code_span.byte_end,
180                        replacement: format!(
181                            "{}{}{}",
182                            "`".repeat(code_span.backtick_count),
183                            trimmed,
184                            "`".repeat(code_span.backtick_count)
185                        ),
186                    }),
187                });
188            }
189        }
190
191        Ok(warnings)
192    }
193
194    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
195        let content = ctx.content;
196        if !self.enabled {
197            return Ok(content.to_string());
198        }
199
200        // Early return if no backticks in content
201        if !content.contains('`') {
202            return Ok(content.to_string());
203        }
204
205        // Get warnings to identify what needs to be fixed
206        let warnings = self.check(ctx)?;
207        if warnings.is_empty() {
208            return Ok(content.to_string());
209        }
210
211        // Collect all fixes and sort by position (reverse order to avoid position shifts)
212        let mut fixes: Vec<(std::ops::Range<usize>, String)> = warnings
213            .into_iter()
214            .filter_map(|w| w.fix.map(|f| (f.range, f.replacement)))
215            .collect();
216
217        fixes.sort_by_key(|(range, _)| std::cmp::Reverse(range.start));
218
219        // Apply fixes - only allocate string when we have fixes to apply
220        let mut result = content.to_string();
221        for (range, replacement) in fixes {
222            result.replace_range(range, &replacement);
223        }
224
225        Ok(result)
226    }
227
228    /// Check if content is likely to have code spans
229    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
230        !ctx.likely_has_code()
231    }
232
233    fn as_any(&self) -> &dyn std::any::Any {
234        self
235    }
236
237    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
238    where
239        Self: Sized,
240    {
241        Box::new(MD038NoSpaceInCode { enabled: true })
242    }
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248
249    #[test]
250    fn test_md038_readme_false_positives() {
251        // These are the exact cases from README.md that are incorrectly flagged
252        let rule = MD038NoSpaceInCode::new();
253        let valid_cases = vec![
254            "3. `pyproject.toml` (must contain `[tool.rumdl]` section)",
255            "#### Effective Configuration (`rumdl config`)",
256            "- Blue: `.rumdl.toml`",
257            "### Defaults Only (`rumdl config --defaults`)",
258        ];
259
260        for case in valid_cases {
261            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard, None);
262            let result = rule.check(&ctx).unwrap();
263            assert!(
264                result.is_empty(),
265                "Should not flag code spans without leading/trailing spaces: '{}'. Got {} warnings",
266                case,
267                result.len()
268            );
269        }
270    }
271
272    #[test]
273    fn test_md038_valid() {
274        let rule = MD038NoSpaceInCode::new();
275        let valid_cases = vec![
276            "This is `code` in a sentence.",
277            "This is a `longer code span` in a sentence.",
278            "This is `code with internal spaces` which is fine.",
279            "Code span at `end of line`",
280            "`Start of line` code span",
281            "Multiple `code spans` in `one line` are fine",
282            "Code span with `symbols: !@#$%^&*()`",
283            "Empty code span `` is technically valid",
284        ];
285        for case in valid_cases {
286            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard, None);
287            let result = rule.check(&ctx).unwrap();
288            assert!(result.is_empty(), "Valid case should not have warnings: {case}");
289        }
290    }
291
292    #[test]
293    fn test_md038_invalid() {
294        let rule = MD038NoSpaceInCode::new();
295        // Flag cases that violate CommonMark:
296        // - Space only at start (no matching end space)
297        // - Space only at end (no matching start space)
298        // - Multiple spaces at start or end (extra space will remain after CommonMark stripping)
299        let invalid_cases = vec![
300            // Unbalanced: only leading space
301            "This is ` code` with leading space.",
302            // Unbalanced: only trailing space
303            "This is `code ` with trailing space.",
304            // Multiple leading spaces (one will remain after CommonMark strips one)
305            "This is `  code ` with double leading space.",
306            // Multiple trailing spaces (one will remain after CommonMark strips one)
307            "This is ` code  ` with double trailing space.",
308            // Multiple spaces both sides
309            "This is `  code  ` with double spaces both sides.",
310        ];
311        for case in invalid_cases {
312            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard, None);
313            let result = rule.check(&ctx).unwrap();
314            assert!(!result.is_empty(), "Invalid case should have warnings: {case}");
315        }
316    }
317
318    #[test]
319    fn test_md038_valid_commonmark_stripping() {
320        let rule = MD038NoSpaceInCode::new();
321        // These cases have exactly ONE space at start AND ONE at end.
322        // CommonMark strips both, so these should NOT be flagged.
323        // See: https://spec.commonmark.org/0.31.2/#code-spans
324        let valid_cases = vec![
325            "Type ` y ` to confirm.",
326            "Use ` git commit -m \"message\" ` to commit.",
327            "The variable ` $HOME ` contains home path.",
328            "The pattern ` *.txt ` matches text files.",
329            "This is ` random word ` with unnecessary spaces.",
330            "Text with ` plain text ` is valid.",
331            "Code with ` just code ` here.",
332            "Multiple ` word ` spans with ` text ` in one line.",
333            "This is ` code ` with both leading and trailing single space.",
334            "Use ` - ` as separator.",
335        ];
336        for case in valid_cases {
337            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard, None);
338            let result = rule.check(&ctx).unwrap();
339            assert!(
340                result.is_empty(),
341                "Single space on each side should not be flagged (CommonMark strips them): {case}"
342            );
343        }
344    }
345
346    #[test]
347    fn test_md038_fix() {
348        let rule = MD038NoSpaceInCode::new();
349        // Only cases that violate CommonMark should be fixed
350        let test_cases = vec![
351            // Unbalanced: only leading space - should be fixed
352            (
353                "This is ` code` with leading space.",
354                "This is `code` with leading space.",
355            ),
356            // Unbalanced: only trailing space - should be fixed
357            (
358                "This is `code ` with trailing space.",
359                "This is `code` with trailing space.",
360            ),
361            // Single space on both sides - NOT fixed (valid per CommonMark)
362            (
363                "This is ` code ` with both spaces.",
364                "This is ` code ` with both spaces.", // unchanged
365            ),
366            // Double leading space - should be fixed
367            (
368                "This is `  code ` with double leading space.",
369                "This is `code` with double leading space.",
370            ),
371            // Mixed: one valid (single space both), one invalid (trailing only)
372            (
373                "Multiple ` code ` and `spans ` to fix.",
374                "Multiple ` code ` and `spans` to fix.", // only spans is fixed
375            ),
376        ];
377        for (input, expected) in test_cases {
378            let ctx = crate::lint_context::LintContext::new(input, crate::config::MarkdownFlavor::Standard, None);
379            let result = rule.fix(&ctx).unwrap();
380            assert_eq!(result, expected, "Fix did not produce expected output for: {input}");
381        }
382    }
383
384    #[test]
385    fn test_check_invalid_leading_space() {
386        let rule = MD038NoSpaceInCode::new();
387        let input = "This has a ` leading space` in code";
388        let ctx = crate::lint_context::LintContext::new(input, crate::config::MarkdownFlavor::Standard, None);
389        let result = rule.check(&ctx).unwrap();
390        assert_eq!(result.len(), 1);
391        assert_eq!(result[0].line, 1);
392        assert!(result[0].fix.is_some());
393    }
394
395    #[test]
396    fn test_code_span_parsing_nested_backticks() {
397        let content = "Code with ` nested `code` example ` should preserve backticks";
398        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
399
400        println!("Content: {content}");
401        println!("Code spans found:");
402        let code_spans = ctx.code_spans();
403        for (i, span) in code_spans.iter().enumerate() {
404            println!(
405                "  Span {}: line={}, col={}-{}, backticks={}, content='{}'",
406                i, span.line, span.start_col, span.end_col, span.backtick_count, span.content
407            );
408        }
409
410        // This test reveals the issue - we're getting multiple separate code spans instead of one
411        assert_eq!(code_spans.len(), 2, "Should parse as 2 code spans");
412    }
413
414    #[test]
415    fn test_nested_backtick_detection() {
416        let rule = MD038NoSpaceInCode::new();
417
418        // Test that code spans with backticks are skipped
419        let content = "Code with `` `backticks` inside `` should not be flagged";
420        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
421        let result = rule.check(&ctx).unwrap();
422        assert!(result.is_empty(), "Code spans with backticks should be skipped");
423    }
424
425    #[test]
426    fn test_quarto_inline_r_code() {
427        // Test that Quarto-specific R code exception works
428        let rule = MD038NoSpaceInCode::new();
429
430        // Test inline R code - should NOT trigger warning in Quarto flavor
431        // The key pattern is "r " followed by code
432        let content = r#"The result is `r nchar("test")` which equals 4."#;
433
434        // Quarto flavor should allow R code
435        let ctx_quarto = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Quarto, None);
436        let result_quarto = rule.check(&ctx_quarto).unwrap();
437        assert!(
438            result_quarto.is_empty(),
439            "Quarto inline R code should not trigger warnings. Got {} warnings",
440            result_quarto.len()
441        );
442
443        // Test that invalid code spans (not matching CommonMark stripping) still get flagged in Quarto
444        // Use only trailing space - this violates CommonMark (no balanced stripping)
445        let content_other = "This has `plain text ` with trailing space.";
446        let ctx_other =
447            crate::lint_context::LintContext::new(content_other, crate::config::MarkdownFlavor::Quarto, None);
448        let result_other = rule.check(&ctx_other).unwrap();
449        assert_eq!(
450            result_other.len(),
451            1,
452            "Quarto should still flag non-R code spans with improper spaces"
453        );
454    }
455
456    #[test]
457    fn test_multibyte_utf8_no_panic() {
458        // Regression test: ensure multi-byte UTF-8 characters don't cause panics
459        // when checking for nested backticks between code spans.
460        // These are real examples from the-art-of-command-line translations.
461        let rule = MD038NoSpaceInCode::new();
462
463        // Greek text with code spans
464        let greek = "- Χρήσιμα εργαλεία της γραμμής εντολών είναι τα `ping`,` ipconfig`, `traceroute` και `netstat`.";
465        let ctx = crate::lint_context::LintContext::new(greek, crate::config::MarkdownFlavor::Standard, None);
466        let result = rule.check(&ctx);
467        assert!(result.is_ok(), "Greek text should not panic");
468
469        // Chinese text with code spans
470        let chinese = "- 當你需要對文字檔案做集合交、並、差運算時,`sort`/`uniq` 很有幫助。";
471        let ctx = crate::lint_context::LintContext::new(chinese, crate::config::MarkdownFlavor::Standard, None);
472        let result = rule.check(&ctx);
473        assert!(result.is_ok(), "Chinese text should not panic");
474
475        // Cyrillic/Ukrainian text with code spans
476        let cyrillic = "- Основи роботи з файлами: `ls` і `ls -l`, `less`, `head`,` tail` і `tail -f`.";
477        let ctx = crate::lint_context::LintContext::new(cyrillic, crate::config::MarkdownFlavor::Standard, None);
478        let result = rule.check(&ctx);
479        assert!(result.is_ok(), "Cyrillic text should not panic");
480
481        // Mixed multi-byte with multiple code spans on same line
482        let mixed = "使用 `git` 命令和 `npm` 工具来管理项目,可以用 `docker` 容器化。";
483        let ctx = crate::lint_context::LintContext::new(mixed, crate::config::MarkdownFlavor::Standard, None);
484        let result = rule.check(&ctx);
485        assert!(
486            result.is_ok(),
487            "Mixed Chinese text with multiple code spans should not panic"
488        );
489    }
490}