rumdl_lib/rules/
md038_no_space_in_code.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2
3/// Rule MD038: No space inside code span markers
4///
5/// See [docs/md038.md](../../docs/md038.md) for full documentation, configuration, and examples.
6///
7/// MD038: Spaces inside code span elements
8///
9/// This rule is triggered when there are spaces inside code span elements.
10///
11/// For example:
12///
13/// ``` markdown
14/// ` some text`
15/// `some text `
16/// ` some text `
17/// ```
18///
19/// To fix this issue, remove the leading and trailing spaces within the code span markers:
20///
21/// ``` markdown
22/// `some text`
23/// ```
24///
25/// Note: Code spans containing backticks (e.g., `` `backticks` inside ``) are not flagged
26/// to avoid breaking nested backtick structures used to display backticks in documentation.
27#[derive(Debug, Clone, Default)]
28pub struct MD038NoSpaceInCode {
29    pub enabled: bool,
30}
31
32impl MD038NoSpaceInCode {
33    pub fn new() -> Self {
34        Self { enabled: true }
35    }
36
37    /// Check if a code span is likely part of a nested backtick structure
38    fn is_likely_nested_backticks(&self, ctx: &crate::lint_context::LintContext, span_index: usize) -> bool {
39        // If there are multiple code spans on the same line, and there's text
40        // between them that contains "code" or other indicators, it's likely nested
41        let code_spans = ctx.code_spans();
42        let current_span = &code_spans[span_index];
43        let current_line = current_span.line;
44
45        // Look for other code spans on the same line
46        let same_line_spans: Vec<_> = code_spans
47            .iter()
48            .enumerate()
49            .filter(|(i, s)| s.line == current_line && *i != span_index)
50            .collect();
51
52        if same_line_spans.is_empty() {
53            return false;
54        }
55
56        // Check if there's content between spans that might indicate nesting
57        // Get the line content
58        let line_idx = current_line - 1; // Convert to 0-based
59        if line_idx >= ctx.lines.len() {
60            return false;
61        }
62
63        let line_content = &ctx.lines[line_idx].content(ctx.content);
64
65        // For each pair of adjacent code spans, check what's between them
66        for (_, other_span) in &same_line_spans {
67            let start = current_span.end_col.min(other_span.end_col);
68            let end = current_span.start_col.max(other_span.start_col);
69
70            if start < end && end <= line_content.len() {
71                // Use .get() to safely handle multi-byte UTF-8 characters
72                if let Some(between) = line_content.get(start..end) {
73                    // If there's text containing "code" or similar patterns between spans,
74                    // it's likely they're showing nested backticks
75                    if between.contains("code") || between.contains("backtick") {
76                        return true;
77                    }
78                }
79            }
80        }
81
82        false
83    }
84}
85
86impl Rule for MD038NoSpaceInCode {
87    fn name(&self) -> &'static str {
88        "MD038"
89    }
90
91    fn description(&self) -> &'static str {
92        "Spaces inside code span elements"
93    }
94
95    fn category(&self) -> RuleCategory {
96        RuleCategory::Other
97    }
98
99    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
100        if !self.enabled {
101            return Ok(vec![]);
102        }
103
104        let mut warnings = Vec::new();
105
106        // Use centralized code spans from LintContext
107        let code_spans = ctx.code_spans();
108        for (i, code_span) in code_spans.iter().enumerate() {
109            let code_content = &code_span.content;
110
111            // Skip empty code spans
112            if code_content.is_empty() {
113                continue;
114            }
115
116            // Early check: if no leading/trailing whitespace, skip
117            let has_leading_space = code_content.chars().next().is_some_and(|c| c.is_whitespace());
118            let has_trailing_space = code_content.chars().last().is_some_and(|c| c.is_whitespace());
119
120            if !has_leading_space && !has_trailing_space {
121                continue;
122            }
123
124            let trimmed = code_content.trim();
125
126            // Check if there are leading or trailing spaces
127            if code_content != trimmed {
128                // Check if the content itself contains backticks - if so, skip to avoid
129                // breaking nested backtick structures
130                if trimmed.contains('`') {
131                    continue;
132                }
133
134                // Skip inline R code in Quarto/RMarkdown: `r expression`
135                // This is a legitimate pattern where space is required after 'r'
136                if ctx.flavor == crate::config::MarkdownFlavor::Quarto
137                    && trimmed.starts_with('r')
138                    && trimmed.len() > 1
139                    && trimmed.chars().nth(1).is_some_and(|c| c.is_whitespace())
140                {
141                    continue;
142                }
143
144                // Check if this might be part of a nested backtick structure
145                // by looking for other code spans nearby that might indicate nesting
146                if self.is_likely_nested_backticks(ctx, i) {
147                    continue;
148                }
149
150                warnings.push(LintWarning {
151                    rule_name: Some(self.name().to_string()),
152                    line: code_span.line,
153                    column: code_span.start_col + 1, // Convert to 1-indexed
154                    end_line: code_span.line,
155                    end_column: code_span.end_col, // Don't add 1 to match test expectation
156                    message: "Spaces inside code span elements".to_string(),
157                    severity: Severity::Warning,
158                    fix: Some(Fix {
159                        range: code_span.byte_offset..code_span.byte_end,
160                        replacement: format!(
161                            "{}{}{}",
162                            "`".repeat(code_span.backtick_count),
163                            trimmed,
164                            "`".repeat(code_span.backtick_count)
165                        ),
166                    }),
167                });
168            }
169        }
170
171        Ok(warnings)
172    }
173
174    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
175        let content = ctx.content;
176        if !self.enabled {
177            return Ok(content.to_string());
178        }
179
180        // Early return if no backticks in content
181        if !content.contains('`') {
182            return Ok(content.to_string());
183        }
184
185        // Get warnings to identify what needs to be fixed
186        let warnings = self.check(ctx)?;
187        if warnings.is_empty() {
188            return Ok(content.to_string());
189        }
190
191        // Collect all fixes and sort by position (reverse order to avoid position shifts)
192        let mut fixes: Vec<(std::ops::Range<usize>, String)> = warnings
193            .into_iter()
194            .filter_map(|w| w.fix.map(|f| (f.range, f.replacement)))
195            .collect();
196
197        fixes.sort_by_key(|(range, _)| std::cmp::Reverse(range.start));
198
199        // Apply fixes - only allocate string when we have fixes to apply
200        let mut result = content.to_string();
201        for (range, replacement) in fixes {
202            result.replace_range(range, &replacement);
203        }
204
205        Ok(result)
206    }
207
208    /// Check if content is likely to have code spans
209    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
210        !ctx.likely_has_code()
211    }
212
213    fn as_any(&self) -> &dyn std::any::Any {
214        self
215    }
216
217    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
218    where
219        Self: Sized,
220    {
221        Box::new(MD038NoSpaceInCode { enabled: true })
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228
229    #[test]
230    fn test_md038_readme_false_positives() {
231        // These are the exact cases from README.md that are incorrectly flagged
232        let rule = MD038NoSpaceInCode::new();
233        let valid_cases = vec![
234            "3. `pyproject.toml` (must contain `[tool.rumdl]` section)",
235            "#### Effective Configuration (`rumdl config`)",
236            "- Blue: `.rumdl.toml`",
237            "### Defaults Only (`rumdl config --defaults`)",
238        ];
239
240        for case in valid_cases {
241            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard);
242            let result = rule.check(&ctx).unwrap();
243            assert!(
244                result.is_empty(),
245                "Should not flag code spans without leading/trailing spaces: '{}'. Got {} warnings",
246                case,
247                result.len()
248            );
249        }
250    }
251
252    #[test]
253    fn test_md038_valid() {
254        let rule = MD038NoSpaceInCode::new();
255        let valid_cases = vec![
256            "This is `code` in a sentence.",
257            "This is a `longer code span` in a sentence.",
258            "This is `code with internal spaces` which is fine.",
259            "Code span at `end of line`",
260            "`Start of line` code span",
261            "Multiple `code spans` in `one line` are fine",
262            "Code span with `symbols: !@#$%^&*()`",
263            "Empty code span `` is technically valid",
264        ];
265        for case in valid_cases {
266            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard);
267            let result = rule.check(&ctx).unwrap();
268            assert!(result.is_empty(), "Valid case should not have warnings: {case}");
269        }
270    }
271
272    #[test]
273    fn test_md038_invalid() {
274        let rule = MD038NoSpaceInCode::new();
275        // All spaces should be flagged (matching markdownlint behavior)
276        let invalid_cases = vec![
277            "Type ` y ` to confirm.",
278            "Use ` git commit -m \"message\" ` to commit.",
279            "The variable ` $HOME ` contains home path.",
280            "The pattern ` *.txt ` matches text files.",
281            "This is ` random word ` with unnecessary spaces.",
282            "Text with ` plain text ` should be flagged.",
283            "Code with ` just code ` here.",
284            "Multiple ` word ` spans with ` text ` in one line.",
285            "This is ` code` with leading space.",
286            "This is `code ` with trailing space.",
287            "This is ` code ` with both leading and trailing space.",
288        ];
289        for case in invalid_cases {
290            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard);
291            let result = rule.check(&ctx).unwrap();
292            assert!(!result.is_empty(), "Invalid case should have warnings: {case}");
293        }
294    }
295
296    #[test]
297    fn test_md038_fix() {
298        let rule = MD038NoSpaceInCode::new();
299        let test_cases = vec![
300            (
301                "This is ` code` with leading space.",
302                "This is `code` with leading space.",
303            ),
304            (
305                "This is `code ` with trailing space.",
306                "This is `code` with trailing space.",
307            ),
308            ("This is ` code ` with both spaces.", "This is `code` with both spaces."),
309            (
310                "Multiple ` code ` and `spans ` to fix.",
311                "Multiple `code` and `spans` to fix.",
312            ),
313        ];
314        for (input, expected) in test_cases {
315            let ctx = crate::lint_context::LintContext::new(input, crate::config::MarkdownFlavor::Standard);
316            let result = rule.fix(&ctx).unwrap();
317            assert_eq!(result, expected, "Fix did not produce expected output for: {input}");
318        }
319    }
320
321    #[test]
322    fn test_check_invalid_leading_space() {
323        let rule = MD038NoSpaceInCode::new();
324        let input = "This has a ` leading space` in code";
325        let ctx = crate::lint_context::LintContext::new(input, crate::config::MarkdownFlavor::Standard);
326        let result = rule.check(&ctx).unwrap();
327        assert_eq!(result.len(), 1);
328        assert_eq!(result[0].line, 1);
329        assert!(result[0].fix.is_some());
330    }
331
332    #[test]
333    fn test_code_span_parsing_nested_backticks() {
334        let content = "Code with ` nested `code` example ` should preserve backticks";
335        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
336
337        println!("Content: {content}");
338        println!("Code spans found:");
339        let code_spans = ctx.code_spans();
340        for (i, span) in code_spans.iter().enumerate() {
341            println!(
342                "  Span {}: line={}, col={}-{}, backticks={}, content='{}'",
343                i, span.line, span.start_col, span.end_col, span.backtick_count, span.content
344            );
345        }
346
347        // This test reveals the issue - we're getting multiple separate code spans instead of one
348        assert_eq!(code_spans.len(), 2, "Should parse as 2 code spans");
349    }
350
351    #[test]
352    fn test_nested_backtick_detection() {
353        let rule = MD038NoSpaceInCode::new();
354
355        // Test that code spans with backticks are skipped
356        let content = "Code with `` `backticks` inside `` should not be flagged";
357        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
358        let result = rule.check(&ctx).unwrap();
359        assert!(result.is_empty(), "Code spans with backticks should be skipped");
360    }
361
362    #[test]
363    fn test_quarto_inline_r_code() {
364        // Test that Quarto-specific R code exception works
365        let rule = MD038NoSpaceInCode::new();
366
367        // Test inline R code - should NOT trigger warning in Quarto flavor
368        // The key pattern is "r " followed by code
369        let content = r#"The result is `r nchar("test")` which equals 4."#;
370
371        // Quarto flavor should allow R code
372        let ctx_quarto = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Quarto);
373        let result_quarto = rule.check(&ctx_quarto).unwrap();
374        assert!(
375            result_quarto.is_empty(),
376            "Quarto inline R code should not trigger warnings. Got {} warnings",
377            result_quarto.len()
378        );
379
380        // Test that other code with spaces still gets flagged in Quarto
381        let content_other = "This has ` plain text ` with spaces.";
382        let ctx_other = crate::lint_context::LintContext::new(content_other, crate::config::MarkdownFlavor::Quarto);
383        let result_other = rule.check(&ctx_other).unwrap();
384        assert_eq!(
385            result_other.len(),
386            1,
387            "Quarto should still flag non-R code spans with improper spaces"
388        );
389    }
390
391    #[test]
392    fn test_multibyte_utf8_no_panic() {
393        // Regression test: ensure multi-byte UTF-8 characters don't cause panics
394        // when checking for nested backticks between code spans.
395        // These are real examples from the-art-of-command-line translations.
396        let rule = MD038NoSpaceInCode::new();
397
398        // Greek text with code spans
399        let greek = "- Χρήσιμα εργαλεία της γραμμής εντολών είναι τα `ping`,` ipconfig`, `traceroute` και `netstat`.";
400        let ctx = crate::lint_context::LintContext::new(greek, crate::config::MarkdownFlavor::Standard);
401        let result = rule.check(&ctx);
402        assert!(result.is_ok(), "Greek text should not panic");
403
404        // Chinese text with code spans
405        let chinese = "- 當你需要對文字檔案做集合交、並、差運算時,`sort`/`uniq` 很有幫助。";
406        let ctx = crate::lint_context::LintContext::new(chinese, crate::config::MarkdownFlavor::Standard);
407        let result = rule.check(&ctx);
408        assert!(result.is_ok(), "Chinese text should not panic");
409
410        // Cyrillic/Ukrainian text with code spans
411        let cyrillic = "- Основи роботи з файлами: `ls` і `ls -l`, `less`, `head`,` tail` і `tail -f`.";
412        let ctx = crate::lint_context::LintContext::new(cyrillic, crate::config::MarkdownFlavor::Standard);
413        let result = rule.check(&ctx);
414        assert!(result.is_ok(), "Cyrillic text should not panic");
415
416        // Mixed multi-byte with multiple code spans on same line
417        let mixed = "使用 `git` 命令和 `npm` 工具来管理项目,可以用 `docker` 容器化。";
418        let ctx = crate::lint_context::LintContext::new(mixed, crate::config::MarkdownFlavor::Standard);
419        let result = rule.check(&ctx);
420        assert!(
421            result.is_ok(),
422            "Mixed Chinese text with multiple code spans should not panic"
423        );
424    }
425}