rumdl_lib/rules/
md038_no_space_in_code.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2
3/// Rule MD038: No space inside code span markers
4///
5/// See [docs/md038.md](../../docs/md038.md) for full documentation, configuration, and examples.
6///
7/// MD038: Spaces inside code span elements
8///
9/// This rule is triggered when there are spaces inside code span elements.
10///
11/// For example:
12///
13/// ``` markdown
14/// ` some text`
15/// `some text `
16/// ` some text `
17/// ```
18///
19/// To fix this issue, remove the leading and trailing spaces within the code span markers:
20///
21/// ``` markdown
22/// `some text`
23/// ```
24///
25/// Note: Code spans containing backticks (e.g., `` `backticks` inside ``) are not flagged
26/// to avoid breaking nested backtick structures used to display backticks in documentation.
27#[derive(Debug, Clone)]
28pub struct MD038NoSpaceInCode {
29    pub enabled: bool,
30    /// Allow leading/trailing spaces in code spans when they improve readability
31    pub allow_intentional_spaces: bool,
32    /// Allow spaces around single characters (e.g., ` y ` for visibility)
33    pub allow_single_char_spaces: bool,
34    /// Allow spaces in command examples (heuristic: contains common shell indicators)
35    pub allow_command_spaces: bool,
36}
37
38impl Default for MD038NoSpaceInCode {
39    fn default() -> Self {
40        Self::new()
41    }
42}
43
44impl MD038NoSpaceInCode {
45    pub fn new() -> Self {
46        Self {
47            enabled: true,
48            allow_intentional_spaces: true, // More lenient by default
49            allow_single_char_spaces: true,
50            allow_command_spaces: true,
51        }
52    }
53
54    pub fn strict() -> Self {
55        Self {
56            enabled: true,
57            allow_intentional_spaces: false,
58            allow_single_char_spaces: false,
59            allow_command_spaces: false,
60        }
61    }
62
63    /// Determine if spaces in a code span should be allowed based on content heuristics
64    fn should_allow_spaces(&self, code_content: &str, trimmed: &str) -> bool {
65        // If intentional spaces are globally allowed, apply heuristics
66        if self.allow_intentional_spaces {
67            // Allow single character with spaces for visibility (e.g., ` y `, ` * `)
68            if self.allow_single_char_spaces && trimmed.len() == 1 {
69                return true;
70            }
71
72            // Allow command examples with spaces
73            if self.allow_command_spaces && self.looks_like_command(trimmed) {
74                return true;
75            }
76
77            // Allow spaces around variable references or file patterns
78            if self.looks_like_variable_or_pattern(trimmed) {
79                return true;
80            }
81
82            // Allow if spaces improve readability for complex content
83            if self.spaces_improve_readability(code_content, trimmed) {
84                return true;
85            }
86        }
87
88        false
89    }
90
91    /// Check if content looks like a shell command that benefits from spaces
92    fn looks_like_command(&self, content: &str) -> bool {
93        // Common command patterns - check case-insensitive prefixes
94        const COMMAND_PREFIXES: &[&str] = &[
95            "git ", "npm ", "cargo ", "docker ", "kubectl ", "pip ", "yarn ", "sudo ", "chmod ", "chown ", "ls ",
96            "cd ", "mkdir ", "rm ", "cp ", "mv ", "cat ", "grep ", "find ", "awk ", "sed ", "rumdl ",
97        ];
98
99        // Check if content starts with any command (case-insensitive)
100        // Use iterator with early return to avoid allocating lowercase string unless needed
101        let needs_lowercase_check = COMMAND_PREFIXES.iter().any(|&cmd| {
102            content.len() >= cmd.len() && content.as_bytes()[..cmd.len()].eq_ignore_ascii_case(cmd.as_bytes())
103        });
104
105        needs_lowercase_check
106            || content.contains(" -") // Commands with flags
107            || content.contains(" --") // Commands with long flags
108    }
109
110    /// Check if content looks like a variable reference or file pattern
111    fn looks_like_variable_or_pattern(&self, content: &str) -> bool {
112        // Variable patterns: $VAR, ${VAR}, %VAR%, etc.
113        content.starts_with('$')
114            || content.starts_with('%') && content.ends_with('%')
115            || (content.contains("*") && content.len() > 3) // File patterns like *.txt (must be substantial)
116            || (content.contains("?") && content.len() > 3 && content.contains("."))
117        // File patterns like file?.txt
118    }
119
120    /// Check if spaces improve readability for complex content
121    fn spaces_improve_readability(&self, _code_content: &str, trimmed: &str) -> bool {
122        // Complex content that benefits from spacing - be more conservative
123        trimmed.len() >= 20 // Only longer content might benefit from spacing
124            || trimmed.contains("://") // URLs
125            || trimmed.contains("->") // Arrows or operators
126            || trimmed.contains("=>") // Lambda arrows
127            || trimmed.contains("&&") || trimmed.contains("||") // Boolean operators
128            || (trimmed.chars().filter(|c| c.is_ascii_punctuation()).count() as f64 / trimmed.len() as f64) > 0.4
129        // Higher punctuation density threshold
130    }
131
132    /// Check if a code span is likely part of a nested backtick structure
133    fn is_likely_nested_backticks(&self, ctx: &crate::lint_context::LintContext, span_index: usize) -> bool {
134        // If there are multiple code spans on the same line, and there's text
135        // between them that contains "code" or other indicators, it's likely nested
136        let code_spans = ctx.code_spans();
137        let current_span = &code_spans[span_index];
138        let current_line = current_span.line;
139
140        // Look for other code spans on the same line
141        let same_line_spans: Vec<_> = code_spans
142            .iter()
143            .enumerate()
144            .filter(|(i, s)| s.line == current_line && *i != span_index)
145            .collect();
146
147        if same_line_spans.is_empty() {
148            return false;
149        }
150
151        // Check if there's content between spans that might indicate nesting
152        // Get the line content
153        let line_idx = current_line - 1; // Convert to 0-based
154        if line_idx >= ctx.lines.len() {
155            return false;
156        }
157
158        let line_content = &ctx.lines[line_idx].content;
159
160        // For each pair of adjacent code spans, check what's between them
161        for (_, other_span) in &same_line_spans {
162            let start = current_span.end_col.min(other_span.end_col);
163            let end = current_span.start_col.max(other_span.start_col);
164
165            if start < end && end <= line_content.len() {
166                let between = &line_content[start..end];
167                // If there's text containing "code" or similar patterns between spans,
168                // it's likely they're showing nested backticks
169                if between.contains("code") || between.contains("backtick") {
170                    return true;
171                }
172            }
173        }
174
175        false
176    }
177}
178
179impl Rule for MD038NoSpaceInCode {
180    fn name(&self) -> &'static str {
181        "MD038"
182    }
183
184    fn description(&self) -> &'static str {
185        "Spaces inside code span elements"
186    }
187
188    fn category(&self) -> RuleCategory {
189        RuleCategory::Other
190    }
191
192    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
193        if !self.enabled {
194            return Ok(vec![]);
195        }
196
197        let mut warnings = Vec::new();
198
199        // Use centralized code spans from LintContext
200        let code_spans = ctx.code_spans();
201        for (i, code_span) in code_spans.iter().enumerate() {
202            let code_content = &code_span.content;
203
204            // Skip empty code spans
205            if code_content.is_empty() {
206                continue;
207            }
208
209            // Early check: if no leading/trailing whitespace, skip
210            let has_leading_space = code_content.chars().next().is_some_and(|c| c.is_whitespace());
211            let has_trailing_space = code_content.chars().last().is_some_and(|c| c.is_whitespace());
212
213            if !has_leading_space && !has_trailing_space {
214                continue;
215            }
216
217            let trimmed = code_content.trim();
218
219            // Check if there are leading or trailing spaces
220            if code_content != trimmed {
221                // Check if the content itself contains backticks - if so, skip to avoid
222                // breaking nested backtick structures
223                if trimmed.contains('`') {
224                    continue;
225                }
226
227                // Skip inline R code in Quarto/RMarkdown: `r expression`
228                // This is a legitimate pattern where space is required after 'r'
229                if ctx.flavor == crate::config::MarkdownFlavor::Quarto
230                    && trimmed.starts_with('r')
231                    && trimmed.len() > 1
232                    && trimmed.chars().nth(1).is_some_and(|c| c.is_whitespace())
233                {
234                    continue;
235                }
236
237                // Check if this might be part of a nested backtick structure
238                // by looking for other code spans nearby that might indicate nesting
239                if self.is_likely_nested_backticks(ctx, i) {
240                    continue;
241                }
242
243                // Check if spaces are allowed in this context
244                if self.should_allow_spaces(code_content, trimmed) {
245                    continue;
246                }
247
248                warnings.push(LintWarning {
249                    rule_name: Some(self.name().to_string()),
250                    line: code_span.line,
251                    column: code_span.start_col + 1, // Convert to 1-indexed
252                    end_line: code_span.line,
253                    end_column: code_span.end_col, // Don't add 1 to match test expectation
254                    message: "Spaces inside code span elements".to_string(),
255                    severity: Severity::Warning,
256                    fix: Some(Fix {
257                        range: code_span.byte_offset..code_span.byte_end,
258                        replacement: format!(
259                            "{}{}{}",
260                            "`".repeat(code_span.backtick_count),
261                            trimmed,
262                            "`".repeat(code_span.backtick_count)
263                        ),
264                    }),
265                });
266            }
267        }
268
269        Ok(warnings)
270    }
271
272    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
273        let content = ctx.content;
274        if !self.enabled {
275            return Ok(content.to_string());
276        }
277
278        // Early return if no backticks in content
279        if !content.contains('`') {
280            return Ok(content.to_string());
281        }
282
283        // Get warnings to identify what needs to be fixed
284        let warnings = self.check(ctx)?;
285        if warnings.is_empty() {
286            return Ok(content.to_string());
287        }
288
289        // Collect all fixes and sort by position (reverse order to avoid position shifts)
290        let mut fixes: Vec<(std::ops::Range<usize>, String)> = warnings
291            .into_iter()
292            .filter_map(|w| w.fix.map(|f| (f.range, f.replacement)))
293            .collect();
294
295        fixes.sort_by_key(|(range, _)| std::cmp::Reverse(range.start));
296
297        // Apply fixes - only allocate string when we have fixes to apply
298        let mut result = content.to_string();
299        for (range, replacement) in fixes {
300            result.replace_range(range, &replacement);
301        }
302
303        Ok(result)
304    }
305
306    /// Check if content is likely to have code spans
307    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
308        !ctx.likely_has_code()
309    }
310
311    fn as_any(&self) -> &dyn std::any::Any {
312        self
313    }
314
315    fn default_config_section(&self) -> Option<(String, toml::Value)> {
316        let mut map = toml::map::Map::new();
317        map.insert(
318            "allow_intentional_spaces".to_string(),
319            toml::Value::Boolean(self.allow_intentional_spaces),
320        );
321        map.insert(
322            "allow_single_char_spaces".to_string(),
323            toml::Value::Boolean(self.allow_single_char_spaces),
324        );
325        map.insert(
326            "allow_command_spaces".to_string(),
327            toml::Value::Boolean(self.allow_command_spaces),
328        );
329        Some((self.name().to_string(), toml::Value::Table(map)))
330    }
331
332    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
333    where
334        Self: Sized,
335    {
336        let allow_intentional_spaces =
337            crate::config::get_rule_config_value::<bool>(config, "MD038", "allow_intentional_spaces").unwrap_or(true); // Default to true for better UX
338
339        let allow_single_char_spaces =
340            crate::config::get_rule_config_value::<bool>(config, "MD038", "allow_single_char_spaces").unwrap_or(true);
341
342        let allow_command_spaces =
343            crate::config::get_rule_config_value::<bool>(config, "MD038", "allow_command_spaces").unwrap_or(true);
344
345        Box::new(MD038NoSpaceInCode {
346            enabled: true,
347            allow_intentional_spaces,
348            allow_single_char_spaces,
349            allow_command_spaces,
350        })
351    }
352}
353
354#[cfg(test)]
355mod tests {
356    use super::*;
357
358    #[test]
359    fn test_md038_readme_false_positives() {
360        // These are the exact cases from README.md that are incorrectly flagged
361        let rule = MD038NoSpaceInCode::new();
362        let valid_cases = vec![
363            "3. `pyproject.toml` (must contain `[tool.rumdl]` section)",
364            "#### Effective Configuration (`rumdl config`)",
365            "- Blue: `.rumdl.toml`",
366            "### Defaults Only (`rumdl config --defaults`)",
367        ];
368
369        for case in valid_cases {
370            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard);
371            let result = rule.check(&ctx).unwrap();
372            assert!(
373                result.is_empty(),
374                "Should not flag code spans without leading/trailing spaces: '{}'. Got {} warnings",
375                case,
376                result.len()
377            );
378        }
379    }
380
381    #[test]
382    fn test_md038_valid() {
383        let rule = MD038NoSpaceInCode::new();
384        let valid_cases = vec![
385            "This is `code` in a sentence.",
386            "This is a `longer code span` in a sentence.",
387            "This is `code with internal spaces` which is fine.",
388            "This is`` code with double backticks`` which is also fine.",
389            "Code span at `end of line`",
390            "`Start of line` code span",
391            "Multiple `code spans` in `one line` are fine",
392            "Code span with `symbols: !@#$%^&*()`",
393            "Empty code span `` is technically valid",
394            // New cases that should be allowed with lenient settings
395            "Type ` y ` to confirm.",                       // Single character with spaces
396            "Use ` git commit -m \"message\" ` to commit.", // Command with spaces
397            "The variable ` $HOME ` contains home path.",   // Variable reference
398            "The pattern ` *.txt ` matches text files.",    // File pattern
399            "URL example ` https://example.com/very/long/path?query=value&more=params ` here.", // Complex long URL
400        ];
401        for case in valid_cases {
402            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard);
403            let result = rule.check(&ctx).unwrap();
404            assert!(result.is_empty(), "Valid case should not have warnings: {case}");
405        }
406    }
407
408    #[test]
409    fn test_md038_invalid() {
410        let rule = MD038NoSpaceInCode::new();
411        // Cases that should still be flagged even with lenient settings
412        let invalid_cases = vec![
413            "This is ` random word ` with unnecessary spaces.", // Not a command/variable/single char
414            "Text with ` plain text ` should be flagged.",      // Just plain text with spaces
415            "Code with ` just code ` here.",                    // Simple code with spaces
416            "Multiple ` word ` spans with ` text ` in one line.", // Multiple simple cases
417        ];
418        for case in invalid_cases {
419            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard);
420            let result = rule.check(&ctx).unwrap();
421            assert!(!result.is_empty(), "Invalid case should have warnings: {case}");
422        }
423    }
424
425    #[test]
426    fn test_md038_strict_mode() {
427        let rule = MD038NoSpaceInCode::strict();
428        // In strict mode, ALL spaces should be flagged
429        let invalid_cases = vec![
430            "Type ` y ` to confirm.",                       // Single character with spaces
431            "Use ` git commit -m \"message\" ` to commit.", // Command with spaces
432            "The variable ` $HOME ` contains home path.",   // Variable reference
433            "The pattern ` *.txt ` matches text files.",    // File pattern
434            "This is ` code` with leading space.",
435            "This is `code ` with trailing space.",
436            "This is ` code ` with both leading and trailing space.",
437        ];
438        for case in invalid_cases {
439            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard);
440            let result = rule.check(&ctx).unwrap();
441            assert!(!result.is_empty(), "Strict mode should flag all spaces: {case}");
442        }
443    }
444
445    #[test]
446    fn test_md038_fix() {
447        let rule = MD038NoSpaceInCode::new();
448        let test_cases = vec![
449            (
450                "This is ` code` with leading space.",
451                "This is `code` with leading space.",
452            ),
453            (
454                "This is `code ` with trailing space.",
455                "This is `code` with trailing space.",
456            ),
457            ("This is ` code ` with both spaces.", "This is `code` with both spaces."),
458            (
459                "Multiple ` code ` and `spans ` to fix.",
460                "Multiple `code` and `spans` to fix.",
461            ),
462        ];
463        for (input, expected) in test_cases {
464            let ctx = crate::lint_context::LintContext::new(input, crate::config::MarkdownFlavor::Standard);
465            let result = rule.fix(&ctx).unwrap();
466            assert_eq!(result, expected, "Fix did not produce expected output for: {input}");
467        }
468    }
469
470    #[test]
471    fn test_check_invalid_leading_space() {
472        let rule = MD038NoSpaceInCode::new();
473        let input = "This has a ` leading space` in code";
474        let ctx = crate::lint_context::LintContext::new(input, crate::config::MarkdownFlavor::Standard);
475        let result = rule.check(&ctx).unwrap();
476        assert_eq!(result.len(), 1);
477        assert_eq!(result[0].line, 1);
478        assert!(result[0].fix.is_some());
479    }
480
481    #[test]
482    fn test_code_span_parsing_nested_backticks() {
483        let content = "Code with ` nested `code` example ` should preserve backticks";
484        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
485
486        println!("Content: {content}");
487        println!("Code spans found:");
488        let code_spans = ctx.code_spans();
489        for (i, span) in code_spans.iter().enumerate() {
490            println!(
491                "  Span {}: line={}, col={}-{}, backticks={}, content='{}'",
492                i, span.line, span.start_col, span.end_col, span.backtick_count, span.content
493            );
494        }
495
496        // This test reveals the issue - we're getting multiple separate code spans instead of one
497        assert_eq!(code_spans.len(), 2, "Should parse as 2 code spans");
498    }
499
500    #[test]
501    fn test_nested_backtick_detection() {
502        let rule = MD038NoSpaceInCode::strict();
503
504        // In strict mode, should_allow_spaces returns false, but the check method
505        // will skip code spans with backticks anyway
506        assert!(!rule.should_allow_spaces(" plain text ", "plain text"));
507
508        // Test with lenient mode
509        let lenient_rule = MD038NoSpaceInCode::new();
510        assert!(lenient_rule.should_allow_spaces(" y ", "y")); // Single char
511        assert!(!lenient_rule.should_allow_spaces(" plain text ", "plain text"));
512    }
513
514    #[test]
515    fn test_quarto_inline_r_code() {
516        // Test with strict mode to verify the Quarto-specific R code exception works
517        let rule_strict = MD038NoSpaceInCode::strict();
518
519        // Test inline R code - should NOT trigger warning in Quarto flavor
520        // The key pattern is "r " followed by code
521        let content = r#"The result is `r nchar("test")` which equals 4."#;
522
523        // In strict mode, Quarto flavor should still allow R code
524        let ctx_quarto_strict = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Quarto);
525        let result_quarto_strict = rule_strict.check(&ctx_quarto_strict).unwrap();
526        assert!(
527            result_quarto_strict.is_empty(),
528            "Quarto inline R code should not trigger warnings even in strict mode. Got {} warnings",
529            result_quarto_strict.len()
530        );
531
532        // Test that other code with spaces still gets flagged in Quarto strict mode
533        let content_other = "This has ` plain text ` with spaces.";
534        let ctx_other = crate::lint_context::LintContext::new(content_other, crate::config::MarkdownFlavor::Quarto);
535        let result_other = rule_strict.check(&ctx_other).unwrap();
536        assert_eq!(
537            result_other.len(),
538            1,
539            "Quarto strict mode should still flag non-R code spans with improper spaces"
540        );
541    }
542}