rumdl_lib/rules/
md038_no_space_in_code.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2
3/// Rule MD038: No space inside code span markers
4///
5/// See [docs/md038.md](../../docs/md038.md) for full documentation, configuration, and examples.
6///
7/// MD038: Spaces inside code span elements
8///
9/// This rule is triggered when there are spaces inside code span elements.
10///
11/// For example:
12///
13/// ``` markdown
14/// ` some text`
15/// `some text `
16/// ` some text `
17/// ```
18///
19/// To fix this issue, remove the leading and trailing spaces within the code span markers:
20///
21/// ``` markdown
22/// `some text`
23/// ```
24///
25/// Note: Code spans containing backticks (e.g., `` `backticks` inside ``) are not flagged
26/// to avoid breaking nested backtick structures used to display backticks in documentation.
27#[derive(Debug, Clone)]
28pub struct MD038NoSpaceInCode {
29    pub enabled: bool,
30    /// Allow leading/trailing spaces in code spans when they improve readability
31    pub allow_intentional_spaces: bool,
32    /// Allow spaces around single characters (e.g., ` y ` for visibility)
33    pub allow_single_char_spaces: bool,
34    /// Allow spaces in command examples (heuristic: contains common shell indicators)
35    pub allow_command_spaces: bool,
36}
37
38impl Default for MD038NoSpaceInCode {
39    fn default() -> Self {
40        Self::new()
41    }
42}
43
44impl MD038NoSpaceInCode {
45    pub fn new() -> Self {
46        Self {
47            enabled: true,
48            allow_intentional_spaces: true, // More lenient by default
49            allow_single_char_spaces: true,
50            allow_command_spaces: true,
51        }
52    }
53
54    pub fn strict() -> Self {
55        Self {
56            enabled: true,
57            allow_intentional_spaces: false,
58            allow_single_char_spaces: false,
59            allow_command_spaces: false,
60        }
61    }
62
63    /// Determine if spaces in a code span should be allowed based on content heuristics
64    fn should_allow_spaces(&self, code_content: &str, trimmed: &str) -> bool {
65        // If intentional spaces are globally allowed, apply heuristics
66        if self.allow_intentional_spaces {
67            // Allow single character with spaces for visibility (e.g., ` y `, ` * `)
68            if self.allow_single_char_spaces && trimmed.len() == 1 {
69                return true;
70            }
71
72            // Allow command examples with spaces
73            if self.allow_command_spaces && self.looks_like_command(trimmed) {
74                return true;
75            }
76
77            // Allow spaces around variable references or file patterns
78            if self.looks_like_variable_or_pattern(trimmed) {
79                return true;
80            }
81
82            // Allow if spaces improve readability for complex content
83            if self.spaces_improve_readability(code_content, trimmed) {
84                return true;
85            }
86        }
87
88        false
89    }
90
91    /// Check if content looks like a shell command that benefits from spaces
92    fn looks_like_command(&self, content: &str) -> bool {
93        // Common command patterns - check case-insensitive prefixes
94        const COMMAND_PREFIXES: &[&str] = &[
95            "git ", "npm ", "cargo ", "docker ", "kubectl ", "pip ", "yarn ", "sudo ", "chmod ", "chown ", "ls ",
96            "cd ", "mkdir ", "rm ", "cp ", "mv ", "cat ", "grep ", "find ", "awk ", "sed ", "rumdl ",
97        ];
98
99        // Check if content starts with any command (case-insensitive)
100        // Use iterator with early return to avoid allocating lowercase string unless needed
101        let needs_lowercase_check = COMMAND_PREFIXES.iter().any(|&cmd| {
102            content.len() >= cmd.len() && content.as_bytes()[..cmd.len()].eq_ignore_ascii_case(cmd.as_bytes())
103        });
104
105        needs_lowercase_check
106            || content.contains(" -") // Commands with flags
107            || content.contains(" --") // Commands with long flags
108    }
109
110    /// Check if content looks like a variable reference or file pattern
111    fn looks_like_variable_or_pattern(&self, content: &str) -> bool {
112        // Variable patterns: $VAR, ${VAR}, %VAR%, etc.
113        content.starts_with('$')
114            || content.starts_with('%') && content.ends_with('%')
115            || (content.contains("*") && content.len() > 3) // File patterns like *.txt (must be substantial)
116            || (content.contains("?") && content.len() > 3 && content.contains("."))
117        // File patterns like file?.txt
118    }
119
120    /// Check if spaces improve readability for complex content
121    fn spaces_improve_readability(&self, _code_content: &str, trimmed: &str) -> bool {
122        // Complex content that benefits from spacing - be more conservative
123        trimmed.len() >= 20 // Only longer content might benefit from spacing
124            || trimmed.contains("://") // URLs
125            || trimmed.contains("->") // Arrows or operators
126            || trimmed.contains("=>") // Lambda arrows
127            || trimmed.contains("&&") || trimmed.contains("||") // Boolean operators
128            || (trimmed.chars().filter(|c| c.is_ascii_punctuation()).count() as f64 / trimmed.len() as f64) > 0.4
129        // Higher punctuation density threshold
130    }
131
132    /// Check if a code span is likely part of a nested backtick structure
133    fn is_likely_nested_backticks(&self, ctx: &crate::lint_context::LintContext, span_index: usize) -> bool {
134        // If there are multiple code spans on the same line, and there's text
135        // between them that contains "code" or other indicators, it's likely nested
136        let code_spans = ctx.code_spans();
137        let current_span = &code_spans[span_index];
138        let current_line = current_span.line;
139
140        // Look for other code spans on the same line
141        let same_line_spans: Vec<_> = code_spans
142            .iter()
143            .enumerate()
144            .filter(|(i, s)| s.line == current_line && *i != span_index)
145            .collect();
146
147        if same_line_spans.is_empty() {
148            return false;
149        }
150
151        // Check if there's content between spans that might indicate nesting
152        // Get the line content
153        let line_idx = current_line - 1; // Convert to 0-based
154        if line_idx >= ctx.lines.len() {
155            return false;
156        }
157
158        let line_content = &ctx.lines[line_idx].content;
159
160        // For each pair of adjacent code spans, check what's between them
161        for (_, other_span) in &same_line_spans {
162            let start = current_span.end_col.min(other_span.end_col);
163            let end = current_span.start_col.max(other_span.start_col);
164
165            if start < end && end <= line_content.len() {
166                let between = &line_content[start..end];
167                // If there's text containing "code" or similar patterns between spans,
168                // it's likely they're showing nested backticks
169                if between.contains("code") || between.contains("backtick") {
170                    return true;
171                }
172            }
173        }
174
175        false
176    }
177}
178
179impl Rule for MD038NoSpaceInCode {
180    fn name(&self) -> &'static str {
181        "MD038"
182    }
183
184    fn description(&self) -> &'static str {
185        "Spaces inside code span elements"
186    }
187
188    fn category(&self) -> RuleCategory {
189        RuleCategory::Other
190    }
191
192    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
193        if !self.enabled {
194            return Ok(vec![]);
195        }
196
197        let mut warnings = Vec::new();
198
199        // Use centralized code spans from LintContext
200        let code_spans = ctx.code_spans();
201        for (i, code_span) in code_spans.iter().enumerate() {
202            let code_content = &code_span.content;
203
204            // Skip empty code spans
205            if code_content.is_empty() {
206                continue;
207            }
208
209            // Early check: if no leading/trailing whitespace, skip
210            let has_leading_space = code_content.chars().next().is_some_and(|c| c.is_whitespace());
211            let has_trailing_space = code_content.chars().last().is_some_and(|c| c.is_whitespace());
212
213            if !has_leading_space && !has_trailing_space {
214                continue;
215            }
216
217            let trimmed = code_content.trim();
218
219            // Check if there are leading or trailing spaces
220            if code_content != trimmed {
221                // Check if the content itself contains backticks - if so, skip to avoid
222                // breaking nested backtick structures
223                if trimmed.contains('`') {
224                    continue;
225                }
226
227                // Check if this might be part of a nested backtick structure
228                // by looking for other code spans nearby that might indicate nesting
229                if self.is_likely_nested_backticks(ctx, i) {
230                    continue;
231                }
232
233                // Check if spaces are allowed in this context
234                if self.should_allow_spaces(code_content, trimmed) {
235                    continue;
236                }
237
238                warnings.push(LintWarning {
239                    rule_name: Some(self.name()),
240                    line: code_span.line,
241                    column: code_span.start_col + 1, // Convert to 1-indexed
242                    end_line: code_span.line,
243                    end_column: code_span.end_col, // Don't add 1 to match test expectation
244                    message: "Spaces inside code span elements".to_string(),
245                    severity: Severity::Warning,
246                    fix: Some(Fix {
247                        range: code_span.byte_offset..code_span.byte_end,
248                        replacement: format!(
249                            "{}{}{}",
250                            "`".repeat(code_span.backtick_count),
251                            trimmed,
252                            "`".repeat(code_span.backtick_count)
253                        ),
254                    }),
255                });
256            }
257        }
258
259        Ok(warnings)
260    }
261
262    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
263        let content = ctx.content;
264        if !self.enabled {
265            return Ok(content.to_string());
266        }
267
268        // Early return if no backticks in content
269        if !content.contains('`') {
270            return Ok(content.to_string());
271        }
272
273        // Get warnings to identify what needs to be fixed
274        let warnings = self.check(ctx)?;
275        if warnings.is_empty() {
276            return Ok(content.to_string());
277        }
278
279        // Collect all fixes and sort by position (reverse order to avoid position shifts)
280        let mut fixes: Vec<(std::ops::Range<usize>, String)> = warnings
281            .into_iter()
282            .filter_map(|w| w.fix.map(|f| (f.range, f.replacement)))
283            .collect();
284
285        fixes.sort_by_key(|(range, _)| std::cmp::Reverse(range.start));
286
287        // Apply fixes - only allocate string when we have fixes to apply
288        let mut result = content.to_string();
289        for (range, replacement) in fixes {
290            result.replace_range(range, &replacement);
291        }
292
293        Ok(result)
294    }
295
296    /// Check if content is likely to have code spans
297    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
298        !ctx.likely_has_code()
299    }
300
301    fn as_any(&self) -> &dyn std::any::Any {
302        self
303    }
304
305    fn default_config_section(&self) -> Option<(String, toml::Value)> {
306        let mut map = toml::map::Map::new();
307        map.insert(
308            "allow_intentional_spaces".to_string(),
309            toml::Value::Boolean(self.allow_intentional_spaces),
310        );
311        map.insert(
312            "allow_single_char_spaces".to_string(),
313            toml::Value::Boolean(self.allow_single_char_spaces),
314        );
315        map.insert(
316            "allow_command_spaces".to_string(),
317            toml::Value::Boolean(self.allow_command_spaces),
318        );
319        Some((self.name().to_string(), toml::Value::Table(map)))
320    }
321
322    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
323    where
324        Self: Sized,
325    {
326        let allow_intentional_spaces =
327            crate::config::get_rule_config_value::<bool>(config, "MD038", "allow_intentional_spaces").unwrap_or(true); // Default to true for better UX
328
329        let allow_single_char_spaces =
330            crate::config::get_rule_config_value::<bool>(config, "MD038", "allow_single_char_spaces").unwrap_or(true);
331
332        let allow_command_spaces =
333            crate::config::get_rule_config_value::<bool>(config, "MD038", "allow_command_spaces").unwrap_or(true);
334
335        Box::new(MD038NoSpaceInCode {
336            enabled: true,
337            allow_intentional_spaces,
338            allow_single_char_spaces,
339            allow_command_spaces,
340        })
341    }
342}
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347
348    #[test]
349    fn test_md038_readme_false_positives() {
350        // These are the exact cases from README.md that are incorrectly flagged
351        let rule = MD038NoSpaceInCode::new();
352        let valid_cases = vec![
353            "3. `pyproject.toml` (must contain `[tool.rumdl]` section)",
354            "#### Effective Configuration (`rumdl config`)",
355            "- Blue: `.rumdl.toml`",
356            "### Defaults Only (`rumdl config --defaults`)",
357        ];
358
359        for case in valid_cases {
360            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard);
361            let result = rule.check(&ctx).unwrap();
362            assert!(
363                result.is_empty(),
364                "Should not flag code spans without leading/trailing spaces: '{}'. Got {} warnings",
365                case,
366                result.len()
367            );
368        }
369    }
370
371    #[test]
372    fn test_md038_valid() {
373        let rule = MD038NoSpaceInCode::new();
374        let valid_cases = vec![
375            "This is `code` in a sentence.",
376            "This is a `longer code span` in a sentence.",
377            "This is `code with internal spaces` which is fine.",
378            "This is`` code with double backticks`` which is also fine.",
379            "Code span at `end of line`",
380            "`Start of line` code span",
381            "Multiple `code spans` in `one line` are fine",
382            "Code span with `symbols: !@#$%^&*()`",
383            "Empty code span `` is technically valid",
384            // New cases that should be allowed with lenient settings
385            "Type ` y ` to confirm.",                       // Single character with spaces
386            "Use ` git commit -m \"message\" ` to commit.", // Command with spaces
387            "The variable ` $HOME ` contains home path.",   // Variable reference
388            "The pattern ` *.txt ` matches text files.",    // File pattern
389            "URL example ` https://example.com/very/long/path?query=value&more=params ` here.", // Complex long URL
390        ];
391        for case in valid_cases {
392            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard);
393            let result = rule.check(&ctx).unwrap();
394            assert!(result.is_empty(), "Valid case should not have warnings: {case}");
395        }
396    }
397
398    #[test]
399    fn test_md038_invalid() {
400        let rule = MD038NoSpaceInCode::new();
401        // Cases that should still be flagged even with lenient settings
402        let invalid_cases = vec![
403            "This is ` random word ` with unnecessary spaces.", // Not a command/variable/single char
404            "Text with ` plain text ` should be flagged.",      // Just plain text with spaces
405            "Code with ` just code ` here.",                    // Simple code with spaces
406            "Multiple ` word ` spans with ` text ` in one line.", // Multiple simple cases
407        ];
408        for case in invalid_cases {
409            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard);
410            let result = rule.check(&ctx).unwrap();
411            assert!(!result.is_empty(), "Invalid case should have warnings: {case}");
412        }
413    }
414
415    #[test]
416    fn test_md038_strict_mode() {
417        let rule = MD038NoSpaceInCode::strict();
418        // In strict mode, ALL spaces should be flagged
419        let invalid_cases = vec![
420            "Type ` y ` to confirm.",                       // Single character with spaces
421            "Use ` git commit -m \"message\" ` to commit.", // Command with spaces
422            "The variable ` $HOME ` contains home path.",   // Variable reference
423            "The pattern ` *.txt ` matches text files.",    // File pattern
424            "This is ` code` with leading space.",
425            "This is `code ` with trailing space.",
426            "This is ` code ` with both leading and trailing space.",
427        ];
428        for case in invalid_cases {
429            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard);
430            let result = rule.check(&ctx).unwrap();
431            assert!(!result.is_empty(), "Strict mode should flag all spaces: {case}");
432        }
433    }
434
435    #[test]
436    fn test_md038_fix() {
437        let rule = MD038NoSpaceInCode::new();
438        let test_cases = vec![
439            (
440                "This is ` code` with leading space.",
441                "This is `code` with leading space.",
442            ),
443            (
444                "This is `code ` with trailing space.",
445                "This is `code` with trailing space.",
446            ),
447            ("This is ` code ` with both spaces.", "This is `code` with both spaces."),
448            (
449                "Multiple ` code ` and `spans ` to fix.",
450                "Multiple `code` and `spans` to fix.",
451            ),
452        ];
453        for (input, expected) in test_cases {
454            let ctx = crate::lint_context::LintContext::new(input, crate::config::MarkdownFlavor::Standard);
455            let result = rule.fix(&ctx).unwrap();
456            assert_eq!(result, expected, "Fix did not produce expected output for: {input}");
457        }
458    }
459
460    #[test]
461    fn test_check_invalid_leading_space() {
462        let rule = MD038NoSpaceInCode::new();
463        let input = "This has a ` leading space` in code";
464        let ctx = crate::lint_context::LintContext::new(input, crate::config::MarkdownFlavor::Standard);
465        let result = rule.check(&ctx).unwrap();
466        assert_eq!(result.len(), 1);
467        assert_eq!(result[0].line, 1);
468        assert!(result[0].fix.is_some());
469    }
470
471    #[test]
472    fn test_code_span_parsing_nested_backticks() {
473        let content = "Code with ` nested `code` example ` should preserve backticks";
474        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
475
476        println!("Content: {content}");
477        println!("Code spans found:");
478        let code_spans = ctx.code_spans();
479        for (i, span) in code_spans.iter().enumerate() {
480            println!(
481                "  Span {}: line={}, col={}-{}, backticks={}, content='{}'",
482                i, span.line, span.start_col, span.end_col, span.backtick_count, span.content
483            );
484        }
485
486        // This test reveals the issue - we're getting multiple separate code spans instead of one
487        assert_eq!(code_spans.len(), 2, "Should parse as 2 code spans");
488    }
489
490    #[test]
491    fn test_nested_backtick_detection() {
492        let rule = MD038NoSpaceInCode::strict();
493
494        // In strict mode, should_allow_spaces returns false, but the check method
495        // will skip code spans with backticks anyway
496        assert!(!rule.should_allow_spaces(" plain text ", "plain text"));
497
498        // Test with lenient mode
499        let lenient_rule = MD038NoSpaceInCode::new();
500        assert!(lenient_rule.should_allow_spaces(" y ", "y")); // Single char
501        assert!(!lenient_rule.should_allow_spaces(" plain text ", "plain text"));
502    }
503}