rumdl_lib/rules/
md038_no_space_in_code.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2
3/// Rule MD038: No space inside code span markers
4///
5/// See [docs/md038.md](../../docs/md038.md) for full documentation, configuration, and examples.
6///
7/// MD038: Spaces inside code span elements
8///
9/// This rule is triggered when there are spaces inside code span elements.
10///
11/// For example:
12///
13/// ``` markdown
14/// ` some text`
15/// `some text `
16/// ` some text `
17/// ```
18///
19/// To fix this issue, remove the leading and trailing spaces within the code span markers:
20///
21/// ``` markdown
22/// `some text`
23/// ```
24///
25/// Note: Code spans containing backticks (e.g., `` `backticks` inside ``) are not flagged
26/// to avoid breaking nested backtick structures used to display backticks in documentation.
27#[derive(Debug, Clone)]
28pub struct MD038NoSpaceInCode {
29    pub enabled: bool,
30    /// Allow leading/trailing spaces in code spans when they improve readability
31    pub allow_intentional_spaces: bool,
32    /// Allow spaces around single characters (e.g., ` y ` for visibility)
33    pub allow_single_char_spaces: bool,
34    /// Allow spaces in command examples (heuristic: contains common shell indicators)
35    pub allow_command_spaces: bool,
36}
37
38impl Default for MD038NoSpaceInCode {
39    fn default() -> Self {
40        Self::new()
41    }
42}
43
44impl MD038NoSpaceInCode {
45    pub fn new() -> Self {
46        Self {
47            enabled: true,
48            allow_intentional_spaces: true, // More lenient by default
49            allow_single_char_spaces: true,
50            allow_command_spaces: true,
51        }
52    }
53
54    pub fn strict() -> Self {
55        Self {
56            enabled: true,
57            allow_intentional_spaces: false,
58            allow_single_char_spaces: false,
59            allow_command_spaces: false,
60        }
61    }
62
63    /// Determine if spaces in a code span should be allowed based on content heuristics
64    fn should_allow_spaces(&self, code_content: &str, trimmed: &str) -> bool {
65        // If intentional spaces are globally allowed, apply heuristics
66        if self.allow_intentional_spaces {
67            // Allow single character with spaces for visibility (e.g., ` y `, ` * `)
68            if self.allow_single_char_spaces && trimmed.len() == 1 {
69                return true;
70            }
71
72            // Allow command examples with spaces
73            if self.allow_command_spaces && self.looks_like_command(trimmed) {
74                return true;
75            }
76
77            // Allow spaces around variable references or file patterns
78            if self.looks_like_variable_or_pattern(trimmed) {
79                return true;
80            }
81
82            // Allow if spaces improve readability for complex content
83            if self.spaces_improve_readability(code_content, trimmed) {
84                return true;
85            }
86        }
87
88        false
89    }
90
91    /// Check if content looks like a shell command that benefits from spaces
92    fn looks_like_command(&self, content: &str) -> bool {
93        // Common command patterns - check case-insensitive prefixes
94        const COMMAND_PREFIXES: &[&str] = &[
95            "git ", "npm ", "cargo ", "docker ", "kubectl ", "pip ", "yarn ", "sudo ", "chmod ", "chown ", "ls ",
96            "cd ", "mkdir ", "rm ", "cp ", "mv ", "cat ", "grep ", "find ", "awk ", "sed ",
97        ];
98
99        // Check if content starts with any command (case-insensitive)
100        // Use iterator with early return to avoid allocating lowercase string unless needed
101        let needs_lowercase_check = COMMAND_PREFIXES.iter().any(|&cmd| {
102            content.len() >= cmd.len() && content.as_bytes()[..cmd.len()].eq_ignore_ascii_case(cmd.as_bytes())
103        });
104
105        needs_lowercase_check
106            || content.contains(" -") // Commands with flags
107            || content.contains(" --") // Commands with long flags
108    }
109
110    /// Check if content looks like a variable reference or file pattern
111    fn looks_like_variable_or_pattern(&self, content: &str) -> bool {
112        // Variable patterns: $VAR, ${VAR}, %VAR%, etc.
113        content.starts_with('$')
114            || content.starts_with('%') && content.ends_with('%')
115            || (content.contains("*") && content.len() > 3) // File patterns like *.txt (must be substantial)
116            || (content.contains("?") && content.len() > 3 && content.contains("."))
117        // File patterns like file?.txt
118    }
119
120    /// Check if spaces improve readability for complex content
121    fn spaces_improve_readability(&self, _code_content: &str, trimmed: &str) -> bool {
122        // Complex content that benefits from spacing - be more conservative
123        trimmed.len() >= 20 // Only longer content might benefit from spacing
124            || trimmed.contains("://") // URLs
125            || trimmed.contains("->") // Arrows or operators
126            || trimmed.contains("=>") // Lambda arrows
127            || trimmed.contains("&&") || trimmed.contains("||") // Boolean operators
128            || (trimmed.chars().filter(|c| c.is_ascii_punctuation()).count() as f64 / trimmed.len() as f64) > 0.4
129        // Higher punctuation density threshold
130    }
131
132    /// Check if a code span is likely part of a nested backtick structure
133    fn is_likely_nested_backticks(&self, ctx: &crate::lint_context::LintContext, span_index: usize) -> bool {
134        // If there are multiple code spans on the same line, and there's text
135        // between them that contains "code" or other indicators, it's likely nested
136        let code_spans = ctx.code_spans();
137        let current_span = &code_spans[span_index];
138        let current_line = current_span.line;
139
140        // Look for other code spans on the same line
141        let same_line_spans: Vec<_> = code_spans
142            .iter()
143            .enumerate()
144            .filter(|(i, s)| s.line == current_line && *i != span_index)
145            .collect();
146
147        if same_line_spans.is_empty() {
148            return false;
149        }
150
151        // Check if there's content between spans that might indicate nesting
152        // Get the line content
153        let line_idx = current_line - 1; // Convert to 0-based
154        if line_idx >= ctx.lines.len() {
155            return false;
156        }
157
158        let line_content = &ctx.lines[line_idx].content;
159
160        // For each pair of adjacent code spans, check what's between them
161        for (_, other_span) in &same_line_spans {
162            let start = current_span.end_col.min(other_span.end_col);
163            let end = current_span.start_col.max(other_span.start_col);
164
165            if start < end && end <= line_content.len() {
166                let between = &line_content[start..end];
167                // If there's text containing "code" or similar patterns between spans,
168                // it's likely they're showing nested backticks
169                if between.contains("code") || between.contains("backtick") {
170                    return true;
171                }
172            }
173        }
174
175        false
176    }
177}
178
179impl Rule for MD038NoSpaceInCode {
180    fn name(&self) -> &'static str {
181        "MD038"
182    }
183
184    fn description(&self) -> &'static str {
185        "Spaces inside code span elements"
186    }
187
188    fn category(&self) -> RuleCategory {
189        RuleCategory::Other
190    }
191
192    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
193        if !self.enabled {
194            return Ok(vec![]);
195        }
196
197        let mut warnings = Vec::new();
198
199        // Use centralized code spans from LintContext
200        let code_spans = ctx.code_spans();
201        for (i, code_span) in code_spans.iter().enumerate() {
202            let code_content = &code_span.content;
203
204            // Skip empty code spans
205            if code_content.is_empty() {
206                continue;
207            }
208
209            // Early check: if no leading/trailing whitespace, skip trimming
210            if !code_content.chars().next().is_some_and(|c| c.is_whitespace())
211                && !code_content.chars().last().is_some_and(|c| c.is_whitespace())
212            {
213                continue;
214            }
215
216            let trimmed = code_content.trim();
217
218            // Check if there are leading or trailing spaces
219            if code_content != trimmed {
220                // Check if the content itself contains backticks - if so, skip to avoid
221                // breaking nested backtick structures
222                if trimmed.contains('`') {
223                    continue;
224                }
225
226                // Check if this might be part of a nested backtick structure
227                // by looking for other code spans nearby that might indicate nesting
228                if self.is_likely_nested_backticks(ctx, i) {
229                    continue;
230                }
231
232                // Check if spaces are allowed in this context
233                if self.should_allow_spaces(code_content, trimmed) {
234                    continue;
235                }
236
237                warnings.push(LintWarning {
238                    rule_name: Some(self.name()),
239                    line: code_span.line,
240                    column: code_span.start_col + 1, // Convert to 1-indexed
241                    end_line: code_span.line,
242                    end_column: code_span.end_col, // Don't add 1 to match test expectation
243                    message: "Spaces inside code span elements".to_string(),
244                    severity: Severity::Warning,
245                    fix: Some(Fix {
246                        range: code_span.byte_offset..code_span.byte_end,
247                        replacement: format!(
248                            "{}{}{}",
249                            "`".repeat(code_span.backtick_count),
250                            trimmed,
251                            "`".repeat(code_span.backtick_count)
252                        ),
253                    }),
254                });
255            }
256        }
257
258        Ok(warnings)
259    }
260
261    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
262        let content = ctx.content;
263        if !self.enabled {
264            return Ok(content.to_string());
265        }
266
267        // Early return if no backticks in content
268        if !content.contains('`') {
269            return Ok(content.to_string());
270        }
271
272        // Get warnings to identify what needs to be fixed
273        let warnings = self.check(ctx)?;
274        if warnings.is_empty() {
275            return Ok(content.to_string());
276        }
277
278        // Collect all fixes and sort by position (reverse order to avoid position shifts)
279        let mut fixes: Vec<(std::ops::Range<usize>, String)> = warnings
280            .into_iter()
281            .filter_map(|w| w.fix.map(|f| (f.range, f.replacement)))
282            .collect();
283
284        fixes.sort_by_key(|(range, _)| std::cmp::Reverse(range.start));
285
286        // Apply fixes - only allocate string when we have fixes to apply
287        let mut result = content.to_string();
288        for (range, replacement) in fixes {
289            result.replace_range(range, &replacement);
290        }
291
292        Ok(result)
293    }
294
295    /// Check if content is likely to have code spans
296    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
297        !ctx.content.contains('`')
298    }
299
300    fn as_any(&self) -> &dyn std::any::Any {
301        self
302    }
303
304    fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
305        Some(self)
306    }
307
308    fn default_config_section(&self) -> Option<(String, toml::Value)> {
309        let mut map = toml::map::Map::new();
310        map.insert(
311            "allow_intentional_spaces".to_string(),
312            toml::Value::Boolean(self.allow_intentional_spaces),
313        );
314        map.insert(
315            "allow_single_char_spaces".to_string(),
316            toml::Value::Boolean(self.allow_single_char_spaces),
317        );
318        map.insert(
319            "allow_command_spaces".to_string(),
320            toml::Value::Boolean(self.allow_command_spaces),
321        );
322        Some((self.name().to_string(), toml::Value::Table(map)))
323    }
324
325    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
326    where
327        Self: Sized,
328    {
329        let allow_intentional_spaces =
330            crate::config::get_rule_config_value::<bool>(config, "MD038", "allow_intentional_spaces").unwrap_or(true); // Default to true for better UX
331
332        let allow_single_char_spaces =
333            crate::config::get_rule_config_value::<bool>(config, "MD038", "allow_single_char_spaces").unwrap_or(true);
334
335        let allow_command_spaces =
336            crate::config::get_rule_config_value::<bool>(config, "MD038", "allow_command_spaces").unwrap_or(true);
337
338        Box::new(MD038NoSpaceInCode {
339            enabled: true,
340            allow_intentional_spaces,
341            allow_single_char_spaces,
342            allow_command_spaces,
343        })
344    }
345}
346
347impl crate::utils::document_structure::DocumentStructureExtensions for MD038NoSpaceInCode {
348    fn has_relevant_elements(
349        &self,
350        ctx: &crate::lint_context::LintContext,
351        _doc_structure: &crate::utils::document_structure::DocumentStructure,
352    ) -> bool {
353        // We now use centralized code spans from LintContext
354        // Quick check without parsing
355        ctx.content.contains('`')
356    }
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362
363    #[test]
364    fn test_md038_valid() {
365        let rule = MD038NoSpaceInCode::new();
366        let valid_cases = vec![
367            "This is `code` in a sentence.",
368            "This is a `longer code span` in a sentence.",
369            "This is `code with internal spaces` which is fine.",
370            "This is`` code with double backticks`` which is also fine.",
371            "Code span at `end of line`",
372            "`Start of line` code span",
373            "Multiple `code spans` in `one line` are fine",
374            "Code span with `symbols: !@#$%^&*()`",
375            "Empty code span `` is technically valid",
376            // New cases that should be allowed with lenient settings
377            "Type ` y ` to confirm.",                       // Single character with spaces
378            "Use ` git commit -m \"message\" ` to commit.", // Command with spaces
379            "The variable ` $HOME ` contains home path.",   // Variable reference
380            "The pattern ` *.txt ` matches text files.",    // File pattern
381            "URL example ` https://example.com/very/long/path?query=value&more=params ` here.", // Complex long URL
382        ];
383        for case in valid_cases {
384            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard);
385            let result = rule.check(&ctx).unwrap();
386            assert!(result.is_empty(), "Valid case should not have warnings: {case}");
387        }
388    }
389
390    #[test]
391    fn test_md038_invalid() {
392        let rule = MD038NoSpaceInCode::new();
393        // Cases that should still be flagged even with lenient settings
394        let invalid_cases = vec![
395            "This is ` random word ` with unnecessary spaces.", // Not a command/variable/single char
396            "Text with ` plain text ` should be flagged.",      // Just plain text with spaces
397            "Code with ` just code ` here.",                    // Simple code with spaces
398            "Multiple ` word ` spans with ` text ` in one line.", // Multiple simple cases
399        ];
400        for case in invalid_cases {
401            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard);
402            let result = rule.check(&ctx).unwrap();
403            assert!(!result.is_empty(), "Invalid case should have warnings: {case}");
404        }
405    }
406
407    #[test]
408    fn test_md038_strict_mode() {
409        let rule = MD038NoSpaceInCode::strict();
410        // In strict mode, ALL spaces should be flagged
411        let invalid_cases = vec![
412            "Type ` y ` to confirm.",                       // Single character with spaces
413            "Use ` git commit -m \"message\" ` to commit.", // Command with spaces
414            "The variable ` $HOME ` contains home path.",   // Variable reference
415            "The pattern ` *.txt ` matches text files.",    // File pattern
416            "This is ` code` with leading space.",
417            "This is `code ` with trailing space.",
418            "This is ` code ` with both leading and trailing space.",
419        ];
420        for case in invalid_cases {
421            let ctx = crate::lint_context::LintContext::new(case, crate::config::MarkdownFlavor::Standard);
422            let result = rule.check(&ctx).unwrap();
423            assert!(!result.is_empty(), "Strict mode should flag all spaces: {case}");
424        }
425    }
426
427    #[test]
428    fn test_md038_fix() {
429        let rule = MD038NoSpaceInCode::new();
430        let test_cases = vec![
431            (
432                "This is ` code` with leading space.",
433                "This is `code` with leading space.",
434            ),
435            (
436                "This is `code ` with trailing space.",
437                "This is `code` with trailing space.",
438            ),
439            ("This is ` code ` with both spaces.", "This is `code` with both spaces."),
440            (
441                "Multiple ` code ` and `spans ` to fix.",
442                "Multiple `code` and `spans` to fix.",
443            ),
444        ];
445        for (input, expected) in test_cases {
446            let ctx = crate::lint_context::LintContext::new(input, crate::config::MarkdownFlavor::Standard);
447            let result = rule.fix(&ctx).unwrap();
448            assert_eq!(result, expected, "Fix did not produce expected output for: {input}");
449        }
450    }
451
452    #[test]
453    fn test_check_invalid_leading_space() {
454        let rule = MD038NoSpaceInCode::new();
455        let input = "This has a ` leading space` in code";
456        let ctx = crate::lint_context::LintContext::new(input, crate::config::MarkdownFlavor::Standard);
457        let result = rule.check(&ctx).unwrap();
458        assert_eq!(result.len(), 1);
459        assert_eq!(result[0].line, 1);
460        assert!(result[0].fix.is_some());
461    }
462
463    #[test]
464    fn test_code_span_parsing_nested_backticks() {
465        let content = "Code with ` nested `code` example ` should preserve backticks";
466        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
467
468        println!("Content: {content}");
469        println!("Code spans found:");
470        let code_spans = ctx.code_spans();
471        for (i, span) in code_spans.iter().enumerate() {
472            println!(
473                "  Span {}: line={}, col={}-{}, backticks={}, content='{}'",
474                i, span.line, span.start_col, span.end_col, span.backtick_count, span.content
475            );
476        }
477
478        // This test reveals the issue - we're getting multiple separate code spans instead of one
479        assert_eq!(code_spans.len(), 2, "Should parse as 2 code spans");
480    }
481
482    #[test]
483    fn test_nested_backtick_detection() {
484        let rule = MD038NoSpaceInCode::strict();
485
486        // In strict mode, should_allow_spaces returns false, but the check method
487        // will skip code spans with backticks anyway
488        assert!(!rule.should_allow_spaces(" plain text ", "plain text"));
489
490        // Test with lenient mode
491        let lenient_rule = MD038NoSpaceInCode::new();
492        assert!(lenient_rule.should_allow_spaces(" y ", "y")); // Single char
493        assert!(!lenient_rule.should_allow_spaces(" plain text ", "plain text"));
494    }
495}