rumdl_lib/rules/
md037_spaces_around_emphasis.rs

1/// Rule MD037: No spaces around emphasis markers
2///
3/// See [docs/md037.md](../../docs/md037.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
6use crate::utils::emphasis_utils::{
7    EmphasisSpan, find_emphasis_markers, find_emphasis_spans, has_doc_patterns, replace_inline_code,
8};
9use crate::utils::kramdown_utils::has_span_ial;
10use crate::utils::regex_cache::UNORDERED_LIST_MARKER_REGEX;
11use crate::utils::skip_context::{is_in_html_comment, is_in_math_context, is_in_table_cell};
12use lazy_static::lazy_static;
13use regex::Regex;
14
15lazy_static! {
16    // Reference definition pattern - matches [ref]: url "title"
17    static ref REF_DEF_REGEX: Regex = Regex::new(
18        r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
19    ).unwrap();
20}
21
22/// Check if an emphasis span has spacing issues that should be flagged
23#[inline]
24fn has_spacing_issues(span: &EmphasisSpan) -> bool {
25    span.has_leading_space || span.has_trailing_space
26}
27
28/// Rule MD037: Spaces inside emphasis markers
29#[derive(Clone)]
30pub struct MD037NoSpaceInEmphasis;
31
32impl Default for MD037NoSpaceInEmphasis {
33    fn default() -> Self {
34        Self
35    }
36}
37
38impl MD037NoSpaceInEmphasis {
39    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
40    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
41        // Check inline and reference links
42        for link in &ctx.links {
43            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
44                return true;
45            }
46        }
47
48        // Check images (which use similar syntax)
49        for image in &ctx.images {
50            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
51                return true;
52            }
53        }
54
55        // Check reference definitions [ref]: url "title" using regex pattern
56        for m in REF_DEF_REGEX.find_iter(ctx.content) {
57            if m.start() <= byte_pos && byte_pos < m.end() {
58                return true;
59            }
60        }
61
62        false
63    }
64}
65
66impl Rule for MD037NoSpaceInEmphasis {
67    fn name(&self) -> &'static str {
68        "MD037"
69    }
70
71    fn description(&self) -> &'static str {
72        "Spaces inside emphasis markers"
73    }
74
75    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
76        let content = ctx.content;
77        let _timer = crate::profiling::ScopedTimer::new("MD037_check");
78
79        // Early return: if no emphasis markers at all, skip processing
80        if !content.contains('*') && !content.contains('_') {
81            return Ok(vec![]);
82        }
83
84        // Fallback path: create structure manually (should rarely be used)
85        let structure = DocumentStructure::new(content);
86        self.check_with_structure(ctx, &structure)
87    }
88
89    /// Enhanced function to check for spaces inside emphasis markers
90    fn check_with_structure(
91        &self,
92        ctx: &crate::lint_context::LintContext,
93        structure: &DocumentStructure,
94    ) -> LintResult {
95        let _timer = crate::profiling::ScopedTimer::new("MD037_check_with_structure");
96
97        let content = ctx.content;
98
99        // Early return if the content is empty or has no emphasis characters
100        if content.is_empty() || (!content.contains('*') && !content.contains('_')) {
101            return Ok(vec![]);
102        }
103
104        let mut warnings = Vec::new();
105
106        // Process the content line by line using the document structure
107        for (line_num, line) in content.lines().enumerate() {
108            // Skip if in code block or front matter
109            if structure.is_in_code_block(line_num + 1) || structure.is_in_front_matter(line_num + 1) {
110                continue;
111            }
112
113            // Skip if the line doesn't contain any emphasis markers
114            if !line.contains('*') && !line.contains('_') {
115                continue;
116            }
117
118            // Check for emphasis issues on the original line
119            self.check_line_for_emphasis_issues_fast(line, line_num + 1, &mut warnings);
120        }
121
122        // Filter out warnings for emphasis markers that are inside links, HTML comments, or math
123        let mut filtered_warnings = Vec::new();
124        let mut line_start_pos = 0;
125
126        for (line_idx, line) in content.lines().enumerate() {
127            let line_num = line_idx + 1;
128
129            // Find warnings for this line
130            for warning in &warnings {
131                if warning.line == line_num {
132                    // Calculate byte position of the warning
133                    let byte_pos = line_start_pos + (warning.column - 1);
134
135                    // Skip if inside links, HTML comments, math contexts, or tables
136                    if !self.is_in_link(ctx, byte_pos)
137                        && !is_in_html_comment(content, byte_pos)
138                        && !is_in_math_context(ctx, byte_pos)
139                        && !is_in_table_cell(ctx, line_num, warning.column)
140                    {
141                        filtered_warnings.push(warning.clone());
142                    }
143                }
144            }
145
146            line_start_pos += line.len() + 1; // +1 for newline
147        }
148
149        Ok(filtered_warnings)
150    }
151
152    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
153        let content = ctx.content;
154        let _timer = crate::profiling::ScopedTimer::new("MD037_fix");
155
156        // Fast path: if no emphasis markers, return unchanged
157        if !content.contains('*') && !content.contains('_') {
158            return Ok(content.to_string());
159        }
160
161        // First check for issues and get all warnings with fixes
162        let warnings = self.check(ctx)?;
163
164        // If no warnings, return original content
165        if warnings.is_empty() {
166            return Ok(content.to_string());
167        }
168
169        // Get all line positions to make it easier to apply fixes by warning
170        let mut line_positions = Vec::new();
171        let mut pos = 0;
172        for line in content.lines() {
173            line_positions.push(pos);
174            pos += line.len() + 1; // +1 for the newline
175        }
176
177        // Apply fixes
178        let mut result = content.to_string();
179        let mut offset: isize = 0;
180
181        // Sort warnings by position to apply fixes in the correct order
182        let mut sorted_warnings: Vec<_> = warnings.iter().filter(|w| w.fix.is_some()).collect();
183        sorted_warnings.sort_by_key(|w| (w.line, w.column));
184
185        for warning in sorted_warnings {
186            if let Some(fix) = &warning.fix {
187                // Calculate the absolute position in the file
188                let line_start = line_positions.get(warning.line - 1).copied().unwrap_or(0);
189                let abs_start = line_start + warning.column - 1;
190                let abs_end = abs_start + (fix.range.end - fix.range.start);
191
192                // Apply fix with offset adjustment
193                let actual_start = (abs_start as isize + offset) as usize;
194                let actual_end = (abs_end as isize + offset) as usize;
195
196                // Make sure we're not out of bounds
197                if actual_start < result.len() && actual_end <= result.len() {
198                    // Replace the text
199                    result.replace_range(actual_start..actual_end, &fix.replacement);
200                    // Update offset for future replacements
201                    offset += fix.replacement.len() as isize - (fix.range.end - fix.range.start) as isize;
202                }
203            }
204        }
205
206        Ok(result)
207    }
208
209    /// Get the category of this rule for selective processing
210    fn category(&self) -> RuleCategory {
211        RuleCategory::Emphasis
212    }
213
214    /// Check if this rule should be skipped
215    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
216        let content = ctx.content;
217        content.is_empty() || (!content.contains('*') && !content.contains('_'))
218    }
219
220    fn as_any(&self) -> &dyn std::any::Any {
221        self
222    }
223
224    fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
225        Some(self)
226    }
227
228    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
229    where
230        Self: Sized,
231    {
232        Box::new(MD037NoSpaceInEmphasis)
233    }
234}
235
236impl DocumentStructureExtensions for MD037NoSpaceInEmphasis {
237    fn has_relevant_elements(
238        &self,
239        ctx: &crate::lint_context::LintContext,
240        _doc_structure: &DocumentStructure,
241    ) -> bool {
242        let content = ctx.content;
243        content.contains('*') || content.contains('_')
244    }
245}
246
247impl MD037NoSpaceInEmphasis {
248    /// Optimized line checking for emphasis spacing issues
249    #[inline]
250    fn check_line_for_emphasis_issues_fast(&self, line: &str, line_num: usize, warnings: &mut Vec<LintWarning>) {
251        // Quick documentation pattern checks
252        if has_doc_patterns(line) {
253            return;
254        }
255
256        // Optimized list detection with fast path
257        if (line.starts_with(' ') || line.starts_with('*') || line.starts_with('+') || line.starts_with('-'))
258            && UNORDERED_LIST_MARKER_REGEX.is_match(line)
259        {
260            if let Some(caps) = UNORDERED_LIST_MARKER_REGEX.captures(line)
261                && let Some(full_match) = caps.get(0)
262            {
263                let list_marker_end = full_match.end();
264                if list_marker_end < line.len() {
265                    let remaining_content = &line[list_marker_end..];
266
267                    if self.is_likely_list_item_fast(remaining_content) {
268                        self.check_line_content_for_emphasis_fast(
269                            remaining_content,
270                            line_num,
271                            list_marker_end,
272                            warnings,
273                        );
274                    } else {
275                        self.check_line_content_for_emphasis_fast(line, line_num, 0, warnings);
276                    }
277                }
278            }
279            return;
280        }
281
282        // Check the entire line
283        self.check_line_content_for_emphasis_fast(line, line_num, 0, warnings);
284    }
285
286    /// Fast list item detection with optimized logic
287    #[inline]
288    fn is_likely_list_item_fast(&self, content: &str) -> bool {
289        let trimmed = content.trim();
290
291        // Early returns for obvious cases
292        if trimmed.is_empty() || trimmed.len() < 3 {
293            return false;
294        }
295
296        // Quick word count using bytes
297        let word_count = trimmed.split_whitespace().count();
298
299        // Short content ending with * is likely emphasis
300        if word_count <= 2 && trimmed.ends_with('*') && !trimmed.ends_with("**") {
301            return false;
302        }
303
304        // Long content (4+ words) without emphasis is likely a list
305        if word_count >= 4 {
306            // Quick check: if no emphasis markers, it's a list
307            if !trimmed.contains('*') && !trimmed.contains('_') {
308                return true;
309            }
310        }
311
312        // For ambiguous cases, default to emphasis (more conservative)
313        false
314    }
315
316    /// Optimized line content checking for emphasis issues
317    fn check_line_content_for_emphasis_fast(
318        &self,
319        content: &str,
320        line_num: usize,
321        offset: usize,
322        warnings: &mut Vec<LintWarning>,
323    ) {
324        // Replace inline code to avoid false positives with emphasis markers inside backticks
325        let processed_content = replace_inline_code(content);
326
327        // Find all emphasis markers using optimized parsing
328        let markers = find_emphasis_markers(&processed_content);
329        if markers.is_empty() {
330            return;
331        }
332
333        // Find valid emphasis spans
334        let spans = find_emphasis_spans(&processed_content, markers);
335
336        // Check each span for spacing issues
337        for span in spans {
338            if has_spacing_issues(&span) {
339                // Calculate the full span including markers
340                let full_start = span.opening.start_pos;
341                let full_end = span.closing.end_pos();
342                let full_text = &content[full_start..full_end];
343
344                // Skip if this emphasis has a Kramdown span IAL immediately after it
345                // (no space between emphasis and IAL)
346                if full_end < content.len() {
347                    let remaining = &content[full_end..];
348                    // Check if IAL starts immediately after the emphasis (no whitespace)
349                    if remaining.starts_with('{') && has_span_ial(remaining.split_whitespace().next().unwrap_or("")) {
350                        continue;
351                    }
352                }
353
354                // Create the marker string efficiently
355                let marker_char = span.opening.as_char();
356                let marker_str = if span.opening.count == 1 {
357                    marker_char.to_string()
358                } else {
359                    format!("{marker_char}{marker_char}")
360                };
361
362                // Create the fixed version by trimming spaces from content
363                let trimmed_content = span.content.trim();
364                let fixed_text = format!("{marker_str}{trimmed_content}{marker_str}");
365
366                let warning = LintWarning {
367                    rule_name: Some(self.name()),
368                    message: format!("Spaces inside emphasis markers: {full_text:?}"),
369                    line: line_num,
370                    column: offset + full_start + 1, // +1 because columns are 1-indexed
371                    end_line: line_num,
372                    end_column: offset + full_end + 1,
373                    severity: Severity::Warning,
374                    fix: Some(Fix {
375                        range: (offset + full_start)..(offset + full_end),
376                        replacement: fixed_text,
377                    }),
378                };
379
380                warnings.push(warning);
381            }
382        }
383    }
384}
385
386#[cfg(test)]
387mod tests {
388    use super::*;
389    use crate::lint_context::LintContext;
390    use crate::utils::document_structure::DocumentStructure;
391
392    #[test]
393    fn test_emphasis_marker_parsing() {
394        let markers = find_emphasis_markers("This has *single* and **double** emphasis");
395        assert_eq!(markers.len(), 4); // *, *, **, **
396
397        let markers = find_emphasis_markers("*start* and *end*");
398        assert_eq!(markers.len(), 4); // *, *, *, *
399    }
400
401    #[test]
402    fn test_emphasis_span_detection() {
403        let markers = find_emphasis_markers("This has *valid* emphasis");
404        let spans = find_emphasis_spans("This has *valid* emphasis", markers);
405        assert_eq!(spans.len(), 1);
406        assert_eq!(spans[0].content, "valid");
407        assert!(!spans[0].has_leading_space);
408        assert!(!spans[0].has_trailing_space);
409
410        let markers = find_emphasis_markers("This has * invalid * emphasis");
411        let spans = find_emphasis_spans("This has * invalid * emphasis", markers);
412        assert_eq!(spans.len(), 1);
413        assert_eq!(spans[0].content, " invalid ");
414        assert!(spans[0].has_leading_space);
415        assert!(spans[0].has_trailing_space);
416    }
417
418    #[test]
419    fn test_with_document_structure() {
420        let rule = MD037NoSpaceInEmphasis;
421
422        // Test with no spaces inside emphasis - should pass
423        let content = "This is *correct* emphasis and **strong emphasis**";
424        let structure = DocumentStructure::new(content);
425        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
426        let result = rule.check_with_structure(&ctx, &structure).unwrap();
427        assert!(result.is_empty(), "No warnings expected for correct emphasis");
428
429        // Test with actual spaces inside emphasis - use content that should warn
430        let content = "This is * text with spaces * and more content";
431        let structure = DocumentStructure::new(content);
432        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
433        let result = rule.check_with_structure(&ctx, &structure).unwrap();
434        assert!(!result.is_empty(), "Expected warnings for spaces in emphasis");
435
436        // Test with code blocks - emphasis in code should be ignored
437        let content = "This is *correct* emphasis\n```\n* incorrect * in code block\n```\nOutside block with * spaces in emphasis *";
438        let structure = DocumentStructure::new(content);
439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
440        let result = rule.check_with_structure(&ctx, &structure).unwrap();
441        assert!(
442            !result.is_empty(),
443            "Expected warnings for spaces in emphasis outside code block"
444        );
445    }
446
447    #[test]
448    fn test_emphasis_in_links_not_flagged() {
449        let rule = MD037NoSpaceInEmphasis;
450        let content = r#"Check this [* spaced asterisk *](https://example.com/*test*) link.
451
452This has * real spaced emphasis * that should be flagged."#;
453        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
454        let result = rule.check(&ctx).unwrap();
455
456        // Test passed - emphasis inside links are filtered out correctly
457
458        // Only the real emphasis outside links should be flagged
459        assert_eq!(
460            result.len(),
461            1,
462            "Expected exactly 1 warning, but got: {:?}",
463            result.len()
464        );
465        assert!(result[0].message.contains("Spaces inside emphasis markers"));
466        // Should flag "* real spaced emphasis *" but not emphasis patterns inside links
467        assert!(result[0].line == 3); // Line with "* real spaced emphasis *"
468    }
469
470    #[test]
471    fn test_emphasis_in_links_vs_outside_links() {
472        let rule = MD037NoSpaceInEmphasis;
473        let content = r#"Check [* spaced *](https://example.com/*test*) and inline * real spaced * text.
474
475[* link *]: https://example.com/*path*"#;
476        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
477        let result = rule.check(&ctx).unwrap();
478
479        // Only the actual emphasis outside links should be flagged
480        assert_eq!(result.len(), 1);
481        assert!(result[0].message.contains("Spaces inside emphasis markers"));
482        // Should be the "* real spaced *" text on line 1
483        assert!(result[0].line == 1);
484    }
485
486    #[test]
487    fn test_issue_49_asterisk_in_inline_code() {
488        // Test for issue #49 - Asterisk within backticks identified as for emphasis
489        let rule = MD037NoSpaceInEmphasis;
490
491        // Test case from issue #49
492        let content = "The `__mul__` method is needed for left-hand multiplication (`vector * 3`) and `__rmul__` is needed for right-hand multiplication (`3 * vector`).";
493        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
494        let result = rule.check(&ctx).unwrap();
495        assert!(
496            result.is_empty(),
497            "Should not flag asterisks inside inline code as emphasis (issue #49). Got: {result:?}"
498        );
499    }
500
501    #[test]
502    fn test_issue_28_inline_code_in_emphasis() {
503        // Test for issue #28 - MD037 should not flag inline code inside emphasis as spaces
504        let rule = MD037NoSpaceInEmphasis;
505
506        // Test case 1: inline code with single backticks inside bold emphasis
507        let content = "Though, we often call this an **inline `if`** because it looks sort of like an `if`-`else` statement all in *one line* of code.";
508        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
509        let result = rule.check(&ctx).unwrap();
510        assert!(
511            result.is_empty(),
512            "Should not flag inline code inside emphasis as spaces (issue #28). Got: {result:?}"
513        );
514
515        // Test case 2: multiple inline code snippets inside emphasis
516        let content2 = "The **`foo` and `bar`** methods are important.";
517        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
518        let result2 = rule.check(&ctx2).unwrap();
519        assert!(
520            result2.is_empty(),
521            "Should not flag multiple inline code snippets inside emphasis. Got: {result2:?}"
522        );
523
524        // Test case 3: inline code with underscores for emphasis
525        let content3 = "This is __inline `code`__ with underscores.";
526        let ctx3 = LintContext::new(content3, crate::config::MarkdownFlavor::Standard);
527        let result3 = rule.check(&ctx3).unwrap();
528        assert!(
529            result3.is_empty(),
530            "Should not flag inline code with underscore emphasis. Got: {result3:?}"
531        );
532
533        // Test case 4: single asterisk emphasis with inline code
534        let content4 = "This is *inline `test`* with single asterisks.";
535        let ctx4 = LintContext::new(content4, crate::config::MarkdownFlavor::Standard);
536        let result4 = rule.check(&ctx4).unwrap();
537        assert!(
538            result4.is_empty(),
539            "Should not flag inline code with single asterisk emphasis. Got: {result4:?}"
540        );
541
542        // Test case 5: actual spaces that should be flagged
543        let content5 = "This has * real spaces * that should be flagged.";
544        let ctx5 = LintContext::new(content5, crate::config::MarkdownFlavor::Standard);
545        let result5 = rule.check(&ctx5).unwrap();
546        assert!(!result5.is_empty(), "Should still flag actual spaces in emphasis");
547        assert!(result5[0].message.contains("Spaces inside emphasis markers"));
548    }
549}