rumdl_lib/rules/
md037_spaces_around_emphasis.rs

1/// Rule MD037: No spaces around emphasis markers
2///
3/// See [docs/md037.md](../../docs/md037.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::utils::emphasis_utils::{
6    EmphasisSpan, find_emphasis_markers, find_emphasis_spans, has_doc_patterns, replace_inline_code,
7};
8use crate::utils::kramdown_utils::has_span_ial;
9use crate::utils::regex_cache::UNORDERED_LIST_MARKER_REGEX;
10use crate::utils::skip_context::{is_in_html_comment, is_in_math_context, is_in_table_cell};
11use lazy_static::lazy_static;
12use regex::Regex;
13
14lazy_static! {
15    // Reference definition pattern - matches [ref]: url "title"
16    static ref REF_DEF_REGEX: Regex = Regex::new(
17        r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
18    ).unwrap();
19}
20
21/// Check if an emphasis span has spacing issues that should be flagged
22#[inline]
23fn has_spacing_issues(span: &EmphasisSpan) -> bool {
24    span.has_leading_space || span.has_trailing_space
25}
26
27/// Rule MD037: Spaces inside emphasis markers
28#[derive(Clone)]
29pub struct MD037NoSpaceInEmphasis;
30
31impl Default for MD037NoSpaceInEmphasis {
32    fn default() -> Self {
33        Self
34    }
35}
36
37impl MD037NoSpaceInEmphasis {
38    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
39    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
40        // Check inline and reference links
41        for link in &ctx.links {
42            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
43                return true;
44            }
45        }
46
47        // Check images (which use similar syntax)
48        for image in &ctx.images {
49            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
50                return true;
51            }
52        }
53
54        // Check reference definitions [ref]: url "title" using regex pattern
55        for m in REF_DEF_REGEX.find_iter(ctx.content) {
56            if m.start() <= byte_pos && byte_pos < m.end() {
57                return true;
58            }
59        }
60
61        false
62    }
63}
64
65impl Rule for MD037NoSpaceInEmphasis {
66    fn name(&self) -> &'static str {
67        "MD037"
68    }
69
70    fn description(&self) -> &'static str {
71        "Spaces inside emphasis markers"
72    }
73
74    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
75        let content = ctx.content;
76        let _timer = crate::profiling::ScopedTimer::new("MD037_check");
77
78        // Early return: if no emphasis markers at all, skip processing
79        if !content.contains('*') && !content.contains('_') {
80            return Ok(vec![]);
81        }
82
83        let mut warnings = Vec::new();
84
85        // Process the content line by line
86        for (line_num, line) in content.lines().enumerate() {
87            // Skip if in code block or front matter
88            if ctx.is_in_code_block(line_num + 1) || ctx.is_in_front_matter(line_num + 1) {
89                continue;
90            }
91
92            // Skip if the line doesn't contain any emphasis markers
93            if !line.contains('*') && !line.contains('_') {
94                continue;
95            }
96
97            // Check for emphasis issues on the original line
98            self.check_line_for_emphasis_issues_fast(line, line_num + 1, &mut warnings);
99        }
100
101        // Filter out warnings for emphasis markers that are inside links, HTML comments, or math
102        let mut filtered_warnings = Vec::new();
103        let mut line_start_pos = 0;
104
105        for (line_idx, line) in content.lines().enumerate() {
106            let line_num = line_idx + 1;
107
108            // Find warnings for this line
109            for warning in &warnings {
110                if warning.line == line_num {
111                    // Calculate byte position of the warning
112                    let byte_pos = line_start_pos + (warning.column - 1);
113
114                    // Skip if inside links, HTML comments, math contexts, or tables
115                    if !self.is_in_link(ctx, byte_pos)
116                        && !is_in_html_comment(content, byte_pos)
117                        && !is_in_math_context(ctx, byte_pos)
118                        && !is_in_table_cell(ctx, line_num, warning.column)
119                    {
120                        filtered_warnings.push(warning.clone());
121                    }
122                }
123            }
124
125            line_start_pos += line.len() + 1; // +1 for newline
126        }
127
128        Ok(filtered_warnings)
129    }
130
131    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
132        let content = ctx.content;
133        let _timer = crate::profiling::ScopedTimer::new("MD037_fix");
134
135        // Fast path: if no emphasis markers, return unchanged
136        if !content.contains('*') && !content.contains('_') {
137            return Ok(content.to_string());
138        }
139
140        // First check for issues and get all warnings with fixes
141        let warnings = self.check(ctx)?;
142
143        // If no warnings, return original content
144        if warnings.is_empty() {
145            return Ok(content.to_string());
146        }
147
148        // Get all line positions to make it easier to apply fixes by warning
149        let mut line_positions = Vec::new();
150        let mut pos = 0;
151        for line in content.lines() {
152            line_positions.push(pos);
153            pos += line.len() + 1; // +1 for the newline
154        }
155
156        // Apply fixes
157        let mut result = content.to_string();
158        let mut offset: isize = 0;
159
160        // Sort warnings by position to apply fixes in the correct order
161        let mut sorted_warnings: Vec<_> = warnings.iter().filter(|w| w.fix.is_some()).collect();
162        sorted_warnings.sort_by_key(|w| (w.line, w.column));
163
164        for warning in sorted_warnings {
165            if let Some(fix) = &warning.fix {
166                // Calculate the absolute position in the file
167                let line_start = line_positions.get(warning.line - 1).copied().unwrap_or(0);
168                let abs_start = line_start + warning.column - 1;
169                let abs_end = abs_start + (fix.range.end - fix.range.start);
170
171                // Apply fix with offset adjustment
172                let actual_start = (abs_start as isize + offset) as usize;
173                let actual_end = (abs_end as isize + offset) as usize;
174
175                // Make sure we're not out of bounds
176                if actual_start < result.len() && actual_end <= result.len() {
177                    // Replace the text
178                    result.replace_range(actual_start..actual_end, &fix.replacement);
179                    // Update offset for future replacements
180                    offset += fix.replacement.len() as isize - (fix.range.end - fix.range.start) as isize;
181                }
182            }
183        }
184
185        Ok(result)
186    }
187
188    /// Get the category of this rule for selective processing
189    fn category(&self) -> RuleCategory {
190        RuleCategory::Emphasis
191    }
192
193    /// Check if this rule should be skipped
194    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
195        let content = ctx.content;
196        content.is_empty() || (!content.contains('*') && !content.contains('_'))
197    }
198
199    fn as_any(&self) -> &dyn std::any::Any {
200        self
201    }
202
203    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
204    where
205        Self: Sized,
206    {
207        Box::new(MD037NoSpaceInEmphasis)
208    }
209}
210
211impl MD037NoSpaceInEmphasis {
212    /// Optimized line checking for emphasis spacing issues
213    #[inline]
214    fn check_line_for_emphasis_issues_fast(&self, line: &str, line_num: usize, warnings: &mut Vec<LintWarning>) {
215        // Quick documentation pattern checks
216        if has_doc_patterns(line) {
217            return;
218        }
219
220        // Optimized list detection with fast path
221        if (line.starts_with(' ') || line.starts_with('*') || line.starts_with('+') || line.starts_with('-'))
222            && UNORDERED_LIST_MARKER_REGEX.is_match(line)
223        {
224            if let Some(caps) = UNORDERED_LIST_MARKER_REGEX.captures(line)
225                && let Some(full_match) = caps.get(0)
226            {
227                let list_marker_end = full_match.end();
228                if list_marker_end < line.len() {
229                    let remaining_content = &line[list_marker_end..];
230
231                    if self.is_likely_list_item_fast(remaining_content) {
232                        self.check_line_content_for_emphasis_fast(
233                            remaining_content,
234                            line_num,
235                            list_marker_end,
236                            warnings,
237                        );
238                    } else {
239                        self.check_line_content_for_emphasis_fast(line, line_num, 0, warnings);
240                    }
241                }
242            }
243            return;
244        }
245
246        // Check the entire line
247        self.check_line_content_for_emphasis_fast(line, line_num, 0, warnings);
248    }
249
250    /// Fast list item detection with optimized logic
251    #[inline]
252    fn is_likely_list_item_fast(&self, content: &str) -> bool {
253        let trimmed = content.trim();
254
255        // Early returns for obvious cases
256        if trimmed.is_empty() || trimmed.len() < 3 {
257            return false;
258        }
259
260        // Quick word count using bytes
261        let word_count = trimmed.split_whitespace().count();
262
263        // Short content ending with * is likely emphasis
264        if word_count <= 2 && trimmed.ends_with('*') && !trimmed.ends_with("**") {
265            return false;
266        }
267
268        // Long content (4+ words) without emphasis is likely a list
269        if word_count >= 4 {
270            // Quick check: if no emphasis markers, it's a list
271            if !trimmed.contains('*') && !trimmed.contains('_') {
272                return true;
273            }
274        }
275
276        // For ambiguous cases, default to emphasis (more conservative)
277        false
278    }
279
280    /// Optimized line content checking for emphasis issues
281    fn check_line_content_for_emphasis_fast(
282        &self,
283        content: &str,
284        line_num: usize,
285        offset: usize,
286        warnings: &mut Vec<LintWarning>,
287    ) {
288        // Replace inline code to avoid false positives with emphasis markers inside backticks
289        let processed_content = replace_inline_code(content);
290
291        // Find all emphasis markers using optimized parsing
292        let markers = find_emphasis_markers(&processed_content);
293        if markers.is_empty() {
294            return;
295        }
296
297        // Find valid emphasis spans
298        let spans = find_emphasis_spans(&processed_content, markers);
299
300        // Check each span for spacing issues
301        for span in spans {
302            if has_spacing_issues(&span) {
303                // Calculate the full span including markers
304                let full_start = span.opening.start_pos;
305                let full_end = span.closing.end_pos();
306                let full_text = &content[full_start..full_end];
307
308                // Skip if this emphasis has a Kramdown span IAL immediately after it
309                // (no space between emphasis and IAL)
310                if full_end < content.len() {
311                    let remaining = &content[full_end..];
312                    // Check if IAL starts immediately after the emphasis (no whitespace)
313                    if remaining.starts_with('{') && has_span_ial(remaining.split_whitespace().next().unwrap_or("")) {
314                        continue;
315                    }
316                }
317
318                // Create the marker string efficiently
319                let marker_char = span.opening.as_char();
320                let marker_str = if span.opening.count == 1 {
321                    marker_char.to_string()
322                } else {
323                    format!("{marker_char}{marker_char}")
324                };
325
326                // Create the fixed version by trimming spaces from content
327                let trimmed_content = span.content.trim();
328                let fixed_text = format!("{marker_str}{trimmed_content}{marker_str}");
329
330                let warning = LintWarning {
331                    rule_name: Some(self.name()),
332                    message: format!("Spaces inside emphasis markers: {full_text:?}"),
333                    line: line_num,
334                    column: offset + full_start + 1, // +1 because columns are 1-indexed
335                    end_line: line_num,
336                    end_column: offset + full_end + 1,
337                    severity: Severity::Warning,
338                    fix: Some(Fix {
339                        range: (offset + full_start)..(offset + full_end),
340                        replacement: fixed_text,
341                    }),
342                };
343
344                warnings.push(warning);
345            }
346        }
347    }
348}
349
350#[cfg(test)]
351mod tests {
352    use super::*;
353    use crate::lint_context::LintContext;
354
355    #[test]
356    fn test_emphasis_marker_parsing() {
357        let markers = find_emphasis_markers("This has *single* and **double** emphasis");
358        assert_eq!(markers.len(), 4); // *, *, **, **
359
360        let markers = find_emphasis_markers("*start* and *end*");
361        assert_eq!(markers.len(), 4); // *, *, *, *
362    }
363
364    #[test]
365    fn test_emphasis_span_detection() {
366        let markers = find_emphasis_markers("This has *valid* emphasis");
367        let spans = find_emphasis_spans("This has *valid* emphasis", markers);
368        assert_eq!(spans.len(), 1);
369        assert_eq!(spans[0].content, "valid");
370        assert!(!spans[0].has_leading_space);
371        assert!(!spans[0].has_trailing_space);
372
373        let markers = find_emphasis_markers("This has * invalid * emphasis");
374        let spans = find_emphasis_spans("This has * invalid * emphasis", markers);
375        assert_eq!(spans.len(), 1);
376        assert_eq!(spans[0].content, " invalid ");
377        assert!(spans[0].has_leading_space);
378        assert!(spans[0].has_trailing_space);
379    }
380
381    #[test]
382    fn test_with_document_structure() {
383        let rule = MD037NoSpaceInEmphasis;
384
385        // Test with no spaces inside emphasis - should pass
386        let content = "This is *correct* emphasis and **strong emphasis**";
387        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
388        let result = rule.check(&ctx).unwrap();
389        assert!(result.is_empty(), "No warnings expected for correct emphasis");
390
391        // Test with actual spaces inside emphasis - use content that should warn
392        let content = "This is * text with spaces * and more content";
393        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
394        let result = rule.check(&ctx).unwrap();
395        assert!(!result.is_empty(), "Expected warnings for spaces in emphasis");
396
397        // Test with code blocks - emphasis in code should be ignored
398        let content = "This is *correct* emphasis\n```\n* incorrect * in code block\n```\nOutside block with * spaces in emphasis *";
399        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
400        let result = rule.check(&ctx).unwrap();
401        assert!(
402            !result.is_empty(),
403            "Expected warnings for spaces in emphasis outside code block"
404        );
405    }
406
407    #[test]
408    fn test_emphasis_in_links_not_flagged() {
409        let rule = MD037NoSpaceInEmphasis;
410        let content = r#"Check this [* spaced asterisk *](https://example.com/*test*) link.
411
412This has * real spaced emphasis * that should be flagged."#;
413        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
414        let result = rule.check(&ctx).unwrap();
415
416        // Test passed - emphasis inside links are filtered out correctly
417
418        // Only the real emphasis outside links should be flagged
419        assert_eq!(
420            result.len(),
421            1,
422            "Expected exactly 1 warning, but got: {:?}",
423            result.len()
424        );
425        assert!(result[0].message.contains("Spaces inside emphasis markers"));
426        // Should flag "* real spaced emphasis *" but not emphasis patterns inside links
427        assert!(result[0].line == 3); // Line with "* real spaced emphasis *"
428    }
429
430    #[test]
431    fn test_emphasis_in_links_vs_outside_links() {
432        let rule = MD037NoSpaceInEmphasis;
433        let content = r#"Check [* spaced *](https://example.com/*test*) and inline * real spaced * text.
434
435[* link *]: https://example.com/*path*"#;
436        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
437        let result = rule.check(&ctx).unwrap();
438
439        // Only the actual emphasis outside links should be flagged
440        assert_eq!(result.len(), 1);
441        assert!(result[0].message.contains("Spaces inside emphasis markers"));
442        // Should be the "* real spaced *" text on line 1
443        assert!(result[0].line == 1);
444    }
445
446    #[test]
447    fn test_issue_49_asterisk_in_inline_code() {
448        // Test for issue #49 - Asterisk within backticks identified as for emphasis
449        let rule = MD037NoSpaceInEmphasis;
450
451        // Test case from issue #49
452        let content = "The `__mul__` method is needed for left-hand multiplication (`vector * 3`) and `__rmul__` is needed for right-hand multiplication (`3 * vector`).";
453        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
454        let result = rule.check(&ctx).unwrap();
455        assert!(
456            result.is_empty(),
457            "Should not flag asterisks inside inline code as emphasis (issue #49). Got: {result:?}"
458        );
459    }
460
461    #[test]
462    fn test_issue_28_inline_code_in_emphasis() {
463        // Test for issue #28 - MD037 should not flag inline code inside emphasis as spaces
464        let rule = MD037NoSpaceInEmphasis;
465
466        // Test case 1: inline code with single backticks inside bold emphasis
467        let content = "Though, we often call this an **inline `if`** because it looks sort of like an `if`-`else` statement all in *one line* of code.";
468        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
469        let result = rule.check(&ctx).unwrap();
470        assert!(
471            result.is_empty(),
472            "Should not flag inline code inside emphasis as spaces (issue #28). Got: {result:?}"
473        );
474
475        // Test case 2: multiple inline code snippets inside emphasis
476        let content2 = "The **`foo` and `bar`** methods are important.";
477        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
478        let result2 = rule.check(&ctx2).unwrap();
479        assert!(
480            result2.is_empty(),
481            "Should not flag multiple inline code snippets inside emphasis. Got: {result2:?}"
482        );
483
484        // Test case 3: inline code with underscores for emphasis
485        let content3 = "This is __inline `code`__ with underscores.";
486        let ctx3 = LintContext::new(content3, crate::config::MarkdownFlavor::Standard);
487        let result3 = rule.check(&ctx3).unwrap();
488        assert!(
489            result3.is_empty(),
490            "Should not flag inline code with underscore emphasis. Got: {result3:?}"
491        );
492
493        // Test case 4: single asterisk emphasis with inline code
494        let content4 = "This is *inline `test`* with single asterisks.";
495        let ctx4 = LintContext::new(content4, crate::config::MarkdownFlavor::Standard);
496        let result4 = rule.check(&ctx4).unwrap();
497        assert!(
498            result4.is_empty(),
499            "Should not flag inline code with single asterisk emphasis. Got: {result4:?}"
500        );
501
502        // Test case 5: actual spaces that should be flagged
503        let content5 = "This has * real spaces * that should be flagged.";
504        let ctx5 = LintContext::new(content5, crate::config::MarkdownFlavor::Standard);
505        let result5 = rule.check(&ctx5).unwrap();
506        assert!(!result5.is_empty(), "Should still flag actual spaces in emphasis");
507        assert!(result5[0].message.contains("Spaces inside emphasis markers"));
508    }
509}