rumdl_lib/rules/
md037_spaces_around_emphasis.rs

1/// Rule MD037: No spaces around emphasis markers
2///
3/// See [docs/md037.md](../../docs/md037.md) for full documentation, configuration, and examples.
4use crate::filtered_lines::FilteredLinesExt;
5use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
6use crate::utils::emphasis_utils::{
7    EmphasisSpan, find_emphasis_markers, find_emphasis_spans, has_doc_patterns, replace_inline_code,
8};
9use crate::utils::kramdown_utils::has_span_ial;
10use crate::utils::range_utils::LineIndex;
11use crate::utils::regex_cache::UNORDERED_LIST_MARKER_REGEX;
12use crate::utils::skip_context::{is_in_html_comment, is_in_math_context, is_in_table_cell};
13use lazy_static::lazy_static;
14use regex::Regex;
15
16lazy_static! {
17    // Reference definition pattern - matches [ref]: url "title"
18    static ref REF_DEF_REGEX: Regex = Regex::new(
19        r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
20    ).unwrap();
21}
22
23/// Check if an emphasis span has spacing issues that should be flagged
24#[inline]
25fn has_spacing_issues(span: &EmphasisSpan) -> bool {
26    span.has_leading_space || span.has_trailing_space
27}
28
29/// Rule MD037: Spaces inside emphasis markers
30#[derive(Clone)]
31pub struct MD037NoSpaceInEmphasis;
32
33impl Default for MD037NoSpaceInEmphasis {
34    fn default() -> Self {
35        Self
36    }
37}
38
39impl MD037NoSpaceInEmphasis {
40    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
41    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
42        // Check inline and reference links
43        for link in &ctx.links {
44            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
45                return true;
46            }
47        }
48
49        // Check images (which use similar syntax)
50        for image in &ctx.images {
51            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
52                return true;
53            }
54        }
55
56        // Check reference definitions [ref]: url "title" using regex pattern
57        for m in REF_DEF_REGEX.find_iter(ctx.content) {
58            if m.start() <= byte_pos && byte_pos < m.end() {
59                return true;
60            }
61        }
62
63        false
64    }
65}
66
67impl Rule for MD037NoSpaceInEmphasis {
68    fn name(&self) -> &'static str {
69        "MD037"
70    }
71
72    fn description(&self) -> &'static str {
73        "Spaces inside emphasis markers"
74    }
75
76    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
77        let content = ctx.content;
78        let _timer = crate::profiling::ScopedTimer::new("MD037_check");
79
80        // Early return: if no emphasis markers at all, skip processing
81        if !content.contains('*') && !content.contains('_') {
82            return Ok(vec![]);
83        }
84
85        // Create LineIndex for correct byte position calculations across all line ending types
86        let line_index = LineIndex::new(content.to_string());
87
88        let mut warnings = Vec::new();
89
90        // Process content lines, automatically skipping front matter and code blocks
91        for line in ctx.filtered_lines().skip_front_matter().skip_code_blocks() {
92            // Skip if the line doesn't contain any emphasis markers
93            if !line.content.contains('*') && !line.content.contains('_') {
94                continue;
95            }
96
97            // Check for emphasis issues on the original line
98            self.check_line_for_emphasis_issues_fast(line.content, line.line_num, &mut warnings);
99        }
100
101        // Filter out warnings for emphasis markers that are inside links, HTML comments, or math
102        let mut filtered_warnings = Vec::new();
103
104        for (line_idx, _line) in content.lines().enumerate() {
105            let line_num = line_idx + 1;
106            let line_start_pos = line_index.get_line_start_byte(line_num).unwrap_or(0);
107
108            // Find warnings for this line
109            for warning in &warnings {
110                if warning.line == line_num {
111                    // Calculate byte position of the warning
112                    let byte_pos = line_start_pos + (warning.column - 1);
113
114                    // Skip if inside links, HTML comments, math contexts, or tables
115                    if !self.is_in_link(ctx, byte_pos)
116                        && !is_in_html_comment(content, byte_pos)
117                        && !is_in_math_context(ctx, byte_pos)
118                        && !is_in_table_cell(ctx, line_num, warning.column)
119                    {
120                        filtered_warnings.push(warning.clone());
121                    }
122                }
123            }
124        }
125
126        Ok(filtered_warnings)
127    }
128
129    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
130        let content = ctx.content;
131        let _timer = crate::profiling::ScopedTimer::new("MD037_fix");
132
133        // Fast path: if no emphasis markers, return unchanged
134        if !content.contains('*') && !content.contains('_') {
135            return Ok(content.to_string());
136        }
137
138        // First check for issues and get all warnings with fixes
139        let warnings = self.check(ctx)?;
140
141        // If no warnings, return original content
142        if warnings.is_empty() {
143            return Ok(content.to_string());
144        }
145
146        // Create LineIndex for correct byte position calculations across all line ending types
147        let line_index = LineIndex::new(content.to_string());
148
149        // Apply fixes
150        let mut result = content.to_string();
151        let mut offset: isize = 0;
152
153        // Sort warnings by position to apply fixes in the correct order
154        let mut sorted_warnings: Vec<_> = warnings.iter().filter(|w| w.fix.is_some()).collect();
155        sorted_warnings.sort_by_key(|w| (w.line, w.column));
156
157        for warning in sorted_warnings {
158            if let Some(fix) = &warning.fix {
159                // Calculate the absolute position in the file
160                let line_start = line_index.get_line_start_byte(warning.line).unwrap_or(0);
161                let abs_start = line_start + warning.column - 1;
162                let abs_end = abs_start + (fix.range.end - fix.range.start);
163
164                // Apply fix with offset adjustment
165                let actual_start = (abs_start as isize + offset) as usize;
166                let actual_end = (abs_end as isize + offset) as usize;
167
168                // Make sure we're not out of bounds
169                if actual_start < result.len() && actual_end <= result.len() {
170                    // Replace the text
171                    result.replace_range(actual_start..actual_end, &fix.replacement);
172                    // Update offset for future replacements
173                    offset += fix.replacement.len() as isize - (fix.range.end - fix.range.start) as isize;
174                }
175            }
176        }
177
178        Ok(result)
179    }
180
181    /// Get the category of this rule for selective processing
182    fn category(&self) -> RuleCategory {
183        RuleCategory::Emphasis
184    }
185
186    /// Check if this rule should be skipped
187    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
188        ctx.content.is_empty() || !ctx.likely_has_emphasis()
189    }
190
191    fn as_any(&self) -> &dyn std::any::Any {
192        self
193    }
194
195    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
196    where
197        Self: Sized,
198    {
199        Box::new(MD037NoSpaceInEmphasis)
200    }
201}
202
203impl MD037NoSpaceInEmphasis {
204    /// Optimized line checking for emphasis spacing issues
205    #[inline]
206    fn check_line_for_emphasis_issues_fast(&self, line: &str, line_num: usize, warnings: &mut Vec<LintWarning>) {
207        // Quick documentation pattern checks
208        if has_doc_patterns(line) {
209            return;
210        }
211
212        // Optimized list detection with fast path
213        if (line.starts_with(' ') || line.starts_with('*') || line.starts_with('+') || line.starts_with('-'))
214            && UNORDERED_LIST_MARKER_REGEX.is_match(line)
215        {
216            if let Some(caps) = UNORDERED_LIST_MARKER_REGEX.captures(line)
217                && let Some(full_match) = caps.get(0)
218            {
219                let list_marker_end = full_match.end();
220                if list_marker_end < line.len() {
221                    let remaining_content = &line[list_marker_end..];
222
223                    if self.is_likely_list_item_fast(remaining_content) {
224                        self.check_line_content_for_emphasis_fast(
225                            remaining_content,
226                            line_num,
227                            list_marker_end,
228                            warnings,
229                        );
230                    } else {
231                        self.check_line_content_for_emphasis_fast(line, line_num, 0, warnings);
232                    }
233                }
234            }
235            return;
236        }
237
238        // Check the entire line
239        self.check_line_content_for_emphasis_fast(line, line_num, 0, warnings);
240    }
241
242    /// Fast list item detection with optimized logic
243    #[inline]
244    fn is_likely_list_item_fast(&self, content: &str) -> bool {
245        let trimmed = content.trim();
246
247        // Early returns for obvious cases
248        if trimmed.is_empty() || trimmed.len() < 3 {
249            return false;
250        }
251
252        // Quick word count using bytes
253        let word_count = trimmed.split_whitespace().count();
254
255        // Short content ending with * is likely emphasis
256        if word_count <= 2 && trimmed.ends_with('*') && !trimmed.ends_with("**") {
257            return false;
258        }
259
260        // Long content (4+ words) without emphasis is likely a list
261        if word_count >= 4 {
262            // Quick check: if no emphasis markers, it's a list
263            if !trimmed.contains('*') && !trimmed.contains('_') {
264                return true;
265            }
266        }
267
268        // For ambiguous cases, default to emphasis (more conservative)
269        false
270    }
271
272    /// Optimized line content checking for emphasis issues
273    fn check_line_content_for_emphasis_fast(
274        &self,
275        content: &str,
276        line_num: usize,
277        offset: usize,
278        warnings: &mut Vec<LintWarning>,
279    ) {
280        // Replace inline code to avoid false positives with emphasis markers inside backticks
281        let processed_content = replace_inline_code(content);
282
283        // Find all emphasis markers using optimized parsing
284        let markers = find_emphasis_markers(&processed_content);
285        if markers.is_empty() {
286            return;
287        }
288
289        // Find valid emphasis spans
290        let spans = find_emphasis_spans(&processed_content, markers);
291
292        // Check each span for spacing issues
293        for span in spans {
294            if has_spacing_issues(&span) {
295                // Calculate the full span including markers
296                let full_start = span.opening.start_pos;
297                let full_end = span.closing.end_pos();
298                let full_text = &content[full_start..full_end];
299
300                // Skip if this emphasis has a Kramdown span IAL immediately after it
301                // (no space between emphasis and IAL)
302                if full_end < content.len() {
303                    let remaining = &content[full_end..];
304                    // Check if IAL starts immediately after the emphasis (no whitespace)
305                    if remaining.starts_with('{') && has_span_ial(remaining.split_whitespace().next().unwrap_or("")) {
306                        continue;
307                    }
308                }
309
310                // Create the marker string efficiently
311                let marker_char = span.opening.as_char();
312                let marker_str = if span.opening.count == 1 {
313                    marker_char.to_string()
314                } else {
315                    format!("{marker_char}{marker_char}")
316                };
317
318                // Create the fixed version by trimming spaces from content
319                let trimmed_content = span.content.trim();
320                let fixed_text = format!("{marker_str}{trimmed_content}{marker_str}");
321
322                let warning = LintWarning {
323                    rule_name: Some(self.name().to_string()),
324                    message: format!("Spaces inside emphasis markers: {full_text:?}"),
325                    line: line_num,
326                    column: offset + full_start + 1, // +1 because columns are 1-indexed
327                    end_line: line_num,
328                    end_column: offset + full_end + 1,
329                    severity: Severity::Warning,
330                    fix: Some(Fix {
331                        range: (offset + full_start)..(offset + full_end),
332                        replacement: fixed_text,
333                    }),
334                };
335
336                warnings.push(warning);
337            }
338        }
339    }
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345    use crate::lint_context::LintContext;
346
347    #[test]
348    fn test_emphasis_marker_parsing() {
349        let markers = find_emphasis_markers("This has *single* and **double** emphasis");
350        assert_eq!(markers.len(), 4); // *, *, **, **
351
352        let markers = find_emphasis_markers("*start* and *end*");
353        assert_eq!(markers.len(), 4); // *, *, *, *
354    }
355
356    #[test]
357    fn test_emphasis_span_detection() {
358        let markers = find_emphasis_markers("This has *valid* emphasis");
359        let spans = find_emphasis_spans("This has *valid* emphasis", markers);
360        assert_eq!(spans.len(), 1);
361        assert_eq!(spans[0].content, "valid");
362        assert!(!spans[0].has_leading_space);
363        assert!(!spans[0].has_trailing_space);
364
365        let markers = find_emphasis_markers("This has * invalid * emphasis");
366        let spans = find_emphasis_spans("This has * invalid * emphasis", markers);
367        assert_eq!(spans.len(), 1);
368        assert_eq!(spans[0].content, " invalid ");
369        assert!(spans[0].has_leading_space);
370        assert!(spans[0].has_trailing_space);
371    }
372
373    #[test]
374    fn test_with_document_structure() {
375        let rule = MD037NoSpaceInEmphasis;
376
377        // Test with no spaces inside emphasis - should pass
378        let content = "This is *correct* emphasis and **strong emphasis**";
379        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
380        let result = rule.check(&ctx).unwrap();
381        assert!(result.is_empty(), "No warnings expected for correct emphasis");
382
383        // Test with actual spaces inside emphasis - use content that should warn
384        let content = "This is * text with spaces * and more content";
385        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
386        let result = rule.check(&ctx).unwrap();
387        assert!(!result.is_empty(), "Expected warnings for spaces in emphasis");
388
389        // Test with code blocks - emphasis in code should be ignored
390        let content = "This is *correct* emphasis\n```\n* incorrect * in code block\n```\nOutside block with * spaces in emphasis *";
391        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
392        let result = rule.check(&ctx).unwrap();
393        assert!(
394            !result.is_empty(),
395            "Expected warnings for spaces in emphasis outside code block"
396        );
397    }
398
399    #[test]
400    fn test_emphasis_in_links_not_flagged() {
401        let rule = MD037NoSpaceInEmphasis;
402        let content = r#"Check this [* spaced asterisk *](https://example.com/*test*) link.
403
404This has * real spaced emphasis * that should be flagged."#;
405        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
406        let result = rule.check(&ctx).unwrap();
407
408        // Test passed - emphasis inside links are filtered out correctly
409
410        // Only the real emphasis outside links should be flagged
411        assert_eq!(
412            result.len(),
413            1,
414            "Expected exactly 1 warning, but got: {:?}",
415            result.len()
416        );
417        assert!(result[0].message.contains("Spaces inside emphasis markers"));
418        // Should flag "* real spaced emphasis *" but not emphasis patterns inside links
419        assert!(result[0].line == 3); // Line with "* real spaced emphasis *"
420    }
421
422    #[test]
423    fn test_emphasis_in_links_vs_outside_links() {
424        let rule = MD037NoSpaceInEmphasis;
425        let content = r#"Check [* spaced *](https://example.com/*test*) and inline * real spaced * text.
426
427[* link *]: https://example.com/*path*"#;
428        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
429        let result = rule.check(&ctx).unwrap();
430
431        // Only the actual emphasis outside links should be flagged
432        assert_eq!(result.len(), 1);
433        assert!(result[0].message.contains("Spaces inside emphasis markers"));
434        // Should be the "* real spaced *" text on line 1
435        assert!(result[0].line == 1);
436    }
437
438    #[test]
439    fn test_issue_49_asterisk_in_inline_code() {
440        // Test for issue #49 - Asterisk within backticks identified as for emphasis
441        let rule = MD037NoSpaceInEmphasis;
442
443        // Test case from issue #49
444        let content = "The `__mul__` method is needed for left-hand multiplication (`vector * 3`) and `__rmul__` is needed for right-hand multiplication (`3 * vector`).";
445        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
446        let result = rule.check(&ctx).unwrap();
447        assert!(
448            result.is_empty(),
449            "Should not flag asterisks inside inline code as emphasis (issue #49). Got: {result:?}"
450        );
451    }
452
453    #[test]
454    fn test_issue_28_inline_code_in_emphasis() {
455        // Test for issue #28 - MD037 should not flag inline code inside emphasis as spaces
456        let rule = MD037NoSpaceInEmphasis;
457
458        // Test case 1: inline code with single backticks inside bold emphasis
459        let content = "Though, we often call this an **inline `if`** because it looks sort of like an `if`-`else` statement all in *one line* of code.";
460        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
461        let result = rule.check(&ctx).unwrap();
462        assert!(
463            result.is_empty(),
464            "Should not flag inline code inside emphasis as spaces (issue #28). Got: {result:?}"
465        );
466
467        // Test case 2: multiple inline code snippets inside emphasis
468        let content2 = "The **`foo` and `bar`** methods are important.";
469        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
470        let result2 = rule.check(&ctx2).unwrap();
471        assert!(
472            result2.is_empty(),
473            "Should not flag multiple inline code snippets inside emphasis. Got: {result2:?}"
474        );
475
476        // Test case 3: inline code with underscores for emphasis
477        let content3 = "This is __inline `code`__ with underscores.";
478        let ctx3 = LintContext::new(content3, crate::config::MarkdownFlavor::Standard);
479        let result3 = rule.check(&ctx3).unwrap();
480        assert!(
481            result3.is_empty(),
482            "Should not flag inline code with underscore emphasis. Got: {result3:?}"
483        );
484
485        // Test case 4: single asterisk emphasis with inline code
486        let content4 = "This is *inline `test`* with single asterisks.";
487        let ctx4 = LintContext::new(content4, crate::config::MarkdownFlavor::Standard);
488        let result4 = rule.check(&ctx4).unwrap();
489        assert!(
490            result4.is_empty(),
491            "Should not flag inline code with single asterisk emphasis. Got: {result4:?}"
492        );
493
494        // Test case 5: actual spaces that should be flagged
495        let content5 = "This has * real spaces * that should be flagged.";
496        let ctx5 = LintContext::new(content5, crate::config::MarkdownFlavor::Standard);
497        let result5 = rule.check(&ctx5).unwrap();
498        assert!(!result5.is_empty(), "Should still flag actual spaces in emphasis");
499        assert!(result5[0].message.contains("Spaces inside emphasis markers"));
500    }
501}