rumdl_lib/rules/
md037_spaces_around_emphasis.rs

1/// Rule MD037: No spaces around emphasis markers
2///
3/// See [docs/md037.md](../../docs/md037.md) for full documentation, configuration, and examples.
4use crate::filtered_lines::FilteredLinesExt;
5use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
6use crate::utils::emphasis_utils::{
7    EmphasisSpan, find_emphasis_markers, find_emphasis_spans, has_doc_patterns, replace_inline_code,
8};
9use crate::utils::kramdown_utils::has_span_ial;
10use crate::utils::regex_cache::UNORDERED_LIST_MARKER_REGEX;
11use crate::utils::skip_context::{is_in_html_comment, is_in_math_context, is_in_table_cell};
12
13/// Check if an emphasis span has spacing issues that should be flagged
14#[inline]
15fn has_spacing_issues(span: &EmphasisSpan) -> bool {
16    span.has_leading_space || span.has_trailing_space
17}
18
19/// Rule MD037: Spaces inside emphasis markers
20#[derive(Clone)]
21pub struct MD037NoSpaceInEmphasis;
22
23impl Default for MD037NoSpaceInEmphasis {
24    fn default() -> Self {
25        Self
26    }
27}
28
29impl MD037NoSpaceInEmphasis {
30    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
31    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
32        // Check inline and reference links
33        for link in &ctx.links {
34            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
35                return true;
36            }
37        }
38
39        // Check images (which use similar syntax)
40        for image in &ctx.images {
41            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
42                return true;
43            }
44        }
45
46        // Check reference definitions [ref]: url "title" using pre-computed data (O(1) vs O(n))
47        ctx.is_in_reference_def(byte_pos)
48    }
49}
50
51impl Rule for MD037NoSpaceInEmphasis {
52    fn name(&self) -> &'static str {
53        "MD037"
54    }
55
56    fn description(&self) -> &'static str {
57        "Spaces inside emphasis markers"
58    }
59
60    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
61        let content = ctx.content;
62        let _timer = crate::profiling::ScopedTimer::new("MD037_check");
63
64        // Early return: if no emphasis markers at all, skip processing
65        if !content.contains('*') && !content.contains('_') {
66            return Ok(vec![]);
67        }
68
69        // Create LineIndex for correct byte position calculations across all line ending types
70        let line_index = &ctx.line_index;
71
72        let mut warnings = Vec::new();
73
74        // Process content lines, automatically skipping front matter and code blocks
75        for line in ctx.filtered_lines().skip_front_matter().skip_code_blocks() {
76            // Skip if the line doesn't contain any emphasis markers
77            if !line.content.contains('*') && !line.content.contains('_') {
78                continue;
79            }
80
81            // Check for emphasis issues on the original line
82            self.check_line_for_emphasis_issues_fast(line.content, line.line_num, &mut warnings);
83        }
84
85        // Filter out warnings for emphasis markers that are inside links, HTML comments, or math
86        let mut filtered_warnings = Vec::new();
87
88        for (line_idx, _line) in content.lines().enumerate() {
89            let line_num = line_idx + 1;
90            let line_start_pos = line_index.get_line_start_byte(line_num).unwrap_or(0);
91
92            // Find warnings for this line
93            for warning in &warnings {
94                if warning.line == line_num {
95                    // Calculate byte position of the warning
96                    let byte_pos = line_start_pos + (warning.column - 1);
97
98                    // Skip if inside links, HTML comments, math contexts, or tables
99                    if !self.is_in_link(ctx, byte_pos)
100                        && !is_in_html_comment(content, byte_pos)
101                        && !is_in_math_context(ctx, byte_pos)
102                        && !is_in_table_cell(ctx, line_num, warning.column)
103                    {
104                        filtered_warnings.push(warning.clone());
105                    }
106                }
107            }
108        }
109
110        Ok(filtered_warnings)
111    }
112
113    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
114        let content = ctx.content;
115        let _timer = crate::profiling::ScopedTimer::new("MD037_fix");
116
117        // Fast path: if no emphasis markers, return unchanged
118        if !content.contains('*') && !content.contains('_') {
119            return Ok(content.to_string());
120        }
121
122        // First check for issues and get all warnings with fixes
123        let warnings = self.check(ctx)?;
124
125        // If no warnings, return original content
126        if warnings.is_empty() {
127            return Ok(content.to_string());
128        }
129
130        // Create LineIndex for correct byte position calculations across all line ending types
131        let line_index = &ctx.line_index;
132
133        // Apply fixes
134        let mut result = content.to_string();
135        let mut offset: isize = 0;
136
137        // Sort warnings by position to apply fixes in the correct order
138        let mut sorted_warnings: Vec<_> = warnings.iter().filter(|w| w.fix.is_some()).collect();
139        sorted_warnings.sort_by_key(|w| (w.line, w.column));
140
141        for warning in sorted_warnings {
142            if let Some(fix) = &warning.fix {
143                // Calculate the absolute position in the file
144                let line_start = line_index.get_line_start_byte(warning.line).unwrap_or(0);
145                let abs_start = line_start + warning.column - 1;
146                let abs_end = abs_start + (fix.range.end - fix.range.start);
147
148                // Apply fix with offset adjustment
149                let actual_start = (abs_start as isize + offset) as usize;
150                let actual_end = (abs_end as isize + offset) as usize;
151
152                // Make sure we're not out of bounds
153                if actual_start < result.len() && actual_end <= result.len() {
154                    // Replace the text
155                    result.replace_range(actual_start..actual_end, &fix.replacement);
156                    // Update offset for future replacements
157                    offset += fix.replacement.len() as isize - (fix.range.end - fix.range.start) as isize;
158                }
159            }
160        }
161
162        Ok(result)
163    }
164
165    /// Get the category of this rule for selective processing
166    fn category(&self) -> RuleCategory {
167        RuleCategory::Emphasis
168    }
169
170    /// Check if this rule should be skipped
171    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
172        ctx.content.is_empty() || !ctx.likely_has_emphasis()
173    }
174
175    fn as_any(&self) -> &dyn std::any::Any {
176        self
177    }
178
179    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
180    where
181        Self: Sized,
182    {
183        Box::new(MD037NoSpaceInEmphasis)
184    }
185}
186
187impl MD037NoSpaceInEmphasis {
188    /// Optimized line checking for emphasis spacing issues
189    #[inline]
190    fn check_line_for_emphasis_issues_fast(&self, line: &str, line_num: usize, warnings: &mut Vec<LintWarning>) {
191        // Quick documentation pattern checks
192        if has_doc_patterns(line) {
193            return;
194        }
195
196        // Optimized list detection with fast path
197        if (line.starts_with(' ') || line.starts_with('*') || line.starts_with('+') || line.starts_with('-'))
198            && UNORDERED_LIST_MARKER_REGEX.is_match(line)
199        {
200            if let Some(caps) = UNORDERED_LIST_MARKER_REGEX.captures(line)
201                && let Some(full_match) = caps.get(0)
202            {
203                let list_marker_end = full_match.end();
204                if list_marker_end < line.len() {
205                    let remaining_content = &line[list_marker_end..];
206
207                    if self.is_likely_list_item_fast(remaining_content) {
208                        self.check_line_content_for_emphasis_fast(
209                            remaining_content,
210                            line_num,
211                            list_marker_end,
212                            warnings,
213                        );
214                    } else {
215                        self.check_line_content_for_emphasis_fast(line, line_num, 0, warnings);
216                    }
217                }
218            }
219            return;
220        }
221
222        // Check the entire line
223        self.check_line_content_for_emphasis_fast(line, line_num, 0, warnings);
224    }
225
226    /// Fast list item detection with optimized logic
227    #[inline]
228    fn is_likely_list_item_fast(&self, content: &str) -> bool {
229        let trimmed = content.trim();
230
231        // Early returns for obvious cases
232        if trimmed.is_empty() || trimmed.len() < 3 {
233            return false;
234        }
235
236        // Quick word count using bytes
237        let word_count = trimmed.split_whitespace().count();
238
239        // Short content ending with * is likely emphasis
240        if word_count <= 2 && trimmed.ends_with('*') && !trimmed.ends_with("**") {
241            return false;
242        }
243
244        // Long content (4+ words) without emphasis is likely a list
245        if word_count >= 4 {
246            // Quick check: if no emphasis markers, it's a list
247            if !trimmed.contains('*') && !trimmed.contains('_') {
248                return true;
249            }
250        }
251
252        // For ambiguous cases, default to emphasis (more conservative)
253        false
254    }
255
256    /// Optimized line content checking for emphasis issues
257    fn check_line_content_for_emphasis_fast(
258        &self,
259        content: &str,
260        line_num: usize,
261        offset: usize,
262        warnings: &mut Vec<LintWarning>,
263    ) {
264        // Replace inline code to avoid false positives with emphasis markers inside backticks
265        let processed_content = replace_inline_code(content);
266
267        // Find all emphasis markers using optimized parsing
268        let markers = find_emphasis_markers(&processed_content);
269        if markers.is_empty() {
270            return;
271        }
272
273        // Find valid emphasis spans
274        let spans = find_emphasis_spans(&processed_content, markers);
275
276        // Check each span for spacing issues
277        for span in spans {
278            if has_spacing_issues(&span) {
279                // Calculate the full span including markers
280                let full_start = span.opening.start_pos;
281                let full_end = span.closing.end_pos();
282                let full_text = &content[full_start..full_end];
283
284                // Skip if this emphasis has a Kramdown span IAL immediately after it
285                // (no space between emphasis and IAL)
286                if full_end < content.len() {
287                    let remaining = &content[full_end..];
288                    // Check if IAL starts immediately after the emphasis (no whitespace)
289                    if remaining.starts_with('{') && has_span_ial(remaining.split_whitespace().next().unwrap_or("")) {
290                        continue;
291                    }
292                }
293
294                // Create the marker string efficiently
295                let marker_char = span.opening.as_char();
296                let marker_str = if span.opening.count == 1 {
297                    marker_char.to_string()
298                } else {
299                    format!("{marker_char}{marker_char}")
300                };
301
302                // Create the fixed version by trimming spaces from content
303                let trimmed_content = span.content.trim();
304                let fixed_text = format!("{marker_str}{trimmed_content}{marker_str}");
305
306                let warning = LintWarning {
307                    rule_name: Some(self.name().to_string()),
308                    message: format!("Spaces inside emphasis markers: {full_text:?}"),
309                    line: line_num,
310                    column: offset + full_start + 1, // +1 because columns are 1-indexed
311                    end_line: line_num,
312                    end_column: offset + full_end + 1,
313                    severity: Severity::Warning,
314                    fix: Some(Fix {
315                        range: (offset + full_start)..(offset + full_end),
316                        replacement: fixed_text,
317                    }),
318                };
319
320                warnings.push(warning);
321            }
322        }
323    }
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329    use crate::lint_context::LintContext;
330
331    #[test]
332    fn test_emphasis_marker_parsing() {
333        let markers = find_emphasis_markers("This has *single* and **double** emphasis");
334        assert_eq!(markers.len(), 4); // *, *, **, **
335
336        let markers = find_emphasis_markers("*start* and *end*");
337        assert_eq!(markers.len(), 4); // *, *, *, *
338    }
339
340    #[test]
341    fn test_emphasis_span_detection() {
342        let markers = find_emphasis_markers("This has *valid* emphasis");
343        let spans = find_emphasis_spans("This has *valid* emphasis", markers);
344        assert_eq!(spans.len(), 1);
345        assert_eq!(spans[0].content, "valid");
346        assert!(!spans[0].has_leading_space);
347        assert!(!spans[0].has_trailing_space);
348
349        let markers = find_emphasis_markers("This has * invalid * emphasis");
350        let spans = find_emphasis_spans("This has * invalid * emphasis", markers);
351        assert_eq!(spans.len(), 1);
352        assert_eq!(spans[0].content, " invalid ");
353        assert!(spans[0].has_leading_space);
354        assert!(spans[0].has_trailing_space);
355    }
356
357    #[test]
358    fn test_with_document_structure() {
359        let rule = MD037NoSpaceInEmphasis;
360
361        // Test with no spaces inside emphasis - should pass
362        let content = "This is *correct* emphasis and **strong emphasis**";
363        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
364        let result = rule.check(&ctx).unwrap();
365        assert!(result.is_empty(), "No warnings expected for correct emphasis");
366
367        // Test with actual spaces inside emphasis - use content that should warn
368        let content = "This is * text with spaces * and more content";
369        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
370        let result = rule.check(&ctx).unwrap();
371        assert!(!result.is_empty(), "Expected warnings for spaces in emphasis");
372
373        // Test with code blocks - emphasis in code should be ignored
374        let content = "This is *correct* emphasis\n```\n* incorrect * in code block\n```\nOutside block with * spaces in emphasis *";
375        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
376        let result = rule.check(&ctx).unwrap();
377        assert!(
378            !result.is_empty(),
379            "Expected warnings for spaces in emphasis outside code block"
380        );
381    }
382
383    #[test]
384    fn test_emphasis_in_links_not_flagged() {
385        let rule = MD037NoSpaceInEmphasis;
386        let content = r#"Check this [* spaced asterisk *](https://example.com/*test*) link.
387
388This has * real spaced emphasis * that should be flagged."#;
389        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
390        let result = rule.check(&ctx).unwrap();
391
392        // Test passed - emphasis inside links are filtered out correctly
393
394        // Only the real emphasis outside links should be flagged
395        assert_eq!(
396            result.len(),
397            1,
398            "Expected exactly 1 warning, but got: {:?}",
399            result.len()
400        );
401        assert!(result[0].message.contains("Spaces inside emphasis markers"));
402        // Should flag "* real spaced emphasis *" but not emphasis patterns inside links
403        assert!(result[0].line == 3); // Line with "* real spaced emphasis *"
404    }
405
406    #[test]
407    fn test_emphasis_in_links_vs_outside_links() {
408        let rule = MD037NoSpaceInEmphasis;
409        let content = r#"Check [* spaced *](https://example.com/*test*) and inline * real spaced * text.
410
411[* link *]: https://example.com/*path*"#;
412        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
413        let result = rule.check(&ctx).unwrap();
414
415        // Only the actual emphasis outside links should be flagged
416        assert_eq!(result.len(), 1);
417        assert!(result[0].message.contains("Spaces inside emphasis markers"));
418        // Should be the "* real spaced *" text on line 1
419        assert!(result[0].line == 1);
420    }
421
422    #[test]
423    fn test_issue_49_asterisk_in_inline_code() {
424        // Test for issue #49 - Asterisk within backticks identified as for emphasis
425        let rule = MD037NoSpaceInEmphasis;
426
427        // Test case from issue #49
428        let content = "The `__mul__` method is needed for left-hand multiplication (`vector * 3`) and `__rmul__` is needed for right-hand multiplication (`3 * vector`).";
429        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
430        let result = rule.check(&ctx).unwrap();
431        assert!(
432            result.is_empty(),
433            "Should not flag asterisks inside inline code as emphasis (issue #49). Got: {result:?}"
434        );
435    }
436
437    #[test]
438    fn test_issue_28_inline_code_in_emphasis() {
439        // Test for issue #28 - MD037 should not flag inline code inside emphasis as spaces
440        let rule = MD037NoSpaceInEmphasis;
441
442        // Test case 1: inline code with single backticks inside bold emphasis
443        let content = "Though, we often call this an **inline `if`** because it looks sort of like an `if`-`else` statement all in *one line* of code.";
444        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
445        let result = rule.check(&ctx).unwrap();
446        assert!(
447            result.is_empty(),
448            "Should not flag inline code inside emphasis as spaces (issue #28). Got: {result:?}"
449        );
450
451        // Test case 2: multiple inline code snippets inside emphasis
452        let content2 = "The **`foo` and `bar`** methods are important.";
453        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
454        let result2 = rule.check(&ctx2).unwrap();
455        assert!(
456            result2.is_empty(),
457            "Should not flag multiple inline code snippets inside emphasis. Got: {result2:?}"
458        );
459
460        // Test case 3: inline code with underscores for emphasis
461        let content3 = "This is __inline `code`__ with underscores.";
462        let ctx3 = LintContext::new(content3, crate::config::MarkdownFlavor::Standard);
463        let result3 = rule.check(&ctx3).unwrap();
464        assert!(
465            result3.is_empty(),
466            "Should not flag inline code with underscore emphasis. Got: {result3:?}"
467        );
468
469        // Test case 4: single asterisk emphasis with inline code
470        let content4 = "This is *inline `test`* with single asterisks.";
471        let ctx4 = LintContext::new(content4, crate::config::MarkdownFlavor::Standard);
472        let result4 = rule.check(&ctx4).unwrap();
473        assert!(
474            result4.is_empty(),
475            "Should not flag inline code with single asterisk emphasis. Got: {result4:?}"
476        );
477
478        // Test case 5: actual spaces that should be flagged
479        let content5 = "This has * real spaces * that should be flagged.";
480        let ctx5 = LintContext::new(content5, crate::config::MarkdownFlavor::Standard);
481        let result5 = rule.check(&ctx5).unwrap();
482        assert!(!result5.is_empty(), "Should still flag actual spaces in emphasis");
483        assert!(result5[0].message.contains("Spaces inside emphasis markers"));
484    }
485}