rumdl_lib/rules/
md037_spaces_around_emphasis.rs

1/// Rule MD037: No spaces around emphasis markers
2///
3/// See [docs/md037.md](../../docs/md037.md) for full documentation, configuration, and examples.
4use crate::filtered_lines::FilteredLinesExt;
5use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
6use crate::utils::emphasis_utils::{
7    EmphasisSpan, find_emphasis_markers, find_emphasis_spans, has_doc_patterns, replace_inline_code,
8    replace_inline_math,
9};
10use crate::utils::kramdown_utils::has_span_ial;
11use crate::utils::regex_cache::UNORDERED_LIST_MARKER_REGEX;
12use crate::utils::skip_context::{is_in_html_comment, is_in_math_context, is_in_table_cell};
13
14/// Check if an emphasis span has spacing issues that should be flagged
15#[inline]
16fn has_spacing_issues(span: &EmphasisSpan) -> bool {
17    span.has_leading_space || span.has_trailing_space
18}
19
20/// Truncate long text for display in warning messages
21/// Shows first ~30 and last ~30 chars with ellipsis in middle for readability
22#[inline]
23fn truncate_for_display(text: &str, max_len: usize) -> String {
24    if text.len() <= max_len {
25        return text.to_string();
26    }
27
28    let prefix_len = max_len / 2 - 2; // -2 for "..."
29    let suffix_len = max_len / 2 - 2;
30
31    // Use floor_char_boundary to safely find UTF-8 character boundaries
32    let prefix_end = text.floor_char_boundary(prefix_len.min(text.len()));
33    let suffix_start = text.floor_char_boundary(text.len().saturating_sub(suffix_len));
34
35    format!("{}...{}", &text[..prefix_end], &text[suffix_start..])
36}
37
38/// Rule MD037: Spaces inside emphasis markers
39#[derive(Clone)]
40pub struct MD037NoSpaceInEmphasis;
41
42impl Default for MD037NoSpaceInEmphasis {
43    fn default() -> Self {
44        Self
45    }
46}
47
48impl MD037NoSpaceInEmphasis {
49    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
50    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
51        // Check inline and reference links
52        for link in &ctx.links {
53            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
54                return true;
55            }
56        }
57
58        // Check images (which use similar syntax)
59        for image in &ctx.images {
60            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
61                return true;
62            }
63        }
64
65        // Check reference definitions [ref]: url "title" using pre-computed data (O(1) vs O(n))
66        ctx.is_in_reference_def(byte_pos)
67    }
68}
69
70impl Rule for MD037NoSpaceInEmphasis {
71    fn name(&self) -> &'static str {
72        "MD037"
73    }
74
75    fn description(&self) -> &'static str {
76        "Spaces inside emphasis markers"
77    }
78
79    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
80        let content = ctx.content;
81        let _timer = crate::profiling::ScopedTimer::new("MD037_check");
82
83        // Early return: if no emphasis markers at all, skip processing
84        if !content.contains('*') && !content.contains('_') {
85            return Ok(vec![]);
86        }
87
88        // Create LineIndex for correct byte position calculations across all line ending types
89        let line_index = &ctx.line_index;
90
91        let mut warnings = Vec::new();
92
93        // Process content lines, automatically skipping front matter and code blocks
94        for line in ctx.filtered_lines().skip_front_matter().skip_code_blocks() {
95            // Skip if the line doesn't contain any emphasis markers
96            if !line.content.contains('*') && !line.content.contains('_') {
97                continue;
98            }
99
100            // Check for emphasis issues on the original line
101            self.check_line_for_emphasis_issues_fast(line.content, line.line_num, &mut warnings);
102        }
103
104        // Filter out warnings for emphasis markers that are inside links, HTML comments, or math
105        let mut filtered_warnings = Vec::new();
106
107        for (line_idx, _line) in content.lines().enumerate() {
108            let line_num = line_idx + 1;
109            let line_start_pos = line_index.get_line_start_byte(line_num).unwrap_or(0);
110
111            // Find warnings for this line
112            for warning in &warnings {
113                if warning.line == line_num {
114                    // Calculate byte position of the warning
115                    let byte_pos = line_start_pos + (warning.column - 1);
116
117                    // Skip if inside links, HTML comments, math contexts, tables, or code spans
118                    // Note: is_in_code_span uses pulldown-cmark and correctly handles multi-line spans
119                    if !self.is_in_link(ctx, byte_pos)
120                        && !is_in_html_comment(content, byte_pos)
121                        && !is_in_math_context(ctx, byte_pos)
122                        && !is_in_table_cell(ctx, line_num, warning.column)
123                        && !ctx.is_in_code_span(line_num, warning.column)
124                    {
125                        filtered_warnings.push(warning.clone());
126                    }
127                }
128            }
129        }
130
131        Ok(filtered_warnings)
132    }
133
134    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
135        let content = ctx.content;
136        let _timer = crate::profiling::ScopedTimer::new("MD037_fix");
137
138        // Fast path: if no emphasis markers, return unchanged
139        if !content.contains('*') && !content.contains('_') {
140            return Ok(content.to_string());
141        }
142
143        // First check for issues and get all warnings with fixes
144        let warnings = self.check(ctx)?;
145
146        // If no warnings, return original content
147        if warnings.is_empty() {
148            return Ok(content.to_string());
149        }
150
151        // Create LineIndex for correct byte position calculations across all line ending types
152        let line_index = &ctx.line_index;
153
154        // Apply fixes
155        let mut result = content.to_string();
156        let mut offset: isize = 0;
157
158        // Sort warnings by position to apply fixes in the correct order
159        let mut sorted_warnings: Vec<_> = warnings.iter().filter(|w| w.fix.is_some()).collect();
160        sorted_warnings.sort_by_key(|w| (w.line, w.column));
161
162        for warning in sorted_warnings {
163            if let Some(fix) = &warning.fix {
164                // Calculate the absolute position in the file
165                let line_start = line_index.get_line_start_byte(warning.line).unwrap_or(0);
166                let abs_start = line_start + warning.column - 1;
167                let abs_end = abs_start + (fix.range.end - fix.range.start);
168
169                // Apply fix with offset adjustment
170                let actual_start = (abs_start as isize + offset) as usize;
171                let actual_end = (abs_end as isize + offset) as usize;
172
173                // Make sure we're not out of bounds
174                if actual_start < result.len() && actual_end <= result.len() {
175                    // Replace the text
176                    result.replace_range(actual_start..actual_end, &fix.replacement);
177                    // Update offset for future replacements
178                    offset += fix.replacement.len() as isize - (fix.range.end - fix.range.start) as isize;
179                }
180            }
181        }
182
183        Ok(result)
184    }
185
186    /// Get the category of this rule for selective processing
187    fn category(&self) -> RuleCategory {
188        RuleCategory::Emphasis
189    }
190
191    /// Check if this rule should be skipped
192    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
193        ctx.content.is_empty() || !ctx.likely_has_emphasis()
194    }
195
196    fn as_any(&self) -> &dyn std::any::Any {
197        self
198    }
199
200    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
201    where
202        Self: Sized,
203    {
204        Box::new(MD037NoSpaceInEmphasis)
205    }
206}
207
208impl MD037NoSpaceInEmphasis {
209    /// Optimized line checking for emphasis spacing issues
210    #[inline]
211    fn check_line_for_emphasis_issues_fast(&self, line: &str, line_num: usize, warnings: &mut Vec<LintWarning>) {
212        // Quick documentation pattern checks
213        if has_doc_patterns(line) {
214            return;
215        }
216
217        // Optimized list detection with fast path
218        // When a list marker is detected, ALWAYS check only the content after the marker,
219        // never the full line. This prevents the list marker (* + -) from being mistaken
220        // for emphasis markers.
221        if (line.starts_with(' ') || line.starts_with('*') || line.starts_with('+') || line.starts_with('-'))
222            && UNORDERED_LIST_MARKER_REGEX.is_match(line)
223        {
224            if let Some(caps) = UNORDERED_LIST_MARKER_REGEX.captures(line)
225                && let Some(full_match) = caps.get(0)
226            {
227                let list_marker_end = full_match.end();
228                if list_marker_end < line.len() {
229                    let remaining_content = &line[list_marker_end..];
230
231                    // Always check just the remaining content (after the list marker).
232                    // The list marker itself is never emphasis.
233                    self.check_line_content_for_emphasis_fast(remaining_content, line_num, list_marker_end, warnings);
234                }
235            }
236            return;
237        }
238
239        // Check the entire line
240        self.check_line_content_for_emphasis_fast(line, line_num, 0, warnings);
241    }
242
243    /// Optimized line content checking for emphasis issues
244    fn check_line_content_for_emphasis_fast(
245        &self,
246        content: &str,
247        line_num: usize,
248        offset: usize,
249        warnings: &mut Vec<LintWarning>,
250    ) {
251        // Replace inline code and inline math to avoid false positives
252        // with emphasis markers inside backticks or dollar signs
253        let processed_content = replace_inline_code(content);
254        let processed_content = replace_inline_math(&processed_content);
255
256        // Find all emphasis markers using optimized parsing
257        let markers = find_emphasis_markers(&processed_content);
258        if markers.is_empty() {
259            return;
260        }
261
262        // Find valid emphasis spans
263        let spans = find_emphasis_spans(&processed_content, markers);
264
265        // Check each span for spacing issues
266        for span in spans {
267            if has_spacing_issues(&span) {
268                // Calculate the full span including markers
269                let full_start = span.opening.start_pos;
270                let full_end = span.closing.end_pos();
271                let full_text = &content[full_start..full_end];
272
273                // Skip if this emphasis has a Kramdown span IAL immediately after it
274                // (no space between emphasis and IAL)
275                if full_end < content.len() {
276                    let remaining = &content[full_end..];
277                    // Check if IAL starts immediately after the emphasis (no whitespace)
278                    if remaining.starts_with('{') && has_span_ial(remaining.split_whitespace().next().unwrap_or("")) {
279                        continue;
280                    }
281                }
282
283                // Create the marker string efficiently
284                let marker_char = span.opening.as_char();
285                let marker_str = if span.opening.count == 1 {
286                    marker_char.to_string()
287                } else {
288                    format!("{marker_char}{marker_char}")
289                };
290
291                // Create the fixed version by trimming spaces from content
292                let trimmed_content = span.content.trim();
293                let fixed_text = format!("{marker_str}{trimmed_content}{marker_str}");
294
295                // Truncate long emphasis spans for readable warning messages
296                let display_text = truncate_for_display(full_text, 60);
297
298                let warning = LintWarning {
299                    rule_name: Some(self.name().to_string()),
300                    message: format!("Spaces inside emphasis markers: {display_text:?}"),
301                    line: line_num,
302                    column: offset + full_start + 1, // +1 because columns are 1-indexed
303                    end_line: line_num,
304                    end_column: offset + full_end + 1,
305                    severity: Severity::Warning,
306                    fix: Some(Fix {
307                        range: (offset + full_start)..(offset + full_end),
308                        replacement: fixed_text,
309                    }),
310                };
311
312                warnings.push(warning);
313            }
314        }
315    }
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321    use crate::lint_context::LintContext;
322
323    #[test]
324    fn test_emphasis_marker_parsing() {
325        let markers = find_emphasis_markers("This has *single* and **double** emphasis");
326        assert_eq!(markers.len(), 4); // *, *, **, **
327
328        let markers = find_emphasis_markers("*start* and *end*");
329        assert_eq!(markers.len(), 4); // *, *, *, *
330    }
331
332    #[test]
333    fn test_emphasis_span_detection() {
334        let markers = find_emphasis_markers("This has *valid* emphasis");
335        let spans = find_emphasis_spans("This has *valid* emphasis", markers);
336        assert_eq!(spans.len(), 1);
337        assert_eq!(spans[0].content, "valid");
338        assert!(!spans[0].has_leading_space);
339        assert!(!spans[0].has_trailing_space);
340
341        let markers = find_emphasis_markers("This has * invalid * emphasis");
342        let spans = find_emphasis_spans("This has * invalid * emphasis", markers);
343        assert_eq!(spans.len(), 1);
344        assert_eq!(spans[0].content, " invalid ");
345        assert!(spans[0].has_leading_space);
346        assert!(spans[0].has_trailing_space);
347    }
348
349    #[test]
350    fn test_with_document_structure() {
351        let rule = MD037NoSpaceInEmphasis;
352
353        // Test with no spaces inside emphasis - should pass
354        let content = "This is *correct* emphasis and **strong emphasis**";
355        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
356        let result = rule.check(&ctx).unwrap();
357        assert!(result.is_empty(), "No warnings expected for correct emphasis");
358
359        // Test with actual spaces inside emphasis - use content that should warn
360        let content = "This is * text with spaces * and more content";
361        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
362        let result = rule.check(&ctx).unwrap();
363        assert!(!result.is_empty(), "Expected warnings for spaces in emphasis");
364
365        // Test with code blocks - emphasis in code should be ignored
366        let content = "This is *correct* emphasis\n```\n* incorrect * in code block\n```\nOutside block with * spaces in emphasis *";
367        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
368        let result = rule.check(&ctx).unwrap();
369        assert!(
370            !result.is_empty(),
371            "Expected warnings for spaces in emphasis outside code block"
372        );
373    }
374
375    #[test]
376    fn test_emphasis_in_links_not_flagged() {
377        let rule = MD037NoSpaceInEmphasis;
378        let content = r#"Check this [* spaced asterisk *](https://example.com/*test*) link.
379
380This has * real spaced emphasis * that should be flagged."#;
381        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
382        let result = rule.check(&ctx).unwrap();
383
384        // Test passed - emphasis inside links are filtered out correctly
385
386        // Only the real emphasis outside links should be flagged
387        assert_eq!(
388            result.len(),
389            1,
390            "Expected exactly 1 warning, but got: {:?}",
391            result.len()
392        );
393        assert!(result[0].message.contains("Spaces inside emphasis markers"));
394        // Should flag "* real spaced emphasis *" but not emphasis patterns inside links
395        assert!(result[0].line == 3); // Line with "* real spaced emphasis *"
396    }
397
398    #[test]
399    fn test_emphasis_in_links_vs_outside_links() {
400        let rule = MD037NoSpaceInEmphasis;
401        let content = r#"Check [* spaced *](https://example.com/*test*) and inline * real spaced * text.
402
403[* link *]: https://example.com/*path*"#;
404        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
405        let result = rule.check(&ctx).unwrap();
406
407        // Only the actual emphasis outside links should be flagged
408        assert_eq!(result.len(), 1);
409        assert!(result[0].message.contains("Spaces inside emphasis markers"));
410        // Should be the "* real spaced *" text on line 1
411        assert!(result[0].line == 1);
412    }
413
414    #[test]
415    fn test_issue_49_asterisk_in_inline_code() {
416        // Test for issue #49 - Asterisk within backticks identified as for emphasis
417        let rule = MD037NoSpaceInEmphasis;
418
419        // Test case from issue #49
420        let content = "The `__mul__` method is needed for left-hand multiplication (`vector * 3`) and `__rmul__` is needed for right-hand multiplication (`3 * vector`).";
421        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
422        let result = rule.check(&ctx).unwrap();
423        assert!(
424            result.is_empty(),
425            "Should not flag asterisks inside inline code as emphasis (issue #49). Got: {result:?}"
426        );
427    }
428
429    #[test]
430    fn test_issue_28_inline_code_in_emphasis() {
431        // Test for issue #28 - MD037 should not flag inline code inside emphasis as spaces
432        let rule = MD037NoSpaceInEmphasis;
433
434        // Test case 1: inline code with single backticks inside bold emphasis
435        let content = "Though, we often call this an **inline `if`** because it looks sort of like an `if`-`else` statement all in *one line* of code.";
436        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
437        let result = rule.check(&ctx).unwrap();
438        assert!(
439            result.is_empty(),
440            "Should not flag inline code inside emphasis as spaces (issue #28). Got: {result:?}"
441        );
442
443        // Test case 2: multiple inline code snippets inside emphasis
444        let content2 = "The **`foo` and `bar`** methods are important.";
445        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
446        let result2 = rule.check(&ctx2).unwrap();
447        assert!(
448            result2.is_empty(),
449            "Should not flag multiple inline code snippets inside emphasis. Got: {result2:?}"
450        );
451
452        // Test case 3: inline code with underscores for emphasis
453        let content3 = "This is __inline `code`__ with underscores.";
454        let ctx3 = LintContext::new(content3, crate::config::MarkdownFlavor::Standard, None);
455        let result3 = rule.check(&ctx3).unwrap();
456        assert!(
457            result3.is_empty(),
458            "Should not flag inline code with underscore emphasis. Got: {result3:?}"
459        );
460
461        // Test case 4: single asterisk emphasis with inline code
462        let content4 = "This is *inline `test`* with single asterisks.";
463        let ctx4 = LintContext::new(content4, crate::config::MarkdownFlavor::Standard, None);
464        let result4 = rule.check(&ctx4).unwrap();
465        assert!(
466            result4.is_empty(),
467            "Should not flag inline code with single asterisk emphasis. Got: {result4:?}"
468        );
469
470        // Test case 5: actual spaces that should be flagged
471        let content5 = "This has * real spaces * that should be flagged.";
472        let ctx5 = LintContext::new(content5, crate::config::MarkdownFlavor::Standard, None);
473        let result5 = rule.check(&ctx5).unwrap();
474        assert!(!result5.is_empty(), "Should still flag actual spaces in emphasis");
475        assert!(result5[0].message.contains("Spaces inside emphasis markers"));
476    }
477
478    #[test]
479    fn test_multibyte_utf8_no_panic() {
480        // Regression test: ensure multi-byte UTF-8 characters don't cause panics
481        // in the truncate_for_display function when handling long emphasis spans.
482        // These test cases include various scripts that could trigger boundary issues.
483        let rule = MD037NoSpaceInEmphasis;
484
485        // Greek text with emphasis
486        let greek = "Αυτό είναι ένα * τεστ με ελληνικά * και πολύ μεγάλο κείμενο που θα πρέπει να περικοπεί σωστά.";
487        let ctx = LintContext::new(greek, crate::config::MarkdownFlavor::Standard, None);
488        let result = rule.check(&ctx);
489        assert!(result.is_ok(), "Greek text should not panic");
490
491        // Chinese text with emphasis
492        let chinese = "这是一个 * 测试文本 * 包含中文字符,需要正确处理多字节边界。";
493        let ctx = LintContext::new(chinese, crate::config::MarkdownFlavor::Standard, None);
494        let result = rule.check(&ctx);
495        assert!(result.is_ok(), "Chinese text should not panic");
496
497        // Cyrillic/Russian text with emphasis
498        let cyrillic = "Это * тест с кириллицей * и очень длинным текстом для проверки обрезки.";
499        let ctx = LintContext::new(cyrillic, crate::config::MarkdownFlavor::Standard, None);
500        let result = rule.check(&ctx);
501        assert!(result.is_ok(), "Cyrillic text should not panic");
502
503        // Mixed multi-byte characters in a long emphasis span that triggers truncation
504        let mixed =
505            "日本語と * 中文と한국어が混在する非常に長いテキストでtruncate_for_displayの境界処理をテスト * します。";
506        let ctx = LintContext::new(mixed, crate::config::MarkdownFlavor::Standard, None);
507        let result = rule.check(&ctx);
508        assert!(result.is_ok(), "Mixed CJK text should not panic");
509
510        // Arabic text (right-to-left) with emphasis
511        let arabic = "هذا * اختبار بالعربية * مع نص طويل جداً لاختبار معالجة حدود الأحرف.";
512        let ctx = LintContext::new(arabic, crate::config::MarkdownFlavor::Standard, None);
513        let result = rule.check(&ctx);
514        assert!(result.is_ok(), "Arabic text should not panic");
515
516        // Emoji with emphasis
517        let emoji = "This has * 🎉 party 🎊 celebration 🥳 emojis * that use multi-byte sequences.";
518        let ctx = LintContext::new(emoji, crate::config::MarkdownFlavor::Standard, None);
519        let result = rule.check(&ctx);
520        assert!(result.is_ok(), "Emoji text should not panic");
521    }
522
523    #[test]
524    fn test_template_shortcode_syntax_not_flagged() {
525        // Test for FastAPI/MkDocs style template syntax {* ... *}
526        // These should NOT be flagged as emphasis with spaces
527        let rule = MD037NoSpaceInEmphasis;
528
529        // FastAPI style code inclusion
530        let content = "{* ../../docs_src/cookie_param_models/tutorial001.py hl[9:12,16] *}";
531        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
532        let result = rule.check(&ctx).unwrap();
533        assert!(
534            result.is_empty(),
535            "Template shortcode syntax should not be flagged. Got: {result:?}"
536        );
537
538        // Another FastAPI example
539        let content = "{* ../../docs_src/conditional_openapi/tutorial001.py hl[6,11] *}";
540        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
541        let result = rule.check(&ctx).unwrap();
542        assert!(
543            result.is_empty(),
544            "Template shortcode syntax should not be flagged. Got: {result:?}"
545        );
546
547        // Multiple shortcodes on different lines
548        let content = "# Header\n\n{* file1.py *}\n\nSome text.\n\n{* file2.py hl[1-5] *}";
549        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
550        let result = rule.check(&ctx).unwrap();
551        assert!(
552            result.is_empty(),
553            "Multiple template shortcodes should not be flagged. Got: {result:?}"
554        );
555
556        // But actual emphasis with spaces should still be flagged
557        let content = "This has * real spaced emphasis * here.";
558        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
559        let result = rule.check(&ctx).unwrap();
560        assert!(!result.is_empty(), "Real spaced emphasis should still be flagged");
561    }
562
563    #[test]
564    fn test_multiline_code_span_not_flagged() {
565        // Test for multi-line code spans - asterisks inside should not be flagged
566        // This tests the case where a code span starts on one line and ends on another
567        let rule = MD037NoSpaceInEmphasis;
568
569        // Code span spanning multiple lines with asterisks inside
570        let content = "# Test\n\naffects the structure. `1 + 0 + 0` is parsed as `(1 + 0) +\n0` while `1 + 0 * 0` is parsed as `1 + (0 * 0)`. Since the pattern";
571        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
572        let result = rule.check(&ctx).unwrap();
573        assert!(
574            result.is_empty(),
575            "Should not flag asterisks inside multi-line code spans. Got: {result:?}"
576        );
577
578        // Another multi-line code span case
579        let content2 = "Text with `code that\nspans * multiple * lines` here.";
580        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
581        let result2 = rule.check(&ctx2).unwrap();
582        assert!(
583            result2.is_empty(),
584            "Should not flag asterisks inside multi-line code spans. Got: {result2:?}"
585        );
586    }
587}