rumdl_lib/rules/
md037_spaces_around_emphasis.rs

1/// Rule MD037: No spaces around emphasis markers
2///
3/// See [docs/md037.md](../../docs/md037.md) for full documentation, configuration, and examples.
4use crate::filtered_lines::FilteredLinesExt;
5use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
6use crate::utils::emphasis_utils::{
7    EmphasisSpan, find_emphasis_markers, find_emphasis_spans, has_doc_patterns, replace_inline_code,
8};
9use crate::utils::kramdown_utils::has_span_ial;
10use crate::utils::regex_cache::UNORDERED_LIST_MARKER_REGEX;
11use crate::utils::skip_context::{is_in_html_comment, is_in_math_context, is_in_table_cell};
12
13/// Check if an emphasis span has spacing issues that should be flagged
14#[inline]
15fn has_spacing_issues(span: &EmphasisSpan) -> bool {
16    span.has_leading_space || span.has_trailing_space
17}
18
19/// Truncate long text for display in warning messages
20/// Shows first ~30 and last ~30 chars with ellipsis in middle for readability
21#[inline]
22fn truncate_for_display(text: &str, max_len: usize) -> String {
23    if text.len() <= max_len {
24        return text.to_string();
25    }
26
27    let prefix_len = max_len / 2 - 2; // -2 for "..."
28    let suffix_len = max_len / 2 - 2;
29
30    // Use floor_char_boundary to safely find UTF-8 character boundaries
31    let prefix_end = text.floor_char_boundary(prefix_len.min(text.len()));
32    let suffix_start = text.floor_char_boundary(text.len().saturating_sub(suffix_len));
33
34    format!("{}...{}", &text[..prefix_end], &text[suffix_start..])
35}
36
37/// Rule MD037: Spaces inside emphasis markers
38#[derive(Clone)]
39pub struct MD037NoSpaceInEmphasis;
40
41impl Default for MD037NoSpaceInEmphasis {
42    fn default() -> Self {
43        Self
44    }
45}
46
47impl MD037NoSpaceInEmphasis {
48    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
49    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
50        // Check inline and reference links
51        for link in &ctx.links {
52            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
53                return true;
54            }
55        }
56
57        // Check images (which use similar syntax)
58        for image in &ctx.images {
59            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
60                return true;
61            }
62        }
63
64        // Check reference definitions [ref]: url "title" using pre-computed data (O(1) vs O(n))
65        ctx.is_in_reference_def(byte_pos)
66    }
67}
68
69impl Rule for MD037NoSpaceInEmphasis {
70    fn name(&self) -> &'static str {
71        "MD037"
72    }
73
74    fn description(&self) -> &'static str {
75        "Spaces inside emphasis markers"
76    }
77
78    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
79        let content = ctx.content;
80        let _timer = crate::profiling::ScopedTimer::new("MD037_check");
81
82        // Early return: if no emphasis markers at all, skip processing
83        if !content.contains('*') && !content.contains('_') {
84            return Ok(vec![]);
85        }
86
87        // Create LineIndex for correct byte position calculations across all line ending types
88        let line_index = &ctx.line_index;
89
90        let mut warnings = Vec::new();
91
92        // Process content lines, automatically skipping front matter and code blocks
93        for line in ctx.filtered_lines().skip_front_matter().skip_code_blocks() {
94            // Skip if the line doesn't contain any emphasis markers
95            if !line.content.contains('*') && !line.content.contains('_') {
96                continue;
97            }
98
99            // Check for emphasis issues on the original line
100            self.check_line_for_emphasis_issues_fast(line.content, line.line_num, &mut warnings);
101        }
102
103        // Filter out warnings for emphasis markers that are inside links, HTML comments, or math
104        let mut filtered_warnings = Vec::new();
105
106        for (line_idx, _line) in content.lines().enumerate() {
107            let line_num = line_idx + 1;
108            let line_start_pos = line_index.get_line_start_byte(line_num).unwrap_or(0);
109
110            // Find warnings for this line
111            for warning in &warnings {
112                if warning.line == line_num {
113                    // Calculate byte position of the warning
114                    let byte_pos = line_start_pos + (warning.column - 1);
115
116                    // Skip if inside links, HTML comments, math contexts, tables, or code spans
117                    // Note: is_in_code_span uses pulldown-cmark and correctly handles multi-line spans
118                    if !self.is_in_link(ctx, byte_pos)
119                        && !is_in_html_comment(content, byte_pos)
120                        && !is_in_math_context(ctx, byte_pos)
121                        && !is_in_table_cell(ctx, line_num, warning.column)
122                        && !ctx.is_in_code_span(line_num, warning.column)
123                    {
124                        filtered_warnings.push(warning.clone());
125                    }
126                }
127            }
128        }
129
130        Ok(filtered_warnings)
131    }
132
133    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
134        let content = ctx.content;
135        let _timer = crate::profiling::ScopedTimer::new("MD037_fix");
136
137        // Fast path: if no emphasis markers, return unchanged
138        if !content.contains('*') && !content.contains('_') {
139            return Ok(content.to_string());
140        }
141
142        // First check for issues and get all warnings with fixes
143        let warnings = self.check(ctx)?;
144
145        // If no warnings, return original content
146        if warnings.is_empty() {
147            return Ok(content.to_string());
148        }
149
150        // Create LineIndex for correct byte position calculations across all line ending types
151        let line_index = &ctx.line_index;
152
153        // Apply fixes
154        let mut result = content.to_string();
155        let mut offset: isize = 0;
156
157        // Sort warnings by position to apply fixes in the correct order
158        let mut sorted_warnings: Vec<_> = warnings.iter().filter(|w| w.fix.is_some()).collect();
159        sorted_warnings.sort_by_key(|w| (w.line, w.column));
160
161        for warning in sorted_warnings {
162            if let Some(fix) = &warning.fix {
163                // Calculate the absolute position in the file
164                let line_start = line_index.get_line_start_byte(warning.line).unwrap_or(0);
165                let abs_start = line_start + warning.column - 1;
166                let abs_end = abs_start + (fix.range.end - fix.range.start);
167
168                // Apply fix with offset adjustment
169                let actual_start = (abs_start as isize + offset) as usize;
170                let actual_end = (abs_end as isize + offset) as usize;
171
172                // Make sure we're not out of bounds
173                if actual_start < result.len() && actual_end <= result.len() {
174                    // Replace the text
175                    result.replace_range(actual_start..actual_end, &fix.replacement);
176                    // Update offset for future replacements
177                    offset += fix.replacement.len() as isize - (fix.range.end - fix.range.start) as isize;
178                }
179            }
180        }
181
182        Ok(result)
183    }
184
185    /// Get the category of this rule for selective processing
186    fn category(&self) -> RuleCategory {
187        RuleCategory::Emphasis
188    }
189
190    /// Check if this rule should be skipped
191    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
192        ctx.content.is_empty() || !ctx.likely_has_emphasis()
193    }
194
195    fn as_any(&self) -> &dyn std::any::Any {
196        self
197    }
198
199    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
200    where
201        Self: Sized,
202    {
203        Box::new(MD037NoSpaceInEmphasis)
204    }
205}
206
207impl MD037NoSpaceInEmphasis {
208    /// Optimized line checking for emphasis spacing issues
209    #[inline]
210    fn check_line_for_emphasis_issues_fast(&self, line: &str, line_num: usize, warnings: &mut Vec<LintWarning>) {
211        // Quick documentation pattern checks
212        if has_doc_patterns(line) {
213            return;
214        }
215
216        // Optimized list detection with fast path
217        // When a list marker is detected, ALWAYS check only the content after the marker,
218        // never the full line. This prevents the list marker (* + -) from being mistaken
219        // for emphasis markers.
220        if (line.starts_with(' ') || line.starts_with('*') || line.starts_with('+') || line.starts_with('-'))
221            && UNORDERED_LIST_MARKER_REGEX.is_match(line)
222        {
223            if let Some(caps) = UNORDERED_LIST_MARKER_REGEX.captures(line)
224                && let Some(full_match) = caps.get(0)
225            {
226                let list_marker_end = full_match.end();
227                if list_marker_end < line.len() {
228                    let remaining_content = &line[list_marker_end..];
229
230                    // Always check just the remaining content (after the list marker).
231                    // The list marker itself is never emphasis.
232                    self.check_line_content_for_emphasis_fast(remaining_content, line_num, list_marker_end, warnings);
233                }
234            }
235            return;
236        }
237
238        // Check the entire line
239        self.check_line_content_for_emphasis_fast(line, line_num, 0, warnings);
240    }
241
242    /// Optimized line content checking for emphasis issues
243    fn check_line_content_for_emphasis_fast(
244        &self,
245        content: &str,
246        line_num: usize,
247        offset: usize,
248        warnings: &mut Vec<LintWarning>,
249    ) {
250        // Replace inline code to avoid false positives with emphasis markers inside backticks
251        let processed_content = replace_inline_code(content);
252
253        // Find all emphasis markers using optimized parsing
254        let markers = find_emphasis_markers(&processed_content);
255        if markers.is_empty() {
256            return;
257        }
258
259        // Find valid emphasis spans
260        let spans = find_emphasis_spans(&processed_content, markers);
261
262        // Check each span for spacing issues
263        for span in spans {
264            if has_spacing_issues(&span) {
265                // Calculate the full span including markers
266                let full_start = span.opening.start_pos;
267                let full_end = span.closing.end_pos();
268                let full_text = &content[full_start..full_end];
269
270                // Skip if this emphasis has a Kramdown span IAL immediately after it
271                // (no space between emphasis and IAL)
272                if full_end < content.len() {
273                    let remaining = &content[full_end..];
274                    // Check if IAL starts immediately after the emphasis (no whitespace)
275                    if remaining.starts_with('{') && has_span_ial(remaining.split_whitespace().next().unwrap_or("")) {
276                        continue;
277                    }
278                }
279
280                // Create the marker string efficiently
281                let marker_char = span.opening.as_char();
282                let marker_str = if span.opening.count == 1 {
283                    marker_char.to_string()
284                } else {
285                    format!("{marker_char}{marker_char}")
286                };
287
288                // Create the fixed version by trimming spaces from content
289                let trimmed_content = span.content.trim();
290                let fixed_text = format!("{marker_str}{trimmed_content}{marker_str}");
291
292                // Truncate long emphasis spans for readable warning messages
293                let display_text = truncate_for_display(full_text, 60);
294
295                let warning = LintWarning {
296                    rule_name: Some(self.name().to_string()),
297                    message: format!("Spaces inside emphasis markers: {display_text:?}"),
298                    line: line_num,
299                    column: offset + full_start + 1, // +1 because columns are 1-indexed
300                    end_line: line_num,
301                    end_column: offset + full_end + 1,
302                    severity: Severity::Warning,
303                    fix: Some(Fix {
304                        range: (offset + full_start)..(offset + full_end),
305                        replacement: fixed_text,
306                    }),
307                };
308
309                warnings.push(warning);
310            }
311        }
312    }
313}
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318    use crate::lint_context::LintContext;
319
320    #[test]
321    fn test_emphasis_marker_parsing() {
322        let markers = find_emphasis_markers("This has *single* and **double** emphasis");
323        assert_eq!(markers.len(), 4); // *, *, **, **
324
325        let markers = find_emphasis_markers("*start* and *end*");
326        assert_eq!(markers.len(), 4); // *, *, *, *
327    }
328
329    #[test]
330    fn test_emphasis_span_detection() {
331        let markers = find_emphasis_markers("This has *valid* emphasis");
332        let spans = find_emphasis_spans("This has *valid* emphasis", markers);
333        assert_eq!(spans.len(), 1);
334        assert_eq!(spans[0].content, "valid");
335        assert!(!spans[0].has_leading_space);
336        assert!(!spans[0].has_trailing_space);
337
338        let markers = find_emphasis_markers("This has * invalid * emphasis");
339        let spans = find_emphasis_spans("This has * invalid * emphasis", markers);
340        assert_eq!(spans.len(), 1);
341        assert_eq!(spans[0].content, " invalid ");
342        assert!(spans[0].has_leading_space);
343        assert!(spans[0].has_trailing_space);
344    }
345
346    #[test]
347    fn test_with_document_structure() {
348        let rule = MD037NoSpaceInEmphasis;
349
350        // Test with no spaces inside emphasis - should pass
351        let content = "This is *correct* emphasis and **strong emphasis**";
352        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
353        let result = rule.check(&ctx).unwrap();
354        assert!(result.is_empty(), "No warnings expected for correct emphasis");
355
356        // Test with actual spaces inside emphasis - use content that should warn
357        let content = "This is * text with spaces * and more content";
358        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
359        let result = rule.check(&ctx).unwrap();
360        assert!(!result.is_empty(), "Expected warnings for spaces in emphasis");
361
362        // Test with code blocks - emphasis in code should be ignored
363        let content = "This is *correct* emphasis\n```\n* incorrect * in code block\n```\nOutside block with * spaces in emphasis *";
364        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
365        let result = rule.check(&ctx).unwrap();
366        assert!(
367            !result.is_empty(),
368            "Expected warnings for spaces in emphasis outside code block"
369        );
370    }
371
372    #[test]
373    fn test_emphasis_in_links_not_flagged() {
374        let rule = MD037NoSpaceInEmphasis;
375        let content = r#"Check this [* spaced asterisk *](https://example.com/*test*) link.
376
377This has * real spaced emphasis * that should be flagged."#;
378        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
379        let result = rule.check(&ctx).unwrap();
380
381        // Test passed - emphasis inside links are filtered out correctly
382
383        // Only the real emphasis outside links should be flagged
384        assert_eq!(
385            result.len(),
386            1,
387            "Expected exactly 1 warning, but got: {:?}",
388            result.len()
389        );
390        assert!(result[0].message.contains("Spaces inside emphasis markers"));
391        // Should flag "* real spaced emphasis *" but not emphasis patterns inside links
392        assert!(result[0].line == 3); // Line with "* real spaced emphasis *"
393    }
394
395    #[test]
396    fn test_emphasis_in_links_vs_outside_links() {
397        let rule = MD037NoSpaceInEmphasis;
398        let content = r#"Check [* spaced *](https://example.com/*test*) and inline * real spaced * text.
399
400[* link *]: https://example.com/*path*"#;
401        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
402        let result = rule.check(&ctx).unwrap();
403
404        // Only the actual emphasis outside links should be flagged
405        assert_eq!(result.len(), 1);
406        assert!(result[0].message.contains("Spaces inside emphasis markers"));
407        // Should be the "* real spaced *" text on line 1
408        assert!(result[0].line == 1);
409    }
410
411    #[test]
412    fn test_issue_49_asterisk_in_inline_code() {
413        // Test for issue #49 - Asterisk within backticks identified as for emphasis
414        let rule = MD037NoSpaceInEmphasis;
415
416        // Test case from issue #49
417        let content = "The `__mul__` method is needed for left-hand multiplication (`vector * 3`) and `__rmul__` is needed for right-hand multiplication (`3 * vector`).";
418        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
419        let result = rule.check(&ctx).unwrap();
420        assert!(
421            result.is_empty(),
422            "Should not flag asterisks inside inline code as emphasis (issue #49). Got: {result:?}"
423        );
424    }
425
426    #[test]
427    fn test_issue_28_inline_code_in_emphasis() {
428        // Test for issue #28 - MD037 should not flag inline code inside emphasis as spaces
429        let rule = MD037NoSpaceInEmphasis;
430
431        // Test case 1: inline code with single backticks inside bold emphasis
432        let content = "Though, we often call this an **inline `if`** because it looks sort of like an `if`-`else` statement all in *one line* of code.";
433        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
434        let result = rule.check(&ctx).unwrap();
435        assert!(
436            result.is_empty(),
437            "Should not flag inline code inside emphasis as spaces (issue #28). Got: {result:?}"
438        );
439
440        // Test case 2: multiple inline code snippets inside emphasis
441        let content2 = "The **`foo` and `bar`** methods are important.";
442        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
443        let result2 = rule.check(&ctx2).unwrap();
444        assert!(
445            result2.is_empty(),
446            "Should not flag multiple inline code snippets inside emphasis. Got: {result2:?}"
447        );
448
449        // Test case 3: inline code with underscores for emphasis
450        let content3 = "This is __inline `code`__ with underscores.";
451        let ctx3 = LintContext::new(content3, crate::config::MarkdownFlavor::Standard, None);
452        let result3 = rule.check(&ctx3).unwrap();
453        assert!(
454            result3.is_empty(),
455            "Should not flag inline code with underscore emphasis. Got: {result3:?}"
456        );
457
458        // Test case 4: single asterisk emphasis with inline code
459        let content4 = "This is *inline `test`* with single asterisks.";
460        let ctx4 = LintContext::new(content4, crate::config::MarkdownFlavor::Standard, None);
461        let result4 = rule.check(&ctx4).unwrap();
462        assert!(
463            result4.is_empty(),
464            "Should not flag inline code with single asterisk emphasis. Got: {result4:?}"
465        );
466
467        // Test case 5: actual spaces that should be flagged
468        let content5 = "This has * real spaces * that should be flagged.";
469        let ctx5 = LintContext::new(content5, crate::config::MarkdownFlavor::Standard, None);
470        let result5 = rule.check(&ctx5).unwrap();
471        assert!(!result5.is_empty(), "Should still flag actual spaces in emphasis");
472        assert!(result5[0].message.contains("Spaces inside emphasis markers"));
473    }
474
475    #[test]
476    fn test_multibyte_utf8_no_panic() {
477        // Regression test: ensure multi-byte UTF-8 characters don't cause panics
478        // in the truncate_for_display function when handling long emphasis spans.
479        // These test cases include various scripts that could trigger boundary issues.
480        let rule = MD037NoSpaceInEmphasis;
481
482        // Greek text with emphasis
483        let greek = "Αυτό είναι ένα * τεστ με ελληνικά * και πολύ μεγάλο κείμενο που θα πρέπει να περικοπεί σωστά.";
484        let ctx = LintContext::new(greek, crate::config::MarkdownFlavor::Standard, None);
485        let result = rule.check(&ctx);
486        assert!(result.is_ok(), "Greek text should not panic");
487
488        // Chinese text with emphasis
489        let chinese = "这是一个 * 测试文本 * 包含中文字符,需要正确处理多字节边界。";
490        let ctx = LintContext::new(chinese, crate::config::MarkdownFlavor::Standard, None);
491        let result = rule.check(&ctx);
492        assert!(result.is_ok(), "Chinese text should not panic");
493
494        // Cyrillic/Russian text with emphasis
495        let cyrillic = "Это * тест с кириллицей * и очень длинным текстом для проверки обрезки.";
496        let ctx = LintContext::new(cyrillic, crate::config::MarkdownFlavor::Standard, None);
497        let result = rule.check(&ctx);
498        assert!(result.is_ok(), "Cyrillic text should not panic");
499
500        // Mixed multi-byte characters in a long emphasis span that triggers truncation
501        let mixed =
502            "日本語と * 中文と한국어が混在する非常に長いテキストでtruncate_for_displayの境界処理をテスト * します。";
503        let ctx = LintContext::new(mixed, crate::config::MarkdownFlavor::Standard, None);
504        let result = rule.check(&ctx);
505        assert!(result.is_ok(), "Mixed CJK text should not panic");
506
507        // Arabic text (right-to-left) with emphasis
508        let arabic = "هذا * اختبار بالعربية * مع نص طويل جداً لاختبار معالجة حدود الأحرف.";
509        let ctx = LintContext::new(arabic, crate::config::MarkdownFlavor::Standard, None);
510        let result = rule.check(&ctx);
511        assert!(result.is_ok(), "Arabic text should not panic");
512
513        // Emoji with emphasis
514        let emoji = "This has * 🎉 party 🎊 celebration 🥳 emojis * that use multi-byte sequences.";
515        let ctx = LintContext::new(emoji, crate::config::MarkdownFlavor::Standard, None);
516        let result = rule.check(&ctx);
517        assert!(result.is_ok(), "Emoji text should not panic");
518    }
519
520    #[test]
521    fn test_template_shortcode_syntax_not_flagged() {
522        // Test for FastAPI/MkDocs style template syntax {* ... *}
523        // These should NOT be flagged as emphasis with spaces
524        let rule = MD037NoSpaceInEmphasis;
525
526        // FastAPI style code inclusion
527        let content = "{* ../../docs_src/cookie_param_models/tutorial001.py hl[9:12,16] *}";
528        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
529        let result = rule.check(&ctx).unwrap();
530        assert!(
531            result.is_empty(),
532            "Template shortcode syntax should not be flagged. Got: {result:?}"
533        );
534
535        // Another FastAPI example
536        let content = "{* ../../docs_src/conditional_openapi/tutorial001.py hl[6,11] *}";
537        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
538        let result = rule.check(&ctx).unwrap();
539        assert!(
540            result.is_empty(),
541            "Template shortcode syntax should not be flagged. Got: {result:?}"
542        );
543
544        // Multiple shortcodes on different lines
545        let content = "# Header\n\n{* file1.py *}\n\nSome text.\n\n{* file2.py hl[1-5] *}";
546        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
547        let result = rule.check(&ctx).unwrap();
548        assert!(
549            result.is_empty(),
550            "Multiple template shortcodes should not be flagged. Got: {result:?}"
551        );
552
553        // But actual emphasis with spaces should still be flagged
554        let content = "This has * real spaced emphasis * here.";
555        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
556        let result = rule.check(&ctx).unwrap();
557        assert!(!result.is_empty(), "Real spaced emphasis should still be flagged");
558    }
559
560    #[test]
561    fn test_multiline_code_span_not_flagged() {
562        // Test for multi-line code spans - asterisks inside should not be flagged
563        // This tests the case where a code span starts on one line and ends on another
564        let rule = MD037NoSpaceInEmphasis;
565
566        // Code span spanning multiple lines with asterisks inside
567        let content = "# Test\n\naffects the structure. `1 + 0 + 0` is parsed as `(1 + 0) +\n0` while `1 + 0 * 0` is parsed as `1 + (0 * 0)`. Since the pattern";
568        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
569        let result = rule.check(&ctx).unwrap();
570        assert!(
571            result.is_empty(),
572            "Should not flag asterisks inside multi-line code spans. Got: {result:?}"
573        );
574
575        // Another multi-line code span case
576        let content2 = "Text with `code that\nspans * multiple * lines` here.";
577        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
578        let result2 = rule.check(&ctx2).unwrap();
579        assert!(
580            result2.is_empty(),
581            "Should not flag asterisks inside multi-line code spans. Got: {result2:?}"
582        );
583    }
584}