Skip to main content

rumdl_lib/rules/
md049_emphasis_style.rs

1use crate::filtered_lines::FilteredLinesExt;
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rules::emphasis_style::EmphasisStyle;
4use crate::utils::emphasis_utils::{find_emphasis_markers, find_single_emphasis_spans, replace_inline_code};
5use crate::utils::skip_context::is_in_mkdocs_markup;
6
7mod md049_config;
8use md049_config::MD049Config;
9
10/// Rule MD049: Emphasis style
11///
12/// See [docs/md049.md](../../docs/md049.md) for full documentation, configuration, and examples.
13///
14/// This rule is triggered when the style for emphasis is inconsistent:
15/// - Asterisks: `*text*`
16/// - Underscores: `_text_`
17///
18/// This rule is focused on regular emphasis, not strong emphasis.
19#[derive(Debug, Default, Clone)]
20pub struct MD049EmphasisStyle {
21    config: MD049Config,
22}
23
24impl MD049EmphasisStyle {
25    /// Create a new instance of MD049EmphasisStyle
26    pub fn new(style: EmphasisStyle) -> Self {
27        MD049EmphasisStyle {
28            config: MD049Config { style },
29        }
30    }
31
32    pub fn from_config_struct(config: MD049Config) -> Self {
33        Self { config }
34    }
35
36    /// Check if a byte position is within a link (inline links, reference links, or reference definitions).
37    /// Delegates to LintContext::is_in_link which uses O(log n) binary search.
38    fn is_in_link(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
39        ctx.is_in_link(byte_pos)
40    }
41
42    // Collect emphasis from a single line
43    fn collect_emphasis_from_line(
44        &self,
45        line: &str,
46        line_num: usize,
47        line_start_pos: usize,
48        emphasis_info: &mut Vec<(usize, usize, usize, char, String)>, // (line, col, abs_pos, marker, content)
49    ) {
50        // Replace inline code to avoid false positives
51        let line_no_code = replace_inline_code(line);
52
53        // Find all emphasis markers
54        let markers = find_emphasis_markers(&line_no_code);
55        if markers.is_empty() {
56            return;
57        }
58
59        // Find single emphasis spans (not strong emphasis)
60        let spans = find_single_emphasis_spans(&line_no_code, &markers);
61
62        for span in spans {
63            let marker_char = span.opening.as_char();
64            let col = span.opening.start_pos + 1; // Convert to 1-based
65            let abs_pos = line_start_pos + span.opening.start_pos;
66
67            emphasis_info.push((line_num, col, abs_pos, marker_char, span.content.clone()));
68        }
69    }
70}
71
72impl Rule for MD049EmphasisStyle {
73    fn name(&self) -> &'static str {
74        "MD049"
75    }
76
77    fn description(&self) -> &'static str {
78        "Emphasis style should be consistent"
79    }
80
81    fn category(&self) -> RuleCategory {
82        RuleCategory::Emphasis
83    }
84
85    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
86        let mut warnings = vec![];
87
88        // Early return if no emphasis markers
89        if !ctx.likely_has_emphasis() {
90            return Ok(warnings);
91        }
92
93        // Use LintContext to skip code blocks
94        // Create LineIndex for correct byte position calculations across all line ending types
95        let line_index = &ctx.line_index;
96
97        // Collect all emphasis from the document
98        let mut emphasis_info = vec![];
99
100        // Process content lines, automatically skipping front matter, code blocks, HTML comments,
101        // MDX constructs, math blocks, and Obsidian comments
102        // Math blocks contain LaTeX syntax where _ and * have special meaning
103        for line in ctx
104            .filtered_lines()
105            .skip_front_matter()
106            .skip_code_blocks()
107            .skip_html_comments()
108            .skip_jsx_expressions()
109            .skip_mdx_comments()
110            .skip_math_blocks()
111            .skip_obsidian_comments()
112            .skip_mkdocstrings()
113        {
114            // Skip if the line doesn't contain any emphasis markers
115            if !line.content.contains('*') && !line.content.contains('_') {
116                continue;
117            }
118
119            // Get absolute position for this line
120            let line_start = line_index.get_line_start_byte(line.line_num).unwrap_or(0);
121            self.collect_emphasis_from_line(line.content, line.line_num, line_start, &mut emphasis_info);
122        }
123
124        // Filter out emphasis markers that are inside links or MkDocs markup
125        let lines = ctx.raw_lines();
126        // Math byte ranges, computed once for the whole document. The
127        // line-level `skip_math_blocks` filter drops whole math-only lines,
128        // but a line that mixes a display span with lintable prose (e.g.
129        // `$$ _x_ $$ $$ _y_ $$`) stays lintable so trailing prose is checked;
130        // this byte-level guard then exempts only the underscores that fall
131        // inside the line-start `$$...$$` span, matching MD037/MD050.
132        // `math_byte_ranges` has no code-block awareness, so a `$$` inside a
133        // fenced code block would wrongly open a span that swallows real
134        // prose up to the next `$$`. Neutralize `$` bytes inside code-block
135        // ranges first (replacing only the ASCII `$` keeps every byte offset
136        // and UTF-8 validity intact) so the byte model agrees with the
137        // code-block-aware line-level math map.
138        // Sort and merge so membership is a binary search rather than a
139        // per-span linear scan: a math-heavy document (many `$x$` spans
140        // alternating with emphasis) would otherwise be O(spans x ranges).
141        // Ranges may overlap (e.g. a `$b$` inside a `$$...$$` block), so the
142        // merge collapses them into disjoint, ascending intervals.
143        let math_ranges: Vec<(usize, usize)> = {
144            // A `$` inside fenced code or an inline code span is never a math
145            // delimiter, but `math_byte_ranges` does not know that. Neutralize
146            // those `$` first so the byte model agrees with the code-block-
147            // aware line-level math map and inline code cannot synthesize a
148            // span around real emphasis.
149            let code_spans = ctx.code_spans();
150            let math_source: std::borrow::Cow<'_, str> = if ctx.code_blocks.is_empty() && code_spans.is_empty() {
151                std::borrow::Cow::Borrowed(ctx.content)
152            } else {
153                let mut bytes = ctx.content.as_bytes().to_vec();
154                let len = bytes.len();
155                let mut mask = |start: usize, end: usize| {
156                    for b in &mut bytes[start.min(len)..end.min(len)] {
157                        if *b == b'$' {
158                            *b = b' ';
159                        }
160                    }
161                };
162                for &(start, end) in &ctx.code_blocks {
163                    mask(start, end);
164                }
165                for span in code_spans.iter() {
166                    mask(span.byte_offset, span.byte_end);
167                }
168                // Only ASCII `$` was replaced with ASCII space, so the
169                // buffer is still valid UTF-8 and the same length.
170                std::borrow::Cow::Owned(String::from_utf8(bytes).expect("ASCII-only substitution"))
171            };
172            let mut r = crate::utils::skip_context::math_byte_ranges(&math_source);
173            r.sort_unstable_by_key(|&(start, _)| start);
174            let mut merged: Vec<(usize, usize)> = Vec::with_capacity(r.len());
175            for (start, end) in r {
176                match merged.last_mut() {
177                    Some(last) if start <= last.1 => last.1 = last.1.max(end),
178                    _ => merged.push((start, end)),
179                }
180            }
181            merged
182        };
183        emphasis_info.retain(|(line_num, col, abs_pos, _, _)| {
184            // Skip emphasis inside math. `math_ranges` is disjoint and sorted
185            // by start, so the only interval that can contain `abs_pos` is
186            // the last one whose start is <= `abs_pos`.
187            let idx = math_ranges.partition_point(|&(start, _)| start <= *abs_pos);
188            if idx > 0 && *abs_pos < math_ranges[idx - 1].1 {
189                return false;
190            }
191            // Skip emphasis inside Obsidian comments
192            if ctx.is_in_obsidian_comment(*abs_pos) {
193                return false;
194            }
195            // Skip if inside a link
196            if Self::is_in_link(ctx, *abs_pos) {
197                return false;
198            }
199            // Skip if inside MkDocs markup (Keys, Caret, Mark, icon shortcodes)
200            if let Some(line) = lines.get(*line_num - 1) {
201                let line_pos = col.saturating_sub(1); // Convert 1-indexed col to 0-indexed position
202                if is_in_mkdocs_markup(line, line_pos, ctx.flavor) {
203                    return false;
204                }
205            }
206            true
207        });
208
209        match self.config.style {
210            EmphasisStyle::Consistent => {
211                // If we have less than 2 emphasis nodes, no need to check consistency
212                if emphasis_info.len() < 2 {
213                    return Ok(warnings);
214                }
215
216                // Count how many times each marker appears (prevalence-based approach)
217                let asterisk_count = emphasis_info.iter().filter(|(_, _, _, m, _)| *m == '*').count();
218                let underscore_count = emphasis_info.iter().filter(|(_, _, _, m, _)| *m == '_').count();
219
220                // Use the most prevalent marker as the target style
221                // In case of a tie, prefer asterisk (matches CommonMark recommendation)
222                let target_marker = if asterisk_count >= underscore_count { '*' } else { '_' };
223
224                // Check all emphasis nodes for consistency with the prevalent style
225                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
226                    if *marker != target_marker {
227                        // Calculate emphasis length (marker + content + marker)
228                        let emphasis_len = 1 + content.len() + 1;
229
230                        warnings.push(LintWarning {
231                            rule_name: Some(self.name().to_string()),
232                            line: *line_num,
233                            column: *col,
234                            end_line: *line_num,
235                            end_column: col + emphasis_len,
236                            message: format!("Emphasis should use {target_marker} instead of {marker}"),
237                            fix: Some(Fix::new(
238                                *abs_pos..*abs_pos + emphasis_len,
239                                format!("{target_marker}{content}{target_marker}"),
240                            )),
241                            severity: Severity::Warning,
242                        });
243                    }
244                }
245            }
246            EmphasisStyle::Asterisk | EmphasisStyle::Underscore => {
247                let (wrong_marker, correct_marker) = match self.config.style {
248                    EmphasisStyle::Asterisk => ('_', '*'),
249                    EmphasisStyle::Underscore => ('*', '_'),
250                    EmphasisStyle::Consistent => {
251                        // This case is handled separately above
252                        // but fallback to asterisk style for safety
253                        ('_', '*')
254                    }
255                };
256
257                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
258                    if *marker == wrong_marker {
259                        // Calculate emphasis length (marker + content + marker)
260                        let emphasis_len = 1 + content.len() + 1;
261
262                        warnings.push(LintWarning {
263                            rule_name: Some(self.name().to_string()),
264                            line: *line_num,
265                            column: *col,
266                            end_line: *line_num,
267                            end_column: col + emphasis_len,
268                            message: format!("Emphasis should use {correct_marker} instead of {wrong_marker}"),
269                            fix: Some(Fix::new(
270                                *abs_pos..*abs_pos + emphasis_len,
271                                format!("{correct_marker}{content}{correct_marker}"),
272                            )),
273                            severity: Severity::Warning,
274                        });
275                    }
276                }
277            }
278        }
279        Ok(warnings)
280    }
281
282    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
283        // Get all warnings with their fixes
284        let warnings = self.check(ctx)?;
285        let warnings =
286            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
287
288        // If no warnings, return original content
289        if warnings.is_empty() {
290            return Ok(ctx.content.to_string());
291        }
292
293        // Collect all fixes and sort by range start (descending) to apply from end to beginning
294        let mut fixes: Vec<_> = warnings
295            .iter()
296            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
297            .collect();
298        fixes.sort_by(|a, b| b.0.cmp(&a.0));
299
300        // Apply fixes from end to beginning to preserve byte offsets
301        let mut result = ctx.content.to_string();
302        for (start, end, replacement) in fixes {
303            if start < result.len() && end <= result.len() && start <= end {
304                result.replace_range(start..end, replacement);
305            }
306        }
307
308        Ok(result)
309    }
310
311    /// Check if this rule should be skipped
312    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
313        ctx.content.is_empty() || !ctx.likely_has_emphasis()
314    }
315
316    fn as_any(&self) -> &dyn std::any::Any {
317        self
318    }
319
320    fn default_config_section(&self) -> Option<(String, toml::Value)> {
321        let json_value = serde_json::to_value(&self.config).ok()?;
322        Some((
323            self.name().to_string(),
324            crate::rule_config_serde::json_to_toml_value(&json_value)?,
325        ))
326    }
327
328    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
329    where
330        Self: Sized,
331    {
332        let rule_config = crate::rule_config_serde::load_rule_config::<MD049Config>(config);
333        Box::new(Self::from_config_struct(rule_config))
334    }
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    #[test]
342    fn test_name() {
343        let rule = MD049EmphasisStyle::default();
344        assert_eq!(rule.name(), "MD049");
345    }
346
347    #[test]
348    fn test_style_from_str() {
349        assert_eq!(EmphasisStyle::from("asterisk"), EmphasisStyle::Asterisk);
350        assert_eq!(EmphasisStyle::from("underscore"), EmphasisStyle::Underscore);
351        assert_eq!(EmphasisStyle::from("other"), EmphasisStyle::Consistent);
352    }
353
354    #[test]
355    fn test_emphasis_in_links_not_flagged() {
356        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
357        let content = r#"Check this [*asterisk*](https://example.com/*pattern*) link and [_underscore_](https://example.com/_private_).
358
359Also see the [`__init__`][__init__] reference.
360
361This should be _flagged_ since we're using asterisk style.
362
363[__init__]: https://example.com/__init__.py"#;
364        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
365        let result = rule.check(&ctx).unwrap();
366
367        // Only the real emphasis outside links should be flagged
368        assert_eq!(result.len(), 1);
369        assert!(result[0].message.contains("Emphasis should use * instead of _"));
370        // Should flag "_flagged_" but not emphasis patterns inside links
371        assert!(result[0].line == 5); // Line with "_flagged_"
372    }
373
374    #[test]
375    fn test_emphasis_in_links_vs_outside_links() {
376        let rule = MD049EmphasisStyle::new(EmphasisStyle::Underscore);
377        let content = r#"Check [*emphasis*](https://example.com/*test*) and inline *real emphasis* text.
378
379[*link*]: https://example.com/*path*"#;
380        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
381        let result = rule.check(&ctx).unwrap();
382
383        // Only the actual emphasis outside links should be flagged
384        assert_eq!(result.len(), 1);
385        assert!(result[0].message.contains("Emphasis should use _ instead of *"));
386        // Should be the "real emphasis" text on line 1
387        assert!(result[0].line == 1);
388    }
389
390    #[test]
391    fn test_mkdocs_keys_notation_not_flagged() {
392        // Keys notation uses ++ which shouldn't be confused with emphasis
393        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
394        let content = "Press ++ctrl+alt+del++ to restart.";
395        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
396        let result = rule.check(&ctx).unwrap();
397
398        // Keys notation should not be flagged as emphasis
399        assert!(
400            result.is_empty(),
401            "Keys notation should not be flagged as emphasis. Got: {result:?}"
402        );
403    }
404
405    #[test]
406    fn test_mkdocs_caret_notation_not_flagged() {
407        // Caret notation (^superscript^ and ^^insert^^) should not be flagged
408        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
409        let content = "This is ^superscript^ and ^^inserted^^ text.";
410        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
411        let result = rule.check(&ctx).unwrap();
412
413        assert!(
414            result.is_empty(),
415            "Caret notation should not be flagged as emphasis. Got: {result:?}"
416        );
417    }
418
419    #[test]
420    fn test_mkdocs_mark_notation_not_flagged() {
421        // Mark notation (==highlight==) should not be flagged
422        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
423        let content = "This is ==highlighted== text.";
424        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
425        let result = rule.check(&ctx).unwrap();
426
427        assert!(
428            result.is_empty(),
429            "Mark notation should not be flagged as emphasis. Got: {result:?}"
430        );
431    }
432
433    #[test]
434    fn test_mkdocs_mixed_content_with_real_emphasis() {
435        // Mixed content: MkDocs markup + real emphasis that should be flagged
436        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
437        let content = "Press ++ctrl++ and _underscore emphasis_ here.";
438        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
439        let result = rule.check(&ctx).unwrap();
440
441        // Only the real underscore emphasis should be flagged (not Keys notation)
442        assert_eq!(result.len(), 1, "Expected 1 warning, got: {result:?}");
443        assert!(result[0].message.contains("Emphasis should use * instead of _"));
444    }
445
446    #[test]
447    fn test_mkdocs_icon_shortcode_not_flagged() {
448        // Icon shortcodes like :material-star: should not affect emphasis detection
449        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
450        let content = "Click :material-check: and _this should be flagged_.";
451        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
452        let result = rule.check(&ctx).unwrap();
453
454        // The underscore emphasis should still be flagged
455        assert_eq!(result.len(), 1);
456        assert!(result[0].message.contains("Emphasis should use * instead of _"));
457    }
458
459    #[test]
460    fn test_mkdocstrings_block_not_flagged() {
461        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
462        let content = "# Example\n\n::: my_module.MyClass\n    options:\n      members:\n        - _private_method\n";
463        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
464        let result = rule.check(&ctx).unwrap();
465
466        assert!(
467            result.is_empty(),
468            "_private_method_ inside mkdocstrings block should not be flagged. Got: {result:?}"
469        );
470    }
471
472    #[test]
473    fn test_mkdocstrings_block_with_emphasis_outside() {
474        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
475        let content = "::: my_module.MyClass\n    options:\n      members:\n        - _init\n\nThis _should be flagged_ outside.\n";
476        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
477        let result = rule.check(&ctx).unwrap();
478
479        assert_eq!(
480            result.len(),
481            1,
482            "Only emphasis outside mkdocstrings should be flagged. Got: {result:?}"
483        );
484        assert_eq!(result[0].line, 6);
485    }
486
487    #[test]
488    fn test_obsidian_inline_comment_emphasis_ignored() {
489        // Emphasis inside Obsidian comments should be ignored
490        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
491        let content = "Visible %%_hidden_%% text.";
492        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
493        let result = rule.check(&ctx).unwrap();
494
495        assert!(
496            result.is_empty(),
497            "Should ignore emphasis inside Obsidian comments. Got: {result:?}"
498        );
499    }
500}