Skip to main content

rumdl_lib/rules/
md049_emphasis_style.rs

1use crate::filtered_lines::FilteredLinesExt;
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
3use crate::rules::emphasis_style::EmphasisStyle;
4use crate::utils::emphasis_utils::{find_emphasis_markers, find_single_emphasis_spans, replace_inline_code};
5use crate::utils::skip_context::is_in_mkdocs_markup;
6
7mod md049_config;
8use md049_config::MD049Config;
9
10/// Rule MD049: Emphasis style
11///
12/// See [docs/md049.md](../../docs/md049.md) for full documentation, configuration, and examples.
13///
14/// This rule is triggered when the style for emphasis is inconsistent:
15/// - Asterisks: `*text*`
16/// - Underscores: `_text_`
17///
18/// This rule is focused on regular emphasis, not strong emphasis.
19#[derive(Debug, Default, Clone)]
20pub struct MD049EmphasisStyle {
21    config: MD049Config,
22}
23
24impl MD049EmphasisStyle {
25    /// Create a new instance of MD049EmphasisStyle
26    pub fn new(style: EmphasisStyle) -> Self {
27        MD049EmphasisStyle {
28            config: MD049Config { style },
29        }
30    }
31
32    pub fn from_config_struct(config: MD049Config) -> Self {
33        Self { config }
34    }
35
36    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
37    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
38        // Check inline and reference links
39        for link in &ctx.links {
40            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
41                return true;
42            }
43        }
44
45        // Check images (which use similar syntax)
46        for image in &ctx.images {
47            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
48                return true;
49            }
50        }
51
52        // Check reference definitions [ref]: url "title" using pre-computed data (O(1) vs O(n))
53        ctx.is_in_reference_def(byte_pos)
54    }
55
56    // Collect emphasis from a single line
57    fn collect_emphasis_from_line(
58        &self,
59        line: &str,
60        line_num: usize,
61        line_start_pos: usize,
62        emphasis_info: &mut Vec<(usize, usize, usize, char, String)>, // (line, col, abs_pos, marker, content)
63    ) {
64        // Replace inline code to avoid false positives
65        let line_no_code = replace_inline_code(line);
66
67        // Find all emphasis markers
68        let markers = find_emphasis_markers(&line_no_code);
69        if markers.is_empty() {
70            return;
71        }
72
73        // Find single emphasis spans (not strong emphasis)
74        let spans = find_single_emphasis_spans(&line_no_code, markers);
75
76        for span in spans {
77            let marker_char = span.opening.as_char();
78            let col = span.opening.start_pos + 1; // Convert to 1-based
79            let abs_pos = line_start_pos + span.opening.start_pos;
80
81            emphasis_info.push((line_num, col, abs_pos, marker_char, span.content.clone()));
82        }
83    }
84}
85
86impl Rule for MD049EmphasisStyle {
87    fn name(&self) -> &'static str {
88        "MD049"
89    }
90
91    fn description(&self) -> &'static str {
92        "Emphasis style should be consistent"
93    }
94
95    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
96        let mut warnings = vec![];
97
98        // Early return if no emphasis markers
99        if !ctx.likely_has_emphasis() {
100            return Ok(warnings);
101        }
102
103        // Use LintContext to skip code blocks
104        // Create LineIndex for correct byte position calculations across all line ending types
105        let line_index = &ctx.line_index;
106
107        // Collect all emphasis from the document
108        let mut emphasis_info = vec![];
109
110        // Process content lines, automatically skipping front matter, code blocks, HTML comments,
111        // MDX constructs, math blocks, and Obsidian comments
112        // Math blocks contain LaTeX syntax where _ and * have special meaning
113        for line in ctx
114            .filtered_lines()
115            .skip_front_matter()
116            .skip_code_blocks()
117            .skip_html_comments()
118            .skip_jsx_expressions()
119            .skip_mdx_comments()
120            .skip_math_blocks()
121            .skip_obsidian_comments()
122            .skip_mkdocstrings()
123        {
124            // Skip if the line doesn't contain any emphasis markers
125            if !line.content.contains('*') && !line.content.contains('_') {
126                continue;
127            }
128
129            // Get absolute position for this line
130            let line_start = line_index.get_line_start_byte(line.line_num).unwrap_or(0);
131            self.collect_emphasis_from_line(line.content, line.line_num, line_start, &mut emphasis_info);
132        }
133
134        // Filter out emphasis markers that are inside links or MkDocs markup
135        let lines = ctx.raw_lines();
136        emphasis_info.retain(|(line_num, col, abs_pos, _, _)| {
137            // Skip emphasis inside Obsidian comments
138            if ctx.is_in_obsidian_comment(*abs_pos) {
139                return false;
140            }
141            // Skip if inside a link
142            if self.is_in_link(ctx, *abs_pos) {
143                return false;
144            }
145            // Skip if inside MkDocs markup (Keys, Caret, Mark, icon shortcodes)
146            if let Some(line) = lines.get(*line_num - 1) {
147                let line_pos = col.saturating_sub(1); // Convert 1-indexed col to 0-indexed position
148                if is_in_mkdocs_markup(line, line_pos, ctx.flavor) {
149                    return false;
150                }
151            }
152            true
153        });
154
155        match self.config.style {
156            EmphasisStyle::Consistent => {
157                // If we have less than 2 emphasis nodes, no need to check consistency
158                if emphasis_info.len() < 2 {
159                    return Ok(warnings);
160                }
161
162                // Count how many times each marker appears (prevalence-based approach)
163                let asterisk_count = emphasis_info.iter().filter(|(_, _, _, m, _)| *m == '*').count();
164                let underscore_count = emphasis_info.iter().filter(|(_, _, _, m, _)| *m == '_').count();
165
166                // Use the most prevalent marker as the target style
167                // In case of a tie, prefer asterisk (matches CommonMark recommendation)
168                let target_marker = if asterisk_count >= underscore_count { '*' } else { '_' };
169
170                // Check all emphasis nodes for consistency with the prevalent style
171                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
172                    if *marker != target_marker {
173                        // Calculate emphasis length (marker + content + marker)
174                        let emphasis_len = 1 + content.len() + 1;
175
176                        warnings.push(LintWarning {
177                            rule_name: Some(self.name().to_string()),
178                            line: *line_num,
179                            column: *col,
180                            end_line: *line_num,
181                            end_column: col + emphasis_len,
182                            message: format!("Emphasis should use {target_marker} instead of {marker}"),
183                            fix: Some(Fix {
184                                range: *abs_pos..*abs_pos + emphasis_len,
185                                replacement: format!("{target_marker}{content}{target_marker}"),
186                            }),
187                            severity: Severity::Warning,
188                        });
189                    }
190                }
191            }
192            EmphasisStyle::Asterisk | EmphasisStyle::Underscore => {
193                let (wrong_marker, correct_marker) = match self.config.style {
194                    EmphasisStyle::Asterisk => ('_', '*'),
195                    EmphasisStyle::Underscore => ('*', '_'),
196                    EmphasisStyle::Consistent => {
197                        // This case is handled separately above
198                        // but fallback to asterisk style for safety
199                        ('_', '*')
200                    }
201                };
202
203                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
204                    if *marker == wrong_marker {
205                        // Calculate emphasis length (marker + content + marker)
206                        let emphasis_len = 1 + content.len() + 1;
207
208                        warnings.push(LintWarning {
209                            rule_name: Some(self.name().to_string()),
210                            line: *line_num,
211                            column: *col,
212                            end_line: *line_num,
213                            end_column: col + emphasis_len,
214                            message: format!("Emphasis should use {correct_marker} instead of {wrong_marker}"),
215                            fix: Some(Fix {
216                                range: *abs_pos..*abs_pos + emphasis_len,
217                                replacement: format!("{correct_marker}{content}{correct_marker}"),
218                            }),
219                            severity: Severity::Warning,
220                        });
221                    }
222                }
223            }
224        }
225        Ok(warnings)
226    }
227
228    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
229        // Get all warnings with their fixes
230        let warnings = self.check(ctx)?;
231        let warnings =
232            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
233
234        // If no warnings, return original content
235        if warnings.is_empty() {
236            return Ok(ctx.content.to_string());
237        }
238
239        // Collect all fixes and sort by range start (descending) to apply from end to beginning
240        let mut fixes: Vec<_> = warnings
241            .iter()
242            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
243            .collect();
244        fixes.sort_by(|a, b| b.0.cmp(&a.0));
245
246        // Apply fixes from end to beginning to preserve byte offsets
247        let mut result = ctx.content.to_string();
248        for (start, end, replacement) in fixes {
249            if start < result.len() && end <= result.len() && start <= end {
250                result.replace_range(start..end, replacement);
251            }
252        }
253
254        Ok(result)
255    }
256
257    /// Check if this rule should be skipped
258    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
259        ctx.content.is_empty() || !ctx.likely_has_emphasis()
260    }
261
262    fn as_any(&self) -> &dyn std::any::Any {
263        self
264    }
265
266    fn default_config_section(&self) -> Option<(String, toml::Value)> {
267        let json_value = serde_json::to_value(&self.config).ok()?;
268        Some((
269            self.name().to_string(),
270            crate::rule_config_serde::json_to_toml_value(&json_value)?,
271        ))
272    }
273
274    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
275    where
276        Self: Sized,
277    {
278        let rule_config = crate::rule_config_serde::load_rule_config::<MD049Config>(config);
279        Box::new(Self::from_config_struct(rule_config))
280    }
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286
287    #[test]
288    fn test_name() {
289        let rule = MD049EmphasisStyle::default();
290        assert_eq!(rule.name(), "MD049");
291    }
292
293    #[test]
294    fn test_style_from_str() {
295        assert_eq!(EmphasisStyle::from("asterisk"), EmphasisStyle::Asterisk);
296        assert_eq!(EmphasisStyle::from("underscore"), EmphasisStyle::Underscore);
297        assert_eq!(EmphasisStyle::from("other"), EmphasisStyle::Consistent);
298    }
299
300    #[test]
301    fn test_emphasis_in_links_not_flagged() {
302        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
303        let content = r#"Check this [*asterisk*](https://example.com/*pattern*) link and [_underscore_](https://example.com/_private_).
304
305Also see the [`__init__`][__init__] reference.
306
307This should be _flagged_ since we're using asterisk style.
308
309[__init__]: https://example.com/__init__.py"#;
310        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
311        let result = rule.check(&ctx).unwrap();
312
313        // Only the real emphasis outside links should be flagged
314        assert_eq!(result.len(), 1);
315        assert!(result[0].message.contains("Emphasis should use * instead of _"));
316        // Should flag "_flagged_" but not emphasis patterns inside links
317        assert!(result[0].line == 5); // Line with "_flagged_"
318    }
319
320    #[test]
321    fn test_emphasis_in_links_vs_outside_links() {
322        let rule = MD049EmphasisStyle::new(EmphasisStyle::Underscore);
323        let content = r#"Check [*emphasis*](https://example.com/*test*) and inline *real emphasis* text.
324
325[*link*]: https://example.com/*path*"#;
326        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
327        let result = rule.check(&ctx).unwrap();
328
329        // Only the actual emphasis outside links should be flagged
330        assert_eq!(result.len(), 1);
331        assert!(result[0].message.contains("Emphasis should use _ instead of *"));
332        // Should be the "real emphasis" text on line 1
333        assert!(result[0].line == 1);
334    }
335
336    #[test]
337    fn test_mkdocs_keys_notation_not_flagged() {
338        // Keys notation uses ++ which shouldn't be confused with emphasis
339        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
340        let content = "Press ++ctrl+alt+del++ to restart.";
341        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
342        let result = rule.check(&ctx).unwrap();
343
344        // Keys notation should not be flagged as emphasis
345        assert!(
346            result.is_empty(),
347            "Keys notation should not be flagged as emphasis. Got: {result:?}"
348        );
349    }
350
351    #[test]
352    fn test_mkdocs_caret_notation_not_flagged() {
353        // Caret notation (^superscript^ and ^^insert^^) should not be flagged
354        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
355        let content = "This is ^superscript^ and ^^inserted^^ text.";
356        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
357        let result = rule.check(&ctx).unwrap();
358
359        assert!(
360            result.is_empty(),
361            "Caret notation should not be flagged as emphasis. Got: {result:?}"
362        );
363    }
364
365    #[test]
366    fn test_mkdocs_mark_notation_not_flagged() {
367        // Mark notation (==highlight==) should not be flagged
368        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
369        let content = "This is ==highlighted== text.";
370        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
371        let result = rule.check(&ctx).unwrap();
372
373        assert!(
374            result.is_empty(),
375            "Mark notation should not be flagged as emphasis. Got: {result:?}"
376        );
377    }
378
379    #[test]
380    fn test_mkdocs_mixed_content_with_real_emphasis() {
381        // Mixed content: MkDocs markup + real emphasis that should be flagged
382        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
383        let content = "Press ++ctrl++ and _underscore emphasis_ here.";
384        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
385        let result = rule.check(&ctx).unwrap();
386
387        // Only the real underscore emphasis should be flagged (not Keys notation)
388        assert_eq!(result.len(), 1, "Expected 1 warning, got: {result:?}");
389        assert!(result[0].message.contains("Emphasis should use * instead of _"));
390    }
391
392    #[test]
393    fn test_mkdocs_icon_shortcode_not_flagged() {
394        // Icon shortcodes like :material-star: should not affect emphasis detection
395        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
396        let content = "Click :material-check: and _this should be flagged_.";
397        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
398        let result = rule.check(&ctx).unwrap();
399
400        // The underscore emphasis should still be flagged
401        assert_eq!(result.len(), 1);
402        assert!(result[0].message.contains("Emphasis should use * instead of _"));
403    }
404
405    #[test]
406    fn test_mkdocstrings_block_not_flagged() {
407        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
408        let content = "# Example\n\n::: my_module.MyClass\n    options:\n      members:\n        - _private_method\n";
409        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
410        let result = rule.check(&ctx).unwrap();
411
412        assert!(
413            result.is_empty(),
414            "_private_method_ inside mkdocstrings block should not be flagged. Got: {result:?}"
415        );
416    }
417
418    #[test]
419    fn test_mkdocstrings_block_with_emphasis_outside() {
420        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
421        let content = "::: my_module.MyClass\n    options:\n      members:\n        - _init\n\nThis _should be flagged_ outside.\n";
422        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
423        let result = rule.check(&ctx).unwrap();
424
425        assert_eq!(
426            result.len(),
427            1,
428            "Only emphasis outside mkdocstrings should be flagged. Got: {result:?}"
429        );
430        assert_eq!(result[0].line, 6);
431    }
432
433    #[test]
434    fn test_obsidian_inline_comment_emphasis_ignored() {
435        // Emphasis inside Obsidian comments should be ignored
436        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
437        let content = "Visible %%_hidden_%% text.";
438        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
439        let result = rule.check(&ctx).unwrap();
440
441        assert!(
442            result.is_empty(),
443            "Should ignore emphasis inside Obsidian comments. Got: {result:?}"
444        );
445    }
446}