Skip to main content

rumdl_lib/rules/
md049_emphasis_style.rs

1use crate::filtered_lines::FilteredLinesExt;
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
3use crate::rules::emphasis_style::EmphasisStyle;
4use crate::utils::emphasis_utils::{find_emphasis_markers, find_single_emphasis_spans, replace_inline_code};
5use crate::utils::skip_context::is_in_mkdocs_markup;
6
7mod md049_config;
8use md049_config::MD049Config;
9
10/// Rule MD049: Emphasis style
11///
12/// See [docs/md049.md](../../docs/md049.md) for full documentation, configuration, and examples.
13///
14/// This rule is triggered when the style for emphasis is inconsistent:
15/// - Asterisks: `*text*`
16/// - Underscores: `_text_`
17///
18/// This rule is focused on regular emphasis, not strong emphasis.
19#[derive(Debug, Default, Clone)]
20pub struct MD049EmphasisStyle {
21    config: MD049Config,
22}
23
24impl MD049EmphasisStyle {
25    /// Create a new instance of MD049EmphasisStyle
26    pub fn new(style: EmphasisStyle) -> Self {
27        MD049EmphasisStyle {
28            config: MD049Config { style },
29        }
30    }
31
32    pub fn from_config_struct(config: MD049Config) -> Self {
33        Self { config }
34    }
35
36    /// Check if a byte position is within a link (inline links, reference links, or reference definitions).
37    /// Delegates to LintContext::is_in_link which uses O(log n) binary search.
38    fn is_in_link(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
39        ctx.is_in_link(byte_pos)
40    }
41
42    // Collect emphasis from a single line
43    fn collect_emphasis_from_line(
44        &self,
45        line: &str,
46        line_num: usize,
47        line_start_pos: usize,
48        emphasis_info: &mut Vec<(usize, usize, usize, char, String)>, // (line, col, abs_pos, marker, content)
49    ) {
50        // Replace inline code to avoid false positives
51        let line_no_code = replace_inline_code(line);
52
53        // Find all emphasis markers
54        let markers = find_emphasis_markers(&line_no_code);
55        if markers.is_empty() {
56            return;
57        }
58
59        // Find single emphasis spans (not strong emphasis)
60        let spans = find_single_emphasis_spans(&line_no_code, markers);
61
62        for span in spans {
63            let marker_char = span.opening.as_char();
64            let col = span.opening.start_pos + 1; // Convert to 1-based
65            let abs_pos = line_start_pos + span.opening.start_pos;
66
67            emphasis_info.push((line_num, col, abs_pos, marker_char, span.content.clone()));
68        }
69    }
70}
71
72impl Rule for MD049EmphasisStyle {
73    fn name(&self) -> &'static str {
74        "MD049"
75    }
76
77    fn description(&self) -> &'static str {
78        "Emphasis style should be consistent"
79    }
80
81    fn category(&self) -> RuleCategory {
82        RuleCategory::Emphasis
83    }
84
85    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
86        let mut warnings = vec![];
87
88        // Early return if no emphasis markers
89        if !ctx.likely_has_emphasis() {
90            return Ok(warnings);
91        }
92
93        // Use LintContext to skip code blocks
94        // Create LineIndex for correct byte position calculations across all line ending types
95        let line_index = &ctx.line_index;
96
97        // Collect all emphasis from the document
98        let mut emphasis_info = vec![];
99
100        // Process content lines, automatically skipping front matter, code blocks, HTML comments,
101        // MDX constructs, math blocks, and Obsidian comments
102        // Math blocks contain LaTeX syntax where _ and * have special meaning
103        for line in ctx
104            .filtered_lines()
105            .skip_front_matter()
106            .skip_code_blocks()
107            .skip_html_comments()
108            .skip_jsx_expressions()
109            .skip_mdx_comments()
110            .skip_math_blocks()
111            .skip_obsidian_comments()
112            .skip_mkdocstrings()
113        {
114            // Skip if the line doesn't contain any emphasis markers
115            if !line.content.contains('*') && !line.content.contains('_') {
116                continue;
117            }
118
119            // Get absolute position for this line
120            let line_start = line_index.get_line_start_byte(line.line_num).unwrap_or(0);
121            self.collect_emphasis_from_line(line.content, line.line_num, line_start, &mut emphasis_info);
122        }
123
124        // Filter out emphasis markers that are inside links or MkDocs markup
125        let lines = ctx.raw_lines();
126        emphasis_info.retain(|(line_num, col, abs_pos, _, _)| {
127            // Skip emphasis inside Obsidian comments
128            if ctx.is_in_obsidian_comment(*abs_pos) {
129                return false;
130            }
131            // Skip if inside a link
132            if Self::is_in_link(ctx, *abs_pos) {
133                return false;
134            }
135            // Skip if inside MkDocs markup (Keys, Caret, Mark, icon shortcodes)
136            if let Some(line) = lines.get(*line_num - 1) {
137                let line_pos = col.saturating_sub(1); // Convert 1-indexed col to 0-indexed position
138                if is_in_mkdocs_markup(line, line_pos, ctx.flavor) {
139                    return false;
140                }
141            }
142            true
143        });
144
145        match self.config.style {
146            EmphasisStyle::Consistent => {
147                // If we have less than 2 emphasis nodes, no need to check consistency
148                if emphasis_info.len() < 2 {
149                    return Ok(warnings);
150                }
151
152                // Count how many times each marker appears (prevalence-based approach)
153                let asterisk_count = emphasis_info.iter().filter(|(_, _, _, m, _)| *m == '*').count();
154                let underscore_count = emphasis_info.iter().filter(|(_, _, _, m, _)| *m == '_').count();
155
156                // Use the most prevalent marker as the target style
157                // In case of a tie, prefer asterisk (matches CommonMark recommendation)
158                let target_marker = if asterisk_count >= underscore_count { '*' } else { '_' };
159
160                // Check all emphasis nodes for consistency with the prevalent style
161                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
162                    if *marker != target_marker {
163                        // Calculate emphasis length (marker + content + marker)
164                        let emphasis_len = 1 + content.len() + 1;
165
166                        warnings.push(LintWarning {
167                            rule_name: Some(self.name().to_string()),
168                            line: *line_num,
169                            column: *col,
170                            end_line: *line_num,
171                            end_column: col + emphasis_len,
172                            message: format!("Emphasis should use {target_marker} instead of {marker}"),
173                            fix: Some(Fix {
174                                range: *abs_pos..*abs_pos + emphasis_len,
175                                replacement: format!("{target_marker}{content}{target_marker}"),
176                            }),
177                            severity: Severity::Warning,
178                        });
179                    }
180                }
181            }
182            EmphasisStyle::Asterisk | EmphasisStyle::Underscore => {
183                let (wrong_marker, correct_marker) = match self.config.style {
184                    EmphasisStyle::Asterisk => ('_', '*'),
185                    EmphasisStyle::Underscore => ('*', '_'),
186                    EmphasisStyle::Consistent => {
187                        // This case is handled separately above
188                        // but fallback to asterisk style for safety
189                        ('_', '*')
190                    }
191                };
192
193                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
194                    if *marker == wrong_marker {
195                        // Calculate emphasis length (marker + content + marker)
196                        let emphasis_len = 1 + content.len() + 1;
197
198                        warnings.push(LintWarning {
199                            rule_name: Some(self.name().to_string()),
200                            line: *line_num,
201                            column: *col,
202                            end_line: *line_num,
203                            end_column: col + emphasis_len,
204                            message: format!("Emphasis should use {correct_marker} instead of {wrong_marker}"),
205                            fix: Some(Fix {
206                                range: *abs_pos..*abs_pos + emphasis_len,
207                                replacement: format!("{correct_marker}{content}{correct_marker}"),
208                            }),
209                            severity: Severity::Warning,
210                        });
211                    }
212                }
213            }
214        }
215        Ok(warnings)
216    }
217
218    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
219        // Get all warnings with their fixes
220        let warnings = self.check(ctx)?;
221        let warnings =
222            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
223
224        // If no warnings, return original content
225        if warnings.is_empty() {
226            return Ok(ctx.content.to_string());
227        }
228
229        // Collect all fixes and sort by range start (descending) to apply from end to beginning
230        let mut fixes: Vec<_> = warnings
231            .iter()
232            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
233            .collect();
234        fixes.sort_by(|a, b| b.0.cmp(&a.0));
235
236        // Apply fixes from end to beginning to preserve byte offsets
237        let mut result = ctx.content.to_string();
238        for (start, end, replacement) in fixes {
239            if start < result.len() && end <= result.len() && start <= end {
240                result.replace_range(start..end, replacement);
241            }
242        }
243
244        Ok(result)
245    }
246
247    /// Check if this rule should be skipped
248    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
249        ctx.content.is_empty() || !ctx.likely_has_emphasis()
250    }
251
252    fn as_any(&self) -> &dyn std::any::Any {
253        self
254    }
255
256    fn default_config_section(&self) -> Option<(String, toml::Value)> {
257        let json_value = serde_json::to_value(&self.config).ok()?;
258        Some((
259            self.name().to_string(),
260            crate::rule_config_serde::json_to_toml_value(&json_value)?,
261        ))
262    }
263
264    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
265    where
266        Self: Sized,
267    {
268        let rule_config = crate::rule_config_serde::load_rule_config::<MD049Config>(config);
269        Box::new(Self::from_config_struct(rule_config))
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276
277    #[test]
278    fn test_name() {
279        let rule = MD049EmphasisStyle::default();
280        assert_eq!(rule.name(), "MD049");
281    }
282
283    #[test]
284    fn test_style_from_str() {
285        assert_eq!(EmphasisStyle::from("asterisk"), EmphasisStyle::Asterisk);
286        assert_eq!(EmphasisStyle::from("underscore"), EmphasisStyle::Underscore);
287        assert_eq!(EmphasisStyle::from("other"), EmphasisStyle::Consistent);
288    }
289
290    #[test]
291    fn test_emphasis_in_links_not_flagged() {
292        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
293        let content = r#"Check this [*asterisk*](https://example.com/*pattern*) link and [_underscore_](https://example.com/_private_).
294
295Also see the [`__init__`][__init__] reference.
296
297This should be _flagged_ since we're using asterisk style.
298
299[__init__]: https://example.com/__init__.py"#;
300        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
301        let result = rule.check(&ctx).unwrap();
302
303        // Only the real emphasis outside links should be flagged
304        assert_eq!(result.len(), 1);
305        assert!(result[0].message.contains("Emphasis should use * instead of _"));
306        // Should flag "_flagged_" but not emphasis patterns inside links
307        assert!(result[0].line == 5); // Line with "_flagged_"
308    }
309
310    #[test]
311    fn test_emphasis_in_links_vs_outside_links() {
312        let rule = MD049EmphasisStyle::new(EmphasisStyle::Underscore);
313        let content = r#"Check [*emphasis*](https://example.com/*test*) and inline *real emphasis* text.
314
315[*link*]: https://example.com/*path*"#;
316        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
317        let result = rule.check(&ctx).unwrap();
318
319        // Only the actual emphasis outside links should be flagged
320        assert_eq!(result.len(), 1);
321        assert!(result[0].message.contains("Emphasis should use _ instead of *"));
322        // Should be the "real emphasis" text on line 1
323        assert!(result[0].line == 1);
324    }
325
326    #[test]
327    fn test_mkdocs_keys_notation_not_flagged() {
328        // Keys notation uses ++ which shouldn't be confused with emphasis
329        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
330        let content = "Press ++ctrl+alt+del++ to restart.";
331        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
332        let result = rule.check(&ctx).unwrap();
333
334        // Keys notation should not be flagged as emphasis
335        assert!(
336            result.is_empty(),
337            "Keys notation should not be flagged as emphasis. Got: {result:?}"
338        );
339    }
340
341    #[test]
342    fn test_mkdocs_caret_notation_not_flagged() {
343        // Caret notation (^superscript^ and ^^insert^^) should not be flagged
344        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
345        let content = "This is ^superscript^ and ^^inserted^^ text.";
346        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
347        let result = rule.check(&ctx).unwrap();
348
349        assert!(
350            result.is_empty(),
351            "Caret notation should not be flagged as emphasis. Got: {result:?}"
352        );
353    }
354
355    #[test]
356    fn test_mkdocs_mark_notation_not_flagged() {
357        // Mark notation (==highlight==) should not be flagged
358        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
359        let content = "This is ==highlighted== text.";
360        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
361        let result = rule.check(&ctx).unwrap();
362
363        assert!(
364            result.is_empty(),
365            "Mark notation should not be flagged as emphasis. Got: {result:?}"
366        );
367    }
368
369    #[test]
370    fn test_mkdocs_mixed_content_with_real_emphasis() {
371        // Mixed content: MkDocs markup + real emphasis that should be flagged
372        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
373        let content = "Press ++ctrl++ and _underscore emphasis_ here.";
374        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
375        let result = rule.check(&ctx).unwrap();
376
377        // Only the real underscore emphasis should be flagged (not Keys notation)
378        assert_eq!(result.len(), 1, "Expected 1 warning, got: {result:?}");
379        assert!(result[0].message.contains("Emphasis should use * instead of _"));
380    }
381
382    #[test]
383    fn test_mkdocs_icon_shortcode_not_flagged() {
384        // Icon shortcodes like :material-star: should not affect emphasis detection
385        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
386        let content = "Click :material-check: and _this should be flagged_.";
387        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
388        let result = rule.check(&ctx).unwrap();
389
390        // The underscore emphasis should still be flagged
391        assert_eq!(result.len(), 1);
392        assert!(result[0].message.contains("Emphasis should use * instead of _"));
393    }
394
395    #[test]
396    fn test_mkdocstrings_block_not_flagged() {
397        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
398        let content = "# Example\n\n::: my_module.MyClass\n    options:\n      members:\n        - _private_method\n";
399        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
400        let result = rule.check(&ctx).unwrap();
401
402        assert!(
403            result.is_empty(),
404            "_private_method_ inside mkdocstrings block should not be flagged. Got: {result:?}"
405        );
406    }
407
408    #[test]
409    fn test_mkdocstrings_block_with_emphasis_outside() {
410        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
411        let content = "::: my_module.MyClass\n    options:\n      members:\n        - _init\n\nThis _should be flagged_ outside.\n";
412        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
413        let result = rule.check(&ctx).unwrap();
414
415        assert_eq!(
416            result.len(),
417            1,
418            "Only emphasis outside mkdocstrings should be flagged. Got: {result:?}"
419        );
420        assert_eq!(result[0].line, 6);
421    }
422
423    #[test]
424    fn test_obsidian_inline_comment_emphasis_ignored() {
425        // Emphasis inside Obsidian comments should be ignored
426        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
427        let content = "Visible %%_hidden_%% text.";
428        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
429        let result = rule.check(&ctx).unwrap();
430
431        assert!(
432            result.is_empty(),
433            "Should ignore emphasis inside Obsidian comments. Got: {result:?}"
434        );
435    }
436}