Skip to main content

rumdl_lib/rules/
md049_emphasis_style.rs

1use crate::filtered_lines::FilteredLinesExt;
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
3use crate::rules::emphasis_style::EmphasisStyle;
4use crate::utils::emphasis_utils::{find_emphasis_markers, find_single_emphasis_spans, replace_inline_code};
5use crate::utils::skip_context::is_in_mkdocs_markup;
6
7mod md049_config;
8use md049_config::MD049Config;
9
10/// Rule MD049: Emphasis style
11///
12/// See [docs/md049.md](../../docs/md049.md) for full documentation, configuration, and examples.
13///
14/// This rule is triggered when the style for emphasis is inconsistent:
15/// - Asterisks: `*text*`
16/// - Underscores: `_text_`
17///
18/// This rule is focused on regular emphasis, not strong emphasis.
19#[derive(Debug, Default, Clone)]
20pub struct MD049EmphasisStyle {
21    config: MD049Config,
22}
23
24impl MD049EmphasisStyle {
25    /// Create a new instance of MD049EmphasisStyle
26    pub fn new(style: EmphasisStyle) -> Self {
27        MD049EmphasisStyle {
28            config: MD049Config { style },
29        }
30    }
31
32    pub fn from_config_struct(config: MD049Config) -> Self {
33        Self { config }
34    }
35
36    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
37    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
38        // Check inline and reference links
39        for link in &ctx.links {
40            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
41                return true;
42            }
43        }
44
45        // Check images (which use similar syntax)
46        for image in &ctx.images {
47            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
48                return true;
49            }
50        }
51
52        // Check reference definitions [ref]: url "title" using pre-computed data (O(1) vs O(n))
53        ctx.is_in_reference_def(byte_pos)
54    }
55
56    // Collect emphasis from a single line
57    fn collect_emphasis_from_line(
58        &self,
59        line: &str,
60        line_num: usize,
61        line_start_pos: usize,
62        emphasis_info: &mut Vec<(usize, usize, usize, char, String)>, // (line, col, abs_pos, marker, content)
63    ) {
64        // Replace inline code to avoid false positives
65        let line_no_code = replace_inline_code(line);
66
67        // Find all emphasis markers
68        let markers = find_emphasis_markers(&line_no_code);
69        if markers.is_empty() {
70            return;
71        }
72
73        // Find single emphasis spans (not strong emphasis)
74        let spans = find_single_emphasis_spans(&line_no_code, markers);
75
76        for span in spans {
77            let marker_char = span.opening.as_char();
78            let col = span.opening.start_pos + 1; // Convert to 1-based
79            let abs_pos = line_start_pos + span.opening.start_pos;
80
81            emphasis_info.push((line_num, col, abs_pos, marker_char, span.content.clone()));
82        }
83    }
84}
85
86impl Rule for MD049EmphasisStyle {
87    fn name(&self) -> &'static str {
88        "MD049"
89    }
90
91    fn description(&self) -> &'static str {
92        "Emphasis style should be consistent"
93    }
94
95    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
96        let mut warnings = vec![];
97
98        // Early return if no emphasis markers
99        if !ctx.likely_has_emphasis() {
100            return Ok(warnings);
101        }
102
103        // Use LintContext to skip code blocks
104        // Create LineIndex for correct byte position calculations across all line ending types
105        let line_index = &ctx.line_index;
106
107        // Collect all emphasis from the document
108        let mut emphasis_info = vec![];
109
110        // Process content lines, automatically skipping front matter, code blocks, HTML comments,
111        // MDX constructs, math blocks, and Obsidian comments
112        // Math blocks contain LaTeX syntax where _ and * have special meaning
113        for line in ctx
114            .filtered_lines()
115            .skip_front_matter()
116            .skip_code_blocks()
117            .skip_html_comments()
118            .skip_jsx_expressions()
119            .skip_mdx_comments()
120            .skip_math_blocks()
121            .skip_obsidian_comments()
122        {
123            // Skip if the line doesn't contain any emphasis markers
124            if !line.content.contains('*') && !line.content.contains('_') {
125                continue;
126            }
127
128            // Get absolute position for this line
129            let line_start = line_index.get_line_start_byte(line.line_num).unwrap_or(0);
130            self.collect_emphasis_from_line(line.content, line.line_num, line_start, &mut emphasis_info);
131        }
132
133        // Filter out emphasis markers that are inside links or MkDocs markup
134        let lines: Vec<&str> = ctx.content.lines().collect();
135        emphasis_info.retain(|(line_num, col, abs_pos, _, _)| {
136            // Skip emphasis inside Obsidian comments
137            if ctx.is_in_obsidian_comment(*abs_pos) {
138                return false;
139            }
140            // Skip if inside a link
141            if self.is_in_link(ctx, *abs_pos) {
142                return false;
143            }
144            // Skip if inside MkDocs markup (Keys, Caret, Mark, icon shortcodes)
145            if let Some(line) = lines.get(*line_num - 1) {
146                let line_pos = col.saturating_sub(1); // Convert 1-indexed col to 0-indexed position
147                if is_in_mkdocs_markup(line, line_pos, ctx.flavor) {
148                    return false;
149                }
150            }
151            true
152        });
153
154        match self.config.style {
155            EmphasisStyle::Consistent => {
156                // If we have less than 2 emphasis nodes, no need to check consistency
157                if emphasis_info.len() < 2 {
158                    return Ok(warnings);
159                }
160
161                // Count how many times each marker appears (prevalence-based approach)
162                let asterisk_count = emphasis_info.iter().filter(|(_, _, _, m, _)| *m == '*').count();
163                let underscore_count = emphasis_info.iter().filter(|(_, _, _, m, _)| *m == '_').count();
164
165                // Use the most prevalent marker as the target style
166                // In case of a tie, prefer asterisk (matches CommonMark recommendation)
167                let target_marker = if asterisk_count >= underscore_count { '*' } else { '_' };
168
169                // Check all emphasis nodes for consistency with the prevalent style
170                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
171                    if *marker != target_marker {
172                        // Calculate emphasis length (marker + content + marker)
173                        let emphasis_len = 1 + content.len() + 1;
174
175                        warnings.push(LintWarning {
176                            rule_name: Some(self.name().to_string()),
177                            line: *line_num,
178                            column: *col,
179                            end_line: *line_num,
180                            end_column: col + emphasis_len,
181                            message: format!("Emphasis should use {target_marker} instead of {marker}"),
182                            fix: Some(Fix {
183                                range: *abs_pos..*abs_pos + emphasis_len,
184                                replacement: format!("{target_marker}{content}{target_marker}"),
185                            }),
186                            severity: Severity::Warning,
187                        });
188                    }
189                }
190            }
191            EmphasisStyle::Asterisk | EmphasisStyle::Underscore => {
192                let (wrong_marker, correct_marker) = match self.config.style {
193                    EmphasisStyle::Asterisk => ('_', '*'),
194                    EmphasisStyle::Underscore => ('*', '_'),
195                    EmphasisStyle::Consistent => {
196                        // This case is handled separately above
197                        // but fallback to asterisk style for safety
198                        ('_', '*')
199                    }
200                };
201
202                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
203                    if *marker == wrong_marker {
204                        // Calculate emphasis length (marker + content + marker)
205                        let emphasis_len = 1 + content.len() + 1;
206
207                        warnings.push(LintWarning {
208                            rule_name: Some(self.name().to_string()),
209                            line: *line_num,
210                            column: *col,
211                            end_line: *line_num,
212                            end_column: col + emphasis_len,
213                            message: format!("Emphasis should use {correct_marker} instead of {wrong_marker}"),
214                            fix: Some(Fix {
215                                range: *abs_pos..*abs_pos + emphasis_len,
216                                replacement: format!("{correct_marker}{content}{correct_marker}"),
217                            }),
218                            severity: Severity::Warning,
219                        });
220                    }
221                }
222            }
223        }
224        Ok(warnings)
225    }
226
227    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
228        // Get all warnings with their fixes
229        let warnings = self.check(ctx)?;
230
231        // If no warnings, return original content
232        if warnings.is_empty() {
233            return Ok(ctx.content.to_string());
234        }
235
236        // Collect all fixes and sort by range start (descending) to apply from end to beginning
237        let mut fixes: Vec<_> = warnings
238            .iter()
239            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
240            .collect();
241        fixes.sort_by(|a, b| b.0.cmp(&a.0));
242
243        // Apply fixes from end to beginning to preserve byte offsets
244        let mut result = ctx.content.to_string();
245        for (start, end, replacement) in fixes {
246            if start < result.len() && end <= result.len() && start <= end {
247                result.replace_range(start..end, replacement);
248            }
249        }
250
251        Ok(result)
252    }
253
254    /// Check if this rule should be skipped
255    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
256        ctx.content.is_empty() || !ctx.likely_has_emphasis()
257    }
258
259    fn as_any(&self) -> &dyn std::any::Any {
260        self
261    }
262
263    fn default_config_section(&self) -> Option<(String, toml::Value)> {
264        let json_value = serde_json::to_value(&self.config).ok()?;
265        Some((
266            self.name().to_string(),
267            crate::rule_config_serde::json_to_toml_value(&json_value)?,
268        ))
269    }
270
271    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
272    where
273        Self: Sized,
274    {
275        let rule_config = crate::rule_config_serde::load_rule_config::<MD049Config>(config);
276        Box::new(Self::from_config_struct(rule_config))
277    }
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283
284    #[test]
285    fn test_name() {
286        let rule = MD049EmphasisStyle::default();
287        assert_eq!(rule.name(), "MD049");
288    }
289
290    #[test]
291    fn test_style_from_str() {
292        assert_eq!(EmphasisStyle::from("asterisk"), EmphasisStyle::Asterisk);
293        assert_eq!(EmphasisStyle::from("underscore"), EmphasisStyle::Underscore);
294        assert_eq!(EmphasisStyle::from("other"), EmphasisStyle::Consistent);
295    }
296
297    #[test]
298    fn test_emphasis_in_links_not_flagged() {
299        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
300        let content = r#"Check this [*asterisk*](https://example.com/*pattern*) link and [_underscore_](https://example.com/_private_).
301
302Also see the [`__init__`][__init__] reference.
303
304This should be _flagged_ since we're using asterisk style.
305
306[__init__]: https://example.com/__init__.py"#;
307        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
308        let result = rule.check(&ctx).unwrap();
309
310        // Only the real emphasis outside links should be flagged
311        assert_eq!(result.len(), 1);
312        assert!(result[0].message.contains("Emphasis should use * instead of _"));
313        // Should flag "_flagged_" but not emphasis patterns inside links
314        assert!(result[0].line == 5); // Line with "_flagged_"
315    }
316
317    #[test]
318    fn test_emphasis_in_links_vs_outside_links() {
319        let rule = MD049EmphasisStyle::new(EmphasisStyle::Underscore);
320        let content = r#"Check [*emphasis*](https://example.com/*test*) and inline *real emphasis* text.
321
322[*link*]: https://example.com/*path*"#;
323        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
324        let result = rule.check(&ctx).unwrap();
325
326        // Only the actual emphasis outside links should be flagged
327        assert_eq!(result.len(), 1);
328        assert!(result[0].message.contains("Emphasis should use _ instead of *"));
329        // Should be the "real emphasis" text on line 1
330        assert!(result[0].line == 1);
331    }
332
333    #[test]
334    fn test_mkdocs_keys_notation_not_flagged() {
335        // Keys notation uses ++ which shouldn't be confused with emphasis
336        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
337        let content = "Press ++ctrl+alt+del++ to restart.";
338        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
339        let result = rule.check(&ctx).unwrap();
340
341        // Keys notation should not be flagged as emphasis
342        assert!(
343            result.is_empty(),
344            "Keys notation should not be flagged as emphasis. Got: {result:?}"
345        );
346    }
347
348    #[test]
349    fn test_mkdocs_caret_notation_not_flagged() {
350        // Caret notation (^superscript^ and ^^insert^^) should not be flagged
351        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
352        let content = "This is ^superscript^ and ^^inserted^^ text.";
353        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
354        let result = rule.check(&ctx).unwrap();
355
356        assert!(
357            result.is_empty(),
358            "Caret notation should not be flagged as emphasis. Got: {result:?}"
359        );
360    }
361
362    #[test]
363    fn test_mkdocs_mark_notation_not_flagged() {
364        // Mark notation (==highlight==) should not be flagged
365        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
366        let content = "This is ==highlighted== text.";
367        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
368        let result = rule.check(&ctx).unwrap();
369
370        assert!(
371            result.is_empty(),
372            "Mark notation should not be flagged as emphasis. Got: {result:?}"
373        );
374    }
375
376    #[test]
377    fn test_mkdocs_mixed_content_with_real_emphasis() {
378        // Mixed content: MkDocs markup + real emphasis that should be flagged
379        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
380        let content = "Press ++ctrl++ and _underscore emphasis_ here.";
381        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
382        let result = rule.check(&ctx).unwrap();
383
384        // Only the real underscore emphasis should be flagged (not Keys notation)
385        assert_eq!(result.len(), 1, "Expected 1 warning, got: {result:?}");
386        assert!(result[0].message.contains("Emphasis should use * instead of _"));
387    }
388
389    #[test]
390    fn test_mkdocs_icon_shortcode_not_flagged() {
391        // Icon shortcodes like :material-star: should not affect emphasis detection
392        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
393        let content = "Click :material-check: and _this should be flagged_.";
394        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
395        let result = rule.check(&ctx).unwrap();
396
397        // The underscore emphasis should still be flagged
398        assert_eq!(result.len(), 1);
399        assert!(result[0].message.contains("Emphasis should use * instead of _"));
400    }
401
402    #[test]
403    fn test_obsidian_inline_comment_emphasis_ignored() {
404        // Emphasis inside Obsidian comments should be ignored
405        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
406        let content = "Visible %%_hidden_%% text.";
407        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
408        let result = rule.check(&ctx).unwrap();
409
410        assert!(
411            result.is_empty(),
412            "Should ignore emphasis inside Obsidian comments. Got: {result:?}"
413        );
414    }
415}