rumdl_lib/rules/
md049_emphasis_style.rs

1use crate::filtered_lines::FilteredLinesExt;
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
3use crate::rules::emphasis_style::EmphasisStyle;
4use crate::utils::emphasis_utils::{find_emphasis_markers, find_single_emphasis_spans, replace_inline_code};
5use crate::utils::skip_context::is_in_mkdocs_markup;
6
7mod md049_config;
8use md049_config::MD049Config;
9
10/// Rule MD049: Emphasis style
11///
12/// See [docs/md049.md](../../docs/md049.md) for full documentation, configuration, and examples.
13///
14/// This rule is triggered when the style for emphasis is inconsistent:
15/// - Asterisks: `*text*`
16/// - Underscores: `_text_`
17///
18/// This rule is focused on regular emphasis, not strong emphasis.
19#[derive(Debug, Default, Clone)]
20pub struct MD049EmphasisStyle {
21    config: MD049Config,
22}
23
24impl MD049EmphasisStyle {
25    /// Create a new instance of MD049EmphasisStyle
26    pub fn new(style: EmphasisStyle) -> Self {
27        MD049EmphasisStyle {
28            config: MD049Config { style },
29        }
30    }
31
32    pub fn from_config_struct(config: MD049Config) -> Self {
33        Self { config }
34    }
35
36    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
37    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
38        // Check inline and reference links
39        for link in &ctx.links {
40            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
41                return true;
42            }
43        }
44
45        // Check images (which use similar syntax)
46        for image in &ctx.images {
47            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
48                return true;
49            }
50        }
51
52        // Check reference definitions [ref]: url "title" using pre-computed data (O(1) vs O(n))
53        ctx.is_in_reference_def(byte_pos)
54    }
55
56    // Collect emphasis from a single line
57    fn collect_emphasis_from_line(
58        &self,
59        line: &str,
60        line_num: usize,
61        line_start_pos: usize,
62        emphasis_info: &mut Vec<(usize, usize, usize, char, String)>, // (line, col, abs_pos, marker, content)
63    ) {
64        // Replace inline code to avoid false positives
65        let line_no_code = replace_inline_code(line);
66
67        // Find all emphasis markers
68        let markers = find_emphasis_markers(&line_no_code);
69        if markers.is_empty() {
70            return;
71        }
72
73        // Find single emphasis spans (not strong emphasis)
74        let spans = find_single_emphasis_spans(&line_no_code, markers);
75
76        for span in spans {
77            let marker_char = span.opening.as_char();
78            let col = span.opening.start_pos + 1; // Convert to 1-based
79            let abs_pos = line_start_pos + span.opening.start_pos;
80
81            emphasis_info.push((line_num, col, abs_pos, marker_char, span.content.clone()));
82        }
83    }
84}
85
86impl Rule for MD049EmphasisStyle {
87    fn name(&self) -> &'static str {
88        "MD049"
89    }
90
91    fn description(&self) -> &'static str {
92        "Emphasis style should be consistent"
93    }
94
95    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
96        let mut warnings = vec![];
97
98        // Early return if no emphasis markers
99        if !ctx.likely_has_emphasis() {
100            return Ok(warnings);
101        }
102
103        // Use LintContext to skip code blocks
104        // Create LineIndex for correct byte position calculations across all line ending types
105        let line_index = &ctx.line_index;
106
107        // Collect all emphasis from the document
108        let mut emphasis_info = vec![];
109
110        // Process content lines, automatically skipping front matter, code blocks, HTML comments, MDX constructs, and math blocks
111        // Math blocks contain LaTeX syntax where _ and * have special meaning
112        for line in ctx
113            .filtered_lines()
114            .skip_front_matter()
115            .skip_code_blocks()
116            .skip_html_comments()
117            .skip_jsx_expressions()
118            .skip_mdx_comments()
119            .skip_math_blocks()
120        {
121            // Skip if the line doesn't contain any emphasis markers
122            if !line.content.contains('*') && !line.content.contains('_') {
123                continue;
124            }
125
126            // Get absolute position for this line
127            let line_start = line_index.get_line_start_byte(line.line_num).unwrap_or(0);
128            self.collect_emphasis_from_line(line.content, line.line_num, line_start, &mut emphasis_info);
129        }
130
131        // Filter out emphasis markers that are inside links or MkDocs markup
132        let lines: Vec<&str> = ctx.content.lines().collect();
133        emphasis_info.retain(|(line_num, col, abs_pos, _, _)| {
134            // Skip if inside a link
135            if self.is_in_link(ctx, *abs_pos) {
136                return false;
137            }
138            // Skip if inside MkDocs markup (Keys, Caret, Mark, icon shortcodes)
139            if let Some(line) = lines.get(*line_num - 1) {
140                let line_pos = col.saturating_sub(1); // Convert 1-indexed col to 0-indexed position
141                if is_in_mkdocs_markup(line, line_pos, ctx.flavor) {
142                    return false;
143                }
144            }
145            true
146        });
147
148        match self.config.style {
149            EmphasisStyle::Consistent => {
150                // If we have less than 2 emphasis nodes, no need to check consistency
151                if emphasis_info.len() < 2 {
152                    return Ok(warnings);
153                }
154
155                // Count how many times each marker appears (prevalence-based approach)
156                let asterisk_count = emphasis_info.iter().filter(|(_, _, _, m, _)| *m == '*').count();
157                let underscore_count = emphasis_info.iter().filter(|(_, _, _, m, _)| *m == '_').count();
158
159                // Use the most prevalent marker as the target style
160                // In case of a tie, prefer asterisk (matches CommonMark recommendation)
161                let target_marker = if asterisk_count >= underscore_count { '*' } else { '_' };
162
163                // Check all emphasis nodes for consistency with the prevalent style
164                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
165                    if *marker != target_marker {
166                        // Calculate emphasis length (marker + content + marker)
167                        let emphasis_len = 1 + content.len() + 1;
168
169                        warnings.push(LintWarning {
170                            rule_name: Some(self.name().to_string()),
171                            line: *line_num,
172                            column: *col,
173                            end_line: *line_num,
174                            end_column: col + emphasis_len,
175                            message: format!("Emphasis should use {target_marker} instead of {marker}"),
176                            fix: Some(Fix {
177                                range: *abs_pos..*abs_pos + emphasis_len,
178                                replacement: format!("{target_marker}{content}{target_marker}"),
179                            }),
180                            severity: Severity::Warning,
181                        });
182                    }
183                }
184            }
185            EmphasisStyle::Asterisk | EmphasisStyle::Underscore => {
186                let (wrong_marker, correct_marker) = match self.config.style {
187                    EmphasisStyle::Asterisk => ('_', '*'),
188                    EmphasisStyle::Underscore => ('*', '_'),
189                    EmphasisStyle::Consistent => {
190                        // This case is handled separately above
191                        // but fallback to asterisk style for safety
192                        ('_', '*')
193                    }
194                };
195
196                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
197                    if *marker == wrong_marker {
198                        // Calculate emphasis length (marker + content + marker)
199                        let emphasis_len = 1 + content.len() + 1;
200
201                        warnings.push(LintWarning {
202                            rule_name: Some(self.name().to_string()),
203                            line: *line_num,
204                            column: *col,
205                            end_line: *line_num,
206                            end_column: col + emphasis_len,
207                            message: format!("Emphasis should use {correct_marker} instead of {wrong_marker}"),
208                            fix: Some(Fix {
209                                range: *abs_pos..*abs_pos + emphasis_len,
210                                replacement: format!("{correct_marker}{content}{correct_marker}"),
211                            }),
212                            severity: Severity::Warning,
213                        });
214                    }
215                }
216            }
217        }
218        Ok(warnings)
219    }
220
221    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
222        // Get all warnings with their fixes
223        let warnings = self.check(ctx)?;
224
225        // If no warnings, return original content
226        if warnings.is_empty() {
227            return Ok(ctx.content.to_string());
228        }
229
230        // Collect all fixes and sort by range start (descending) to apply from end to beginning
231        let mut fixes: Vec<_> = warnings
232            .iter()
233            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
234            .collect();
235        fixes.sort_by(|a, b| b.0.cmp(&a.0));
236
237        // Apply fixes from end to beginning to preserve byte offsets
238        let mut result = ctx.content.to_string();
239        for (start, end, replacement) in fixes {
240            if start < result.len() && end <= result.len() && start <= end {
241                result.replace_range(start..end, replacement);
242            }
243        }
244
245        Ok(result)
246    }
247
248    /// Check if this rule should be skipped
249    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
250        ctx.content.is_empty() || !ctx.likely_has_emphasis()
251    }
252
253    fn as_any(&self) -> &dyn std::any::Any {
254        self
255    }
256
257    fn default_config_section(&self) -> Option<(String, toml::Value)> {
258        let json_value = serde_json::to_value(&self.config).ok()?;
259        Some((
260            self.name().to_string(),
261            crate::rule_config_serde::json_to_toml_value(&json_value)?,
262        ))
263    }
264
265    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
266    where
267        Self: Sized,
268    {
269        let rule_config = crate::rule_config_serde::load_rule_config::<MD049Config>(config);
270        Box::new(Self::from_config_struct(rule_config))
271    }
272}
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277
278    #[test]
279    fn test_name() {
280        let rule = MD049EmphasisStyle::default();
281        assert_eq!(rule.name(), "MD049");
282    }
283
284    #[test]
285    fn test_style_from_str() {
286        assert_eq!(EmphasisStyle::from("asterisk"), EmphasisStyle::Asterisk);
287        assert_eq!(EmphasisStyle::from("underscore"), EmphasisStyle::Underscore);
288        assert_eq!(EmphasisStyle::from("other"), EmphasisStyle::Consistent);
289    }
290
291    #[test]
292    fn test_emphasis_in_links_not_flagged() {
293        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
294        let content = r#"Check this [*asterisk*](https://example.com/*pattern*) link and [_underscore_](https://example.com/_private_).
295
296Also see the [`__init__`][__init__] reference.
297
298This should be _flagged_ since we're using asterisk style.
299
300[__init__]: https://example.com/__init__.py"#;
301        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
302        let result = rule.check(&ctx).unwrap();
303
304        // Only the real emphasis outside links should be flagged
305        assert_eq!(result.len(), 1);
306        assert!(result[0].message.contains("Emphasis should use * instead of _"));
307        // Should flag "_flagged_" but not emphasis patterns inside links
308        assert!(result[0].line == 5); // Line with "_flagged_"
309    }
310
311    #[test]
312    fn test_emphasis_in_links_vs_outside_links() {
313        let rule = MD049EmphasisStyle::new(EmphasisStyle::Underscore);
314        let content = r#"Check [*emphasis*](https://example.com/*test*) and inline *real emphasis* text.
315
316[*link*]: https://example.com/*path*"#;
317        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
318        let result = rule.check(&ctx).unwrap();
319
320        // Only the actual emphasis outside links should be flagged
321        assert_eq!(result.len(), 1);
322        assert!(result[0].message.contains("Emphasis should use _ instead of *"));
323        // Should be the "real emphasis" text on line 1
324        assert!(result[0].line == 1);
325    }
326
327    #[test]
328    fn test_mkdocs_keys_notation_not_flagged() {
329        // Keys notation uses ++ which shouldn't be confused with emphasis
330        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
331        let content = "Press ++ctrl+alt+del++ to restart.";
332        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
333        let result = rule.check(&ctx).unwrap();
334
335        // Keys notation should not be flagged as emphasis
336        assert!(
337            result.is_empty(),
338            "Keys notation should not be flagged as emphasis. Got: {result:?}"
339        );
340    }
341
342    #[test]
343    fn test_mkdocs_caret_notation_not_flagged() {
344        // Caret notation (^superscript^ and ^^insert^^) should not be flagged
345        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
346        let content = "This is ^superscript^ and ^^inserted^^ text.";
347        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
348        let result = rule.check(&ctx).unwrap();
349
350        assert!(
351            result.is_empty(),
352            "Caret notation should not be flagged as emphasis. Got: {result:?}"
353        );
354    }
355
356    #[test]
357    fn test_mkdocs_mark_notation_not_flagged() {
358        // Mark notation (==highlight==) should not be flagged
359        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
360        let content = "This is ==highlighted== text.";
361        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
362        let result = rule.check(&ctx).unwrap();
363
364        assert!(
365            result.is_empty(),
366            "Mark notation should not be flagged as emphasis. Got: {result:?}"
367        );
368    }
369
370    #[test]
371    fn test_mkdocs_mixed_content_with_real_emphasis() {
372        // Mixed content: MkDocs markup + real emphasis that should be flagged
373        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
374        let content = "Press ++ctrl++ and _underscore emphasis_ here.";
375        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
376        let result = rule.check(&ctx).unwrap();
377
378        // Only the real underscore emphasis should be flagged (not Keys notation)
379        assert_eq!(result.len(), 1, "Expected 1 warning, got: {result:?}");
380        assert!(result[0].message.contains("Emphasis should use * instead of _"));
381    }
382
383    #[test]
384    fn test_mkdocs_icon_shortcode_not_flagged() {
385        // Icon shortcodes like :material-star: should not affect emphasis detection
386        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
387        let content = "Click :material-check: and _this should be flagged_.";
388        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::MkDocs, None);
389        let result = rule.check(&ctx).unwrap();
390
391        // The underscore emphasis should still be flagged
392        assert_eq!(result.len(), 1);
393        assert!(result[0].message.contains("Emphasis should use * instead of _"));
394    }
395}