rumdl_lib/rules/
md049_emphasis_style.rs

1use crate::filtered_lines::FilteredLinesExt;
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
3use crate::rules::emphasis_style::EmphasisStyle;
4use crate::utils::emphasis_utils::{find_emphasis_markers, find_single_emphasis_spans, replace_inline_code};
5
6mod md049_config;
7use md049_config::MD049Config;
8
9/// Rule MD049: Emphasis style
10///
11/// See [docs/md049.md](../../docs/md049.md) for full documentation, configuration, and examples.
12///
13/// This rule is triggered when the style for emphasis is inconsistent:
14/// - Asterisks: `*text*`
15/// - Underscores: `_text_`
16///
17/// This rule is focused on regular emphasis, not strong emphasis.
18#[derive(Debug, Default, Clone)]
19pub struct MD049EmphasisStyle {
20    config: MD049Config,
21}
22
23impl MD049EmphasisStyle {
24    /// Create a new instance of MD049EmphasisStyle
25    pub fn new(style: EmphasisStyle) -> Self {
26        MD049EmphasisStyle {
27            config: MD049Config { style },
28        }
29    }
30
31    pub fn from_config_struct(config: MD049Config) -> Self {
32        Self { config }
33    }
34
35    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
36    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
37        // Check inline and reference links
38        for link in &ctx.links {
39            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
40                return true;
41            }
42        }
43
44        // Check images (which use similar syntax)
45        for image in &ctx.images {
46            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
47                return true;
48            }
49        }
50
51        // Check reference definitions [ref]: url "title" using pre-computed data (O(1) vs O(n))
52        ctx.is_in_reference_def(byte_pos)
53    }
54
55    // Collect emphasis from a single line
56    fn collect_emphasis_from_line(
57        &self,
58        line: &str,
59        line_num: usize,
60        line_start_pos: usize,
61        emphasis_info: &mut Vec<(usize, usize, usize, char, String)>, // (line, col, abs_pos, marker, content)
62    ) {
63        // Replace inline code to avoid false positives
64        let line_no_code = replace_inline_code(line);
65
66        // Find all emphasis markers
67        let markers = find_emphasis_markers(&line_no_code);
68        if markers.is_empty() {
69            return;
70        }
71
72        // Find single emphasis spans (not strong emphasis)
73        let spans = find_single_emphasis_spans(&line_no_code, markers);
74
75        for span in spans {
76            let marker_char = span.opening.as_char();
77            let col = span.opening.start_pos + 1; // Convert to 1-based
78            let abs_pos = line_start_pos + span.opening.start_pos;
79
80            emphasis_info.push((line_num, col, abs_pos, marker_char, span.content.clone()));
81        }
82    }
83}
84
85impl Rule for MD049EmphasisStyle {
86    fn name(&self) -> &'static str {
87        "MD049"
88    }
89
90    fn description(&self) -> &'static str {
91        "Emphasis style should be consistent"
92    }
93
94    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
95        let mut warnings = vec![];
96
97        // Early return if no emphasis markers
98        if !ctx.likely_has_emphasis() {
99            return Ok(warnings);
100        }
101
102        // Use LintContext to skip code blocks
103        // Create LineIndex for correct byte position calculations across all line ending types
104        let line_index = &ctx.line_index;
105
106        // Collect all emphasis from the document
107        let mut emphasis_info = vec![];
108
109        // Process content lines, automatically skipping front matter, code blocks, and HTML comments
110        for line in ctx
111            .filtered_lines()
112            .skip_front_matter()
113            .skip_code_blocks()
114            .skip_html_comments()
115        {
116            // Skip if the line doesn't contain any emphasis markers
117            if !line.content.contains('*') && !line.content.contains('_') {
118                continue;
119            }
120
121            // Get absolute position for this line
122            let line_start = line_index.get_line_start_byte(line.line_num).unwrap_or(0);
123            self.collect_emphasis_from_line(line.content, line.line_num, line_start, &mut emphasis_info);
124        }
125
126        // Filter out emphasis markers that are inside links
127        emphasis_info.retain(|(_, _, abs_pos, _, _)| !self.is_in_link(ctx, *abs_pos));
128
129        match self.config.style {
130            EmphasisStyle::Consistent => {
131                // If we have less than 2 emphasis nodes, no need to check consistency
132                if emphasis_info.len() < 2 {
133                    return Ok(warnings);
134                }
135
136                // Count how many times each marker appears (prevalence-based approach)
137                let asterisk_count = emphasis_info.iter().filter(|(_, _, _, m, _)| *m == '*').count();
138                let underscore_count = emphasis_info.iter().filter(|(_, _, _, m, _)| *m == '_').count();
139
140                // Use the most prevalent marker as the target style
141                // In case of a tie, prefer asterisk (matches CommonMark recommendation)
142                let target_marker = if asterisk_count >= underscore_count { '*' } else { '_' };
143
144                // Check all emphasis nodes for consistency with the prevalent style
145                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
146                    if *marker != target_marker {
147                        // Calculate emphasis length (marker + content + marker)
148                        let emphasis_len = 1 + content.len() + 1;
149
150                        warnings.push(LintWarning {
151                            rule_name: Some(self.name().to_string()),
152                            line: *line_num,
153                            column: *col,
154                            end_line: *line_num,
155                            end_column: col + emphasis_len,
156                            message: format!("Emphasis should use {target_marker} instead of {marker}"),
157                            fix: Some(Fix {
158                                range: *abs_pos..*abs_pos + emphasis_len,
159                                replacement: format!("{target_marker}{content}{target_marker}"),
160                            }),
161                            severity: Severity::Warning,
162                        });
163                    }
164                }
165            }
166            EmphasisStyle::Asterisk | EmphasisStyle::Underscore => {
167                let (wrong_marker, correct_marker) = match self.config.style {
168                    EmphasisStyle::Asterisk => ('_', '*'),
169                    EmphasisStyle::Underscore => ('*', '_'),
170                    EmphasisStyle::Consistent => {
171                        // This case is handled separately above
172                        // but fallback to asterisk style for safety
173                        ('_', '*')
174                    }
175                };
176
177                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
178                    if *marker == wrong_marker {
179                        // Calculate emphasis length (marker + content + marker)
180                        let emphasis_len = 1 + content.len() + 1;
181
182                        warnings.push(LintWarning {
183                            rule_name: Some(self.name().to_string()),
184                            line: *line_num,
185                            column: *col,
186                            end_line: *line_num,
187                            end_column: col + emphasis_len,
188                            message: format!("Emphasis should use {correct_marker} instead of {wrong_marker}"),
189                            fix: Some(Fix {
190                                range: *abs_pos..*abs_pos + emphasis_len,
191                                replacement: format!("{correct_marker}{content}{correct_marker}"),
192                            }),
193                            severity: Severity::Warning,
194                        });
195                    }
196                }
197            }
198        }
199        Ok(warnings)
200    }
201
202    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
203        // Get all warnings with their fixes
204        let warnings = self.check(ctx)?;
205
206        // If no warnings, return original content
207        if warnings.is_empty() {
208            return Ok(ctx.content.to_string());
209        }
210
211        // Collect all fixes and sort by range start (descending) to apply from end to beginning
212        let mut fixes: Vec<_> = warnings
213            .iter()
214            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
215            .collect();
216        fixes.sort_by(|a, b| b.0.cmp(&a.0));
217
218        // Apply fixes from end to beginning to preserve byte offsets
219        let mut result = ctx.content.to_string();
220        for (start, end, replacement) in fixes {
221            if start < result.len() && end <= result.len() && start <= end {
222                result.replace_range(start..end, replacement);
223            }
224        }
225
226        Ok(result)
227    }
228
229    /// Check if this rule should be skipped
230    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
231        ctx.content.is_empty() || !ctx.likely_has_emphasis()
232    }
233
234    fn as_any(&self) -> &dyn std::any::Any {
235        self
236    }
237
238    fn default_config_section(&self) -> Option<(String, toml::Value)> {
239        let json_value = serde_json::to_value(&self.config).ok()?;
240        Some((
241            self.name().to_string(),
242            crate::rule_config_serde::json_to_toml_value(&json_value)?,
243        ))
244    }
245
246    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
247    where
248        Self: Sized,
249    {
250        let rule_config = crate::rule_config_serde::load_rule_config::<MD049Config>(config);
251        Box::new(Self::from_config_struct(rule_config))
252    }
253}
254
255#[cfg(test)]
256mod tests {
257    use super::*;
258
259    #[test]
260    fn test_name() {
261        let rule = MD049EmphasisStyle::default();
262        assert_eq!(rule.name(), "MD049");
263    }
264
265    #[test]
266    fn test_style_from_str() {
267        assert_eq!(EmphasisStyle::from("asterisk"), EmphasisStyle::Asterisk);
268        assert_eq!(EmphasisStyle::from("underscore"), EmphasisStyle::Underscore);
269        assert_eq!(EmphasisStyle::from("other"), EmphasisStyle::Consistent);
270    }
271
272    #[test]
273    fn test_emphasis_in_links_not_flagged() {
274        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
275        let content = r#"Check this [*asterisk*](https://example.com/*pattern*) link and [_underscore_](https://example.com/_private_).
276
277Also see the [`__init__`][__init__] reference.
278
279This should be _flagged_ since we're using asterisk style.
280
281[__init__]: https://example.com/__init__.py"#;
282        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
283        let result = rule.check(&ctx).unwrap();
284
285        // Only the real emphasis outside links should be flagged
286        assert_eq!(result.len(), 1);
287        assert!(result[0].message.contains("Emphasis should use * instead of _"));
288        // Should flag "_flagged_" but not emphasis patterns inside links
289        assert!(result[0].line == 5); // Line with "_flagged_"
290    }
291
292    #[test]
293    fn test_emphasis_in_links_vs_outside_links() {
294        let rule = MD049EmphasisStyle::new(EmphasisStyle::Underscore);
295        let content = r#"Check [*emphasis*](https://example.com/*test*) and inline *real emphasis* text.
296
297[*link*]: https://example.com/*path*"#;
298        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
299        let result = rule.check(&ctx).unwrap();
300
301        // Only the actual emphasis outside links should be flagged
302        assert_eq!(result.len(), 1);
303        assert!(result[0].message.contains("Emphasis should use _ instead of *"));
304        // Should be the "real emphasis" text on line 1
305        assert!(result[0].line == 1);
306    }
307}