rumdl_lib/rules/
md049_emphasis_style.rs

1use crate::filtered_lines::FilteredLinesExt;
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
3use crate::rules::emphasis_style::EmphasisStyle;
4use crate::utils::emphasis_utils::{find_emphasis_markers, find_single_emphasis_spans, replace_inline_code};
5
6mod md049_config;
7use md049_config::MD049Config;
8
9/// Rule MD049: Emphasis style
10///
11/// See [docs/md049.md](../../docs/md049.md) for full documentation, configuration, and examples.
12///
13/// This rule is triggered when the style for emphasis is inconsistent:
14/// - Asterisks: `*text*`
15/// - Underscores: `_text_`
16///
17/// This rule is focused on regular emphasis, not strong emphasis.
18#[derive(Debug, Default, Clone)]
19pub struct MD049EmphasisStyle {
20    config: MD049Config,
21}
22
23impl MD049EmphasisStyle {
24    /// Create a new instance of MD049EmphasisStyle
25    pub fn new(style: EmphasisStyle) -> Self {
26        MD049EmphasisStyle {
27            config: MD049Config { style },
28        }
29    }
30
31    pub fn from_config_struct(config: MD049Config) -> Self {
32        Self { config }
33    }
34
35    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
36    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
37        // Check inline and reference links
38        for link in &ctx.links {
39            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
40                return true;
41            }
42        }
43
44        // Check images (which use similar syntax)
45        for image in &ctx.images {
46            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
47                return true;
48            }
49        }
50
51        // Check reference definitions [ref]: url "title" using pre-computed data (O(1) vs O(n))
52        ctx.is_in_reference_def(byte_pos)
53    }
54
55    // Collect emphasis from a single line
56    fn collect_emphasis_from_line(
57        &self,
58        line: &str,
59        line_num: usize,
60        line_start_pos: usize,
61        emphasis_info: &mut Vec<(usize, usize, usize, char, String)>, // (line, col, abs_pos, marker, content)
62    ) {
63        // Replace inline code to avoid false positives
64        let line_no_code = replace_inline_code(line);
65
66        // Find all emphasis markers
67        let markers = find_emphasis_markers(&line_no_code);
68        if markers.is_empty() {
69            return;
70        }
71
72        // Find single emphasis spans (not strong emphasis)
73        let spans = find_single_emphasis_spans(&line_no_code, markers);
74
75        for span in spans {
76            let marker_char = span.opening.as_char();
77            let col = span.opening.start_pos + 1; // Convert to 1-based
78            let abs_pos = line_start_pos + span.opening.start_pos;
79
80            emphasis_info.push((line_num, col, abs_pos, marker_char, span.content.clone()));
81        }
82    }
83}
84
85impl Rule for MD049EmphasisStyle {
86    fn name(&self) -> &'static str {
87        "MD049"
88    }
89
90    fn description(&self) -> &'static str {
91        "Emphasis style should be consistent"
92    }
93
94    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
95        let mut warnings = vec![];
96
97        // Early return if no emphasis markers
98        if !ctx.likely_has_emphasis() {
99            return Ok(warnings);
100        }
101
102        // Use LintContext to skip code blocks
103        // Create LineIndex for correct byte position calculations across all line ending types
104        let line_index = &ctx.line_index;
105
106        // Collect all emphasis from the document
107        let mut emphasis_info = vec![];
108
109        // Process content lines, automatically skipping front matter, code blocks, and HTML comments
110        for line in ctx
111            .filtered_lines()
112            .skip_front_matter()
113            .skip_code_blocks()
114            .skip_html_comments()
115        {
116            // Skip if the line doesn't contain any emphasis markers
117            if !line.content.contains('*') && !line.content.contains('_') {
118                continue;
119            }
120
121            // Get absolute position for this line
122            let line_start = line_index.get_line_start_byte(line.line_num).unwrap_or(0);
123            self.collect_emphasis_from_line(line.content, line.line_num, line_start, &mut emphasis_info);
124        }
125
126        // Filter out emphasis markers that are inside links
127        emphasis_info.retain(|(_, _, abs_pos, _, _)| !self.is_in_link(ctx, *abs_pos));
128
129        match self.config.style {
130            EmphasisStyle::Consistent => {
131                // If we have less than 2 emphasis nodes, no need to check consistency
132                if emphasis_info.len() < 2 {
133                    return Ok(warnings);
134                }
135
136                // Use the first emphasis marker found as the target style
137                let target_marker = emphasis_info[0].3;
138
139                // Check all subsequent emphasis nodes for consistency
140                for (line_num, col, abs_pos, marker, content) in emphasis_info.iter().skip(1) {
141                    if *marker != target_marker {
142                        // Calculate emphasis length (marker + content + marker)
143                        let emphasis_len = 1 + content.len() + 1;
144
145                        warnings.push(LintWarning {
146                            rule_name: Some(self.name().to_string()),
147                            line: *line_num,
148                            column: *col,
149                            end_line: *line_num,
150                            end_column: col + emphasis_len,
151                            message: format!("Emphasis should use {target_marker} instead of {marker}"),
152                            fix: Some(Fix {
153                                range: *abs_pos..*abs_pos + emphasis_len,
154                                replacement: format!("{target_marker}{content}{target_marker}"),
155                            }),
156                            severity: Severity::Warning,
157                        });
158                    }
159                }
160            }
161            EmphasisStyle::Asterisk | EmphasisStyle::Underscore => {
162                let (wrong_marker, correct_marker) = match self.config.style {
163                    EmphasisStyle::Asterisk => ('_', '*'),
164                    EmphasisStyle::Underscore => ('*', '_'),
165                    EmphasisStyle::Consistent => {
166                        // This case is handled separately above
167                        // but fallback to asterisk style for safety
168                        ('_', '*')
169                    }
170                };
171
172                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
173                    if *marker == wrong_marker {
174                        // Calculate emphasis length (marker + content + marker)
175                        let emphasis_len = 1 + content.len() + 1;
176
177                        warnings.push(LintWarning {
178                            rule_name: Some(self.name().to_string()),
179                            line: *line_num,
180                            column: *col,
181                            end_line: *line_num,
182                            end_column: col + emphasis_len,
183                            message: format!("Emphasis should use {correct_marker} instead of {wrong_marker}"),
184                            fix: Some(Fix {
185                                range: *abs_pos..*abs_pos + emphasis_len,
186                                replacement: format!("{correct_marker}{content}{correct_marker}"),
187                            }),
188                            severity: Severity::Warning,
189                        });
190                    }
191                }
192            }
193        }
194        Ok(warnings)
195    }
196
197    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
198        // Get all warnings with their fixes
199        let warnings = self.check(ctx)?;
200
201        // If no warnings, return original content
202        if warnings.is_empty() {
203            return Ok(ctx.content.to_string());
204        }
205
206        // Collect all fixes and sort by range start (descending) to apply from end to beginning
207        let mut fixes: Vec<_> = warnings
208            .iter()
209            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
210            .collect();
211        fixes.sort_by(|a, b| b.0.cmp(&a.0));
212
213        // Apply fixes from end to beginning to preserve byte offsets
214        let mut result = ctx.content.to_string();
215        for (start, end, replacement) in fixes {
216            if start < result.len() && end <= result.len() && start <= end {
217                result.replace_range(start..end, replacement);
218            }
219        }
220
221        Ok(result)
222    }
223
224    /// Check if this rule should be skipped
225    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
226        ctx.content.is_empty() || !ctx.likely_has_emphasis()
227    }
228
229    fn as_any(&self) -> &dyn std::any::Any {
230        self
231    }
232
233    fn default_config_section(&self) -> Option<(String, toml::Value)> {
234        let json_value = serde_json::to_value(&self.config).ok()?;
235        Some((
236            self.name().to_string(),
237            crate::rule_config_serde::json_to_toml_value(&json_value)?,
238        ))
239    }
240
241    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
242    where
243        Self: Sized,
244    {
245        let rule_config = crate::rule_config_serde::load_rule_config::<MD049Config>(config);
246        Box::new(Self::from_config_struct(rule_config))
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253
254    #[test]
255    fn test_name() {
256        let rule = MD049EmphasisStyle::default();
257        assert_eq!(rule.name(), "MD049");
258    }
259
260    #[test]
261    fn test_style_from_str() {
262        assert_eq!(EmphasisStyle::from("asterisk"), EmphasisStyle::Asterisk);
263        assert_eq!(EmphasisStyle::from("underscore"), EmphasisStyle::Underscore);
264        assert_eq!(EmphasisStyle::from("other"), EmphasisStyle::Consistent);
265    }
266
267    #[test]
268    fn test_emphasis_in_links_not_flagged() {
269        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
270        let content = r#"Check this [*asterisk*](https://example.com/*pattern*) link and [_underscore_](https://example.com/_private_).
271
272Also see the [`__init__`][__init__] reference.
273
274This should be _flagged_ since we're using asterisk style.
275
276[__init__]: https://example.com/__init__.py"#;
277        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
278        let result = rule.check(&ctx).unwrap();
279
280        // Only the real emphasis outside links should be flagged
281        assert_eq!(result.len(), 1);
282        assert!(result[0].message.contains("Emphasis should use * instead of _"));
283        // Should flag "_flagged_" but not emphasis patterns inside links
284        assert!(result[0].line == 5); // Line with "_flagged_"
285    }
286
287    #[test]
288    fn test_emphasis_in_links_vs_outside_links() {
289        let rule = MD049EmphasisStyle::new(EmphasisStyle::Underscore);
290        let content = r#"Check [*emphasis*](https://example.com/*test*) and inline *real emphasis* text.
291
292[*link*]: https://example.com/*path*"#;
293        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
294        let result = rule.check(&ctx).unwrap();
295
296        // Only the actual emphasis outside links should be flagged
297        assert_eq!(result.len(), 1);
298        assert!(result[0].message.contains("Emphasis should use _ instead of *"));
299        // Should be the "real emphasis" text on line 1
300        assert!(result[0].line == 1);
301    }
302}