rumdl_lib/rules/
md049_emphasis_style.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rules::emphasis_style::EmphasisStyle;
3use crate::utils::document_structure::DocumentStructure;
4use crate::utils::emphasis_utils::{find_emphasis_markers, find_single_emphasis_spans, replace_inline_code};
5use lazy_static::lazy_static;
6use regex::Regex;
7
8lazy_static! {
9    // Reference definition pattern - matches [ref]: url "title"
10    static ref REF_DEF_REGEX: Regex = Regex::new(
11        r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
12    ).unwrap();
13}
14
15mod md049_config;
16use md049_config::MD049Config;
17
18/// Rule MD049: Emphasis style
19///
20/// See [docs/md049.md](../../docs/md049.md) for full documentation, configuration, and examples.
21///
22/// This rule is triggered when the style for emphasis is inconsistent:
23/// - Asterisks: `*text*`
24/// - Underscores: `_text_`
25///
26/// This rule is focused on regular emphasis, not strong emphasis.
27#[derive(Debug, Default, Clone)]
28pub struct MD049EmphasisStyle {
29    config: MD049Config,
30}
31
32impl MD049EmphasisStyle {
33    /// Create a new instance of MD049EmphasisStyle
34    pub fn new(style: EmphasisStyle) -> Self {
35        MD049EmphasisStyle {
36            config: MD049Config { style },
37        }
38    }
39
40    pub fn from_config_struct(config: MD049Config) -> Self {
41        Self { config }
42    }
43
44    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
45    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
46        // Check inline and reference links
47        for link in &ctx.links {
48            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
49                return true;
50            }
51        }
52
53        // Check images (which use similar syntax)
54        for image in &ctx.images {
55            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
56                return true;
57            }
58        }
59
60        // Check reference definitions [ref]: url "title" using regex pattern
61        for m in REF_DEF_REGEX.find_iter(ctx.content) {
62            if m.start() <= byte_pos && byte_pos < m.end() {
63                return true;
64            }
65        }
66
67        false
68    }
69
70    // Collect emphasis from a single line
71    fn collect_emphasis_from_line(
72        &self,
73        line: &str,
74        line_num: usize,
75        line_start_pos: usize,
76        emphasis_info: &mut Vec<(usize, usize, usize, char, String)>, // (line, col, abs_pos, marker, content)
77    ) {
78        // Replace inline code to avoid false positives
79        let line_no_code = replace_inline_code(line);
80
81        // Find all emphasis markers
82        let markers = find_emphasis_markers(&line_no_code);
83        if markers.is_empty() {
84            return;
85        }
86
87        // Find single emphasis spans (not strong emphasis)
88        let spans = find_single_emphasis_spans(&line_no_code, markers);
89
90        for span in spans {
91            let marker_char = span.opening.as_char();
92            let col = span.opening.start_pos + 1; // Convert to 1-based
93            let abs_pos = line_start_pos + span.opening.start_pos;
94
95            emphasis_info.push((line_num, col, abs_pos, marker_char, span.content.clone()));
96        }
97    }
98}
99
100impl Rule for MD049EmphasisStyle {
101    fn name(&self) -> &'static str {
102        "MD049"
103    }
104
105    fn description(&self) -> &'static str {
106        "Emphasis style should be consistent"
107    }
108
109    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
110        let mut warnings = vec![];
111        let content = ctx.content;
112
113        // Early return if no emphasis markers
114        if !content.contains('*') && !content.contains('_') {
115            return Ok(warnings);
116        }
117
118        // Create document structure to skip code blocks
119        let structure = DocumentStructure::new(content);
120
121        // Collect all emphasis from the document
122        let mut emphasis_info = vec![];
123
124        // Track absolute position for fixes
125        let mut abs_pos = 0;
126
127        for (line_idx, line) in content.lines().enumerate() {
128            let line_num = line_idx + 1;
129
130            // Skip if in code block or front matter
131            if structure.is_in_code_block(line_num) || structure.is_in_front_matter(line_num) {
132                abs_pos += line.len() + 1; // +1 for newline
133                continue;
134            }
135
136            // Skip if the line doesn't contain any emphasis markers
137            if !line.contains('*') && !line.contains('_') {
138                abs_pos += line.len() + 1;
139                continue;
140            }
141
142            // Collect emphasis with absolute positions
143            let line_start = abs_pos;
144            self.collect_emphasis_from_line(line, line_num, line_start, &mut emphasis_info);
145
146            abs_pos += line.len() + 1;
147        }
148
149        // Filter out emphasis markers that are inside links
150        emphasis_info.retain(|(_, _, abs_pos, _, _)| !self.is_in_link(ctx, *abs_pos));
151
152        match self.config.style {
153            EmphasisStyle::Consistent => {
154                // If we have less than 2 emphasis nodes, no need to check consistency
155                if emphasis_info.len() < 2 {
156                    return Ok(warnings);
157                }
158
159                // Use the first emphasis marker found as the target style
160                let target_marker = emphasis_info[0].3;
161
162                // Check all subsequent emphasis nodes for consistency
163                for (line_num, col, abs_pos, marker, content) in emphasis_info.iter().skip(1) {
164                    if *marker != target_marker {
165                        // Calculate emphasis length (marker + content + marker)
166                        let emphasis_len = 1 + content.len() + 1;
167
168                        warnings.push(LintWarning {
169                            rule_name: Some(self.name()),
170                            line: *line_num,
171                            column: *col,
172                            end_line: *line_num,
173                            end_column: col + emphasis_len,
174                            message: format!("Emphasis should use {target_marker} instead of {marker}"),
175                            fix: Some(Fix {
176                                range: *abs_pos..*abs_pos + emphasis_len,
177                                replacement: format!("{target_marker}{content}{target_marker}"),
178                            }),
179                            severity: Severity::Warning,
180                        });
181                    }
182                }
183            }
184            EmphasisStyle::Asterisk | EmphasisStyle::Underscore => {
185                let (wrong_marker, correct_marker) = match self.config.style {
186                    EmphasisStyle::Asterisk => ('_', '*'),
187                    EmphasisStyle::Underscore => ('*', '_'),
188                    EmphasisStyle::Consistent => {
189                        // This case is handled separately above
190                        // but fallback to asterisk style for safety
191                        ('_', '*')
192                    }
193                };
194
195                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
196                    if *marker == wrong_marker {
197                        // Calculate emphasis length (marker + content + marker)
198                        let emphasis_len = 1 + content.len() + 1;
199
200                        warnings.push(LintWarning {
201                            rule_name: Some(self.name()),
202                            line: *line_num,
203                            column: *col,
204                            end_line: *line_num,
205                            end_column: col + emphasis_len,
206                            message: format!("Emphasis should use {correct_marker} instead of {wrong_marker}"),
207                            fix: Some(Fix {
208                                range: *abs_pos..*abs_pos + emphasis_len,
209                                replacement: format!("{correct_marker}{content}{correct_marker}"),
210                            }),
211                            severity: Severity::Warning,
212                        });
213                    }
214                }
215            }
216        }
217        Ok(warnings)
218    }
219
220    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
221        // Get all warnings with their fixes
222        let warnings = self.check(ctx)?;
223
224        // If no warnings, return original content
225        if warnings.is_empty() {
226            return Ok(ctx.content.to_string());
227        }
228
229        // Collect all fixes and sort by range start (descending) to apply from end to beginning
230        let mut fixes: Vec<_> = warnings
231            .iter()
232            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
233            .collect();
234        fixes.sort_by(|a, b| b.0.cmp(&a.0));
235
236        // Apply fixes from end to beginning to preserve byte offsets
237        let mut result = ctx.content.to_string();
238        for (start, end, replacement) in fixes {
239            if start < result.len() && end <= result.len() && start <= end {
240                result.replace_range(start..end, replacement);
241            }
242        }
243
244        Ok(result)
245    }
246
247    /// Check if this rule should be skipped
248    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
249        ctx.content.is_empty() || (!ctx.content.contains('*') && !ctx.content.contains('_'))
250    }
251
252    fn as_any(&self) -> &dyn std::any::Any {
253        self
254    }
255
256    fn default_config_section(&self) -> Option<(String, toml::Value)> {
257        let json_value = serde_json::to_value(&self.config).ok()?;
258        Some((
259            self.name().to_string(),
260            crate::rule_config_serde::json_to_toml_value(&json_value)?,
261        ))
262    }
263
264    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
265    where
266        Self: Sized,
267    {
268        let rule_config = crate::rule_config_serde::load_rule_config::<MD049Config>(config);
269        Box::new(Self::from_config_struct(rule_config))
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276
277    #[test]
278    fn test_name() {
279        let rule = MD049EmphasisStyle::default();
280        assert_eq!(rule.name(), "MD049");
281    }
282
283    #[test]
284    fn test_style_from_str() {
285        assert_eq!(EmphasisStyle::from("asterisk"), EmphasisStyle::Asterisk);
286        assert_eq!(EmphasisStyle::from("underscore"), EmphasisStyle::Underscore);
287        assert_eq!(EmphasisStyle::from("other"), EmphasisStyle::Consistent);
288    }
289
290    #[test]
291    fn test_emphasis_in_links_not_flagged() {
292        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
293        let content = r#"Check this [*asterisk*](https://example.com/*pattern*) link and [_underscore_](https://example.com/_private_).
294
295Also see the [`__init__`][__init__] reference.
296
297This should be _flagged_ since we're using asterisk style.
298
299[__init__]: https://example.com/__init__.py"#;
300        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
301        let result = rule.check(&ctx).unwrap();
302
303        // Only the real emphasis outside links should be flagged
304        assert_eq!(result.len(), 1);
305        assert!(result[0].message.contains("Emphasis should use * instead of _"));
306        // Should flag "_flagged_" but not emphasis patterns inside links
307        assert!(result[0].line == 5); // Line with "_flagged_"
308    }
309
310    #[test]
311    fn test_emphasis_in_links_vs_outside_links() {
312        let rule = MD049EmphasisStyle::new(EmphasisStyle::Underscore);
313        let content = r#"Check [*emphasis*](https://example.com/*test*) and inline *real emphasis* text.
314
315[*link*]: https://example.com/*path*"#;
316        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
317        let result = rule.check(&ctx).unwrap();
318
319        // Only the actual emphasis outside links should be flagged
320        assert_eq!(result.len(), 1);
321        assert!(result[0].message.contains("Emphasis should use _ instead of *"));
322        // Should be the "real emphasis" text on line 1
323        assert!(result[0].line == 1);
324    }
325}