rumdl_lib/rules/
md049_emphasis_style.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
2use crate::rules::emphasis_style::EmphasisStyle;
3use crate::utils::document_structure::DocumentStructure;
4use crate::utils::emphasis_utils::{find_emphasis_markers, find_single_emphasis_spans, replace_inline_code};
5use lazy_static::lazy_static;
6use regex::Regex;
7
8lazy_static! {
9    // Reference definition pattern - matches [ref]: url "title"
10    static ref REF_DEF_REGEX: Regex = Regex::new(
11        r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
12    ).unwrap();
13}
14
15mod md049_config;
16use md049_config::MD049Config;
17
18/// Rule MD049: Emphasis style
19///
20/// See [docs/md049.md](../../docs/md049.md) for full documentation, configuration, and examples.
21///
22/// This rule is triggered when the style for emphasis is inconsistent:
23/// - Asterisks: `*text*`
24/// - Underscores: `_text_`
25///
26/// This rule is focused on regular emphasis, not strong emphasis.
27#[derive(Debug, Default, Clone)]
28pub struct MD049EmphasisStyle {
29    config: MD049Config,
30}
31
32impl MD049EmphasisStyle {
33    /// Create a new instance of MD049EmphasisStyle
34    pub fn new(style: EmphasisStyle) -> Self {
35        MD049EmphasisStyle {
36            config: MD049Config { style },
37        }
38    }
39
40    pub fn from_config_struct(config: MD049Config) -> Self {
41        Self { config }
42    }
43
44    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
45    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
46        // Check inline and reference links
47        for link in &ctx.links {
48            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
49                return true;
50            }
51        }
52
53        // Check images (which use similar syntax)
54        for image in &ctx.images {
55            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
56                return true;
57            }
58        }
59
60        // Check reference definitions [ref]: url "title" using regex pattern
61        for m in REF_DEF_REGEX.find_iter(ctx.content) {
62            if m.start() <= byte_pos && byte_pos < m.end() {
63                return true;
64            }
65        }
66
67        false
68    }
69
70    // Collect emphasis from a single line
71    fn collect_emphasis_from_line(
72        &self,
73        line: &str,
74        line_num: usize,
75        emphasis_info: &mut Vec<(usize, usize, char, String)>, // (line, col, marker, content)
76    ) {
77        // Replace inline code to avoid false positives
78        let line_no_code = replace_inline_code(line);
79
80        // Find all emphasis markers
81        let markers = find_emphasis_markers(&line_no_code);
82        if markers.is_empty() {
83            return;
84        }
85
86        // Find single emphasis spans (not strong emphasis)
87        let spans = find_single_emphasis_spans(&line_no_code, markers);
88
89        for span in spans {
90            let marker_char = span.opening.as_char();
91            let col = span.opening.start_pos + 1; // Convert to 1-based
92
93            emphasis_info.push((line_num, col, marker_char, span.content.clone()));
94        }
95    }
96}
97
98impl Rule for MD049EmphasisStyle {
99    fn name(&self) -> &'static str {
100        "MD049"
101    }
102
103    fn description(&self) -> &'static str {
104        "Emphasis style should be consistent"
105    }
106
107    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
108        let mut warnings = vec![];
109        let content = ctx.content;
110
111        // Early return if no emphasis markers
112        if !content.contains('*') && !content.contains('_') {
113            return Ok(warnings);
114        }
115
116        // Create document structure to skip code blocks
117        let structure = DocumentStructure::new(content);
118
119        // Collect all emphasis from the document
120        let mut emphasis_info = vec![];
121
122        // Track absolute position for fixes
123        let mut abs_pos = 0;
124
125        for (line_idx, line) in content.lines().enumerate() {
126            let line_num = line_idx + 1;
127
128            // Skip if in code block or front matter
129            if structure.is_in_code_block(line_num) || structure.is_in_front_matter(line_num) {
130                abs_pos += line.len() + 1; // +1 for newline
131                continue;
132            }
133
134            // Skip if the line doesn't contain any emphasis markers
135            if !line.contains('*') && !line.contains('_') {
136                abs_pos += line.len() + 1;
137                continue;
138            }
139
140            // Collect emphasis with absolute positions
141            let line_start = abs_pos;
142            self.collect_emphasis_from_line(line, line_num, &mut emphasis_info);
143
144            // Update emphasis_info with absolute positions
145            let last_emphasis_count = emphasis_info.len();
146            for i in (0..last_emphasis_count).rev() {
147                if emphasis_info[i].0 == line_num {
148                    // Add line start position to column
149                    let (line_num, col, marker, content) = emphasis_info[i].clone();
150                    emphasis_info[i] = (line_num, line_start + col - 1, marker, content);
151                } else {
152                    break;
153                }
154            }
155
156            abs_pos += line.len() + 1;
157        }
158
159        // Filter out emphasis markers that are inside links
160        emphasis_info.retain(|(_, abs_col, _, _)| !self.is_in_link(ctx, *abs_col));
161
162        match self.config.style {
163            EmphasisStyle::Consistent => {
164                // If we have less than 2 emphasis nodes, no need to check consistency
165                if emphasis_info.len() < 2 {
166                    return Ok(warnings);
167                }
168
169                // Use the first emphasis marker found as the target style
170                let target_marker = emphasis_info[0].2;
171
172                // Check all subsequent emphasis nodes for consistency
173                for (line_num, abs_col, marker, content) in emphasis_info.iter().skip(1) {
174                    if *marker != target_marker {
175                        // Calculate emphasis length (marker + content + marker)
176                        let emphasis_len = 1 + content.len() + 1;
177
178                        // Calculate line-relative column (1-based)
179                        let line_start = content.lines().take(line_num - 1).map(|l| l.len() + 1).sum::<usize>();
180                        let col = abs_col - line_start + 1;
181
182                        warnings.push(LintWarning {
183                            rule_name: Some(self.name()),
184                            line: *line_num,
185                            column: col,
186                            end_line: *line_num,
187                            end_column: col + emphasis_len,
188                            message: format!("Emphasis should use {target_marker} instead of {marker}"),
189                            fix: Some(Fix {
190                                range: *abs_col..*abs_col + emphasis_len,
191                                replacement: format!("{target_marker}{content}{target_marker}"),
192                            }),
193                            severity: Severity::Warning,
194                        });
195                    }
196                }
197            }
198            EmphasisStyle::Asterisk | EmphasisStyle::Underscore => {
199                let (wrong_marker, correct_marker) = match self.config.style {
200                    EmphasisStyle::Asterisk => ('_', '*'),
201                    EmphasisStyle::Underscore => ('*', '_'),
202                    EmphasisStyle::Consistent => {
203                        // This case is handled separately above
204                        // but fallback to asterisk style for safety
205                        ('_', '*')
206                    }
207                };
208
209                for (line_num, abs_col, marker, content) in &emphasis_info {
210                    if *marker == wrong_marker {
211                        // Calculate emphasis length (marker + content + marker)
212                        let emphasis_len = 1 + content.len() + 1;
213
214                        // Calculate line-relative column (1-based)
215                        let line_start = ctx
216                            .content
217                            .lines()
218                            .take(line_num - 1)
219                            .map(|l| l.len() + 1)
220                            .sum::<usize>();
221                        let col = abs_col - line_start + 1;
222
223                        warnings.push(LintWarning {
224                            rule_name: Some(self.name()),
225                            line: *line_num,
226                            column: col,
227                            end_line: *line_num,
228                            end_column: col + emphasis_len,
229                            message: format!("Emphasis should use {correct_marker} instead of {wrong_marker}"),
230                            fix: Some(Fix {
231                                range: *abs_col..*abs_col + emphasis_len,
232                                replacement: format!("{correct_marker}{content}{correct_marker}"),
233                            }),
234                            severity: Severity::Warning,
235                        });
236                    }
237                }
238            }
239        }
240        Ok(warnings)
241    }
242
243    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
244        // Get all warnings with their fixes
245        let warnings = self.check(ctx)?;
246
247        // If no warnings, return original content
248        if warnings.is_empty() {
249            return Ok(ctx.content.to_string());
250        }
251
252        // Collect all fixes and sort by range start (descending) to apply from end to beginning
253        let mut fixes: Vec<_> = warnings
254            .iter()
255            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
256            .collect();
257        fixes.sort_by(|a, b| b.0.cmp(&a.0));
258
259        // Apply fixes from end to beginning to preserve byte offsets
260        let mut result = ctx.content.to_string();
261        for (start, end, replacement) in fixes {
262            if start < result.len() && end <= result.len() && start <= end {
263                result.replace_range(start..end, replacement);
264            }
265        }
266
267        Ok(result)
268    }
269
270    /// Check if this rule should be skipped
271    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
272        ctx.content.is_empty() || (!ctx.content.contains('*') && !ctx.content.contains('_'))
273    }
274
275    fn as_any(&self) -> &dyn std::any::Any {
276        self
277    }
278
279    fn default_config_section(&self) -> Option<(String, toml::Value)> {
280        let json_value = serde_json::to_value(&self.config).ok()?;
281        Some((
282            self.name().to_string(),
283            crate::rule_config_serde::json_to_toml_value(&json_value)?,
284        ))
285    }
286
287    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
288    where
289        Self: Sized,
290    {
291        let rule_config = crate::rule_config_serde::load_rule_config::<MD049Config>(config);
292        Box::new(Self::from_config_struct(rule_config))
293    }
294}
295
296#[cfg(test)]
297mod tests {
298    use super::*;
299
300    #[test]
301    fn test_name() {
302        let rule = MD049EmphasisStyle::default();
303        assert_eq!(rule.name(), "MD049");
304    }
305
306    #[test]
307    fn test_style_from_str() {
308        assert_eq!(EmphasisStyle::from("asterisk"), EmphasisStyle::Asterisk);
309        assert_eq!(EmphasisStyle::from("underscore"), EmphasisStyle::Underscore);
310        assert_eq!(EmphasisStyle::from("other"), EmphasisStyle::Consistent);
311    }
312
313    #[test]
314    fn test_emphasis_in_links_not_flagged() {
315        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
316        let content = r#"Check this [*asterisk*](https://example.com/*pattern*) link and [_underscore_](https://example.com/_private_).
317
318Also see the [`__init__`][__init__] reference.
319
320This should be _flagged_ since we're using asterisk style.
321
322[__init__]: https://example.com/__init__.py"#;
323        let ctx = crate::lint_context::LintContext::new(content);
324        let result = rule.check(&ctx).unwrap();
325
326        // Only the real emphasis outside links should be flagged
327        assert_eq!(result.len(), 1);
328        assert!(result[0].message.contains("Emphasis should use * instead of _"));
329        // Should flag "_flagged_" but not emphasis patterns inside links
330        assert!(result[0].line == 5); // Line with "_flagged_"
331    }
332
333    #[test]
334    fn test_emphasis_in_links_vs_outside_links() {
335        let rule = MD049EmphasisStyle::new(EmphasisStyle::Underscore);
336        let content = r#"Check [*emphasis*](https://example.com/*test*) and inline *real emphasis* text.
337
338[*link*]: https://example.com/*path*"#;
339        let ctx = crate::lint_context::LintContext::new(content);
340        let result = rule.check(&ctx).unwrap();
341
342        // Only the actual emphasis outside links should be flagged
343        assert_eq!(result.len(), 1);
344        assert!(result[0].message.contains("Emphasis should use _ instead of *"));
345        // Should be the "real emphasis" text on line 1
346        assert!(result[0].line == 1);
347    }
348}