rumdl_lib/rules/
md049_emphasis_style.rs

1use crate::filtered_lines::FilteredLinesExt;
2use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
3use crate::rules::emphasis_style::EmphasisStyle;
4use crate::utils::emphasis_utils::{find_emphasis_markers, find_single_emphasis_spans, replace_inline_code};
5use crate::utils::range_utils::LineIndex;
6use lazy_static::lazy_static;
7use regex::Regex;
8
9lazy_static! {
10    // Reference definition pattern - matches [ref]: url "title"
11    static ref REF_DEF_REGEX: Regex = Regex::new(
12        r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
13    ).unwrap();
14}
15
16mod md049_config;
17use md049_config::MD049Config;
18
19/// Rule MD049: Emphasis style
20///
21/// See [docs/md049.md](../../docs/md049.md) for full documentation, configuration, and examples.
22///
23/// This rule is triggered when the style for emphasis is inconsistent:
24/// - Asterisks: `*text*`
25/// - Underscores: `_text_`
26///
27/// This rule is focused on regular emphasis, not strong emphasis.
28#[derive(Debug, Default, Clone)]
29pub struct MD049EmphasisStyle {
30    config: MD049Config,
31}
32
33impl MD049EmphasisStyle {
34    /// Create a new instance of MD049EmphasisStyle
35    pub fn new(style: EmphasisStyle) -> Self {
36        MD049EmphasisStyle {
37            config: MD049Config { style },
38        }
39    }
40
41    pub fn from_config_struct(config: MD049Config) -> Self {
42        Self { config }
43    }
44
45    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
46    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
47        // Check inline and reference links
48        for link in &ctx.links {
49            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
50                return true;
51            }
52        }
53
54        // Check images (which use similar syntax)
55        for image in &ctx.images {
56            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
57                return true;
58            }
59        }
60
61        // Check reference definitions [ref]: url "title" using regex pattern
62        for m in REF_DEF_REGEX.find_iter(ctx.content) {
63            if m.start() <= byte_pos && byte_pos < m.end() {
64                return true;
65            }
66        }
67
68        false
69    }
70
71    // Collect emphasis from a single line
72    fn collect_emphasis_from_line(
73        &self,
74        line: &str,
75        line_num: usize,
76        line_start_pos: usize,
77        emphasis_info: &mut Vec<(usize, usize, usize, char, String)>, // (line, col, abs_pos, marker, content)
78    ) {
79        // Replace inline code to avoid false positives
80        let line_no_code = replace_inline_code(line);
81
82        // Find all emphasis markers
83        let markers = find_emphasis_markers(&line_no_code);
84        if markers.is_empty() {
85            return;
86        }
87
88        // Find single emphasis spans (not strong emphasis)
89        let spans = find_single_emphasis_spans(&line_no_code, markers);
90
91        for span in spans {
92            let marker_char = span.opening.as_char();
93            let col = span.opening.start_pos + 1; // Convert to 1-based
94            let abs_pos = line_start_pos + span.opening.start_pos;
95
96            emphasis_info.push((line_num, col, abs_pos, marker_char, span.content.clone()));
97        }
98    }
99}
100
101impl Rule for MD049EmphasisStyle {
102    fn name(&self) -> &'static str {
103        "MD049"
104    }
105
106    fn description(&self) -> &'static str {
107        "Emphasis style should be consistent"
108    }
109
110    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
111        let mut warnings = vec![];
112        let content = ctx.content;
113
114        // Early return if no emphasis markers
115        if !ctx.likely_has_emphasis() {
116            return Ok(warnings);
117        }
118
119        // Use LintContext to skip code blocks
120        // Create LineIndex for correct byte position calculations across all line ending types
121        let line_index = LineIndex::new(content.to_string());
122
123        // Collect all emphasis from the document
124        let mut emphasis_info = vec![];
125
126        // Process content lines, automatically skipping front matter, code blocks, and HTML comments
127        for line in ctx
128            .filtered_lines()
129            .skip_front_matter()
130            .skip_code_blocks()
131            .skip_html_comments()
132        {
133            // Skip if the line doesn't contain any emphasis markers
134            if !line.content.contains('*') && !line.content.contains('_') {
135                continue;
136            }
137
138            // Get absolute position for this line
139            let line_start = line_index.get_line_start_byte(line.line_num).unwrap_or(0);
140            self.collect_emphasis_from_line(line.content, line.line_num, line_start, &mut emphasis_info);
141        }
142
143        // Filter out emphasis markers that are inside links
144        emphasis_info.retain(|(_, _, abs_pos, _, _)| !self.is_in_link(ctx, *abs_pos));
145
146        match self.config.style {
147            EmphasisStyle::Consistent => {
148                // If we have less than 2 emphasis nodes, no need to check consistency
149                if emphasis_info.len() < 2 {
150                    return Ok(warnings);
151                }
152
153                // Use the first emphasis marker found as the target style
154                let target_marker = emphasis_info[0].3;
155
156                // Check all subsequent emphasis nodes for consistency
157                for (line_num, col, abs_pos, marker, content) in emphasis_info.iter().skip(1) {
158                    if *marker != target_marker {
159                        // Calculate emphasis length (marker + content + marker)
160                        let emphasis_len = 1 + content.len() + 1;
161
162                        warnings.push(LintWarning {
163                            rule_name: Some(self.name().to_string()),
164                            line: *line_num,
165                            column: *col,
166                            end_line: *line_num,
167                            end_column: col + emphasis_len,
168                            message: format!("Emphasis should use {target_marker} instead of {marker}"),
169                            fix: Some(Fix {
170                                range: *abs_pos..*abs_pos + emphasis_len,
171                                replacement: format!("{target_marker}{content}{target_marker}"),
172                            }),
173                            severity: Severity::Warning,
174                        });
175                    }
176                }
177            }
178            EmphasisStyle::Asterisk | EmphasisStyle::Underscore => {
179                let (wrong_marker, correct_marker) = match self.config.style {
180                    EmphasisStyle::Asterisk => ('_', '*'),
181                    EmphasisStyle::Underscore => ('*', '_'),
182                    EmphasisStyle::Consistent => {
183                        // This case is handled separately above
184                        // but fallback to asterisk style for safety
185                        ('_', '*')
186                    }
187                };
188
189                for (line_num, col, abs_pos, marker, content) in &emphasis_info {
190                    if *marker == wrong_marker {
191                        // Calculate emphasis length (marker + content + marker)
192                        let emphasis_len = 1 + content.len() + 1;
193
194                        warnings.push(LintWarning {
195                            rule_name: Some(self.name().to_string()),
196                            line: *line_num,
197                            column: *col,
198                            end_line: *line_num,
199                            end_column: col + emphasis_len,
200                            message: format!("Emphasis should use {correct_marker} instead of {wrong_marker}"),
201                            fix: Some(Fix {
202                                range: *abs_pos..*abs_pos + emphasis_len,
203                                replacement: format!("{correct_marker}{content}{correct_marker}"),
204                            }),
205                            severity: Severity::Warning,
206                        });
207                    }
208                }
209            }
210        }
211        Ok(warnings)
212    }
213
214    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
215        // Get all warnings with their fixes
216        let warnings = self.check(ctx)?;
217
218        // If no warnings, return original content
219        if warnings.is_empty() {
220            return Ok(ctx.content.to_string());
221        }
222
223        // Collect all fixes and sort by range start (descending) to apply from end to beginning
224        let mut fixes: Vec<_> = warnings
225            .iter()
226            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
227            .collect();
228        fixes.sort_by(|a, b| b.0.cmp(&a.0));
229
230        // Apply fixes from end to beginning to preserve byte offsets
231        let mut result = ctx.content.to_string();
232        for (start, end, replacement) in fixes {
233            if start < result.len() && end <= result.len() && start <= end {
234                result.replace_range(start..end, replacement);
235            }
236        }
237
238        Ok(result)
239    }
240
241    /// Check if this rule should be skipped
242    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
243        ctx.content.is_empty() || !ctx.likely_has_emphasis()
244    }
245
246    fn as_any(&self) -> &dyn std::any::Any {
247        self
248    }
249
250    fn default_config_section(&self) -> Option<(String, toml::Value)> {
251        let json_value = serde_json::to_value(&self.config).ok()?;
252        Some((
253            self.name().to_string(),
254            crate::rule_config_serde::json_to_toml_value(&json_value)?,
255        ))
256    }
257
258    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
259    where
260        Self: Sized,
261    {
262        let rule_config = crate::rule_config_serde::load_rule_config::<MD049Config>(config);
263        Box::new(Self::from_config_struct(rule_config))
264    }
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270
271    #[test]
272    fn test_name() {
273        let rule = MD049EmphasisStyle::default();
274        assert_eq!(rule.name(), "MD049");
275    }
276
277    #[test]
278    fn test_style_from_str() {
279        assert_eq!(EmphasisStyle::from("asterisk"), EmphasisStyle::Asterisk);
280        assert_eq!(EmphasisStyle::from("underscore"), EmphasisStyle::Underscore);
281        assert_eq!(EmphasisStyle::from("other"), EmphasisStyle::Consistent);
282    }
283
284    #[test]
285    fn test_emphasis_in_links_not_flagged() {
286        let rule = MD049EmphasisStyle::new(EmphasisStyle::Asterisk);
287        let content = r#"Check this [*asterisk*](https://example.com/*pattern*) link and [_underscore_](https://example.com/_private_).
288
289Also see the [`__init__`][__init__] reference.
290
291This should be _flagged_ since we're using asterisk style.
292
293[__init__]: https://example.com/__init__.py"#;
294        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
295        let result = rule.check(&ctx).unwrap();
296
297        // Only the real emphasis outside links should be flagged
298        assert_eq!(result.len(), 1);
299        assert!(result[0].message.contains("Emphasis should use * instead of _"));
300        // Should flag "_flagged_" but not emphasis patterns inside links
301        assert!(result[0].line == 5); // Line with "_flagged_"
302    }
303
304    #[test]
305    fn test_emphasis_in_links_vs_outside_links() {
306        let rule = MD049EmphasisStyle::new(EmphasisStyle::Underscore);
307        let content = r#"Check [*emphasis*](https://example.com/*test*) and inline *real emphasis* text.
308
309[*link*]: https://example.com/*path*"#;
310        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
311        let result = rule.check(&ctx).unwrap();
312
313        // Only the actual emphasis outside links should be flagged
314        assert_eq!(result.len(), 1);
315        assert!(result[0].message.contains("Emphasis should use _ instead of *"));
316        // Should be the "real emphasis" text on line 1
317        assert!(result[0].line == 1);
318    }
319}