mdbook_lint_core/rules/standard/
md049.rs

1//! MD049: Emphasis style consistency
2//!
3//! This rule checks that emphasis markers (italics) use a consistent style throughout the document.
4
5use crate::error::Result;
6use crate::rule::{Rule, RuleCategory, RuleMetadata};
7use crate::{
8    Document,
9    violation::{Severity, Violation},
10};
11
12/// Rule to check emphasis style consistency
13pub struct MD049 {
14    /// Preferred emphasis style
15    style: EmphasisStyle,
16}
17
18#[derive(Debug, Clone, Copy, PartialEq)]
19pub enum EmphasisStyle {
20    /// Use asterisk (*text*)
21    Asterisk,
22    /// Use underscore (_text_)
23    Underscore,
24    /// Detect from first usage in document
25    Consistent,
26}
27
28impl MD049 {
29    /// Create a new MD049 rule with consistent style detection
30    pub fn new() -> Self {
31        Self {
32            style: EmphasisStyle::Consistent,
33        }
34    }
35
36    /// Create a new MD049 rule with specific style preference
37    #[allow(dead_code)]
38    pub fn with_style(style: EmphasisStyle) -> Self {
39        Self { style }
40    }
41
42    /// Find emphasis markers in a line and check for style violations
43    fn check_line_emphasis(
44        &self,
45        line: &str,
46        line_number: usize,
47        expected_style: Option<EmphasisStyle>,
48    ) -> (Vec<Violation>, Option<EmphasisStyle>) {
49        let mut violations = Vec::new();
50        let mut detected_style = expected_style;
51
52        // Find emphasis markers - look for single * or _ that aren't part of strong emphasis
53        let chars: Vec<char> = line.chars().collect();
54        let mut i = 0;
55
56        while i < chars.len() {
57            if chars[i] == '*' || chars[i] == '_' {
58                let marker = chars[i];
59
60                // Skip if this is part of strong emphasis (** or __)
61                if i + 1 < chars.len() && chars[i + 1] == marker {
62                    i += 2;
63                    continue;
64                }
65
66                // Skip if preceded by strong emphasis marker
67                if i > 0 && chars[i - 1] == marker {
68                    i += 1;
69                    continue;
70                }
71
72                // Look for closing marker
73                if let Some(end_pos) = self.find_closing_emphasis_marker(&chars, i + 1, marker) {
74                    let current_style = if marker == '*' {
75                        EmphasisStyle::Asterisk
76                    } else {
77                        EmphasisStyle::Underscore
78                    };
79
80                    // Establish or check style consistency
81                    if let Some(ref expected) = detected_style {
82                        if *expected != current_style {
83                            let expected_marker = if *expected == EmphasisStyle::Asterisk {
84                                '*'
85                            } else {
86                                '_'
87                            };
88                            violations.push(self.create_violation(
89                                format!(
90                                    "Emphasis style inconsistent - expected '{expected_marker}' but found '{marker}'"
91                                ),
92                                line_number,
93                                i + 1, // Convert to 1-based column
94                                Severity::Warning,
95                            ));
96                        }
97                    } else {
98                        // First emphasis found - establish the style
99                        detected_style = Some(current_style);
100                    }
101
102                    i = end_pos + 1;
103                } else {
104                    i += 1;
105                }
106            } else {
107                i += 1;
108            }
109        }
110
111        (violations, detected_style)
112    }
113
114    /// Find the closing emphasis marker
115    fn find_closing_emphasis_marker(
116        &self,
117        chars: &[char],
118        start: usize,
119        marker: char,
120    ) -> Option<usize> {
121        let mut i = start;
122
123        while i < chars.len() {
124            if chars[i] == marker {
125                // Make sure this isn't part of strong emphasis
126                if i + 1 < chars.len() && chars[i + 1] == marker {
127                    i += 2;
128                    continue;
129                }
130                if i > 0 && chars[i - 1] == marker {
131                    i += 1;
132                    continue;
133                }
134                return Some(i);
135            }
136            i += 1;
137        }
138
139        None
140    }
141
142    /// Get code block ranges to exclude from checking
143    fn get_code_block_ranges(&self, lines: &[&str]) -> Vec<bool> {
144        let mut in_code_block = vec![false; lines.len()];
145        let mut in_fenced_block = false;
146
147        for (i, line) in lines.iter().enumerate() {
148            let trimmed = line.trim();
149
150            // Check for fenced code blocks
151            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
152                in_fenced_block = !in_fenced_block;
153                in_code_block[i] = true;
154                continue;
155            }
156
157            if in_fenced_block {
158                in_code_block[i] = true;
159                continue;
160            }
161        }
162
163        in_code_block
164    }
165}
166
167impl Default for MD049 {
168    fn default() -> Self {
169        Self::new()
170    }
171}
172
173impl Rule for MD049 {
174    fn id(&self) -> &'static str {
175        "MD049"
176    }
177
178    fn name(&self) -> &'static str {
179        "emphasis-style"
180    }
181
182    fn description(&self) -> &'static str {
183        "Emphasis style should be consistent"
184    }
185
186    fn metadata(&self) -> RuleMetadata {
187        RuleMetadata::stable(RuleCategory::Formatting).introduced_in("mdbook-lint v0.1.0")
188    }
189
190    fn check_with_ast<'a>(
191        &self,
192        document: &Document,
193        _ast: Option<&'a comrak::nodes::AstNode<'a>>,
194    ) -> Result<Vec<Violation>> {
195        let mut violations = Vec::new();
196        let lines: Vec<&str> = document.content.lines().collect();
197        let in_code_block = self.get_code_block_ranges(&lines);
198
199        let mut expected_style = match self.style {
200            EmphasisStyle::Asterisk => Some(EmphasisStyle::Asterisk),
201            EmphasisStyle::Underscore => Some(EmphasisStyle::Underscore),
202            EmphasisStyle::Consistent => None, // Detect from first usage
203        };
204
205        for (line_number, line) in lines.iter().enumerate() {
206            let line_number = line_number + 1;
207
208            // Skip lines inside code blocks
209            if in_code_block[line_number - 1] {
210                continue;
211            }
212
213            let (line_violations, detected_style) =
214                self.check_line_emphasis(line, line_number, expected_style);
215            violations.extend(line_violations);
216
217            // Update expected style if we detected one
218            if expected_style.is_none() && detected_style.is_some() {
219                expected_style = detected_style;
220            }
221        }
222
223        Ok(violations)
224    }
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230    use crate::rule::Rule;
231    use std::path::PathBuf;
232
233    fn create_test_document(content: &str) -> Document {
234        Document::new(content.to_string(), PathBuf::from("test.md")).unwrap()
235    }
236
237    #[test]
238    fn test_md049_consistent_asterisk_style() {
239        let content = r#"This has *emphasis* and more *italic text* here.
240
241Another paragraph with *more emphasis* text.
242"#;
243
244        let document = create_test_document(content);
245        let rule = MD049::new();
246        let violations = rule.check(&document).unwrap();
247        assert_eq!(violations.len(), 0);
248    }
249
250    #[test]
251    fn test_md049_consistent_underscore_style() {
252        let content = r#"This has _emphasis_ and more _italic text_ here.
253
254Another paragraph with _more emphasis_ text.
255"#;
256
257        let document = create_test_document(content);
258        let rule = MD049::new();
259        let violations = rule.check(&document).unwrap();
260        assert_eq!(violations.len(), 0);
261    }
262
263    #[test]
264    fn test_md049_mixed_styles_violation() {
265        let content = r#"This has *emphasis* and more _italic text_ here.
266
267Another paragraph with *more emphasis* text.
268"#;
269
270        let document = create_test_document(content);
271        let rule = MD049::new();
272        let violations = rule.check(&document).unwrap();
273        assert_eq!(violations.len(), 1);
274        assert_eq!(violations[0].rule_id, "MD049");
275        assert_eq!(violations[0].line, 1);
276        assert!(violations[0].message.contains("expected '*' but found '_'"));
277    }
278
279    #[test]
280    fn test_md049_preferred_asterisk_style() {
281        let content = r#"This has _emphasis_ text.
282"#;
283
284        let document = create_test_document(content);
285        let rule = MD049::with_style(EmphasisStyle::Asterisk);
286        let violations = rule.check(&document).unwrap();
287        assert_eq!(violations.len(), 1);
288        assert!(violations[0].message.contains("expected '*' but found '_'"));
289    }
290
291    #[test]
292    fn test_md049_preferred_underscore_style() {
293        let content = r#"This has *emphasis* text.
294"#;
295
296        let document = create_test_document(content);
297        let rule = MD049::with_style(EmphasisStyle::Underscore);
298        let violations = rule.check(&document).unwrap();
299        assert_eq!(violations.len(), 1);
300        assert!(violations[0].message.contains("expected '_' but found '*'"));
301    }
302
303    #[test]
304    fn test_md049_strong_emphasis_ignored() {
305        let content = r#"This has **strong text** and _italic text_.
306
307More **strong** and _italic_ here.
308"#;
309
310        let document = create_test_document(content);
311        let rule = MD049::new();
312        let violations = rule.check(&document).unwrap();
313        assert_eq!(violations.len(), 0); // All underscores, should be consistent
314    }
315
316    #[test]
317    fn test_md049_mixed_strong_and_emphasis() {
318        let content = r#"This has **strong** and *italic* and _also italic_.
319
320More text here.
321"#;
322
323        let document = create_test_document(content);
324        let rule = MD049::new();
325        let violations = rule.check(&document).unwrap();
326        assert_eq!(violations.len(), 1);
327        assert!(violations[0].message.contains("expected '*' but found '_'"));
328    }
329
330    #[test]
331    fn test_md049_code_blocks_ignored() {
332        let content = r#"This has *italic* text.
333
334```
335Code with *asterisks* and _underscores_ should be ignored.
336```
337
338This has _different style_ which should trigger violation.
339"#;
340
341        let document = create_test_document(content);
342        let rule = MD049::new();
343        let violations = rule.check(&document).unwrap();
344        assert_eq!(violations.len(), 1);
345        assert_eq!(violations[0].line, 7);
346    }
347
348    #[test]
349    fn test_md049_inline_code_spans() {
350        let content = r#"This has *italic* and `code with *asterisks*` text.
351
352More *italic* text here.
353"#;
354
355        let document = create_test_document(content);
356        let rule = MD049::new();
357        let violations = rule.check(&document).unwrap();
358        // Code spans are not excluded by this rule (they're handled at line level)
359        // but the emphasis should still be consistent
360        assert_eq!(violations.len(), 0);
361    }
362
363    #[test]
364    fn test_md049_no_emphasis() {
365        let content = r#"This document has no emphasis at all.
366
367Just regular text with **strong** formatting.
368"#;
369
370        let document = create_test_document(content);
371        let rule = MD049::new();
372        let violations = rule.check(&document).unwrap();
373        assert_eq!(violations.len(), 0);
374    }
375
376    #[test]
377    fn test_md049_multiple_violations() {
378        let content = r#"Start with *italic* text.
379
380Then switch to _different style_.
381
382Back to *original style*.
383
384And _different again_.
385"#;
386
387        let document = create_test_document(content);
388        let rule = MD049::new();
389        let violations = rule.check(&document).unwrap();
390        assert_eq!(violations.len(), 2); // Line 3 and line 7 violations
391        assert_eq!(violations[0].line, 3);
392        assert_eq!(violations[1].line, 7);
393    }
394
395    #[test]
396    fn test_md049_unclosed_emphasis() {
397        let content = r#"This has *unclosed emphasis and _closed emphasis_.
398
399More text here.
400"#;
401
402        let document = create_test_document(content);
403        let rule = MD049::new();
404        let violations = rule.check(&document).unwrap();
405        // Only the properly closed emphasis should be checked
406        assert_eq!(violations.len(), 0); // _closed emphasis_ is the only valid emphasis, so no violation
407    }
408}