quickmark_core/rules/
md049.rs

1use serde::Deserialize;
2use std::rc::Rc;
3
4use once_cell::sync::Lazy;
5use regex::Regex;
6use tree_sitter::Node;
7
8use crate::{
9    linter::{range_from_tree_sitter, Context, RuleViolation},
10    rules::{Rule, RuleLinter, RuleType},
11};
12
13// MD049-specific configuration types
14#[derive(Debug, PartialEq, Clone, Deserialize)]
15pub enum EmphasisStyle {
16    #[serde(rename = "consistent")]
17    Consistent,
18    #[serde(rename = "asterisk")]
19    Asterisk,
20    #[serde(rename = "underscore")]
21    Underscore,
22}
23
24impl Default for EmphasisStyle {
25    fn default() -> Self {
26        Self::Consistent
27    }
28}
29
30#[derive(Debug, PartialEq, Clone, Deserialize)]
31pub struct MD049EmphasisStyleTable {
32    #[serde(default)]
33    pub style: EmphasisStyle,
34}
35
36impl Default for MD049EmphasisStyleTable {
37    fn default() -> Self {
38        Self {
39            style: EmphasisStyle::Consistent,
40        }
41    }
42}
43
44// Regex patterns to find emphasis
45static ASTERISK_EMPHASIS_REGEX: Lazy<Regex> =
46    Lazy::new(|| Regex::new(r"\*([^*\n]+?)\*").expect("Invalid asterisk emphasis regex"));
47
48static UNDERSCORE_EMPHASIS_REGEX: Lazy<Regex> =
49    Lazy::new(|| Regex::new(r"_([^_\n]+?)_").expect("Invalid underscore emphasis regex"));
50
51// Regex to find code spans (to exclude from emphasis checking)
52static CODE_SPAN_REGEX: Lazy<Regex> =
53    Lazy::new(|| Regex::new(r"`[^`\n]*`").expect("Invalid code span regex"));
54
55#[derive(Debug, Clone, Copy, PartialEq)]
56enum DetectedEmphasisStyle {
57    Asterisk,
58    Underscore,
59}
60
61pub(crate) struct MD049Linter {
62    context: Rc<Context>,
63    violations: Vec<RuleViolation>,
64    document_style: Option<DetectedEmphasisStyle>,
65}
66
67impl MD049Linter {
68    pub fn new(context: Rc<Context>) -> Self {
69        Self {
70            context,
71            violations: Vec::new(),
72            document_style: None,
73        }
74    }
75
76    fn get_configured_style(&self) -> EmphasisStyle {
77        self.context
78            .config
79            .linters
80            .settings
81            .emphasis_style
82            .style
83            .clone()
84    }
85
86    fn is_in_code_context(&self, node: &Node) -> bool {
87        // Check if this node is inside a code span or code block
88        let mut current = Some(*node);
89        while let Some(node_to_check) = current {
90            match node_to_check.kind() {
91                "code_span" | "fenced_code_block" | "indented_code_block" => {
92                    return true;
93                }
94                _ => {
95                    current = node_to_check.parent();
96                }
97            }
98        }
99        false
100    }
101
102    fn is_intraword_emphasis(
103        &self,
104        _text: &str,
105        start_offset: usize,
106        emphasis_start: usize,
107        emphasis_end: usize,
108    ) -> bool {
109        let emphasis_global_start = start_offset + emphasis_start;
110        let emphasis_global_end = start_offset + emphasis_end;
111        let source = self.context.get_document_content();
112
113        // Check character before emphasis start
114        let before_is_word_char = if emphasis_global_start > 0 {
115            if let Some(ch) = source.chars().nth(emphasis_global_start - 1) {
116                ch.is_alphanumeric() || ch == '_'
117            } else {
118                false
119            }
120        } else {
121            false
122        };
123
124        // Check character after emphasis end
125        let after_is_word_char = if emphasis_global_end < source.len() {
126            if let Some(ch) = source.chars().nth(emphasis_global_end) {
127                ch.is_alphanumeric() || ch == '_'
128            } else {
129                false
130            }
131        } else {
132            false
133        };
134
135        before_is_word_char || after_is_word_char
136    }
137
138    fn process_emphasis_matches(
139        &mut self,
140        text: &str,
141        start_offset: usize,
142        regex: &Regex,
143        style: DetectedEmphasisStyle,
144    ) {
145        // Find code span ranges to exclude
146        let code_span_ranges: Vec<(usize, usize)> = CODE_SPAN_REGEX
147            .find_iter(text)
148            .map(|m| (m.start(), m.end()))
149            .collect();
150
151        for capture in regex.find_iter(text) {
152            let match_start = capture.start();
153            let match_end = capture.end();
154
155            // Check if this match overlaps with any code span
156            let in_code_span = code_span_ranges
157                .iter()
158                .any(|(code_start, code_end)| match_start < *code_end && match_end > *code_start);
159
160            if in_code_span {
161                continue; // Skip this match as it's inside a code span
162            }
163
164            // Check if this is intraword emphasis
165            if self.is_intraword_emphasis(text, start_offset, match_start, match_end) {
166                // Intraword emphasis is always allowed regardless of configured style
167                continue;
168            }
169
170            let configured_style = self.get_configured_style();
171            let should_report_violation = match configured_style {
172                EmphasisStyle::Asterisk => style != DetectedEmphasisStyle::Asterisk,
173                EmphasisStyle::Underscore => style != DetectedEmphasisStyle::Underscore,
174                EmphasisStyle::Consistent => {
175                    if let Some(doc_style) = self.document_style {
176                        style != doc_style
177                    } else {
178                        // First emphasis sets the document style
179                        self.document_style = Some(style);
180                        false // No violation for the first emphasis
181                    }
182                }
183            };
184
185            if should_report_violation {
186                let expected_style = match configured_style {
187                    EmphasisStyle::Asterisk => "asterisk",
188                    EmphasisStyle::Underscore => "underscore",
189                    EmphasisStyle::Consistent => match self.document_style {
190                        Some(DetectedEmphasisStyle::Asterisk) => "asterisk",
191                        Some(DetectedEmphasisStyle::Underscore) => "underscore",
192                        None => "consistent", // This shouldn't happen, but fallback
193                    },
194                };
195
196                let actual_style = match style {
197                    DetectedEmphasisStyle::Asterisk => "asterisk",
198                    DetectedEmphasisStyle::Underscore => "underscore",
199                };
200
201                // Convert text offset to byte offset
202                let global_start = start_offset + match_start;
203                let global_end = start_offset + match_end;
204
205                let range = tree_sitter::Range {
206                    start_byte: global_start,
207                    end_byte: global_end,
208                    start_point: self.byte_to_point(global_start),
209                    end_point: self.byte_to_point(global_end),
210                };
211
212                self.violations.push(RuleViolation::new(
213                    &MD049,
214                    format!("Expected: {expected_style}; Actual: {actual_style}"),
215                    self.context.file_path.clone(),
216                    range_from_tree_sitter(&range),
217                ));
218            }
219        }
220    }
221
222    fn find_emphasis_violations_in_text(&mut self, node: &Node) {
223        if self.is_in_code_context(node) {
224            return;
225        }
226
227        let start_byte = node.start_byte();
228        let text = {
229            let source = self.context.get_document_content();
230            source[start_byte..node.end_byte()].to_string()
231        };
232
233        // eprintln!("DEBUG MD049: Processing text: '{}'", text);
234
235        // Check for asterisk emphasis
236        self.process_emphasis_matches(
237            &text,
238            start_byte,
239            &ASTERISK_EMPHASIS_REGEX,
240            DetectedEmphasisStyle::Asterisk,
241        );
242
243        // Check for underscore emphasis
244        self.process_emphasis_matches(
245            &text,
246            start_byte,
247            &UNDERSCORE_EMPHASIS_REGEX,
248            DetectedEmphasisStyle::Underscore,
249        );
250    }
251
252    fn byte_to_point(&self, byte_pos: usize) -> tree_sitter::Point {
253        let source = self.context.get_document_content();
254        let mut line = 0;
255        let mut column = 0;
256
257        for (i, ch) in source.char_indices() {
258            if i >= byte_pos {
259                break;
260            }
261            if ch == '\n' {
262                line += 1;
263                column = 0;
264            } else {
265                column += 1;
266            }
267        }
268
269        tree_sitter::Point { row: line, column }
270    }
271}
272
273impl RuleLinter for MD049Linter {
274    fn feed(&mut self, node: &Node) {
275        match node.kind() {
276            // Look for text content that might contain emphasis
277            "text" | "inline" => {
278                self.find_emphasis_violations_in_text(node);
279            }
280            _ => {}
281        }
282    }
283
284    fn finalize(&mut self) -> Vec<RuleViolation> {
285        std::mem::take(&mut self.violations)
286    }
287}
288
289pub const MD049: Rule = Rule {
290    id: "MD049",
291    alias: "emphasis-style",
292    tags: &["emphasis"],
293    description: "Emphasis style",
294    rule_type: RuleType::Token,
295    required_nodes: &["emphasis"],
296    new_linter: |context| Box::new(MD049Linter::new(context)),
297};
298
299#[cfg(test)]
300mod test {
301    use std::path::PathBuf;
302
303    use crate::config::RuleSeverity;
304    use crate::linter::MultiRuleLinter;
305    use crate::test_utils::test_helpers::test_config_with_rules;
306
307    fn test_config() -> crate::config::QuickmarkConfig {
308        test_config_with_rules(vec![("emphasis-style", RuleSeverity::Error)])
309    }
310
311    #[test]
312    fn test_consistent_style_asterisk_should_pass() {
313        let config = test_config();
314        let input = "This has *valid* emphasis and *more* emphasis.";
315
316        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
317        let violations = linter.analyze();
318        let md049_violations: Vec<_> = violations
319            .iter()
320            .filter(|v| v.rule().id == "MD049")
321            .collect();
322        assert_eq!(md049_violations.len(), 0);
323    }
324
325    #[test]
326    fn test_consistent_style_underscore_should_pass() {
327        let config = test_config();
328        let input = "This has _valid_ emphasis and _more_ emphasis.";
329
330        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
331        let violations = linter.analyze();
332        let md049_violations: Vec<_> = violations
333            .iter()
334            .filter(|v| v.rule().id == "MD049")
335            .collect();
336        assert_eq!(md049_violations.len(), 0);
337    }
338
339    #[test]
340    fn test_mixed_styles_should_fail() {
341        let config = test_config();
342        let input = "This has *asterisk* emphasis and _underscore_ emphasis.";
343
344        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
345        let violations = linter.analyze();
346        let md049_violations: Vec<_> = violations
347            .iter()
348            .filter(|v| v.rule().id == "MD049")
349            .collect();
350        // Should find violations for the inconsistent emphasis (underscore when asterisk was first)
351        assert!(!md049_violations.is_empty());
352    }
353
354    #[test]
355    fn test_intraword_emphasis_should_be_preserved() {
356        let config = test_config();
357        let input = "This has apple*banana*cherry and normal *emphasis* as well.";
358
359        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
360        let violations = linter.analyze();
361        let md049_violations: Vec<_> = violations
362            .iter()
363            .filter(|v| v.rule().id == "MD049")
364            .collect();
365        // Intraword emphasis should not be checked for style consistency
366        assert_eq!(md049_violations.len(), 0);
367    }
368
369    #[test]
370    fn test_nested_emphasis_mixed_styles() {
371        let config = test_config();
372        let input = "This paragraph *nests both _kinds_ of emphasis* marker.";
373
374        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
375        let violations = linter.analyze();
376        let md049_violations: Vec<_> = violations
377            .iter()
378            .filter(|v| v.rule().id == "MD049")
379            .collect();
380        // Should find violations for the inconsistent nested emphasis
381        assert!(!md049_violations.is_empty());
382    }
383}