quickmark_core/rules/
md037.rs

1use std::rc::Rc;
2
3use once_cell::sync::Lazy;
4use regex::Regex;
5use tree_sitter::Node;
6
7use crate::{
8    linter::{range_from_tree_sitter, Context, RuleViolation},
9    rules::{Rule, RuleLinter, RuleType},
10};
11
12// Regex patterns to find emphasis markers with spaces
13static ASTERISK_EMPHASIS_REGEX: Lazy<Regex> = Lazy::new(|| {
14    Regex::new(r"(\*{1,3})(\s*)([^*\n]*?)(\s*)(\*{1,3})").expect("Invalid asterisk emphasis regex")
15});
16
17static UNDERSCORE_EMPHASIS_REGEX: Lazy<Regex> = Lazy::new(|| {
18    Regex::new(r"(\_{1,3})(\s*)([^_\n]*?)(\s*)(\_{1,3})")
19        .expect("Invalid underscore emphasis regex")
20});
21
22// Regex to find code spans
23static CODE_SPAN_REGEX: Lazy<Regex> =
24    Lazy::new(|| Regex::new(r"`[^`\n]*`").expect("Invalid code span regex"));
25
26pub(crate) struct MD037Linter {
27    context: Rc<Context>,
28    violations: Vec<RuleViolation>,
29}
30
31impl MD037Linter {
32    pub fn new(context: Rc<Context>) -> Self {
33        Self {
34            context,
35            violations: Vec::new(),
36        }
37    }
38
39    fn is_in_code_context(&self, node: &Node) -> bool {
40        // Check if this node is inside a code span or code block
41        let mut current = Some(*node);
42        while let Some(node_to_check) = current {
43            match node_to_check.kind() {
44                "code_span" | "fenced_code_block" | "indented_code_block" => {
45                    return true;
46                }
47                _ => {
48                    current = node_to_check.parent();
49                }
50            }
51        }
52        false
53    }
54
55    fn find_emphasis_violations_in_text(&mut self, node: &Node) {
56        if self.is_in_code_context(node) {
57            return;
58        }
59
60        let start_byte = node.start_byte();
61        let text = {
62            let source = self.context.get_document_content();
63            source[start_byte..node.end_byte()].to_string()
64        };
65
66        // Find code span ranges to exclude
67        let code_span_ranges: Vec<(usize, usize)> = CODE_SPAN_REGEX
68            .find_iter(&text)
69            .map(|m| (m.start(), m.end()))
70            .collect();
71
72        // Check for asterisk emphasis violations
73        self.check_emphasis_pattern(
74            &text,
75            start_byte,
76            &ASTERISK_EMPHASIS_REGEX,
77            &code_span_ranges,
78        );
79
80        // Check for underscore emphasis violations
81        self.check_emphasis_pattern(
82            &text,
83            start_byte,
84            &UNDERSCORE_EMPHASIS_REGEX,
85            &code_span_ranges,
86        );
87    }
88
89    fn check_emphasis_pattern(
90        &mut self,
91        text: &str,
92        text_start_byte: usize,
93        regex: &Regex,
94        code_span_ranges: &[(usize, usize)],
95    ) {
96        for capture in regex.captures_iter(text) {
97            if let (
98                Some(opening_marker),
99                Some(opening_space),
100                Some(_content),
101                Some(closing_space),
102                Some(closing_marker),
103            ) = (
104                capture.get(1),
105                capture.get(2),
106                capture.get(3),
107                capture.get(4),
108                capture.get(5),
109            ) {
110                // Check if this match overlaps with any code span
111                let match_start = capture.get(0).unwrap().start();
112                let match_end = capture.get(0).unwrap().end();
113
114                let in_code_span = code_span_ranges.iter().any(|(code_start, code_end)| {
115                    // Check if the match overlaps with a code span
116                    match_start < *code_end && match_end > *code_start
117                });
118
119                if in_code_span {
120                    continue; // Skip this match as it's inside a code span
121                }
122
123                let opening_text = opening_marker.as_str();
124                let closing_text = closing_marker.as_str();
125
126                // Only process if markers match (same type and count)
127                if opening_text == closing_text {
128                    // Check for space after opening marker
129                    if !opening_space.as_str().is_empty() {
130                        self.create_opening_space_violation(
131                            opening_marker,
132                            opening_space,
133                            text_start_byte,
134                        );
135                    }
136
137                    // Check for space before closing marker
138                    if !closing_space.as_str().is_empty() {
139                        self.create_closing_space_violation(
140                            closing_marker,
141                            closing_space,
142                            text_start_byte,
143                        );
144                    }
145                }
146            }
147        }
148    }
149
150    fn create_opening_space_violation(
151        &mut self,
152        opening_marker: regex::Match,
153        opening_space: regex::Match,
154        text_start_byte: usize,
155    ) {
156        let marker = opening_marker.as_str();
157        let space = opening_space.as_str();
158        let violation_start = text_start_byte + opening_marker.end();
159        let violation_end = text_start_byte + opening_space.end();
160
161        let range = tree_sitter::Range {
162            start_byte: violation_start,
163            end_byte: violation_end,
164            start_point: self.byte_to_point(violation_start),
165            end_point: self.byte_to_point(violation_end),
166        };
167
168        self.violations.push(RuleViolation::new(
169            &MD037,
170            format!("{} [Context: \"{}{}\"]", MD037.description, marker, space),
171            self.context.file_path.clone(),
172            range_from_tree_sitter(&range),
173        ));
174    }
175
176    fn create_closing_space_violation(
177        &mut self,
178        closing_marker: regex::Match,
179        closing_space: regex::Match,
180        text_start_byte: usize,
181    ) {
182        let marker = closing_marker.as_str();
183        let space = closing_space.as_str();
184        let violation_start = text_start_byte + closing_space.start();
185        let violation_end = text_start_byte + closing_marker.end();
186
187        let range = tree_sitter::Range {
188            start_byte: violation_start,
189            end_byte: violation_end,
190            start_point: self.byte_to_point(violation_start),
191            end_point: self.byte_to_point(violation_end),
192        };
193
194        self.violations.push(RuleViolation::new(
195            &MD037,
196            format!("{} [Context: \"{}{}\"]", MD037.description, space, marker),
197            self.context.file_path.clone(),
198            range_from_tree_sitter(&range),
199        ));
200    }
201
202    fn byte_to_point(&self, byte_pos: usize) -> tree_sitter::Point {
203        let source = self.context.get_document_content();
204        let mut line = 0;
205        let mut column = 0;
206
207        for (i, ch) in source.char_indices() {
208            if i >= byte_pos {
209                break;
210            }
211            if ch == '\n' {
212                line += 1;
213                column = 0;
214            } else {
215                column += 1;
216            }
217        }
218
219        tree_sitter::Point { row: line, column }
220    }
221}
222
223impl RuleLinter for MD037Linter {
224    fn feed(&mut self, node: &Node) {
225        match node.kind() {
226            // Look for text content that might contain emphasis markers with spaces
227            "text" | "inline" => {
228                self.find_emphasis_violations_in_text(node);
229            }
230            _ => {}
231        }
232    }
233
234    fn finalize(&mut self) -> Vec<RuleViolation> {
235        std::mem::take(&mut self.violations)
236    }
237}
238
239pub const MD037: Rule = Rule {
240    id: "MD037",
241    alias: "no-space-in-emphasis",
242    tags: &["whitespace", "emphasis"],
243    description: "Spaces inside emphasis markers",
244    rule_type: RuleType::Token,
245    required_nodes: &["emphasis", "strong_emphasis"],
246    new_linter: |context| Box::new(MD037Linter::new(context)),
247};
248
249#[cfg(test)]
250mod test {
251    use std::path::PathBuf;
252
253    use crate::config::RuleSeverity;
254    use crate::linter::MultiRuleLinter;
255    use crate::test_utils::test_helpers::test_config_with_rules;
256
257    fn test_config() -> crate::config::QuickmarkConfig {
258        test_config_with_rules(vec![("no-space-in-emphasis", RuleSeverity::Error)])
259    }
260
261    #[test]
262    fn test_no_violations_valid_emphasis() {
263        let config = test_config();
264        let input = "This has *valid emphasis* and **valid strong** text.
265Also _valid emphasis_ and __valid strong__ text.
266And ***valid strong emphasis*** and ___valid strong emphasis___ text.";
267
268        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
269        let violations = linter.analyze();
270        let md037_violations: Vec<_> = violations
271            .iter()
272            .filter(|v| v.rule().id == "MD037")
273            .collect();
274        assert_eq!(md037_violations.len(), 0);
275    }
276
277    #[test]
278    fn test_violations_spaces_inside_single_asterisk() {
279        let config = test_config();
280        let input = "This has * invalid emphasis * with spaces inside.";
281
282        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
283        let violations = linter.analyze();
284        let md037_violations: Vec<_> = violations
285            .iter()
286            .filter(|v| v.rule().id == "MD037")
287            .collect();
288
289        // Should find 2 violations: one for opening space, one for closing space
290        assert_eq!(md037_violations.len(), 2);
291    }
292
293    #[test]
294    fn test_violations_spaces_inside_double_asterisk() {
295        let config = test_config();
296        let input = "This has ** invalid strong ** with spaces inside.";
297
298        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
299        let violations = linter.analyze();
300        let md037_violations: Vec<_> = violations
301            .iter()
302            .filter(|v| v.rule().id == "MD037")
303            .collect();
304
305        // Should find 2 violations: one for opening space, one for closing space
306        assert_eq!(md037_violations.len(), 2);
307    }
308
309    #[test]
310    fn test_violations_spaces_inside_triple_asterisk() {
311        let config = test_config();
312        let input = "This has *** invalid strong emphasis *** with spaces inside.";
313
314        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
315        let violations = linter.analyze();
316        let md037_violations: Vec<_> = violations
317            .iter()
318            .filter(|v| v.rule().id == "MD037")
319            .collect();
320
321        // Should find 2 violations: one for opening space, one for closing space
322        assert_eq!(md037_violations.len(), 2);
323    }
324
325    #[test]
326    fn test_violations_spaces_inside_single_underscore() {
327        let config = test_config();
328        let input = "This has _ invalid emphasis _ with spaces inside.";
329
330        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
331        let violations = linter.analyze();
332        let md037_violations: Vec<_> = violations
333            .iter()
334            .filter(|v| v.rule().id == "MD037")
335            .collect();
336
337        // Should find 2 violations: one for opening space, one for closing space
338        assert_eq!(md037_violations.len(), 2);
339    }
340
341    #[test]
342    fn test_violations_spaces_inside_double_underscore() {
343        let config = test_config();
344        let input = "This has __ invalid strong __ with spaces inside.";
345
346        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
347        let violations = linter.analyze();
348        let md037_violations: Vec<_> = violations
349            .iter()
350            .filter(|v| v.rule().id == "MD037")
351            .collect();
352
353        // Should find 2 violations: one for opening space, one for closing space
354        assert_eq!(md037_violations.len(), 2);
355    }
356
357    #[test]
358    fn test_violations_spaces_inside_triple_underscore() {
359        let config = test_config();
360        let input = "This has ___ invalid strong emphasis ___ with spaces inside.";
361
362        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
363        let violations = linter.analyze();
364        let md037_violations: Vec<_> = violations
365            .iter()
366            .filter(|v| v.rule().id == "MD037")
367            .collect();
368
369        // Should find 2 violations: one for opening space, one for closing space
370        assert_eq!(md037_violations.len(), 2);
371    }
372
373    #[test]
374    fn test_violations_mixed_valid_and_invalid() {
375        let config = test_config();
376        let input = "Mix of *valid* and * invalid * emphasis.
377Also **valid** and ** invalid ** strong.";
378
379        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
380        let violations = linter.analyze();
381        let md037_violations: Vec<_> = violations
382            .iter()
383            .filter(|v| v.rule().id == "MD037")
384            .collect();
385
386        // Should find 4 violations: 2 from each invalid emphasis (opening and closing spaces)
387        assert_eq!(md037_violations.len(), 4);
388    }
389
390    #[test]
391    fn test_violations_one_sided_spaces() {
392        let config = test_config();
393        let input = "One sided *invalid * and * invalid* emphasis.";
394
395        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
396        let violations = linter.analyze();
397        let md037_violations: Vec<_> = violations
398            .iter()
399            .filter(|v| v.rule().id == "MD037")
400            .collect();
401
402        // Should find 2 violations: one for each one-sided space
403        assert_eq!(md037_violations.len(), 2);
404    }
405
406    #[test]
407    fn test_no_violations_in_code_blocks() {
408        let config = test_config();
409        let input = "Regular text with *valid* emphasis.
410
411```markdown
412This should not trigger * invalid * emphasis in code blocks.
413```
414
415More text with _valid_ emphasis.";
416
417        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
418        let violations = linter.analyze();
419        let md037_violations: Vec<_> = violations
420            .iter()
421            .filter(|v| v.rule().id == "MD037")
422            .collect();
423        assert_eq!(md037_violations.len(), 0);
424    }
425
426    #[test]
427    fn test_no_violations_in_code_spans() {
428        let config = test_config();
429        let input = "Regular text with `* invalid * code spans` should not trigger violations.";
430
431        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
432        let violations = linter.analyze();
433        let md037_violations: Vec<_> = violations
434            .iter()
435            .filter(|v| v.rule().id == "MD037")
436            .collect();
437        assert_eq!(md037_violations.len(), 0);
438    }
439}