quickmark_core/rules/
md038.rs

1use std::rc::Rc;
2
3use tree_sitter::Node;
4
5use crate::linter::{range_from_tree_sitter, Context, RuleLinter, RuleViolation};
6
7use super::{Rule, RuleType};
8
9const VIOLATION_MESSAGE: &str = "Spaces inside code span elements";
10
11pub(crate) struct MD038Linter {
12    context: Rc<Context>,
13    violations: Vec<RuleViolation>,
14}
15
16impl MD038Linter {
17    pub fn new(context: Rc<Context>) -> Self {
18        Self {
19            context,
20            violations: Vec::new(),
21        }
22    }
23
24    fn check_inline_content(&mut self, node: &Node) {
25        let text = {
26            let content = self.context.get_document_content();
27            node.utf8_text(content.as_bytes()).unwrap_or("").to_string()
28        };
29        let node_start_byte = node.start_byte();
30
31        // Find all code spans using a proper parser
32        let code_spans = self.find_code_spans(&text);
33        for (content, start, len) in code_spans {
34            self.check_code_span_content(&content, node_start_byte + start, len);
35        }
36    }
37
38    fn find_code_spans(&self, text: &str) -> Vec<(String, usize, usize)> {
39        let mut spans = Vec::new();
40        let mut i = 0;
41        let chars: Vec<char> = text.chars().collect();
42
43        while i < chars.len() {
44            if chars[i] == '`' {
45                // Count opening backticks
46                let start_pos = i;
47                let mut backtick_count = 0;
48                while i < chars.len() && chars[i] == '`' {
49                    backtick_count += 1;
50                    i += 1;
51                }
52
53                // Look for closing backticks of the same count
54                let content_start = i;
55                let mut found_closing = false;
56
57                while i < chars.len() {
58                    if chars[i] == '`' {
59                        let closing_start = i;
60                        let mut closing_count = 0;
61                        while i < chars.len() && chars[i] == '`' {
62                            closing_count += 1;
63                            i += 1;
64                        }
65
66                        if closing_count == backtick_count {
67                            // Found matching closing backticks
68                            let content_end = closing_start;
69                            let content: String =
70                                chars[content_start..content_end].iter().collect();
71                            let content_byte_start = text
72                                .char_indices()
73                                .nth(content_start)
74                                .map(|(i, _)| i)
75                                .unwrap_or(0);
76                            let content_len = content.len();
77                            spans.push((content, content_byte_start, content_len));
78                            found_closing = true;
79                            break;
80                        }
81                        // Continue looking if backtick count doesn't match
82                    } else {
83                        i += 1;
84                    }
85                }
86
87                // If we didn't find a closing sequence, backtrack and continue
88                if !found_closing {
89                    i = start_pos + 1;
90                }
91            } else {
92                i += 1;
93            }
94        }
95
96        spans
97    }
98
99    fn check_code_span_content(
100        &mut self,
101        code_content: &str,
102        content_start_byte: usize,
103        content_len: usize,
104    ) {
105        // If the content is only whitespace, allow it (per recent clarification)
106        if code_content.trim().is_empty() {
107            return;
108        }
109
110        // Check for leading whitespace violations
111        let leading_whitespace: String = code_content
112            .chars()
113            .take_while(|c| c.is_whitespace())
114            .collect();
115        let leading_is_violation = match leading_whitespace.as_str() {
116            "" => false,  // No leading whitespace - OK
117            " " => false, // Single space - OK per CommonMark spec
118            _ => true,    // Multiple spaces, tabs, or other whitespace - violation
119        };
120
121        if leading_is_violation {
122            let leading_byte_len = leading_whitespace.len();
123            let violation_range = tree_sitter::Range {
124                start_byte: content_start_byte,
125                end_byte: content_start_byte + leading_byte_len,
126                start_point: self.byte_to_point(content_start_byte),
127                end_point: self.byte_to_point(content_start_byte + leading_byte_len),
128            };
129
130            self.violations.push(RuleViolation::new(
131                &MD038,
132                format!("{VIOLATION_MESSAGE} [Context: leading whitespace]"),
133                self.context.file_path.clone(),
134                range_from_tree_sitter(&violation_range),
135            ));
136        }
137
138        // Check for trailing whitespace violations
139        let trailing_whitespace: String = code_content
140            .chars()
141            .rev()
142            .take_while(|c| c.is_whitespace())
143            .collect::<String>()
144            .chars()
145            .rev()
146            .collect();
147        let trailing_is_violation = match trailing_whitespace.as_str() {
148            "" => false,  // No trailing whitespace - OK
149            " " => false, // Single space - OK per CommonMark spec
150            _ => true,    // Multiple spaces, tabs, or other whitespace - violation
151        };
152
153        if trailing_is_violation {
154            let trailing_byte_len = trailing_whitespace.len();
155            let violation_end_byte = content_start_byte + content_len;
156            let violation_start_byte = violation_end_byte - trailing_byte_len;
157
158            let violation_range = tree_sitter::Range {
159                start_byte: violation_start_byte,
160                end_byte: violation_end_byte,
161                start_point: self.byte_to_point(violation_start_byte),
162                end_point: self.byte_to_point(violation_end_byte),
163            };
164
165            self.violations.push(RuleViolation::new(
166                &MD038,
167                format!("{VIOLATION_MESSAGE} [Context: trailing whitespace]"),
168                self.context.file_path.clone(),
169                range_from_tree_sitter(&violation_range),
170            ));
171        }
172    }
173
174    fn byte_to_point(&self, byte_pos: usize) -> tree_sitter::Point {
175        let source = self.context.get_document_content();
176        let mut line = 0;
177        let mut column = 0;
178
179        for (i, ch) in source.char_indices() {
180            if i >= byte_pos {
181                break;
182            }
183            if ch == '\n' {
184                line += 1;
185                column = 0;
186            } else {
187                column += 1;
188            }
189        }
190
191        tree_sitter::Point { row: line, column }
192    }
193}
194
195impl RuleLinter for MD038Linter {
196    fn feed(&mut self, node: &Node) {
197        if node.kind() == "inline" {
198            self.check_inline_content(node);
199        }
200    }
201
202    fn finalize(&mut self) -> Vec<RuleViolation> {
203        std::mem::take(&mut self.violations)
204    }
205}
206
207pub const MD038: Rule = Rule {
208    id: "MD038",
209    alias: "no-space-in-code",
210    tags: &["whitespace", "code"],
211    description: "Spaces inside code span elements",
212    rule_type: RuleType::Token,
213    required_nodes: &["inline"],
214    new_linter: |context| Box::new(MD038Linter::new(context)),
215};
216
217#[cfg(test)]
218mod test {
219    use std::path::PathBuf;
220
221    use crate::config::RuleSeverity;
222    use crate::linter::MultiRuleLinter;
223    use crate::test_utils::test_helpers::test_config_with_rules;
224
225    fn test_config() -> crate::config::QuickmarkConfig {
226        test_config_with_rules(vec![("no-space-in-code", RuleSeverity::Error)])
227    }
228
229    #[test]
230    fn test_no_violations_valid_code_spans() {
231        let config = test_config();
232        let input = "This has `valid code` spans.";
233
234        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
235        let violations = linter.analyze();
236        let md038_violations: Vec<_> = violations
237            .iter()
238            .filter(|v| v.rule().id == "MD038")
239            .collect();
240        assert_eq!(md038_violations.len(), 0);
241    }
242
243    #[test]
244    fn test_no_violations_single_space_padding() {
245        // Single leading and trailing space is allowed by CommonMark spec
246        let config = test_config();
247        let input = "This has ` code ` spans with single space padding.";
248
249        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
250        let violations = linter.analyze();
251        let md038_violations: Vec<_> = violations
252            .iter()
253            .filter(|v| v.rule().id == "MD038")
254            .collect();
255        assert_eq!(md038_violations.len(), 0);
256    }
257
258    #[test]
259    fn test_no_violations_code_spans_only_spaces() {
260        // Code spans containing only spaces should be allowed
261        let config = test_config();
262        let input = "This has `   ` code spans with only spaces.";
263
264        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
265        let violations = linter.analyze();
266        let md038_violations: Vec<_> = violations
267            .iter()
268            .filter(|v| v.rule().id == "MD038")
269            .collect();
270        assert_eq!(md038_violations.len(), 0);
271    }
272
273    #[test]
274    fn test_violations_multiple_leading_spaces() {
275        let config = test_config();
276        let input = "This has `  code` with multiple leading spaces.";
277
278        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
279        let violations = linter.analyze();
280        let md038_violations: Vec<_> = violations
281            .iter()
282            .filter(|v| v.rule().id == "MD038")
283            .collect();
284        assert_eq!(md038_violations.len(), 1);
285    }
286
287    #[test]
288    fn test_violations_multiple_trailing_spaces() {
289        let config = test_config();
290        let input = "This has `code  ` with multiple trailing spaces.";
291
292        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
293        let violations = linter.analyze();
294        let md038_violations: Vec<_> = violations
295            .iter()
296            .filter(|v| v.rule().id == "MD038")
297            .collect();
298        assert_eq!(md038_violations.len(), 1);
299    }
300
301    #[test]
302    fn test_violations_multiple_leading_and_trailing_spaces() {
303        let config = test_config();
304        let input = "This has `  code  ` with multiple leading and trailing spaces.";
305
306        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
307        let violations = linter.analyze();
308        let md038_violations: Vec<_> = violations
309            .iter()
310            .filter(|v| v.rule().id == "MD038")
311            .collect();
312        assert_eq!(md038_violations.len(), 2);
313    }
314
315    #[test]
316    fn test_violations_tabs_instead_of_spaces() {
317        let config = test_config();
318        let input = "This has `\tcode\t` with tabs.";
319
320        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
321        let violations = linter.analyze();
322        let md038_violations: Vec<_> = violations
323            .iter()
324            .filter(|v| v.rule().id == "MD038")
325            .collect();
326        assert_eq!(md038_violations.len(), 2);
327    }
328
329    #[test]
330    fn test_violations_mixed_whitespace() {
331        let config = test_config();
332        let input = "This has ` \tcode \t` with mixed whitespace.";
333
334        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
335        let violations = linter.analyze();
336        let md038_violations: Vec<_> = violations
337            .iter()
338            .filter(|v| v.rule().id == "MD038")
339            .collect();
340        assert_eq!(md038_violations.len(), 2);
341    }
342
343    #[test]
344    fn test_violations_only_leading_spaces() {
345        let config = test_config();
346        let input = "This has `  code` with only leading spaces.";
347
348        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
349        let violations = linter.analyze();
350        let md038_violations: Vec<_> = violations
351            .iter()
352            .filter(|v| v.rule().id == "MD038")
353            .collect();
354        assert_eq!(md038_violations.len(), 1);
355    }
356
357    #[test]
358    fn test_violations_only_trailing_spaces() {
359        let config = test_config();
360        let input = "This has `code  ` with only trailing spaces.";
361
362        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
363        let violations = linter.analyze();
364        let md038_violations: Vec<_> = violations
365            .iter()
366            .filter(|v| v.rule().id == "MD038")
367            .collect();
368        assert_eq!(md038_violations.len(), 1);
369    }
370
371    #[test]
372    fn test_no_violations_double_backtick_code_spans() {
373        let config = test_config();
374        let input = "This has ``valid code`` with double backticks.";
375
376        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
377        let violations = linter.analyze();
378        let md038_violations: Vec<_> = violations
379            .iter()
380            .filter(|v| v.rule().id == "MD038")
381            .collect();
382        assert_eq!(md038_violations.len(), 0);
383    }
384
385    #[test]
386    fn test_violations_double_backtick_with_spaces() {
387        let config = test_config();
388        let input = "This has ``  code  `` with double backticks and spaces.";
389
390        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
391        let violations = linter.analyze();
392        let md038_violations: Vec<_> = violations
393            .iter()
394            .filter(|v| v.rule().id == "MD038")
395            .collect();
396        assert_eq!(md038_violations.len(), 2);
397    }
398
399    #[test]
400    fn test_multiple_code_spans_on_same_line() {
401        let config = test_config();
402        let input = "This has `valid` and `  invalid  ` code spans.";
403
404        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
405        let violations = linter.analyze();
406        let md038_violations: Vec<_> = violations
407            .iter()
408            .filter(|v| v.rule().id == "MD038")
409            .collect();
410        assert_eq!(md038_violations.len(), 2);
411    }
412
413    #[test]
414    fn test_code_spans_in_different_contexts() {
415        let config = test_config();
416        let input = "# Heading with `  invalid  ` code span
417
418Paragraph with `valid` and `  invalid  ` spans.
419
420- List item with `  invalid  ` code span
421- Another item with `valid` span
422
423> Blockquote with `  invalid  ` code span";
424
425        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
426        let violations = linter.analyze();
427        let md038_violations: Vec<_> = violations
428            .iter()
429            .filter(|v| v.rule().id == "MD038")
430            .collect();
431        assert_eq!(md038_violations.len(), 8); // 2 violations per invalid span (leading + trailing)
432    }
433
434    #[test]
435    fn test_no_violations_empty_code_span() {
436        let config = test_config();
437        let input = "This has `` empty code spans.";
438
439        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
440        let violations = linter.analyze();
441        let md038_violations: Vec<_> = violations
442            .iter()
443            .filter(|v| v.rule().id == "MD038")
444            .collect();
445        assert_eq!(md038_violations.len(), 0);
446    }
447
448    #[test]
449    fn test_code_span_with_backtick_content() {
450        // Test code span that contains backticks - should use double backticks
451        let config = test_config();
452        let input = "This shows `` ` `` a backtick character.";
453
454        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
455        let violations = linter.analyze();
456        let md038_violations: Vec<_> = violations
457            .iter()
458            .filter(|v| v.rule().id == "MD038")
459            .collect();
460        // Single space padding is allowed in this case
461        assert_eq!(md038_violations.len(), 0);
462    }
463
464    #[test]
465    fn test_code_span_with_backtick_content_extra_spaces() {
466        // Test code span that contains backticks with extra spaces
467        let config = test_config();
468        let input = "This shows ``  `  `` a backtick with extra spaces.";
469
470        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
471        let violations = linter.analyze();
472        let md038_violations: Vec<_> = violations
473            .iter()
474            .filter(|v| v.rule().id == "MD038")
475            .collect();
476        assert_eq!(md038_violations.len(), 2);
477    }
478}