quickmark_core/rules/
md026.rs

1use serde::Deserialize;
2use std::rc::Rc;
3
4use once_cell::sync::Lazy;
5use regex::Regex;
6use tree_sitter::Node;
7
8use crate::linter::{range_from_tree_sitter, Context, RuleLinter, RuleViolation};
9
10use super::{Rule, RuleType};
11
12// MD026-specific configuration types
13#[derive(Debug, PartialEq, Clone, Deserialize)]
14pub struct MD026TrailingPunctuationTable {
15    #[serde(default)]
16    pub punctuation: String,
17}
18
19impl Default for MD026TrailingPunctuationTable {
20    fn default() -> Self {
21        Self {
22            punctuation: ".,;:!。,;:!".to_string(),
23        }
24    }
25}
26
27impl MD026TrailingPunctuationTable {
28    pub fn with_default_punctuation() -> Self {
29        Self {
30            punctuation: ".,;:!。,;:!".to_string(), // Default without '?' chars
31        }
32    }
33}
34
35pub(crate) struct MD026Linter {
36    context: Rc<Context>,
37    violations: Vec<RuleViolation>,
38}
39
40impl MD026Linter {
41    pub fn new(context: Rc<Context>) -> Self {
42        Self {
43            context,
44            violations: Vec::new(),
45        }
46    }
47
48    fn extract_heading_text<'a>(&self, node: &Node, source: &'a str) -> &'a str {
49        let start_byte = node.start_byte();
50        let end_byte = node.end_byte();
51        let full_text = &source[start_byte..end_byte];
52
53        match node.kind() {
54            "atx_heading" => full_text
55                .trim_start_matches('#')
56                .trim()
57                .trim_end_matches('#')
58                .trim(),
59            "setext_heading" => {
60                if let Some(line) = full_text.lines().next() {
61                    line.trim()
62                } else {
63                    ""
64                }
65            }
66            _ => "",
67        }
68    }
69
70    fn check_trailing_punctuation(&mut self, node: &Node) {
71        let source = self.context.get_document_content();
72        let heading_text = self.extract_heading_text(node, &source);
73        if heading_text.is_empty() {
74            return;
75        }
76
77        let config = &self.context.config.linters.settings.trailing_punctuation;
78
79        // Handle configuration: if punctuation is empty, the rule is effectively disabled
80        let punctuation_chars = if config.punctuation.is_empty() {
81            return; // Empty punctuation = rule disabled, allow all
82        } else {
83            &config.punctuation
84        };
85
86        // Check if the heading ends with any of the specified punctuation characters
87        if let Some(trailing_char) = heading_text.chars().last() {
88            if punctuation_chars.contains(trailing_char) {
89                // Check if this is an HTML entity (ends with ;)
90                if trailing_char == ';' && is_html_entity(heading_text) {
91                    return; // Skip HTML entities
92                }
93
94                // Check if this is a gemoji code (ends with :)
95                if trailing_char == ':' && is_gemoji_code(heading_text) {
96                    return; // Skip gemoji codes
97                }
98
99                // Create a violation
100                let range = tree_sitter::Range {
101                    start_byte: 0, // Not used by range_from_tree_sitter
102                    end_byte: 0,   // Not used by range_from_tree_sitter
103                    start_point: tree_sitter::Point {
104                        row: node.start_position().row,
105                        column: 0,
106                    },
107                    end_point: tree_sitter::Point {
108                        row: node.end_position().row,
109                        column: node.end_position().column,
110                    },
111                };
112
113                self.violations.push(RuleViolation::new(
114                    &MD026,
115                    format!("Punctuation: '{trailing_char}'"),
116                    self.context.file_path.clone(),
117                    range_from_tree_sitter(&range),
118                ));
119            }
120        }
121    }
122}
123
124impl RuleLinter for MD026Linter {
125    fn feed(&mut self, node: &Node) {
126        match node.kind() {
127            "atx_heading" | "setext_heading" => self.check_trailing_punctuation(node),
128            _ => {
129                // Ignore other nodes
130            }
131        }
132    }
133
134    fn finalize(&mut self) -> Vec<RuleViolation> {
135        std::mem::take(&mut self.violations)
136    }
137}
138
139// Helper function to detect HTML entities
140fn is_html_entity(text: &str) -> bool {
141    static HTML_ENTITY_RE: Lazy<Regex> =
142        Lazy::new(|| Regex::new(r"&(?:[a-zA-Z\d]+|#\d+|#x[0-9a-fA-F]+);$").unwrap());
143    HTML_ENTITY_RE.is_match(text.trim())
144}
145
146// Helper function to detect GitHub emoji codes (gemoji)
147fn is_gemoji_code(text: &str) -> bool {
148    static GEMOJI_RE: Lazy<Regex> = Lazy::new(|| {
149        Regex::new(r":(?:[abmovx]|[-+]1|100|1234|(?:1st|2nd|3rd)_place_medal|8ball|clock\d{1,4}|e-mail|non-potable_water|o2|t-rex|u5272|u5408|u55b6|u6307|u6708|u6709|u6e80|u7121|u7533|u7981|u7a7a|[a-z]{2,15}2?|[a-z]{1,14}(?:_[a-z\d]{1,16})+):$").unwrap()
150    });
151    GEMOJI_RE.is_match(text.trim())
152}
153
154pub const MD026: Rule = Rule {
155    id: "MD026",
156    alias: "no-trailing-punctuation",
157    tags: &["headings"],
158    description: "Trailing punctuation in heading",
159    rule_type: RuleType::Token,
160    required_nodes: &["atx_heading", "setext_heading"],
161    new_linter: |context| Box::new(MD026Linter::new(context)),
162};
163
164#[cfg(test)]
165mod test {
166    use std::path::PathBuf;
167
168    use crate::config::{LintersSettingsTable, MD026TrailingPunctuationTable, RuleSeverity};
169    use crate::linter::MultiRuleLinter;
170    use crate::test_utils::test_helpers::test_config_with_settings;
171
172    fn test_config(punctuation: &str) -> crate::config::QuickmarkConfig {
173        test_config_with_settings(
174            vec![("no-trailing-punctuation", RuleSeverity::Error)],
175            LintersSettingsTable {
176                trailing_punctuation: MD026TrailingPunctuationTable {
177                    punctuation: punctuation.to_string(),
178                },
179                ..Default::default()
180            },
181        )
182    }
183
184    fn test_default_config() -> crate::config::QuickmarkConfig {
185        test_config(".,;:!。,;:!")
186    }
187
188    #[test]
189    fn test_atx_heading_with_period() {
190        let config = test_default_config();
191        let input = "# This is a heading.";
192
193        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
194        let violations = linter.analyze();
195        assert_eq!(violations.len(), 1);
196        assert!(violations[0].message().contains("Punctuation: '.'"));
197    }
198
199    #[test]
200    fn test_atx_heading_with_exclamation() {
201        let config = test_default_config();
202        let input = "# This is a heading!";
203
204        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
205        let violations = linter.analyze();
206        assert_eq!(violations.len(), 1);
207        assert!(violations[0].message().contains("Punctuation: '!'"));
208    }
209
210    #[test]
211    fn test_atx_heading_with_comma() {
212        let config = test_default_config();
213        let input = "## This is a heading,";
214
215        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
216        let violations = linter.analyze();
217        assert_eq!(violations.len(), 1);
218        assert!(violations[0].message().contains("Punctuation: ','"));
219    }
220
221    #[test]
222    fn test_atx_heading_with_semicolon() {
223        let config = test_default_config();
224        let input = "### This is a heading;";
225
226        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
227        let violations = linter.analyze();
228        assert_eq!(violations.len(), 1);
229        assert!(violations[0].message().contains("Punctuation: ';'"));
230    }
231
232    #[test]
233    fn test_atx_heading_with_colon() {
234        let config = test_default_config();
235        let input = "#### This is a heading:";
236
237        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
238        let violations = linter.analyze();
239        assert_eq!(violations.len(), 1);
240        assert!(violations[0].message().contains("Punctuation: ':'"));
241    }
242
243    #[test]
244    fn test_atx_heading_with_question_mark_allowed() {
245        let config = test_default_config();
246        let input = "# This is a heading?";
247
248        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
249        let violations = linter.analyze();
250        assert_eq!(violations.len(), 0); // '?' is not in default punctuation
251    }
252
253    #[test]
254    fn test_atx_heading_without_punctuation() {
255        let config = test_default_config();
256        let input = "# This is a heading";
257
258        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
259        let violations = linter.analyze();
260        assert_eq!(violations.len(), 0);
261    }
262
263    #[test]
264    fn test_setext_heading_with_period() {
265        let config = test_default_config();
266        let input = "# Document\n\nThis is a heading.\n==================\n\nContent here";
267
268        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
269        let violations = linter.analyze();
270        assert_eq!(violations.len(), 1);
271        assert!(violations[0].message().contains("Punctuation: '.'"));
272    }
273
274    #[test]
275    fn test_setext_heading_with_exclamation() {
276        let config = test_default_config();
277        let input = "# Document\n\nThis is a heading!\n------------------\n\nContent here";
278
279        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
280        let violations = linter.analyze();
281        assert_eq!(violations.len(), 1);
282        assert!(violations[0].message().contains("Punctuation: '!'"));
283    }
284
285    #[test]
286    fn test_setext_heading_without_punctuation() {
287        let config = test_default_config();
288        let input = "# Document\n\nThis is a heading\n=================\n\nContent here";
289
290        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
291        let violations = linter.analyze();
292        assert_eq!(violations.len(), 0);
293    }
294
295    #[test]
296    fn test_full_width_punctuation() {
297        let config = test_default_config();
298        let input = "# Heading with full-width period。";
299
300        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
301        let violations = linter.analyze();
302        assert_eq!(violations.len(), 1);
303        assert!(violations[0].message().contains("Punctuation: '。'"));
304    }
305
306    #[test]
307    fn test_full_width_comma() {
308        let config = test_default_config();
309        let input = "# Heading with full-width comma,";
310
311        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
312        let violations = linter.analyze();
313        assert_eq!(violations.len(), 1);
314        assert!(violations[0].message().contains("Punctuation: ','"));
315    }
316
317    #[test]
318    fn test_custom_punctuation() {
319        let config = test_config(".,;:");
320        let input = "# This heading has exclamation!";
321
322        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
323        let violations = linter.analyze();
324        assert_eq!(violations.len(), 0); // '!' not in custom punctuation
325    }
326
327    #[test]
328    fn test_custom_punctuation_with_violation() {
329        let config = test_config(".,;:");
330        let input = "# This heading has period.";
331
332        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
333        let violations = linter.analyze();
334        assert_eq!(violations.len(), 1);
335        assert!(violations[0].message().contains("Punctuation: '.'"));
336    }
337
338    #[test]
339    fn test_empty_punctuation_allows_all() {
340        let config = test_config("");
341        let input =
342            "# This heading has period.\n## This heading has exclamation!\n### This has comma,";
343
344        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
345        let violations = linter.analyze();
346        assert_eq!(violations.len(), 0); // Empty punctuation = allow all
347    }
348
349    #[test]
350    fn test_html_entity_ignored() {
351        let config = test_default_config();
352        let input = "# Copyright &copy;\n## Registered &reg;";
353
354        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
355        let violations = linter.analyze();
356        assert_eq!(violations.len(), 0); // HTML entities should be ignored
357    }
358
359    #[test]
360    fn test_numeric_html_entity_ignored() {
361        let config = test_default_config();
362        let input = "# Copyright &#169;\n## Registered &#174;";
363
364        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
365        let violations = linter.analyze();
366        assert_eq!(violations.len(), 0); // Numeric HTML entities should be ignored
367    }
368
369    #[test]
370    fn test_hex_html_entity_ignored() {
371        let config = test_default_config();
372        let input = "# Copyright &#x000A9;\n## Registered &#xAE;";
373
374        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
375        let violations = linter.analyze();
376        assert_eq!(violations.len(), 0); // Hex HTML entities should be ignored
377    }
378
379    #[test]
380    fn test_mixed_valid_and_invalid() {
381        let config = test_default_config();
382        let input =
383            "# Good heading\n## Bad heading.\n### Another good heading\n#### Another bad heading!";
384
385        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
386        let violations = linter.analyze();
387        assert_eq!(violations.len(), 2);
388        assert!(violations[0].message().contains("Punctuation: '.'"));
389        assert!(violations[1].message().contains("Punctuation: '!'"));
390    }
391
392    #[test]
393    fn test_atx_closed_style_heading() {
394        let config = test_default_config();
395        let input = "# This is a heading. #";
396
397        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
398        let violations = linter.analyze();
399        assert_eq!(violations.len(), 1);
400        assert!(violations[0].message().contains("Punctuation: '.'"));
401    }
402
403    #[test]
404    fn test_multiple_trailing_punctuation() {
405        let config = test_default_config();
406        let input = "# This is a heading...";
407
408        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
409        let violations = linter.analyze();
410        assert_eq!(violations.len(), 1);
411        assert!(violations[0].message().contains("Punctuation: '.'"));
412    }
413
414    #[test]
415    fn test_empty_heading() {
416        let config = test_default_config();
417        let input = "#\n==";
418
419        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
420        let violations = linter.analyze();
421        assert_eq!(violations.len(), 0); // Empty headings should not trigger violations
422    }
423}