rumdl_lib/rules/
md026_no_trailing_punctuation.rs

1/// Rule MD026: No trailing punctuation in headings
2///
3/// See [docs/md026.md](../../docs/md026.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::utils::range_utils::calculate_match_range;
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::HashMap;
9use std::ops::Range;
10use std::sync::RwLock;
11
12mod md026_config;
13use md026_config::{DEFAULT_PUNCTUATION, MD026Config};
14
15lazy_static! {
16    // Optimized single regex for all ATX heading types (normal, closed, indented 1-3 spaces)
17    static ref ATX_HEADING_UNIFIED: Regex = Regex::new(r"^( {0,3})(#{1,6})(\s+)(.+?)(\s+#{1,6})?$").unwrap();
18
19    // Fast check patterns for early returns - match defaults
20    static ref QUICK_PUNCTUATION_CHECK: Regex = Regex::new(&format!(r"[{}]", regex::escape(DEFAULT_PUNCTUATION))).unwrap();
21
22    // Regex cache for punctuation patterns
23    static ref PUNCTUATION_REGEX_CACHE: RwLock<HashMap<String, Regex>> = RwLock::new(HashMap::new());
24}
25
26/// Rule MD026: Trailing punctuation in heading
27#[derive(Clone, Default)]
28pub struct MD026NoTrailingPunctuation {
29    config: MD026Config,
30}
31
32impl MD026NoTrailingPunctuation {
33    pub fn new(punctuation: Option<String>) -> Self {
34        Self {
35            config: MD026Config {
36                punctuation: punctuation.unwrap_or_else(|| DEFAULT_PUNCTUATION.to_string()),
37            },
38        }
39    }
40
41    pub fn from_config_struct(config: MD026Config) -> Self {
42        Self { config }
43    }
44
45    #[inline]
46    fn get_punctuation_regex(&self) -> Result<Regex, regex::Error> {
47        // Check cache first
48        {
49            let cache = PUNCTUATION_REGEX_CACHE.read().unwrap();
50            if let Some(cached_regex) = cache.get(&self.config.punctuation) {
51                return Ok(cached_regex.clone());
52            }
53        }
54
55        // Compile and cache the regex
56        let pattern = format!(r"([{}]+)$", regex::escape(&self.config.punctuation));
57        let regex = Regex::new(&pattern)?;
58
59        {
60            let mut cache = PUNCTUATION_REGEX_CACHE.write().unwrap();
61            cache.insert(self.config.punctuation.clone(), regex.clone());
62        }
63
64        Ok(regex)
65    }
66
67    #[inline]
68    fn has_trailing_punctuation(&self, text: &str, re: &Regex) -> bool {
69        let trimmed = text.trim();
70        re.is_match(trimmed)
71    }
72
73    #[inline]
74    fn get_line_byte_range(&self, content: &str, line_num: usize) -> Range<usize> {
75        let mut start_pos = 0;
76
77        for (idx, line) in content.lines().enumerate() {
78            if idx + 1 == line_num {
79                return Range {
80                    start: start_pos,
81                    end: start_pos + line.len(),
82                };
83            }
84            // +1 for the newline character
85            start_pos += line.len() + 1;
86        }
87
88        Range {
89            start: content.len(),
90            end: content.len(),
91        }
92    }
93
94    // Remove trailing punctuation from text
95    #[inline]
96    fn remove_trailing_punctuation(&self, text: &str, re: &Regex) -> String {
97        re.replace_all(text.trim(), "").to_string()
98    }
99
100    // Optimized ATX heading fix using unified regex
101    #[inline]
102    fn fix_atx_heading(&self, line: &str, re: &Regex) -> String {
103        if let Some(captures) = ATX_HEADING_UNIFIED.captures(line) {
104            let indentation = captures.get(1).unwrap().as_str();
105            let hashes = captures.get(2).unwrap().as_str();
106            let space = captures.get(3).unwrap().as_str();
107            let content = captures.get(4).unwrap().as_str();
108
109            // Check if content ends with a custom header ID like {#my-id}
110            // If so, we need to fix punctuation before the ID
111            let fixed_content = if let Some(id_pos) = content.rfind(" {#") {
112                // Has a custom ID - fix punctuation before it
113                let before_id = &content[..id_pos];
114                let id_part = &content[id_pos..];
115                let fixed_before = self.remove_trailing_punctuation(before_id, re);
116                format!("{fixed_before}{id_part}")
117            } else {
118                // No custom ID - just remove trailing punctuation
119                self.remove_trailing_punctuation(content, re)
120            };
121
122            // Preserve any trailing hashes if present
123            if let Some(trailing) = captures.get(5) {
124                return format!(
125                    "{}{}{}{}{}",
126                    indentation,
127                    hashes,
128                    space,
129                    fixed_content,
130                    trailing.as_str()
131                );
132            }
133
134            return format!("{indentation}{hashes}{space}{fixed_content}");
135        }
136
137        // Fallback if no regex matches
138        line.to_string()
139    }
140
141    // Fix a setext heading by removing trailing punctuation from the content line
142    #[inline]
143    fn fix_setext_heading(&self, content_line: &str, re: &Regex) -> String {
144        let trimmed = content_line.trim_end();
145        let mut whitespace = "";
146
147        // Preserve trailing whitespace
148        if content_line.len() > trimmed.len() {
149            whitespace = &content_line[trimmed.len()..];
150        }
151
152        // Remove punctuation and preserve whitespace
153        format!("{}{}", self.remove_trailing_punctuation(trimmed, re), whitespace)
154    }
155}
156
157impl Rule for MD026NoTrailingPunctuation {
158    fn name(&self) -> &'static str {
159        "MD026"
160    }
161
162    fn description(&self) -> &'static str {
163        "Trailing punctuation in heading"
164    }
165
166    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
167        // Skip if no heading markers
168        if !ctx.likely_has_headings() {
169            return true;
170        }
171        // Skip if none of the configured punctuation exists
172        let punctuation = &self.config.punctuation;
173        !punctuation.chars().any(|p| ctx.content.contains(p))
174    }
175
176    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
177        let content = ctx.content;
178
179        // Early returns for performance
180        if content.is_empty() {
181            return Ok(Vec::new());
182        }
183
184        // Quick check for any punctuation we care about
185        // For custom punctuation, we need to check differently
186        if self.config.punctuation == DEFAULT_PUNCTUATION {
187            if !QUICK_PUNCTUATION_CHECK.is_match(content) {
188                return Ok(Vec::new());
189            }
190        } else {
191            // For custom punctuation, check if any of those characters exist
192            let has_custom_punctuation = self.config.punctuation.chars().any(|c| content.contains(c));
193            if !has_custom_punctuation {
194                return Ok(Vec::new());
195            }
196        }
197
198        // Check if we have any headings from pre-computed line info
199        let has_headings = ctx.lines.iter().any(|line| line.heading.is_some());
200        if !has_headings {
201            return Ok(Vec::new());
202        }
203
204        let mut warnings = Vec::new();
205        let re = match self.get_punctuation_regex() {
206            Ok(regex) => regex,
207            Err(_) => return Ok(warnings),
208        };
209
210        // Use pre-computed heading information from LintContext
211        for (line_num, line_info) in ctx.lines.iter().enumerate() {
212            if let Some(heading) = &line_info.heading {
213                // Skip deeply indented headings (they're code blocks)
214                if line_info.indent >= 4 && matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
215                    continue;
216                }
217
218                // LintContext already strips Kramdown IDs from heading.text
219                // So we just check the heading text directly for trailing punctuation
220                // This correctly flags "# Heading." even if it has {#id}
221                let text_to_check = heading.text.clone();
222
223                if self.has_trailing_punctuation(&text_to_check, &re) {
224                    // Find the trailing punctuation
225                    if let Some(punctuation_match) = re.find(&text_to_check) {
226                        let line = &line_info.content;
227
228                        // For ATX headings, find the punctuation position in the line
229                        let punctuation_pos_in_text = punctuation_match.start();
230                        let text_pos_in_line = line.find(&heading.text).unwrap_or(heading.content_column);
231                        let punctuation_start_in_line = text_pos_in_line + punctuation_pos_in_text;
232                        let punctuation_len = punctuation_match.len();
233
234                        let (start_line, start_col, end_line, end_col) = calculate_match_range(
235                            line_num + 1, // Convert to 1-indexed
236                            line,
237                            punctuation_start_in_line,
238                            punctuation_len,
239                        );
240
241                        let last_char = text_to_check.chars().last().unwrap_or(' ');
242                        warnings.push(LintWarning {
243                            rule_name: Some(self.name()),
244                            line: start_line,
245                            column: start_col,
246                            end_line,
247                            end_column: end_col,
248                            message: format!("Heading '{text_to_check}' ends with punctuation '{last_char}'"),
249                            severity: Severity::Warning,
250                            fix: Some(Fix {
251                                range: self.get_line_byte_range(content, line_num + 1),
252                                replacement: if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
253                                    self.fix_atx_heading(line, &re)
254                                } else {
255                                    self.fix_setext_heading(line, &re)
256                                },
257                            }),
258                        });
259                    }
260                }
261            }
262        }
263
264        Ok(warnings)
265    }
266
267    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
268        let content = ctx.content;
269
270        // Fast path optimizations
271        if content.is_empty() {
272            return Ok(content.to_string());
273        }
274
275        // Quick check for punctuation
276        // For custom punctuation, we need to check differently
277        if self.config.punctuation == DEFAULT_PUNCTUATION {
278            if !QUICK_PUNCTUATION_CHECK.is_match(content) {
279                return Ok(content.to_string());
280            }
281        } else {
282            // For custom punctuation, check if any of those characters exist
283            let has_custom_punctuation = self.config.punctuation.chars().any(|c| content.contains(c));
284            if !has_custom_punctuation {
285                return Ok(content.to_string());
286            }
287        }
288
289        // Check if we have any headings from pre-computed line info
290        let has_headings = ctx.lines.iter().any(|line| line.heading.is_some());
291        if !has_headings {
292            return Ok(content.to_string());
293        }
294
295        let re = match self.get_punctuation_regex() {
296            Ok(regex) => regex,
297            Err(_) => return Ok(content.to_string()),
298        };
299
300        let lines: Vec<&str> = content.lines().collect();
301        let mut fixed_lines: Vec<String> = lines.iter().map(|&s| s.to_string()).collect();
302
303        // Use pre-computed heading information from LintContext
304        for (line_num, line_info) in ctx.lines.iter().enumerate() {
305            if let Some(heading) = &line_info.heading {
306                // Skip deeply indented headings (they're code blocks)
307                if line_info.indent >= 4 && matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
308                    continue;
309                }
310
311                // LintContext already strips custom header IDs from heading.text
312                // So we just check the heading text directly for trailing punctuation
313                let text_to_check = heading.text.clone();
314
315                // Check and fix trailing punctuation
316                if self.has_trailing_punctuation(&text_to_check, &re) {
317                    fixed_lines[line_num] = if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
318                        self.fix_atx_heading(&line_info.content, &re)
319                    } else {
320                        self.fix_setext_heading(&line_info.content, &re)
321                    };
322                }
323            }
324        }
325
326        // Reconstruct content preserving line endings
327        let mut result = String::with_capacity(content.len());
328        for (i, line) in fixed_lines.iter().enumerate() {
329            result.push_str(line);
330            if i < fixed_lines.len() - 1 || content.ends_with('\n') {
331                result.push('\n');
332            }
333        }
334
335        Ok(result)
336    }
337
338    fn as_any(&self) -> &dyn std::any::Any {
339        self
340    }
341
342    fn default_config_section(&self) -> Option<(String, toml::Value)> {
343        let json_value = serde_json::to_value(&self.config).ok()?;
344        Some((
345            self.name().to_string(),
346            crate::rule_config_serde::json_to_toml_value(&json_value)?,
347        ))
348    }
349
350    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
351    where
352        Self: Sized,
353    {
354        let rule_config = crate::rule_config_serde::load_rule_config::<MD026Config>(config);
355        Box::new(Self::from_config_struct(rule_config))
356    }
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362    use crate::lint_context::LintContext;
363
364    #[test]
365    fn test_no_trailing_punctuation() {
366        let rule = MD026NoTrailingPunctuation::new(None);
367        let content = "# This is a heading\n\n## Another heading";
368        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
369        let result = rule.check(&ctx).unwrap();
370        assert!(result.is_empty(), "Headings without punctuation should not be flagged");
371    }
372
373    #[test]
374    fn test_trailing_period() {
375        let rule = MD026NoTrailingPunctuation::new(None);
376        let content = "# This is a heading.\n\n## Another one.";
377        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
378        let result = rule.check(&ctx).unwrap();
379        assert_eq!(result.len(), 2);
380        assert_eq!(result[0].line, 1);
381        assert_eq!(result[0].column, 20);
382        assert!(result[0].message.contains("ends with punctuation '.'"));
383        assert_eq!(result[1].line, 3);
384        assert_eq!(result[1].column, 15);
385    }
386
387    #[test]
388    fn test_trailing_comma() {
389        let rule = MD026NoTrailingPunctuation::new(None);
390        let content = "# Heading,\n## Sub-heading,";
391        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
392        let result = rule.check(&ctx).unwrap();
393        assert_eq!(result.len(), 2);
394        assert!(result[0].message.contains("ends with punctuation ','"));
395    }
396
397    #[test]
398    fn test_trailing_semicolon() {
399        let rule = MD026NoTrailingPunctuation::new(None);
400        let content = "# Title;\n## Subtitle;";
401        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
402        let result = rule.check(&ctx).unwrap();
403        assert_eq!(result.len(), 2);
404        assert!(result[0].message.contains("ends with punctuation ';'"));
405    }
406
407    #[test]
408    fn test_custom_punctuation() {
409        let rule = MD026NoTrailingPunctuation::new(Some("!".to_string()));
410        let content = "# Important!\n## Regular heading.";
411        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
412        let result = rule.check(&ctx).unwrap();
413        assert_eq!(result.len(), 1, "Only exclamation should be flagged with custom config");
414        assert_eq!(result[0].line, 1);
415        assert!(result[0].message.contains("ends with punctuation '!'"));
416    }
417
418    #[test]
419    fn test_legitimate_question_mark() {
420        let rule = MD026NoTrailingPunctuation::new(Some(".,;?".to_string()));
421        let content = "# What is this?\n# This is bad.";
422        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
423        let result = rule.check(&ctx).unwrap();
424        // With custom punctuation, legitimate punctuation exceptions don't apply
425        assert_eq!(result.len(), 2, "Both should be flagged with custom punctuation");
426    }
427
428    #[test]
429    fn test_question_marks_not_in_default() {
430        let rule = MD026NoTrailingPunctuation::new(None);
431        let content = "# What is Rust?\n# How does it work?\n# Is it fast?";
432        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
433        let result = rule.check(&ctx).unwrap();
434        assert!(result.is_empty(), "Question marks are not in default punctuation list");
435    }
436
437    #[test]
438    fn test_colons_in_default() {
439        let rule = MD026NoTrailingPunctuation::new(None);
440        let content = "# FAQ:\n# API Reference:\n# Step 1:\n# Version 2.0:";
441        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
442        let result = rule.check(&ctx).unwrap();
443        assert_eq!(
444            result.len(),
445            4,
446            "Colons are in default punctuation list and should be flagged"
447        );
448    }
449
450    #[test]
451    fn test_fix_atx_headings() {
452        let rule = MD026NoTrailingPunctuation::new(None);
453        let content = "# Title.\n## Subtitle,\n### Sub-subtitle;";
454        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
455        let fixed = rule.fix(&ctx).unwrap();
456        assert_eq!(fixed, "# Title\n## Subtitle\n### Sub-subtitle");
457    }
458
459    #[test]
460    fn test_fix_setext_headings() {
461        let rule = MD026NoTrailingPunctuation::new(None);
462        let content = "Title.\n======\n\nSubtitle,\n---------";
463        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
464        let fixed = rule.fix(&ctx).unwrap();
465        assert_eq!(fixed, "Title\n======\n\nSubtitle\n---------");
466    }
467
468    #[test]
469    fn test_fix_preserves_trailing_hashes() {
470        let rule = MD026NoTrailingPunctuation::new(None);
471        let content = "# Title. #\n## Subtitle, ##";
472        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
473        let fixed = rule.fix(&ctx).unwrap();
474        assert_eq!(fixed, "# Title #\n## Subtitle ##");
475    }
476
477    #[test]
478    fn test_indented_headings() {
479        let rule = MD026NoTrailingPunctuation::new(None);
480        let content = "   # Title.\n  ## Subtitle.";
481        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
482        let result = rule.check(&ctx).unwrap();
483        assert_eq!(result.len(), 2, "Indented headings (< 4 spaces) should be checked");
484    }
485
486    #[test]
487    fn test_deeply_indented_ignored() {
488        let rule = MD026NoTrailingPunctuation::new(None);
489        let content = "    # This is code.";
490        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
491        let result = rule.check(&ctx).unwrap();
492        assert!(result.is_empty(), "Deeply indented lines (4+ spaces) should be ignored");
493    }
494
495    #[test]
496    fn test_multiple_punctuation() {
497        let rule = MD026NoTrailingPunctuation::new(None);
498        let content = "# Title...";
499        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
500        let result = rule.check(&ctx).unwrap();
501        assert_eq!(result.len(), 1);
502        assert_eq!(result[0].column, 8); // Points to first period
503    }
504
505    #[test]
506    fn test_empty_content() {
507        let rule = MD026NoTrailingPunctuation::new(None);
508        let content = "";
509        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
510        let result = rule.check(&ctx).unwrap();
511        assert!(result.is_empty());
512    }
513
514    #[test]
515    fn test_no_headings() {
516        let rule = MD026NoTrailingPunctuation::new(None);
517        let content = "This is just text.\nMore text with punctuation.";
518        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
519        let result = rule.check(&ctx).unwrap();
520        assert!(result.is_empty(), "Non-heading lines should not be checked");
521    }
522
523    #[test]
524    fn test_get_punctuation_regex() {
525        let rule = MD026NoTrailingPunctuation::new(Some("!?".to_string()));
526        let regex = rule.get_punctuation_regex().unwrap();
527        assert!(regex.is_match("text!"));
528        assert!(regex.is_match("text?"));
529        assert!(!regex.is_match("text."));
530    }
531
532    #[test]
533    fn test_regex_caching() {
534        let rule1 = MD026NoTrailingPunctuation::new(Some("!".to_string()));
535        let rule2 = MD026NoTrailingPunctuation::new(Some("!".to_string()));
536
537        // Both should get the same cached regex
538        let _regex1 = rule1.get_punctuation_regex().unwrap();
539        let _regex2 = rule2.get_punctuation_regex().unwrap();
540
541        // Check cache has the entry
542        let cache = PUNCTUATION_REGEX_CACHE.read().unwrap();
543        assert!(cache.contains_key("!"));
544    }
545
546    #[test]
547    fn test_config_from_toml() {
548        let mut config = crate::config::Config::default();
549        let mut rule_config = crate::config::RuleConfig::default();
550        rule_config
551            .values
552            .insert("punctuation".to_string(), toml::Value::String("!?".to_string()));
553        config.rules.insert("MD026".to_string(), rule_config);
554
555        let rule = MD026NoTrailingPunctuation::from_config(&config);
556        let content = "# Title!\n# Another?";
557        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
558        let result = rule.check(&ctx).unwrap();
559        assert_eq!(result.len(), 2, "Custom punctuation from config should be used");
560    }
561
562    #[test]
563    fn test_fix_removes_punctuation() {
564        let rule = MD026NoTrailingPunctuation::new(None);
565        let content = "# Title.   \n## Subtitle,  ";
566        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
567        let fixed = rule.fix(&ctx).unwrap();
568        // The current implementation doesn't preserve trailing whitespace after punctuation removal
569        assert_eq!(fixed, "# Title\n## Subtitle");
570    }
571
572    #[test]
573    fn test_final_newline_preservation() {
574        let rule = MD026NoTrailingPunctuation::new(None);
575        let content = "# Title.\n";
576        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
577        let fixed = rule.fix(&ctx).unwrap();
578        assert_eq!(fixed, "# Title\n");
579
580        let content_no_newline = "# Title.";
581        let ctx2 = LintContext::new(content_no_newline, crate::config::MarkdownFlavor::Standard);
582        let fixed2 = rule.fix(&ctx2).unwrap();
583        assert_eq!(fixed2, "# Title");
584    }
585}