rumdl_lib/rules/
md026_no_trailing_punctuation.rs

1/// Rule MD026: No trailing punctuation in headings
2///
3/// See [docs/md026.md](../../docs/md026.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::utils::range_utils::{LineIndex, calculate_match_range};
6use lazy_static::lazy_static;
7use regex::Regex;
8use std::collections::HashMap;
9use std::ops::Range;
10use std::sync::RwLock;
11
12mod md026_config;
13use md026_config::{DEFAULT_PUNCTUATION, MD026Config};
14
15lazy_static! {
16    // Optimized single regex for all ATX heading types (normal, closed, indented 1-3 spaces)
17    static ref ATX_HEADING_UNIFIED: Regex = Regex::new(r"^( {0,3})(#{1,6})(\s+)(.+?)(\s+#{1,6})?$").unwrap();
18
19    // Fast check patterns for early returns - match defaults
20    static ref QUICK_PUNCTUATION_CHECK: Regex = Regex::new(&format!(r"[{}]", regex::escape(DEFAULT_PUNCTUATION))).unwrap();
21
22    // Regex cache for punctuation patterns
23    static ref PUNCTUATION_REGEX_CACHE: RwLock<HashMap<String, Regex>> = RwLock::new(HashMap::new());
24}
25
26/// Rule MD026: Trailing punctuation in heading
27#[derive(Clone, Default)]
28pub struct MD026NoTrailingPunctuation {
29    config: MD026Config,
30}
31
32impl MD026NoTrailingPunctuation {
33    pub fn new(punctuation: Option<String>) -> Self {
34        Self {
35            config: MD026Config {
36                punctuation: punctuation.unwrap_or_else(|| DEFAULT_PUNCTUATION.to_string()),
37            },
38        }
39    }
40
41    pub fn from_config_struct(config: MD026Config) -> Self {
42        Self { config }
43    }
44
45    #[inline]
46    fn get_punctuation_regex(&self) -> Result<Regex, regex::Error> {
47        // Check cache first
48        {
49            let cache = PUNCTUATION_REGEX_CACHE.read().unwrap();
50            if let Some(cached_regex) = cache.get(&self.config.punctuation) {
51                return Ok(cached_regex.clone());
52            }
53        }
54
55        // Compile and cache the regex
56        let pattern = format!(r"([{}]+)$", regex::escape(&self.config.punctuation));
57        let regex = Regex::new(&pattern)?;
58
59        {
60            let mut cache = PUNCTUATION_REGEX_CACHE.write().unwrap();
61            cache.insert(self.config.punctuation.clone(), regex.clone());
62        }
63
64        Ok(regex)
65    }
66
67    #[inline]
68    fn has_trailing_punctuation(&self, text: &str, re: &Regex) -> bool {
69        let trimmed = text.trim();
70        re.is_match(trimmed)
71    }
72
73    #[inline]
74    fn get_line_byte_range(&self, content: &str, line_num: usize, line_index: &LineIndex) -> Range<usize> {
75        let start_pos = line_index.get_line_start_byte(line_num).unwrap_or(content.len());
76
77        // Find the line length
78        let line = content.lines().nth(line_num - 1).unwrap_or("");
79
80        Range {
81            start: start_pos,
82            end: start_pos + line.len(),
83        }
84    }
85
86    // Remove trailing punctuation from text
87    #[inline]
88    fn remove_trailing_punctuation(&self, text: &str, re: &Regex) -> String {
89        re.replace_all(text.trim(), "").to_string()
90    }
91
92    // Optimized ATX heading fix using unified regex
93    #[inline]
94    fn fix_atx_heading(&self, line: &str, re: &Regex) -> String {
95        if let Some(captures) = ATX_HEADING_UNIFIED.captures(line) {
96            let indentation = captures.get(1).unwrap().as_str();
97            let hashes = captures.get(2).unwrap().as_str();
98            let space = captures.get(3).unwrap().as_str();
99            let content = captures.get(4).unwrap().as_str();
100
101            // Check if content ends with a custom header ID like {#my-id}
102            // If so, we need to fix punctuation before the ID
103            let fixed_content = if let Some(id_pos) = content.rfind(" {#") {
104                // Has a custom ID - fix punctuation before it
105                let before_id = &content[..id_pos];
106                let id_part = &content[id_pos..];
107                let fixed_before = self.remove_trailing_punctuation(before_id, re);
108                format!("{fixed_before}{id_part}")
109            } else {
110                // No custom ID - just remove trailing punctuation
111                self.remove_trailing_punctuation(content, re)
112            };
113
114            // Preserve any trailing hashes if present
115            if let Some(trailing) = captures.get(5) {
116                return format!(
117                    "{}{}{}{}{}",
118                    indentation,
119                    hashes,
120                    space,
121                    fixed_content,
122                    trailing.as_str()
123                );
124            }
125
126            return format!("{indentation}{hashes}{space}{fixed_content}");
127        }
128
129        // Fallback if no regex matches
130        line.to_string()
131    }
132
133    // Fix a setext heading by removing trailing punctuation from the content line
134    #[inline]
135    fn fix_setext_heading(&self, content_line: &str, re: &Regex) -> String {
136        let trimmed = content_line.trim_end();
137        let mut whitespace = "";
138
139        // Preserve trailing whitespace
140        if content_line.len() > trimmed.len() {
141            whitespace = &content_line[trimmed.len()..];
142        }
143
144        // Remove punctuation and preserve whitespace
145        format!("{}{}", self.remove_trailing_punctuation(trimmed, re), whitespace)
146    }
147}
148
149impl Rule for MD026NoTrailingPunctuation {
150    fn name(&self) -> &'static str {
151        "MD026"
152    }
153
154    fn description(&self) -> &'static str {
155        "Trailing punctuation in heading"
156    }
157
158    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
159        // Skip if no heading markers
160        if !ctx.likely_has_headings() {
161            return true;
162        }
163        // Skip if none of the configured punctuation exists
164        let punctuation = &self.config.punctuation;
165        !punctuation.chars().any(|p| ctx.content.contains(p))
166    }
167
168    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
169        let content = ctx.content;
170
171        // Early returns for performance
172        if content.is_empty() {
173            return Ok(Vec::new());
174        }
175
176        // Quick check for any punctuation we care about
177        // For custom punctuation, we need to check differently
178        if self.config.punctuation == DEFAULT_PUNCTUATION {
179            if !QUICK_PUNCTUATION_CHECK.is_match(content) {
180                return Ok(Vec::new());
181            }
182        } else {
183            // For custom punctuation, check if any of those characters exist
184            let has_custom_punctuation = self.config.punctuation.chars().any(|c| content.contains(c));
185            if !has_custom_punctuation {
186                return Ok(Vec::new());
187            }
188        }
189
190        // Check if we have any headings from pre-computed line info
191        let has_headings = ctx.lines.iter().any(|line| line.heading.is_some());
192        if !has_headings {
193            return Ok(Vec::new());
194        }
195
196        let mut warnings = Vec::new();
197        let re = match self.get_punctuation_regex() {
198            Ok(regex) => regex,
199            Err(_) => return Ok(warnings),
200        };
201
202        // Create LineIndex for correct byte position calculations across all line ending types
203        let line_index = LineIndex::new(content.to_string());
204
205        // Use pre-computed heading information from LintContext
206        for (line_num, line_info) in ctx.lines.iter().enumerate() {
207            if let Some(heading) = &line_info.heading {
208                // Skip deeply indented headings (they're code blocks)
209                if line_info.indent >= 4 && matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
210                    continue;
211                }
212
213                // LintContext already strips Kramdown IDs from heading.text
214                // So we just check the heading text directly for trailing punctuation
215                // This correctly flags "# Heading." even if it has {#id}
216                let text_to_check = heading.text.clone();
217
218                if self.has_trailing_punctuation(&text_to_check, &re) {
219                    // Find the trailing punctuation
220                    if let Some(punctuation_match) = re.find(&text_to_check) {
221                        let line = &line_info.content;
222
223                        // For ATX headings, find the punctuation position in the line
224                        let punctuation_pos_in_text = punctuation_match.start();
225                        let text_pos_in_line = line.find(&heading.text).unwrap_or(heading.content_column);
226                        let punctuation_start_in_line = text_pos_in_line + punctuation_pos_in_text;
227                        let punctuation_len = punctuation_match.len();
228
229                        let (start_line, start_col, end_line, end_col) = calculate_match_range(
230                            line_num + 1, // Convert to 1-indexed
231                            line,
232                            punctuation_start_in_line,
233                            punctuation_len,
234                        );
235
236                        let last_char = text_to_check.chars().last().unwrap_or(' ');
237                        warnings.push(LintWarning {
238                            rule_name: Some(self.name()),
239                            line: start_line,
240                            column: start_col,
241                            end_line,
242                            end_column: end_col,
243                            message: format!("Heading '{text_to_check}' ends with punctuation '{last_char}'"),
244                            severity: Severity::Warning,
245                            fix: Some(Fix {
246                                range: self.get_line_byte_range(content, line_num + 1, &line_index),
247                                replacement: if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
248                                    self.fix_atx_heading(line, &re)
249                                } else {
250                                    self.fix_setext_heading(line, &re)
251                                },
252                            }),
253                        });
254                    }
255                }
256            }
257        }
258
259        Ok(warnings)
260    }
261
262    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
263        let content = ctx.content;
264
265        // Fast path optimizations
266        if content.is_empty() {
267            return Ok(content.to_string());
268        }
269
270        // Quick check for punctuation
271        // For custom punctuation, we need to check differently
272        if self.config.punctuation == DEFAULT_PUNCTUATION {
273            if !QUICK_PUNCTUATION_CHECK.is_match(content) {
274                return Ok(content.to_string());
275            }
276        } else {
277            // For custom punctuation, check if any of those characters exist
278            let has_custom_punctuation = self.config.punctuation.chars().any(|c| content.contains(c));
279            if !has_custom_punctuation {
280                return Ok(content.to_string());
281            }
282        }
283
284        // Check if we have any headings from pre-computed line info
285        let has_headings = ctx.lines.iter().any(|line| line.heading.is_some());
286        if !has_headings {
287            return Ok(content.to_string());
288        }
289
290        let re = match self.get_punctuation_regex() {
291            Ok(regex) => regex,
292            Err(_) => return Ok(content.to_string()),
293        };
294
295        let lines: Vec<&str> = content.lines().collect();
296        let mut fixed_lines: Vec<String> = lines.iter().map(|&s| s.to_string()).collect();
297
298        // Use pre-computed heading information from LintContext
299        for (line_num, line_info) in ctx.lines.iter().enumerate() {
300            if let Some(heading) = &line_info.heading {
301                // Skip deeply indented headings (they're code blocks)
302                if line_info.indent >= 4 && matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
303                    continue;
304                }
305
306                // LintContext already strips custom header IDs from heading.text
307                // So we just check the heading text directly for trailing punctuation
308                let text_to_check = heading.text.clone();
309
310                // Check and fix trailing punctuation
311                if self.has_trailing_punctuation(&text_to_check, &re) {
312                    fixed_lines[line_num] = if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
313                        self.fix_atx_heading(&line_info.content, &re)
314                    } else {
315                        self.fix_setext_heading(&line_info.content, &re)
316                    };
317                }
318            }
319        }
320
321        // Reconstruct content preserving line endings
322        let mut result = String::with_capacity(content.len());
323        for (i, line) in fixed_lines.iter().enumerate() {
324            result.push_str(line);
325            if i < fixed_lines.len() - 1 || content.ends_with('\n') {
326                result.push('\n');
327            }
328        }
329
330        Ok(result)
331    }
332
333    fn as_any(&self) -> &dyn std::any::Any {
334        self
335    }
336
337    fn default_config_section(&self) -> Option<(String, toml::Value)> {
338        let json_value = serde_json::to_value(&self.config).ok()?;
339        Some((
340            self.name().to_string(),
341            crate::rule_config_serde::json_to_toml_value(&json_value)?,
342        ))
343    }
344
345    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
346    where
347        Self: Sized,
348    {
349        let rule_config = crate::rule_config_serde::load_rule_config::<MD026Config>(config);
350        Box::new(Self::from_config_struct(rule_config))
351    }
352}
353
354#[cfg(test)]
355mod tests {
356    use super::*;
357    use crate::lint_context::LintContext;
358
359    #[test]
360    fn test_no_trailing_punctuation() {
361        let rule = MD026NoTrailingPunctuation::new(None);
362        let content = "# This is a heading\n\n## Another heading";
363        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
364        let result = rule.check(&ctx).unwrap();
365        assert!(result.is_empty(), "Headings without punctuation should not be flagged");
366    }
367
368    #[test]
369    fn test_trailing_period() {
370        let rule = MD026NoTrailingPunctuation::new(None);
371        let content = "# This is a heading.\n\n## Another one.";
372        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
373        let result = rule.check(&ctx).unwrap();
374        assert_eq!(result.len(), 2);
375        assert_eq!(result[0].line, 1);
376        assert_eq!(result[0].column, 20);
377        assert!(result[0].message.contains("ends with punctuation '.'"));
378        assert_eq!(result[1].line, 3);
379        assert_eq!(result[1].column, 15);
380    }
381
382    #[test]
383    fn test_trailing_comma() {
384        let rule = MD026NoTrailingPunctuation::new(None);
385        let content = "# Heading,\n## Sub-heading,";
386        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
387        let result = rule.check(&ctx).unwrap();
388        assert_eq!(result.len(), 2);
389        assert!(result[0].message.contains("ends with punctuation ','"));
390    }
391
392    #[test]
393    fn test_trailing_semicolon() {
394        let rule = MD026NoTrailingPunctuation::new(None);
395        let content = "# Title;\n## Subtitle;";
396        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
397        let result = rule.check(&ctx).unwrap();
398        assert_eq!(result.len(), 2);
399        assert!(result[0].message.contains("ends with punctuation ';'"));
400    }
401
402    #[test]
403    fn test_custom_punctuation() {
404        let rule = MD026NoTrailingPunctuation::new(Some("!".to_string()));
405        let content = "# Important!\n## Regular heading.";
406        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
407        let result = rule.check(&ctx).unwrap();
408        assert_eq!(result.len(), 1, "Only exclamation should be flagged with custom config");
409        assert_eq!(result[0].line, 1);
410        assert!(result[0].message.contains("ends with punctuation '!'"));
411    }
412
413    #[test]
414    fn test_legitimate_question_mark() {
415        let rule = MD026NoTrailingPunctuation::new(Some(".,;?".to_string()));
416        let content = "# What is this?\n# This is bad.";
417        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
418        let result = rule.check(&ctx).unwrap();
419        // With custom punctuation, legitimate punctuation exceptions don't apply
420        assert_eq!(result.len(), 2, "Both should be flagged with custom punctuation");
421    }
422
423    #[test]
424    fn test_question_marks_not_in_default() {
425        let rule = MD026NoTrailingPunctuation::new(None);
426        let content = "# What is Rust?\n# How does it work?\n# Is it fast?";
427        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
428        let result = rule.check(&ctx).unwrap();
429        assert!(result.is_empty(), "Question marks are not in default punctuation list");
430    }
431
432    #[test]
433    fn test_colons_in_default() {
434        let rule = MD026NoTrailingPunctuation::new(None);
435        let content = "# FAQ:\n# API Reference:\n# Step 1:\n# Version 2.0:";
436        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
437        let result = rule.check(&ctx).unwrap();
438        assert_eq!(
439            result.len(),
440            4,
441            "Colons are in default punctuation list and should be flagged"
442        );
443    }
444
445    #[test]
446    fn test_fix_atx_headings() {
447        let rule = MD026NoTrailingPunctuation::new(None);
448        let content = "# Title.\n## Subtitle,\n### Sub-subtitle;";
449        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
450        let fixed = rule.fix(&ctx).unwrap();
451        assert_eq!(fixed, "# Title\n## Subtitle\n### Sub-subtitle");
452    }
453
454    #[test]
455    fn test_fix_setext_headings() {
456        let rule = MD026NoTrailingPunctuation::new(None);
457        let content = "Title.\n======\n\nSubtitle,\n---------";
458        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
459        let fixed = rule.fix(&ctx).unwrap();
460        assert_eq!(fixed, "Title\n======\n\nSubtitle\n---------");
461    }
462
463    #[test]
464    fn test_fix_preserves_trailing_hashes() {
465        let rule = MD026NoTrailingPunctuation::new(None);
466        let content = "# Title. #\n## Subtitle, ##";
467        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
468        let fixed = rule.fix(&ctx).unwrap();
469        assert_eq!(fixed, "# Title #\n## Subtitle ##");
470    }
471
472    #[test]
473    fn test_indented_headings() {
474        let rule = MD026NoTrailingPunctuation::new(None);
475        let content = "   # Title.\n  ## Subtitle.";
476        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
477        let result = rule.check(&ctx).unwrap();
478        assert_eq!(result.len(), 2, "Indented headings (< 4 spaces) should be checked");
479    }
480
481    #[test]
482    fn test_deeply_indented_ignored() {
483        let rule = MD026NoTrailingPunctuation::new(None);
484        let content = "    # This is code.";
485        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
486        let result = rule.check(&ctx).unwrap();
487        assert!(result.is_empty(), "Deeply indented lines (4+ spaces) should be ignored");
488    }
489
490    #[test]
491    fn test_multiple_punctuation() {
492        let rule = MD026NoTrailingPunctuation::new(None);
493        let content = "# Title...";
494        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
495        let result = rule.check(&ctx).unwrap();
496        assert_eq!(result.len(), 1);
497        assert_eq!(result[0].column, 8); // Points to first period
498    }
499
500    #[test]
501    fn test_empty_content() {
502        let rule = MD026NoTrailingPunctuation::new(None);
503        let content = "";
504        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
505        let result = rule.check(&ctx).unwrap();
506        assert!(result.is_empty());
507    }
508
509    #[test]
510    fn test_no_headings() {
511        let rule = MD026NoTrailingPunctuation::new(None);
512        let content = "This is just text.\nMore text with punctuation.";
513        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
514        let result = rule.check(&ctx).unwrap();
515        assert!(result.is_empty(), "Non-heading lines should not be checked");
516    }
517
518    #[test]
519    fn test_get_punctuation_regex() {
520        let rule = MD026NoTrailingPunctuation::new(Some("!?".to_string()));
521        let regex = rule.get_punctuation_regex().unwrap();
522        assert!(regex.is_match("text!"));
523        assert!(regex.is_match("text?"));
524        assert!(!regex.is_match("text."));
525    }
526
527    #[test]
528    fn test_regex_caching() {
529        let rule1 = MD026NoTrailingPunctuation::new(Some("!".to_string()));
530        let rule2 = MD026NoTrailingPunctuation::new(Some("!".to_string()));
531
532        // Both should get the same cached regex
533        let _regex1 = rule1.get_punctuation_regex().unwrap();
534        let _regex2 = rule2.get_punctuation_regex().unwrap();
535
536        // Check cache has the entry
537        let cache = PUNCTUATION_REGEX_CACHE.read().unwrap();
538        assert!(cache.contains_key("!"));
539    }
540
541    #[test]
542    fn test_config_from_toml() {
543        let mut config = crate::config::Config::default();
544        let mut rule_config = crate::config::RuleConfig::default();
545        rule_config
546            .values
547            .insert("punctuation".to_string(), toml::Value::String("!?".to_string()));
548        config.rules.insert("MD026".to_string(), rule_config);
549
550        let rule = MD026NoTrailingPunctuation::from_config(&config);
551        let content = "# Title!\n# Another?";
552        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
553        let result = rule.check(&ctx).unwrap();
554        assert_eq!(result.len(), 2, "Custom punctuation from config should be used");
555    }
556
557    #[test]
558    fn test_fix_removes_punctuation() {
559        let rule = MD026NoTrailingPunctuation::new(None);
560        let content = "# Title.   \n## Subtitle,  ";
561        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
562        let fixed = rule.fix(&ctx).unwrap();
563        // The current implementation doesn't preserve trailing whitespace after punctuation removal
564        assert_eq!(fixed, "# Title\n## Subtitle");
565    }
566
567    #[test]
568    fn test_final_newline_preservation() {
569        let rule = MD026NoTrailingPunctuation::new(None);
570        let content = "# Title.\n";
571        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
572        let fixed = rule.fix(&ctx).unwrap();
573        assert_eq!(fixed, "# Title\n");
574
575        let content_no_newline = "# Title.";
576        let ctx2 = LintContext::new(content_no_newline, crate::config::MarkdownFlavor::Standard);
577        let fixed2 = rule.fix(&ctx2).unwrap();
578        assert_eq!(fixed2, "# Title");
579    }
580}