Skip to main content

rumdl_lib/rules/
md026_no_trailing_punctuation.rs

1/// Rule MD026: No trailing punctuation in headings
2///
3/// See [docs/md026.md](../../docs/md026.md) for full documentation, configuration, and examples.
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::utils::range_utils::{LineIndex, calculate_match_range};
6use regex::Regex;
7use std::collections::HashMap;
8use std::ops::Range;
9use std::sync::LazyLock;
10use std::sync::RwLock;
11
12mod md026_config;
13use md026_config::{DEFAULT_PUNCTUATION, MD026Config};
14
15// Optimized single regex for all ATX heading types (normal, closed, indented 1-3 spaces)
16static ATX_HEADING_UNIFIED: LazyLock<Regex> =
17    LazyLock::new(|| Regex::new(r"^( {0,3})(#{1,6})(\s+)(.+?)(\s+#{1,6})?$").unwrap());
18
19// Fast check patterns for early returns - match defaults
20static QUICK_PUNCTUATION_CHECK: LazyLock<Regex> =
21    LazyLock::new(|| Regex::new(&format!(r"[{}]", regex::escape(DEFAULT_PUNCTUATION))).unwrap());
22
23// Regex cache for punctuation patterns
24static PUNCTUATION_REGEX_CACHE: LazyLock<RwLock<HashMap<String, Regex>>> =
25    LazyLock::new(|| RwLock::new(HashMap::new()));
26
27/// Rule MD026: Trailing punctuation in heading
28#[derive(Clone, Default)]
29pub struct MD026NoTrailingPunctuation {
30    config: MD026Config,
31}
32
33impl MD026NoTrailingPunctuation {
34    pub fn new(punctuation: Option<String>) -> Self {
35        Self {
36            config: MD026Config {
37                punctuation: punctuation.unwrap_or_else(|| DEFAULT_PUNCTUATION.to_string()),
38            },
39        }
40    }
41
42    pub fn from_config_struct(config: MD026Config) -> Self {
43        Self { config }
44    }
45
46    #[inline]
47    fn get_punctuation_regex(&self) -> Result<Regex, regex::Error> {
48        // Check cache first
49        {
50            let cache = PUNCTUATION_REGEX_CACHE.read().unwrap();
51            if let Some(cached_regex) = cache.get(&self.config.punctuation) {
52                return Ok(cached_regex.clone());
53            }
54        }
55
56        // Compile and cache the regex
57        let pattern = format!(r"([{}]+)$", regex::escape(&self.config.punctuation));
58        let regex = Regex::new(&pattern)?;
59
60        {
61            let mut cache = PUNCTUATION_REGEX_CACHE.write().unwrap();
62            cache.insert(self.config.punctuation.clone(), regex.clone());
63        }
64
65        Ok(regex)
66    }
67
68    #[inline]
69    fn has_trailing_punctuation(&self, text: &str, re: &Regex) -> bool {
70        let trimmed = text.trim();
71        re.is_match(trimmed)
72    }
73
74    #[inline]
75    fn get_line_byte_range(&self, content: &str, line_num: usize, line_index: &LineIndex) -> Range<usize> {
76        let start_pos = line_index.get_line_start_byte(line_num).unwrap_or(content.len());
77
78        // Find the line length
79        let line = content.lines().nth(line_num - 1).unwrap_or("");
80
81        Range {
82            start: start_pos,
83            end: start_pos + line.len(),
84        }
85    }
86
87    // Remove trailing punctuation from text
88    #[inline]
89    fn remove_trailing_punctuation(&self, text: &str, re: &Regex) -> String {
90        re.replace_all(text.trim(), "").to_string()
91    }
92
93    // Optimized ATX heading fix using unified regex
94    #[inline]
95    fn fix_atx_heading(&self, line: &str, re: &Regex) -> String {
96        if let Some(captures) = ATX_HEADING_UNIFIED.captures(line) {
97            let indentation = captures.get(1).unwrap().as_str();
98            let hashes = captures.get(2).unwrap().as_str();
99            let space = captures.get(3).unwrap().as_str();
100            let content = captures.get(4).unwrap().as_str();
101
102            // Check if content ends with a custom header ID like {#my-id}
103            // If so, we need to fix punctuation before the ID
104            let fixed_content = if let Some(id_pos) = content.rfind(" {#") {
105                // Has a custom ID - fix punctuation before it
106                let before_id = &content[..id_pos];
107                let id_part = &content[id_pos..];
108                let fixed_before = self.remove_trailing_punctuation(before_id, re);
109                format!("{fixed_before}{id_part}")
110            } else {
111                // No custom ID - just remove trailing punctuation
112                self.remove_trailing_punctuation(content, re)
113            };
114
115            // Preserve any trailing hashes if present
116            if let Some(trailing) = captures.get(5) {
117                return format!(
118                    "{}{}{}{}{}",
119                    indentation,
120                    hashes,
121                    space,
122                    fixed_content,
123                    trailing.as_str()
124                );
125            }
126
127            return format!("{indentation}{hashes}{space}{fixed_content}");
128        }
129
130        // Fallback if no regex matches
131        line.to_string()
132    }
133
134    // Fix a setext heading by removing trailing punctuation from the content line
135    #[inline]
136    fn fix_setext_heading(&self, content_line: &str, re: &Regex) -> String {
137        let trimmed = content_line.trim_end();
138        let mut whitespace = "";
139
140        // Preserve trailing whitespace
141        if content_line.len() > trimmed.len() {
142            whitespace = &content_line[trimmed.len()..];
143        }
144
145        // Remove punctuation and preserve whitespace
146        format!("{}{}", self.remove_trailing_punctuation(trimmed, re), whitespace)
147    }
148}
149
150impl Rule for MD026NoTrailingPunctuation {
151    fn name(&self) -> &'static str {
152        "MD026"
153    }
154
155    fn description(&self) -> &'static str {
156        "Trailing punctuation in heading"
157    }
158
159    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
160        // Skip if no heading markers
161        if !ctx.likely_has_headings() {
162            return true;
163        }
164        // Skip if none of the configured punctuation exists
165        let punctuation = &self.config.punctuation;
166        !punctuation.chars().any(|p| ctx.content.contains(p))
167    }
168
169    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
170        let content = ctx.content;
171
172        // Early returns for performance
173        if content.is_empty() {
174            return Ok(Vec::new());
175        }
176
177        // Quick check for any punctuation we care about
178        // For custom punctuation, we need to check differently
179        if self.config.punctuation == DEFAULT_PUNCTUATION {
180            if !QUICK_PUNCTUATION_CHECK.is_match(content) {
181                return Ok(Vec::new());
182            }
183        } else {
184            // For custom punctuation, check if any of those characters exist
185            let has_custom_punctuation = self.config.punctuation.chars().any(|c| content.contains(c));
186            if !has_custom_punctuation {
187                return Ok(Vec::new());
188            }
189        }
190
191        // Check if we have any headings from pre-computed line info
192        let has_headings = ctx.lines.iter().any(|line| line.heading.is_some());
193        if !has_headings {
194            return Ok(Vec::new());
195        }
196
197        let mut warnings = Vec::new();
198        let re = match self.get_punctuation_regex() {
199            Ok(regex) => regex,
200            Err(_) => return Ok(warnings),
201        };
202
203        // Create LineIndex for correct byte position calculations across all line ending types
204        let line_index = &ctx.line_index;
205
206        // Use pre-computed heading information from LintContext
207        for (line_num, line_info) in ctx.lines.iter().enumerate() {
208            if let Some(heading) = &line_info.heading {
209                // Skip invalid headings (e.g., `#NoSpace` which lacks required space after #)
210                if !heading.is_valid {
211                    continue;
212                }
213
214                // Skip deeply indented headings (they're code blocks)
215                if line_info.visual_indent >= 4 && matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
216                    continue;
217                }
218
219                // LintContext already strips Kramdown IDs from heading.text
220                // So we just check the heading text directly for trailing punctuation
221                // This correctly flags "# Heading." even if it has {#id}
222                let text_to_check = heading.text.clone();
223
224                if self.has_trailing_punctuation(&text_to_check, &re) {
225                    // Find the trailing punctuation
226                    if let Some(punctuation_match) = re.find(&text_to_check) {
227                        let line = line_info.content(ctx.content);
228
229                        // For ATX headings, find the punctuation position in the line
230                        let punctuation_pos_in_text = punctuation_match.start();
231                        let text_pos_in_line = line.find(&heading.text).unwrap_or(heading.content_column);
232                        let punctuation_start_in_line = text_pos_in_line + punctuation_pos_in_text;
233                        let punctuation_len = punctuation_match.len();
234
235                        let (start_line, start_col, end_line, end_col) = calculate_match_range(
236                            line_num + 1, // Convert to 1-indexed
237                            line,
238                            punctuation_start_in_line,
239                            punctuation_len,
240                        );
241
242                        let last_char = text_to_check.chars().last().unwrap_or(' ');
243                        warnings.push(LintWarning {
244                            rule_name: Some(self.name().to_string()),
245                            line: start_line,
246                            column: start_col,
247                            end_line,
248                            end_column: end_col,
249                            message: format!("Heading '{text_to_check}' ends with punctuation '{last_char}'"),
250                            severity: Severity::Warning,
251                            fix: Some(Fix {
252                                range: self.get_line_byte_range(content, line_num + 1, line_index),
253                                replacement: if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
254                                    self.fix_atx_heading(line, &re)
255                                } else {
256                                    self.fix_setext_heading(line, &re)
257                                },
258                            }),
259                        });
260                    }
261                }
262            }
263        }
264
265        Ok(warnings)
266    }
267
268    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
269        let content = ctx.content;
270
271        // Fast path optimizations
272        if content.is_empty() {
273            return Ok(content.to_string());
274        }
275
276        // Quick check for punctuation
277        // For custom punctuation, we need to check differently
278        if self.config.punctuation == DEFAULT_PUNCTUATION {
279            if !QUICK_PUNCTUATION_CHECK.is_match(content) {
280                return Ok(content.to_string());
281            }
282        } else {
283            // For custom punctuation, check if any of those characters exist
284            let has_custom_punctuation = self.config.punctuation.chars().any(|c| content.contains(c));
285            if !has_custom_punctuation {
286                return Ok(content.to_string());
287            }
288        }
289
290        // Check if we have any headings from pre-computed line info
291        let has_headings = ctx.lines.iter().any(|line| line.heading.is_some());
292        if !has_headings {
293            return Ok(content.to_string());
294        }
295
296        let re = match self.get_punctuation_regex() {
297            Ok(regex) => regex,
298            Err(_) => return Ok(content.to_string()),
299        };
300
301        let lines = ctx.raw_lines();
302        let mut fixed_lines: Vec<String> = lines.iter().map(|&s| s.to_string()).collect();
303
304        // Use pre-computed heading information from LintContext
305        for (line_num, line_info) in ctx.lines.iter().enumerate() {
306            // Skip lines where this rule is disabled by inline config
307            if ctx.inline_config().is_rule_disabled(self.name(), line_num + 1) {
308                continue;
309            }
310
311            if let Some(heading) = &line_info.heading {
312                // Skip invalid headings (e.g., `#NoSpace` which lacks required space after #)
313                if !heading.is_valid {
314                    continue;
315                }
316
317                // Skip deeply indented headings (they're code blocks)
318                if line_info.visual_indent >= 4 && matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
319                    continue;
320                }
321
322                // LintContext already strips custom header IDs from heading.text
323                // So we just check the heading text directly for trailing punctuation
324                let text_to_check = heading.text.clone();
325
326                // Check and fix trailing punctuation
327                if self.has_trailing_punctuation(&text_to_check, &re) {
328                    fixed_lines[line_num] = if matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
329                        self.fix_atx_heading(line_info.content(ctx.content), &re)
330                    } else {
331                        self.fix_setext_heading(line_info.content(ctx.content), &re)
332                    };
333                }
334            }
335        }
336
337        // Reconstruct content preserving line endings
338        let mut result = String::with_capacity(content.len());
339        for (i, line) in fixed_lines.iter().enumerate() {
340            result.push_str(line);
341            if i < fixed_lines.len() - 1 || content.ends_with('\n') {
342                result.push('\n');
343            }
344        }
345
346        Ok(result)
347    }
348
349    fn as_any(&self) -> &dyn std::any::Any {
350        self
351    }
352
353    fn default_config_section(&self) -> Option<(String, toml::Value)> {
354        let json_value = serde_json::to_value(&self.config).ok()?;
355        Some((
356            self.name().to_string(),
357            crate::rule_config_serde::json_to_toml_value(&json_value)?,
358        ))
359    }
360
361    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
362    where
363        Self: Sized,
364    {
365        let rule_config = crate::rule_config_serde::load_rule_config::<MD026Config>(config);
366        Box::new(Self::from_config_struct(rule_config))
367    }
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373    use crate::lint_context::LintContext;
374
375    #[test]
376    fn test_no_trailing_punctuation() {
377        let rule = MD026NoTrailingPunctuation::new(None);
378        let content = "# This is a heading\n\n## Another heading";
379        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
380        let result = rule.check(&ctx).unwrap();
381        assert!(result.is_empty(), "Headings without punctuation should not be flagged");
382    }
383
384    #[test]
385    fn test_trailing_period() {
386        let rule = MD026NoTrailingPunctuation::new(None);
387        let content = "# This is a heading.\n\n## Another one.";
388        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
389        let result = rule.check(&ctx).unwrap();
390        assert_eq!(result.len(), 2);
391        assert_eq!(result[0].line, 1);
392        assert_eq!(result[0].column, 20);
393        assert!(result[0].message.contains("ends with punctuation '.'"));
394        assert_eq!(result[1].line, 3);
395        assert_eq!(result[1].column, 15);
396    }
397
398    #[test]
399    fn test_trailing_comma() {
400        let rule = MD026NoTrailingPunctuation::new(None);
401        let content = "# Heading,\n## Sub-heading,";
402        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
403        let result = rule.check(&ctx).unwrap();
404        assert_eq!(result.len(), 2);
405        assert!(result[0].message.contains("ends with punctuation ','"));
406    }
407
408    #[test]
409    fn test_trailing_semicolon() {
410        let rule = MD026NoTrailingPunctuation::new(None);
411        let content = "# Title;\n## Subtitle;";
412        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
413        let result = rule.check(&ctx).unwrap();
414        assert_eq!(result.len(), 2);
415        assert!(result[0].message.contains("ends with punctuation ';'"));
416    }
417
418    #[test]
419    fn test_custom_punctuation() {
420        let rule = MD026NoTrailingPunctuation::new(Some("!".to_string()));
421        let content = "# Important!\n## Regular heading.";
422        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
423        let result = rule.check(&ctx).unwrap();
424        assert_eq!(result.len(), 1, "Only exclamation should be flagged with custom config");
425        assert_eq!(result[0].line, 1);
426        assert!(result[0].message.contains("ends with punctuation '!'"));
427    }
428
429    #[test]
430    fn test_legitimate_question_mark() {
431        let rule = MD026NoTrailingPunctuation::new(Some(".,;?".to_string()));
432        let content = "# What is this?\n# This is bad.";
433        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
434        let result = rule.check(&ctx).unwrap();
435        // With custom punctuation, legitimate punctuation exceptions don't apply
436        assert_eq!(result.len(), 2, "Both should be flagged with custom punctuation");
437    }
438
439    #[test]
440    fn test_question_marks_not_in_default() {
441        let rule = MD026NoTrailingPunctuation::new(None);
442        let content = "# What is Rust?\n# How does it work?\n# Is it fast?";
443        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
444        let result = rule.check(&ctx).unwrap();
445        assert!(result.is_empty(), "Question marks are not in default punctuation list");
446    }
447
448    #[test]
449    fn test_colons_in_default() {
450        let rule = MD026NoTrailingPunctuation::new(None);
451        let content = "# FAQ:\n# API Reference:\n# Step 1:\n# Version 2.0:";
452        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
453        let result = rule.check(&ctx).unwrap();
454        assert_eq!(
455            result.len(),
456            4,
457            "Colons are in default punctuation list and should be flagged"
458        );
459    }
460
461    #[test]
462    fn test_fix_atx_headings() {
463        let rule = MD026NoTrailingPunctuation::new(None);
464        let content = "# Title.\n## Subtitle,\n### Sub-subtitle;";
465        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
466        let fixed = rule.fix(&ctx).unwrap();
467        assert_eq!(fixed, "# Title\n## Subtitle\n### Sub-subtitle");
468    }
469
470    #[test]
471    fn test_fix_setext_headings() {
472        let rule = MD026NoTrailingPunctuation::new(None);
473        let content = "Title.\n======\n\nSubtitle,\n---------";
474        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
475        let fixed = rule.fix(&ctx).unwrap();
476        assert_eq!(fixed, "Title\n======\n\nSubtitle\n---------");
477    }
478
479    #[test]
480    fn test_fix_preserves_trailing_hashes() {
481        let rule = MD026NoTrailingPunctuation::new(None);
482        let content = "# Title. #\n## Subtitle, ##";
483        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
484        let fixed = rule.fix(&ctx).unwrap();
485        assert_eq!(fixed, "# Title #\n## Subtitle ##");
486    }
487
488    #[test]
489    fn test_indented_headings() {
490        let rule = MD026NoTrailingPunctuation::new(None);
491        let content = "   # Title.\n  ## Subtitle.";
492        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
493        let result = rule.check(&ctx).unwrap();
494        assert_eq!(result.len(), 2, "Indented headings (< 4 spaces) should be checked");
495    }
496
497    #[test]
498    fn test_deeply_indented_ignored() {
499        let rule = MD026NoTrailingPunctuation::new(None);
500        let content = "    # This is code.";
501        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
502        let result = rule.check(&ctx).unwrap();
503        assert!(result.is_empty(), "Deeply indented lines (4+ spaces) should be ignored");
504    }
505
506    #[test]
507    fn test_multiple_punctuation() {
508        let rule = MD026NoTrailingPunctuation::new(None);
509        let content = "# Title...";
510        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
511        let result = rule.check(&ctx).unwrap();
512        assert_eq!(result.len(), 1);
513        assert_eq!(result[0].column, 8); // Points to first period
514    }
515
516    #[test]
517    fn test_empty_content() {
518        let rule = MD026NoTrailingPunctuation::new(None);
519        let content = "";
520        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
521        let result = rule.check(&ctx).unwrap();
522        assert!(result.is_empty());
523    }
524
525    #[test]
526    fn test_no_headings() {
527        let rule = MD026NoTrailingPunctuation::new(None);
528        let content = "This is just text.\nMore text with punctuation.";
529        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
530        let result = rule.check(&ctx).unwrap();
531        assert!(result.is_empty(), "Non-heading lines should not be checked");
532    }
533
534    #[test]
535    fn test_get_punctuation_regex() {
536        let rule = MD026NoTrailingPunctuation::new(Some("!?".to_string()));
537        let regex = rule.get_punctuation_regex().unwrap();
538        assert!(regex.is_match("text!"));
539        assert!(regex.is_match("text?"));
540        assert!(!regex.is_match("text."));
541    }
542
543    #[test]
544    fn test_regex_caching() {
545        let rule1 = MD026NoTrailingPunctuation::new(Some("!".to_string()));
546        let rule2 = MD026NoTrailingPunctuation::new(Some("!".to_string()));
547
548        // Both should get the same cached regex
549        let _regex1 = rule1.get_punctuation_regex().unwrap();
550        let _regex2 = rule2.get_punctuation_regex().unwrap();
551
552        // Check cache has the entry
553        let cache = PUNCTUATION_REGEX_CACHE.read().unwrap();
554        assert!(cache.contains_key("!"));
555    }
556
557    #[test]
558    fn test_config_from_toml() {
559        let mut config = crate::config::Config::default();
560        let mut rule_config = crate::config::RuleConfig::default();
561        rule_config
562            .values
563            .insert("punctuation".to_string(), toml::Value::String("!?".to_string()));
564        config.rules.insert("MD026".to_string(), rule_config);
565
566        let rule = MD026NoTrailingPunctuation::from_config(&config);
567        let content = "# Title!\n# Another?";
568        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
569        let result = rule.check(&ctx).unwrap();
570        assert_eq!(result.len(), 2, "Custom punctuation from config should be used");
571    }
572
573    #[test]
574    fn test_fix_removes_punctuation() {
575        let rule = MD026NoTrailingPunctuation::new(None);
576        let content = "# Title.   \n## Subtitle,  ";
577        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
578        let fixed = rule.fix(&ctx).unwrap();
579        // The current implementation doesn't preserve trailing whitespace after punctuation removal
580        assert_eq!(fixed, "# Title\n## Subtitle");
581    }
582
583    #[test]
584    fn test_final_newline_preservation() {
585        let rule = MD026NoTrailingPunctuation::new(None);
586        let content = "# Title.\n";
587        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
588        let fixed = rule.fix(&ctx).unwrap();
589        assert_eq!(fixed, "# Title\n");
590
591        let content_no_newline = "# Title.";
592        let ctx2 = LintContext::new(content_no_newline, crate::config::MarkdownFlavor::Standard, None);
593        let fixed2 = rule.fix(&ctx2).unwrap();
594        assert_eq!(fixed2, "# Title");
595    }
596}