rumdl_lib/rules/md063_heading_capitalization/
mod.rs

1/// Rule MD063: Heading capitalization
2///
3/// See [docs/md063.md](../../docs/md063.md) for full documentation, configuration, and examples.
4///
5/// This rule enforces consistent capitalization styles for markdown headings.
6/// It supports title case, sentence case, and all caps styles.
7///
8/// **Note:** This rule is disabled by default. Enable it in your configuration:
9/// ```toml
10/// [MD063]
11/// enabled = true
12/// style = "title_case"
13/// ```
14use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
15use crate::utils::range_utils::LineIndex;
16use regex::Regex;
17use std::collections::HashSet;
18use std::ops::Range;
19use std::sync::LazyLock;
20
21mod md063_config;
22pub use md063_config::{HeadingCapStyle, MD063Config};
23
24// Regex to match inline code spans (backticks)
25static INLINE_CODE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`+[^`]+`+").unwrap());
26
27// Regex to match markdown links [text](url) or [text][ref]
28static LINK_REGEX: LazyLock<Regex> =
29    LazyLock::new(|| Regex::new(r"\[([^\]]*)\]\([^)]*\)|\[([^\]]*)\]\[[^\]]*\]").unwrap());
30
31// Regex to match custom header IDs {#id}
32static CUSTOM_ID_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s*\{#[^}]+\}\s*$").unwrap());
33
34/// Represents a segment of heading text
35#[derive(Debug, Clone)]
36enum HeadingSegment {
37    /// Regular text that should be capitalized
38    Text(String),
39    /// Inline code that should be preserved as-is
40    Code(String),
41    /// Link with text that may be capitalized and URL that's preserved
42    Link {
43        full: String,
44        text_start: usize,
45        text_end: usize,
46    },
47}
48
49/// Rule MD063: Heading capitalization
50#[derive(Clone)]
51pub struct MD063HeadingCapitalization {
52    config: MD063Config,
53    lowercase_set: HashSet<String>,
54}
55
56impl Default for MD063HeadingCapitalization {
57    fn default() -> Self {
58        Self::new()
59    }
60}
61
62impl MD063HeadingCapitalization {
63    pub fn new() -> Self {
64        let config = MD063Config::default();
65        let lowercase_set = config.lowercase_words.iter().cloned().collect();
66        Self { config, lowercase_set }
67    }
68
69    pub fn from_config_struct(config: MD063Config) -> Self {
70        let lowercase_set = config.lowercase_words.iter().cloned().collect();
71        Self { config, lowercase_set }
72    }
73
74    /// Check if a word has internal capitals (like "iPhone", "macOS", "GitHub", "iOS")
75    fn has_internal_capitals(&self, word: &str) -> bool {
76        let chars: Vec<char> = word.chars().collect();
77        if chars.len() < 2 {
78            return false;
79        }
80
81        let first = chars[0];
82        let rest = &chars[1..];
83        let has_upper_in_rest = rest.iter().any(|c| c.is_uppercase());
84        let has_lower_in_rest = rest.iter().any(|c| c.is_lowercase());
85
86        // Case 1: Mixed case after first character (like "iPhone", "macOS", "GitHub", "JavaScript")
87        if has_upper_in_rest && has_lower_in_rest {
88            return true;
89        }
90
91        // Case 2: Lowercase first + uppercase in rest (like "iOS", "eBay")
92        if first.is_lowercase() && has_upper_in_rest {
93            return true;
94        }
95
96        false
97    }
98
99    /// Check if a word is an all-caps acronym (2+ consecutive uppercase letters)
100    /// Examples: "API", "GPU", "HTTP2", "IO" return true
101    /// Examples: "A", "iPhone", "npm" return false
102    fn is_all_caps_acronym(&self, word: &str) -> bool {
103        // Skip single-letter words (handled by title case rules)
104        if word.len() < 2 {
105            return false;
106        }
107
108        let mut consecutive_upper = 0;
109        let mut max_consecutive = 0;
110
111        for c in word.chars() {
112            if c.is_uppercase() {
113                consecutive_upper += 1;
114                max_consecutive = max_consecutive.max(consecutive_upper);
115            } else if c.is_lowercase() {
116                // Any lowercase letter means not all-caps
117                return false;
118            } else {
119                // Non-letter (number, punctuation) - reset counter but don't fail
120                consecutive_upper = 0;
121            }
122        }
123
124        // Must have at least 2 consecutive uppercase letters
125        max_consecutive >= 2
126    }
127
128    /// Check if a word should be preserved as-is
129    fn should_preserve_word(&self, word: &str) -> bool {
130        // Check ignore_words list (case-sensitive exact match)
131        if self.config.ignore_words.iter().any(|w| w == word) {
132            return true;
133        }
134
135        // Check if word has internal capitals and preserve_cased_words is enabled
136        if self.config.preserve_cased_words && self.has_internal_capitals(word) {
137            return true;
138        }
139
140        // Check if word is an all-caps acronym (2+ consecutive uppercase)
141        if self.config.preserve_cased_words && self.is_all_caps_acronym(word) {
142            return true;
143        }
144
145        false
146    }
147
148    /// Check if a word is a "lowercase word" (articles, prepositions, etc.)
149    fn is_lowercase_word(&self, word: &str) -> bool {
150        self.lowercase_set.contains(&word.to_lowercase())
151    }
152
153    /// Apply title case to a single word
154    fn title_case_word(&self, word: &str, is_first: bool, is_last: bool) -> String {
155        if word.is_empty() {
156            return word.to_string();
157        }
158
159        // Preserve words in ignore list or with internal capitals
160        if self.should_preserve_word(word) {
161            return word.to_string();
162        }
163
164        // First and last words are always capitalized
165        if is_first || is_last {
166            return self.capitalize_first(word);
167        }
168
169        // Check if it's a lowercase word (articles, prepositions, etc.)
170        if self.is_lowercase_word(word) {
171            return word.to_lowercase();
172        }
173
174        // Regular word - capitalize first letter
175        self.capitalize_first(word)
176    }
177
178    /// Capitalize the first letter of a word, handling Unicode properly
179    fn capitalize_first(&self, word: &str) -> String {
180        let mut chars = word.chars();
181        match chars.next() {
182            None => String::new(),
183            Some(first) => {
184                let first_upper: String = first.to_uppercase().collect();
185                let rest: String = chars.collect();
186                format!("{}{}", first_upper, rest.to_lowercase())
187            }
188        }
189    }
190
191    /// Apply title case to text (using titlecase crate as base, then our customizations)
192    fn apply_title_case(&self, text: &str) -> String {
193        // Use the titlecase crate for the base transformation
194        let base_result = titlecase::titlecase(text);
195
196        // Get words from both original and transformed text to compare
197        let original_words: Vec<&str> = text.split_whitespace().collect();
198        let transformed_words: Vec<&str> = base_result.split_whitespace().collect();
199        let total_words = transformed_words.len();
200
201        let result_words: Vec<String> = transformed_words
202            .iter()
203            .enumerate()
204            .map(|(i, word)| {
205                let is_first = i == 0;
206                let is_last = i == total_words - 1;
207
208                // Check if the ORIGINAL word should be preserved (for acronyms like "API")
209                if let Some(original_word) = original_words.get(i)
210                    && self.should_preserve_word(original_word)
211                {
212                    return (*original_word).to_string();
213                }
214
215                // Handle hyphenated words
216                if word.contains('-') {
217                    // Also check original for hyphenated preservation
218                    if let Some(original_word) = original_words.get(i) {
219                        return self.handle_hyphenated_word_with_original(word, original_word, is_first, is_last);
220                    }
221                    return self.handle_hyphenated_word(word, is_first, is_last);
222                }
223
224                self.title_case_word(word, is_first, is_last)
225            })
226            .collect();
227
228        result_words.join(" ")
229    }
230
231    /// Handle hyphenated words like "self-documenting"
232    fn handle_hyphenated_word(&self, word: &str, is_first: bool, is_last: bool) -> String {
233        let parts: Vec<&str> = word.split('-').collect();
234        let total_parts = parts.len();
235
236        let result_parts: Vec<String> = parts
237            .iter()
238            .enumerate()
239            .map(|(i, part)| {
240                // First part of first word and last part of last word get special treatment
241                let part_is_first = is_first && i == 0;
242                let part_is_last = is_last && i == total_parts - 1;
243                self.title_case_word(part, part_is_first, part_is_last)
244            })
245            .collect();
246
247        result_parts.join("-")
248    }
249
250    /// Handle hyphenated words with original text for acronym preservation
251    fn handle_hyphenated_word_with_original(
252        &self,
253        word: &str,
254        original: &str,
255        is_first: bool,
256        is_last: bool,
257    ) -> String {
258        let parts: Vec<&str> = word.split('-').collect();
259        let original_parts: Vec<&str> = original.split('-').collect();
260        let total_parts = parts.len();
261
262        let result_parts: Vec<String> = parts
263            .iter()
264            .enumerate()
265            .map(|(i, part)| {
266                // Check if the original part should be preserved (for acronyms)
267                if let Some(original_part) = original_parts.get(i)
268                    && self.should_preserve_word(original_part)
269                {
270                    return (*original_part).to_string();
271                }
272
273                // First part of first word and last part of last word get special treatment
274                let part_is_first = is_first && i == 0;
275                let part_is_last = is_last && i == total_parts - 1;
276                self.title_case_word(part, part_is_first, part_is_last)
277            })
278            .collect();
279
280        result_parts.join("-")
281    }
282
283    /// Apply sentence case to text
284    fn apply_sentence_case(&self, text: &str) -> String {
285        if text.is_empty() {
286            return text.to_string();
287        }
288
289        let mut result = String::new();
290        let mut current_pos = 0;
291        let mut is_first_word = true;
292
293        // Use original text positions to preserve whitespace correctly
294        for word in text.split_whitespace() {
295            if let Some(pos) = text[current_pos..].find(word) {
296                let abs_pos = current_pos + pos;
297
298                // Preserve whitespace before this word
299                result.push_str(&text[current_pos..abs_pos]);
300
301                // Process the word
302                if is_first_word {
303                    // Check if word should be preserved BEFORE any capitalization
304                    if self.should_preserve_word(word) {
305                        // Preserve ignore-words exactly as-is, even at start
306                        result.push_str(word);
307                    } else {
308                        // First word: capitalize first letter, lowercase rest
309                        let mut chars = word.chars();
310                        if let Some(first) = chars.next() {
311                            let first_upper: String = first.to_uppercase().collect();
312                            result.push_str(&first_upper);
313                            let rest: String = chars.collect();
314                            result.push_str(&rest.to_lowercase());
315                        }
316                    }
317                    is_first_word = false;
318                } else {
319                    // Non-first words: preserve if needed, otherwise lowercase
320                    if self.should_preserve_word(word) {
321                        result.push_str(word);
322                    } else {
323                        result.push_str(&word.to_lowercase());
324                    }
325                }
326
327                current_pos = abs_pos + word.len();
328            }
329        }
330
331        // Preserve any trailing whitespace
332        if current_pos < text.len() {
333            result.push_str(&text[current_pos..]);
334        }
335
336        result
337    }
338
339    /// Apply all caps to text (preserve whitespace)
340    fn apply_all_caps(&self, text: &str) -> String {
341        if text.is_empty() {
342            return text.to_string();
343        }
344
345        let mut result = String::new();
346        let mut current_pos = 0;
347
348        // Use original text positions to preserve whitespace correctly
349        for word in text.split_whitespace() {
350            if let Some(pos) = text[current_pos..].find(word) {
351                let abs_pos = current_pos + pos;
352
353                // Preserve whitespace before this word
354                result.push_str(&text[current_pos..abs_pos]);
355
356                // Check if this word should be preserved
357                if self.should_preserve_word(word) {
358                    result.push_str(word);
359                } else {
360                    result.push_str(&word.to_uppercase());
361                }
362
363                current_pos = abs_pos + word.len();
364            }
365        }
366
367        // Preserve any trailing whitespace
368        if current_pos < text.len() {
369            result.push_str(&text[current_pos..]);
370        }
371
372        result
373    }
374
375    /// Parse heading text into segments
376    fn parse_segments(&self, text: &str) -> Vec<HeadingSegment> {
377        let mut segments = Vec::new();
378        let mut last_end = 0;
379
380        // Collect all special regions (code and links)
381        let mut special_regions: Vec<(usize, usize, HeadingSegment)> = Vec::new();
382
383        // Find inline code spans
384        for mat in INLINE_CODE_REGEX.find_iter(text) {
385            special_regions.push((mat.start(), mat.end(), HeadingSegment::Code(mat.as_str().to_string())));
386        }
387
388        // Find links
389        for caps in LINK_REGEX.captures_iter(text) {
390            let full_match = caps.get(0).unwrap();
391            let text_match = caps.get(1).or_else(|| caps.get(2));
392
393            if let Some(text_m) = text_match {
394                special_regions.push((
395                    full_match.start(),
396                    full_match.end(),
397                    HeadingSegment::Link {
398                        full: full_match.as_str().to_string(),
399                        text_start: text_m.start() - full_match.start(),
400                        text_end: text_m.end() - full_match.start(),
401                    },
402                ));
403            }
404        }
405
406        // Sort by start position
407        special_regions.sort_by_key(|(start, _, _)| *start);
408
409        // Remove overlapping regions (code takes precedence)
410        let mut filtered_regions: Vec<(usize, usize, HeadingSegment)> = Vec::new();
411        for region in special_regions {
412            let overlaps = filtered_regions.iter().any(|(s, e, _)| region.0 < *e && region.1 > *s);
413            if !overlaps {
414                filtered_regions.push(region);
415            }
416        }
417
418        // Build segments
419        for (start, end, segment) in filtered_regions {
420            // Add text before this special region
421            if start > last_end {
422                let text_segment = &text[last_end..start];
423                if !text_segment.is_empty() {
424                    segments.push(HeadingSegment::Text(text_segment.to_string()));
425                }
426            }
427            segments.push(segment);
428            last_end = end;
429        }
430
431        // Add remaining text
432        if last_end < text.len() {
433            let remaining = &text[last_end..];
434            if !remaining.is_empty() {
435                segments.push(HeadingSegment::Text(remaining.to_string()));
436            }
437        }
438
439        // If no segments were found, treat the whole thing as text
440        if segments.is_empty() && !text.is_empty() {
441            segments.push(HeadingSegment::Text(text.to_string()));
442        }
443
444        segments
445    }
446
447    /// Apply capitalization to heading text
448    fn apply_capitalization(&self, text: &str) -> String {
449        // Strip custom ID if present and re-add later
450        let (main_text, custom_id) = if let Some(mat) = CUSTOM_ID_REGEX.find(text) {
451            (&text[..mat.start()], Some(mat.as_str()))
452        } else {
453            (text, None)
454        };
455
456        // Parse into segments
457        let segments = self.parse_segments(main_text);
458
459        // Count text segments to determine first/last word context
460        let text_segments: Vec<usize> = segments
461            .iter()
462            .enumerate()
463            .filter_map(|(i, s)| matches!(s, HeadingSegment::Text(_)).then_some(i))
464            .collect();
465
466        // Apply capitalization to each segment
467        let mut result_parts: Vec<String> = Vec::new();
468
469        for (i, segment) in segments.iter().enumerate() {
470            match segment {
471                HeadingSegment::Text(t) => {
472                    let is_first_text = text_segments.first() == Some(&i);
473                    let is_last_text = text_segments.last() == Some(&i);
474
475                    let capitalized = match self.config.style {
476                        HeadingCapStyle::TitleCase => self.apply_title_case_segment(t, is_first_text, is_last_text),
477                        HeadingCapStyle::SentenceCase => {
478                            if is_first_text {
479                                self.apply_sentence_case(t)
480                            } else {
481                                // For non-first segments in sentence case, lowercase
482                                self.apply_sentence_case_non_first(t)
483                            }
484                        }
485                        HeadingCapStyle::AllCaps => self.apply_all_caps(t),
486                    };
487                    result_parts.push(capitalized);
488                }
489                HeadingSegment::Code(c) => {
490                    result_parts.push(c.clone());
491                }
492                HeadingSegment::Link {
493                    full,
494                    text_start,
495                    text_end,
496                } => {
497                    // Apply capitalization to link text only
498                    let link_text = &full[*text_start..*text_end];
499                    let capitalized_text = match self.config.style {
500                        HeadingCapStyle::TitleCase => self.apply_title_case(link_text),
501                        HeadingCapStyle::SentenceCase => link_text.to_lowercase(),
502                        HeadingCapStyle::AllCaps => self.apply_all_caps(link_text),
503                    };
504
505                    let mut new_link = String::new();
506                    new_link.push_str(&full[..*text_start]);
507                    new_link.push_str(&capitalized_text);
508                    new_link.push_str(&full[*text_end..]);
509                    result_parts.push(new_link);
510                }
511            }
512        }
513
514        let mut result = result_parts.join("");
515
516        // Re-add custom ID if present
517        if let Some(id) = custom_id {
518            result.push_str(id);
519        }
520
521        result
522    }
523
524    /// Apply title case to a text segment with first/last awareness
525    fn apply_title_case_segment(&self, text: &str, is_first_segment: bool, is_last_segment: bool) -> String {
526        let words: Vec<&str> = text.split_whitespace().collect();
527        let total_words = words.len();
528
529        if total_words == 0 {
530            return text.to_string();
531        }
532
533        let result_words: Vec<String> = words
534            .iter()
535            .enumerate()
536            .map(|(i, word)| {
537                let is_first = is_first_segment && i == 0;
538                let is_last = is_last_segment && i == total_words - 1;
539
540                // Handle hyphenated words
541                if word.contains('-') {
542                    return self.handle_hyphenated_word(word, is_first, is_last);
543                }
544
545                self.title_case_word(word, is_first, is_last)
546            })
547            .collect();
548
549        // Preserve original spacing
550        let mut result = String::new();
551        let mut word_iter = result_words.iter();
552        let mut in_word = false;
553
554        for c in text.chars() {
555            if c.is_whitespace() {
556                if in_word {
557                    in_word = false;
558                }
559                result.push(c);
560            } else if !in_word {
561                if let Some(word) = word_iter.next() {
562                    result.push_str(word);
563                }
564                in_word = true;
565            }
566        }
567
568        result
569    }
570
571    /// Apply sentence case to non-first segments (just lowercase, preserve whitespace)
572    fn apply_sentence_case_non_first(&self, text: &str) -> String {
573        if text.is_empty() {
574            return text.to_string();
575        }
576
577        let lower = text.to_lowercase();
578        let mut result = String::new();
579        let mut current_pos = 0;
580
581        for word in lower.split_whitespace() {
582            if let Some(pos) = lower[current_pos..].find(word) {
583                let abs_pos = current_pos + pos;
584
585                // Preserve whitespace before this word
586                result.push_str(&lower[current_pos..abs_pos]);
587
588                // Check if this word should be preserved
589                let original_word = &text[abs_pos..abs_pos + word.len()];
590                if self.should_preserve_word(original_word) {
591                    result.push_str(original_word);
592                } else {
593                    result.push_str(word);
594                }
595
596                current_pos = abs_pos + word.len();
597            }
598        }
599
600        // Preserve any trailing whitespace
601        if current_pos < lower.len() {
602            result.push_str(&lower[current_pos..]);
603        }
604
605        result
606    }
607
608    /// Get byte range for a line
609    fn get_line_byte_range(&self, content: &str, line_num: usize, line_index: &LineIndex) -> Range<usize> {
610        let start_pos = line_index.get_line_start_byte(line_num).unwrap_or(content.len());
611        let line = content.lines().nth(line_num - 1).unwrap_or("");
612        Range {
613            start: start_pos,
614            end: start_pos + line.len(),
615        }
616    }
617
618    /// Fix an ATX heading line
619    fn fix_atx_heading(&self, _line: &str, heading: &crate::lint_context::HeadingInfo) -> String {
620        // Parse the line to preserve structure
621        let indent = " ".repeat(heading.marker_column);
622        let hashes = "#".repeat(heading.level as usize);
623
624        // Apply capitalization to the text
625        let fixed_text = self.apply_capitalization(&heading.raw_text);
626
627        // Reconstruct with closing sequence if present
628        let closing = &heading.closing_sequence;
629        if heading.has_closing_sequence {
630            format!("{indent}{hashes} {fixed_text} {closing}")
631        } else {
632            format!("{indent}{hashes} {fixed_text}")
633        }
634    }
635
636    /// Fix a Setext heading line
637    fn fix_setext_heading(&self, line: &str, heading: &crate::lint_context::HeadingInfo) -> String {
638        // Apply capitalization to the text
639        let fixed_text = self.apply_capitalization(&heading.raw_text);
640
641        // Preserve leading whitespace from original line
642        let leading_ws: String = line.chars().take_while(|c| c.is_whitespace()).collect();
643
644        format!("{leading_ws}{fixed_text}")
645    }
646}
647
648impl Rule for MD063HeadingCapitalization {
649    fn name(&self) -> &'static str {
650        "MD063"
651    }
652
653    fn description(&self) -> &'static str {
654        "Heading capitalization"
655    }
656
657    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
658        // Skip if rule is disabled or no headings
659        !self.config.enabled || !ctx.likely_has_headings() || !ctx.lines.iter().any(|line| line.heading.is_some())
660    }
661
662    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
663        if !self.config.enabled {
664            return Ok(Vec::new());
665        }
666
667        let content = ctx.content;
668
669        if content.is_empty() {
670            return Ok(Vec::new());
671        }
672
673        let mut warnings = Vec::new();
674        let line_index = &ctx.line_index;
675
676        for (line_num, line_info) in ctx.lines.iter().enumerate() {
677            if let Some(heading) = &line_info.heading {
678                // Check level filter
679                if heading.level < self.config.min_level || heading.level > self.config.max_level {
680                    continue;
681                }
682
683                // Skip headings in code blocks (indented headings)
684                if line_info.indent >= 4 && matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
685                    continue;
686                }
687
688                // Apply capitalization and compare
689                let original_text = &heading.raw_text;
690                let fixed_text = self.apply_capitalization(original_text);
691
692                if original_text != &fixed_text {
693                    let line = line_info.content(ctx.content);
694                    let style_name = match self.config.style {
695                        HeadingCapStyle::TitleCase => "title case",
696                        HeadingCapStyle::SentenceCase => "sentence case",
697                        HeadingCapStyle::AllCaps => "ALL CAPS",
698                    };
699
700                    warnings.push(LintWarning {
701                        rule_name: Some(self.name().to_string()),
702                        line: line_num + 1,
703                        column: heading.content_column + 1,
704                        end_line: line_num + 1,
705                        end_column: heading.content_column + 1 + original_text.len(),
706                        message: format!("Heading should use {style_name}: '{original_text}' -> '{fixed_text}'"),
707                        severity: Severity::Warning,
708                        fix: Some(Fix {
709                            range: self.get_line_byte_range(content, line_num + 1, line_index),
710                            replacement: match heading.style {
711                                crate::lint_context::HeadingStyle::ATX => self.fix_atx_heading(line, heading),
712                                _ => self.fix_setext_heading(line, heading),
713                            },
714                        }),
715                    });
716                }
717            }
718        }
719
720        Ok(warnings)
721    }
722
723    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
724        if !self.config.enabled {
725            return Ok(ctx.content.to_string());
726        }
727
728        let content = ctx.content;
729
730        if content.is_empty() {
731            return Ok(content.to_string());
732        }
733
734        let lines: Vec<&str> = content.lines().collect();
735        let mut fixed_lines: Vec<String> = lines.iter().map(|&s| s.to_string()).collect();
736
737        for (line_num, line_info) in ctx.lines.iter().enumerate() {
738            if let Some(heading) = &line_info.heading {
739                // Check level filter
740                if heading.level < self.config.min_level || heading.level > self.config.max_level {
741                    continue;
742                }
743
744                // Skip headings in code blocks
745                if line_info.indent >= 4 && matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
746                    continue;
747                }
748
749                let original_text = &heading.raw_text;
750                let fixed_text = self.apply_capitalization(original_text);
751
752                if original_text != &fixed_text {
753                    let line = line_info.content(ctx.content);
754                    fixed_lines[line_num] = match heading.style {
755                        crate::lint_context::HeadingStyle::ATX => self.fix_atx_heading(line, heading),
756                        _ => self.fix_setext_heading(line, heading),
757                    };
758                }
759            }
760        }
761
762        // Reconstruct content preserving line endings
763        let mut result = String::with_capacity(content.len());
764        for (i, line) in fixed_lines.iter().enumerate() {
765            result.push_str(line);
766            if i < fixed_lines.len() - 1 || content.ends_with('\n') {
767                result.push('\n');
768            }
769        }
770
771        Ok(result)
772    }
773
774    fn as_any(&self) -> &dyn std::any::Any {
775        self
776    }
777
778    fn default_config_section(&self) -> Option<(String, toml::Value)> {
779        let json_value = serde_json::to_value(&self.config).ok()?;
780        Some((
781            self.name().to_string(),
782            crate::rule_config_serde::json_to_toml_value(&json_value)?,
783        ))
784    }
785
786    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
787    where
788        Self: Sized,
789    {
790        let rule_config = crate::rule_config_serde::load_rule_config::<MD063Config>(config);
791        Box::new(Self::from_config_struct(rule_config))
792    }
793}
794
795#[cfg(test)]
796mod tests {
797    use super::*;
798    use crate::lint_context::LintContext;
799
800    fn create_rule() -> MD063HeadingCapitalization {
801        let config = MD063Config {
802            enabled: true,
803            ..Default::default()
804        };
805        MD063HeadingCapitalization::from_config_struct(config)
806    }
807
808    fn create_rule_with_style(style: HeadingCapStyle) -> MD063HeadingCapitalization {
809        let config = MD063Config {
810            enabled: true,
811            style,
812            ..Default::default()
813        };
814        MD063HeadingCapitalization::from_config_struct(config)
815    }
816
817    // Title case tests
818    #[test]
819    fn test_title_case_basic() {
820        let rule = create_rule();
821        let content = "# hello world\n";
822        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
823        let result = rule.check(&ctx).unwrap();
824        assert_eq!(result.len(), 1);
825        assert!(result[0].message.contains("Hello World"));
826    }
827
828    #[test]
829    fn test_title_case_lowercase_words() {
830        let rule = create_rule();
831        let content = "# the quick brown fox\n";
832        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
833        let result = rule.check(&ctx).unwrap();
834        assert_eq!(result.len(), 1);
835        // "The" should be capitalized (first word), "quick", "brown", "fox" should be capitalized
836        assert!(result[0].message.contains("The Quick Brown Fox"));
837    }
838
839    #[test]
840    fn test_title_case_already_correct() {
841        let rule = create_rule();
842        let content = "# The Quick Brown Fox\n";
843        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
844        let result = rule.check(&ctx).unwrap();
845        assert!(result.is_empty(), "Already correct heading should not be flagged");
846    }
847
848    #[test]
849    fn test_title_case_hyphenated() {
850        let rule = create_rule();
851        let content = "# self-documenting code\n";
852        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
853        let result = rule.check(&ctx).unwrap();
854        assert_eq!(result.len(), 1);
855        assert!(result[0].message.contains("Self-Documenting Code"));
856    }
857
858    // Sentence case tests
859    #[test]
860    fn test_sentence_case_basic() {
861        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
862        let content = "# The Quick Brown Fox\n";
863        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
864        let result = rule.check(&ctx).unwrap();
865        assert_eq!(result.len(), 1);
866        assert!(result[0].message.contains("The quick brown fox"));
867    }
868
869    #[test]
870    fn test_sentence_case_already_correct() {
871        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
872        let content = "# The quick brown fox\n";
873        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
874        let result = rule.check(&ctx).unwrap();
875        assert!(result.is_empty());
876    }
877
878    // All caps tests
879    #[test]
880    fn test_all_caps_basic() {
881        let rule = create_rule_with_style(HeadingCapStyle::AllCaps);
882        let content = "# hello world\n";
883        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
884        let result = rule.check(&ctx).unwrap();
885        assert_eq!(result.len(), 1);
886        assert!(result[0].message.contains("HELLO WORLD"));
887    }
888
889    // Preserve tests
890    #[test]
891    fn test_preserve_ignore_words() {
892        let config = MD063Config {
893            enabled: true,
894            ignore_words: vec!["iPhone".to_string(), "macOS".to_string()],
895            ..Default::default()
896        };
897        let rule = MD063HeadingCapitalization::from_config_struct(config);
898
899        let content = "# using iPhone on macOS\n";
900        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
901        let result = rule.check(&ctx).unwrap();
902        assert_eq!(result.len(), 1);
903        // iPhone and macOS should be preserved
904        assert!(result[0].message.contains("iPhone"));
905        assert!(result[0].message.contains("macOS"));
906    }
907
908    #[test]
909    fn test_preserve_cased_words() {
910        let rule = create_rule();
911        let content = "# using GitHub actions\n";
912        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
913        let result = rule.check(&ctx).unwrap();
914        assert_eq!(result.len(), 1);
915        // GitHub should be preserved (has internal capital)
916        assert!(result[0].message.contains("GitHub"));
917    }
918
919    // Inline code tests
920    #[test]
921    fn test_inline_code_preserved() {
922        let rule = create_rule();
923        let content = "# using `const` in javascript\n";
924        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
925        let result = rule.check(&ctx).unwrap();
926        assert_eq!(result.len(), 1);
927        // `const` should be preserved, rest capitalized
928        assert!(result[0].message.contains("`const`"));
929        assert!(result[0].message.contains("Javascript") || result[0].message.contains("JavaScript"));
930    }
931
932    // Level filter tests
933    #[test]
934    fn test_level_filter() {
935        let config = MD063Config {
936            enabled: true,
937            min_level: 2,
938            max_level: 4,
939            ..Default::default()
940        };
941        let rule = MD063HeadingCapitalization::from_config_struct(config);
942
943        let content = "# h1 heading\n## h2 heading\n### h3 heading\n##### h5 heading\n";
944        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
945        let result = rule.check(&ctx).unwrap();
946
947        // Only h2 and h3 should be flagged (h1 < min_level, h5 > max_level)
948        assert_eq!(result.len(), 2);
949        assert_eq!(result[0].line, 2); // h2
950        assert_eq!(result[1].line, 3); // h3
951    }
952
953    // Fix tests
954    #[test]
955    fn test_fix_atx_heading() {
956        let rule = create_rule();
957        let content = "# hello world\n";
958        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
959        let fixed = rule.fix(&ctx).unwrap();
960        assert_eq!(fixed, "# Hello World\n");
961    }
962
963    #[test]
964    fn test_fix_multiple_headings() {
965        let rule = create_rule();
966        let content = "# first heading\n\n## second heading\n";
967        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
968        let fixed = rule.fix(&ctx).unwrap();
969        assert_eq!(fixed, "# First Heading\n\n## Second Heading\n");
970    }
971
972    // Setext heading tests
973    #[test]
974    fn test_setext_heading() {
975        let rule = create_rule();
976        let content = "hello world\n============\n";
977        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
978        let result = rule.check(&ctx).unwrap();
979        assert_eq!(result.len(), 1);
980        assert!(result[0].message.contains("Hello World"));
981    }
982
983    // Custom ID tests
984    #[test]
985    fn test_custom_id_preserved() {
986        let rule = create_rule();
987        let content = "# getting started {#intro}\n";
988        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
989        let result = rule.check(&ctx).unwrap();
990        assert_eq!(result.len(), 1);
991        // Custom ID should be preserved
992        assert!(result[0].message.contains("{#intro}"));
993    }
994
995    #[test]
996    fn test_md063_disabled_by_default() {
997        let rule = MD063HeadingCapitalization::new();
998        let content = "# hello world\n";
999        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1000
1001        // Should return no warnings when disabled
1002        let warnings = rule.check(&ctx).unwrap();
1003        assert_eq!(warnings.len(), 0);
1004
1005        // Should return content unchanged when disabled
1006        let fixed = rule.fix(&ctx).unwrap();
1007        assert_eq!(fixed, content);
1008    }
1009
1010    // Acronym preservation tests
1011    #[test]
1012    fn test_preserve_all_caps_acronyms() {
1013        let rule = create_rule();
1014        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1015
1016        // Basic acronyms should be preserved
1017        let fixed = rule.fix(&ctx("# using API in production\n")).unwrap();
1018        assert_eq!(fixed, "# Using API in Production\n");
1019
1020        // Multiple acronyms
1021        let fixed = rule.fix(&ctx("# API and GPU integration\n")).unwrap();
1022        assert_eq!(fixed, "# API and GPU Integration\n");
1023
1024        // Two-letter acronyms
1025        let fixed = rule.fix(&ctx("# IO performance guide\n")).unwrap();
1026        assert_eq!(fixed, "# IO Performance Guide\n");
1027
1028        // Acronyms with numbers
1029        let fixed = rule.fix(&ctx("# HTTP2 and MD5 hashing\n")).unwrap();
1030        assert_eq!(fixed, "# HTTP2 and MD5 Hashing\n");
1031    }
1032
1033    #[test]
1034    fn test_preserve_acronyms_in_hyphenated_words() {
1035        let rule = create_rule();
1036        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1037
1038        // Acronyms at start of hyphenated word
1039        let fixed = rule.fix(&ctx("# API-driven architecture\n")).unwrap();
1040        assert_eq!(fixed, "# API-Driven Architecture\n");
1041
1042        // Multiple acronyms with hyphens
1043        let fixed = rule.fix(&ctx("# GPU-accelerated CPU-intensive tasks\n")).unwrap();
1044        assert_eq!(fixed, "# GPU-Accelerated CPU-Intensive Tasks\n");
1045    }
1046
1047    #[test]
1048    fn test_single_letters_not_treated_as_acronyms() {
1049        let rule = create_rule();
1050        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1051
1052        // Single uppercase letters should follow title case rules, not be preserved
1053        let fixed = rule.fix(&ctx("# i am a heading\n")).unwrap();
1054        assert_eq!(fixed, "# I Am a Heading\n");
1055    }
1056
1057    #[test]
1058    fn test_lowercase_terms_need_ignore_words() {
1059        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1060
1061        // Without ignore_words: npm gets capitalized
1062        let rule = create_rule();
1063        let fixed = rule.fix(&ctx("# using npm packages\n")).unwrap();
1064        assert_eq!(fixed, "# Using Npm Packages\n");
1065
1066        // With ignore_words: npm preserved
1067        let config = MD063Config {
1068            enabled: true,
1069            ignore_words: vec!["npm".to_string()],
1070            ..Default::default()
1071        };
1072        let rule = MD063HeadingCapitalization::from_config_struct(config);
1073        let fixed = rule.fix(&ctx("# using npm packages\n")).unwrap();
1074        assert_eq!(fixed, "# Using npm Packages\n");
1075    }
1076
1077    #[test]
1078    fn test_acronyms_with_mixed_case_preserved() {
1079        let rule = create_rule();
1080        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1081
1082        // Both acronyms (API, GPU) and mixed-case (GitHub) should be preserved
1083        let fixed = rule.fix(&ctx("# using API with GitHub\n")).unwrap();
1084        assert_eq!(fixed, "# Using API with GitHub\n");
1085    }
1086
1087    #[test]
1088    fn test_real_world_acronyms() {
1089        let rule = create_rule();
1090        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1091
1092        // Common technical acronyms from tested repositories
1093        let content = "# FFI bindings for CPU optimization\n";
1094        let fixed = rule.fix(&ctx(content)).unwrap();
1095        assert_eq!(fixed, "# FFI Bindings for CPU Optimization\n");
1096
1097        let content = "# DOM manipulation and SSR rendering\n";
1098        let fixed = rule.fix(&ctx(content)).unwrap();
1099        assert_eq!(fixed, "# DOM Manipulation and SSR Rendering\n");
1100
1101        let content = "# CVE security and RNN models\n";
1102        let fixed = rule.fix(&ctx(content)).unwrap();
1103        assert_eq!(fixed, "# CVE Security and RNN Models\n");
1104    }
1105
1106    #[test]
1107    fn test_is_all_caps_acronym() {
1108        let rule = create_rule();
1109
1110        // Should return true for all-caps with 2+ letters
1111        assert!(rule.is_all_caps_acronym("API"));
1112        assert!(rule.is_all_caps_acronym("IO"));
1113        assert!(rule.is_all_caps_acronym("GPU"));
1114        assert!(rule.is_all_caps_acronym("HTTP2")); // Numbers don't break it
1115
1116        // Should return false for single letters
1117        assert!(!rule.is_all_caps_acronym("A"));
1118        assert!(!rule.is_all_caps_acronym("I"));
1119
1120        // Should return false for words with lowercase
1121        assert!(!rule.is_all_caps_acronym("Api"));
1122        assert!(!rule.is_all_caps_acronym("npm"));
1123        assert!(!rule.is_all_caps_acronym("iPhone"));
1124    }
1125
1126    // Issue #215: ignore-words should work for first word in sentence case
1127    #[test]
1128    fn test_sentence_case_ignore_words_first_word() {
1129        let config = MD063Config {
1130            enabled: true,
1131            style: HeadingCapStyle::SentenceCase,
1132            ignore_words: vec!["nvim".to_string()],
1133            ..Default::default()
1134        };
1135        let rule = MD063HeadingCapitalization::from_config_struct(config);
1136
1137        // "nvim" as first word should be preserved exactly
1138        let content = "# nvim config\n";
1139        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1140        let result = rule.check(&ctx).unwrap();
1141        assert!(
1142            result.is_empty(),
1143            "nvim in ignore-words should not be flagged. Got: {result:?}"
1144        );
1145
1146        // Verify fix also preserves it
1147        let fixed = rule.fix(&ctx).unwrap();
1148        assert_eq!(fixed, "# nvim config\n");
1149    }
1150
1151    #[test]
1152    fn test_sentence_case_ignore_words_not_first() {
1153        let config = MD063Config {
1154            enabled: true,
1155            style: HeadingCapStyle::SentenceCase,
1156            ignore_words: vec!["nvim".to_string()],
1157            ..Default::default()
1158        };
1159        let rule = MD063HeadingCapitalization::from_config_struct(config);
1160
1161        // "nvim" in middle should also be preserved
1162        let content = "# Using nvim editor\n";
1163        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1164        let result = rule.check(&ctx).unwrap();
1165        assert!(
1166            result.is_empty(),
1167            "nvim in ignore-words should be preserved. Got: {result:?}"
1168        );
1169    }
1170
1171    // Issue #216: preserve-cased-words should work for "iOS"
1172    #[test]
1173    fn test_preserve_cased_words_ios() {
1174        let config = MD063Config {
1175            enabled: true,
1176            style: HeadingCapStyle::SentenceCase,
1177            preserve_cased_words: true,
1178            ..Default::default()
1179        };
1180        let rule = MD063HeadingCapitalization::from_config_struct(config);
1181
1182        // "iOS" should be preserved (has mixed case: lowercase 'i' + uppercase 'OS')
1183        let content = "## This is iOS\n";
1184        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1185        let result = rule.check(&ctx).unwrap();
1186        assert!(
1187            result.is_empty(),
1188            "iOS should be preserved with preserve-cased-words. Got: {result:?}"
1189        );
1190
1191        // Verify fix also preserves it
1192        let fixed = rule.fix(&ctx).unwrap();
1193        assert_eq!(fixed, "## This is iOS\n");
1194    }
1195
1196    #[test]
1197    fn test_preserve_cased_words_ios_title_case() {
1198        let config = MD063Config {
1199            enabled: true,
1200            style: HeadingCapStyle::TitleCase,
1201            preserve_cased_words: true,
1202            ..Default::default()
1203        };
1204        let rule = MD063HeadingCapitalization::from_config_struct(config);
1205
1206        // "iOS" should be preserved in title case too
1207        let content = "# developing for iOS\n";
1208        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1209        let fixed = rule.fix(&ctx).unwrap();
1210        assert_eq!(fixed, "# Developing for iOS\n");
1211    }
1212
1213    #[test]
1214    fn test_has_internal_capitals_ios() {
1215        let rule = create_rule();
1216
1217        // iOS should be detected as having internal capitals
1218        assert!(
1219            rule.has_internal_capitals("iOS"),
1220            "iOS has mixed case (lowercase i, uppercase OS)"
1221        );
1222
1223        // Other mixed-case words
1224        assert!(rule.has_internal_capitals("iPhone"));
1225        assert!(rule.has_internal_capitals("macOS"));
1226        assert!(rule.has_internal_capitals("GitHub"));
1227        assert!(rule.has_internal_capitals("JavaScript"));
1228        assert!(rule.has_internal_capitals("eBay"));
1229
1230        // All-caps should NOT be detected (handled by is_all_caps_acronym)
1231        assert!(!rule.has_internal_capitals("API"));
1232        assert!(!rule.has_internal_capitals("GPU"));
1233
1234        // All-lowercase should NOT be detected
1235        assert!(!rule.has_internal_capitals("npm"));
1236        assert!(!rule.has_internal_capitals("config"));
1237
1238        // Regular capitalized words should NOT be detected
1239        assert!(!rule.has_internal_capitals("The"));
1240        assert!(!rule.has_internal_capitals("Hello"));
1241    }
1242}