Skip to main content

rumdl_lib/rules/md063_heading_capitalization/
mod.rs

1/// Rule MD063: Heading capitalization
2///
3/// See [docs/md063.md](../../docs/md063.md) for full documentation, configuration, and examples.
4///
5/// This rule enforces consistent capitalization styles for markdown headings.
6/// It supports title case, sentence case, and all caps styles.
7///
8/// **Note:** This rule is disabled by default. Enable it in your configuration:
9/// ```toml
10/// [MD063]
11/// enabled = true
12/// style = "title_case"
13/// ```
14use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
15use crate::utils::range_utils::LineIndex;
16use regex::Regex;
17use std::collections::HashSet;
18use std::ops::Range;
19use std::sync::LazyLock;
20
21mod md063_config;
22pub use md063_config::{HeadingCapStyle, MD063Config};
23
24// Regex to match inline code spans (backticks)
25static INLINE_CODE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`+[^`]+`+").unwrap());
26
27// Regex to match markdown links [text](url) or [text][ref]
28static LINK_REGEX: LazyLock<Regex> =
29    LazyLock::new(|| Regex::new(r"\[([^\]]*)\]\([^)]*\)|\[([^\]]*)\]\[[^\]]*\]").unwrap());
30
31// Regex to match inline HTML tags commonly used in headings
32// Matches paired tags: <tag>content</tag>, <tag attr="val">content</tag>
33// Matches self-closing: <tag/>, <tag />
34// Uses explicit list of common inline tags to avoid backreference (not supported in Rust regex)
35static HTML_TAG_REGEX: LazyLock<Regex> = LazyLock::new(|| {
36    // Common inline HTML tags used in documentation headings
37    let tags = "kbd|abbr|code|span|sub|sup|mark|cite|dfn|var|samp|small|strong|em|b|i|u|s|q|br|wbr";
38    let pattern = format!(r"<({tags})(?:\s[^>]*)?>.*?</({tags})>|<({tags})(?:\s[^>]*)?\s*/?>");
39    Regex::new(&pattern).unwrap()
40});
41
42// Regex to match custom header IDs {#id}
43static CUSTOM_ID_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s*\{#[^}]+\}\s*$").unwrap());
44
45/// Represents a segment of heading text
46#[derive(Debug, Clone)]
47enum HeadingSegment {
48    /// Regular text that should be capitalized
49    Text(String),
50    /// Inline code that should be preserved as-is
51    Code(String),
52    /// Link with text that may be capitalized and URL that's preserved
53    Link {
54        full: String,
55        text_start: usize,
56        text_end: usize,
57    },
58    /// Inline HTML tag that should be preserved as-is
59    Html(String),
60}
61
62/// Rule MD063: Heading capitalization
63#[derive(Clone)]
64pub struct MD063HeadingCapitalization {
65    config: MD063Config,
66    lowercase_set: HashSet<String>,
67    /// Multi-word proper names from MD044 that must survive sentence-case transformation.
68    /// Populated via `from_config` when both rules are active.
69    proper_names: Vec<String>,
70}
71
72impl Default for MD063HeadingCapitalization {
73    fn default() -> Self {
74        Self::new()
75    }
76}
77
78impl MD063HeadingCapitalization {
79    pub fn new() -> Self {
80        let config = MD063Config::default();
81        let lowercase_set = config.lowercase_words.iter().cloned().collect();
82        Self {
83            config,
84            lowercase_set,
85            proper_names: Vec::new(),
86        }
87    }
88
89    pub fn from_config_struct(config: MD063Config) -> Self {
90        let lowercase_set = config.lowercase_words.iter().cloned().collect();
91        Self {
92            config,
93            lowercase_set,
94            proper_names: Vec::new(),
95        }
96    }
97
98    /// Match `pattern_lower` at `start` in `text` using Unicode-aware lowercasing.
99    /// Returns the end byte offset in `text` when the match succeeds.
100    ///
101    /// This avoids converting the full `text` to lowercase and then reusing those
102    /// offsets on the original string, which can panic for case-fold expansions
103    /// (e.g. `İ` -> `i̇`).
104    fn match_case_insensitive_at(text: &str, start: usize, pattern_lower: &str) -> Option<usize> {
105        if start > text.len() || !text.is_char_boundary(start) || pattern_lower.is_empty() {
106            return None;
107        }
108
109        let mut matched_bytes = 0;
110
111        for (offset, ch) in text[start..].char_indices() {
112            if matched_bytes >= pattern_lower.len() {
113                break;
114            }
115
116            let lowered: String = ch.to_lowercase().collect();
117            if !pattern_lower[matched_bytes..].starts_with(&lowered) {
118                return None;
119            }
120
121            matched_bytes += lowered.len();
122
123            if matched_bytes == pattern_lower.len() {
124                return Some(start + offset + ch.len_utf8());
125            }
126        }
127
128        None
129    }
130
131    /// Find the next case-insensitive match of `pattern_lower` in `text`,
132    /// returning byte offsets in the ORIGINAL string.
133    fn find_case_insensitive_match(text: &str, pattern_lower: &str, search_start: usize) -> Option<(usize, usize)> {
134        if pattern_lower.is_empty() || search_start >= text.len() || !text.is_char_boundary(search_start) {
135            return None;
136        }
137
138        for (offset, _) in text[search_start..].char_indices() {
139            let start = search_start + offset;
140            if let Some(end) = Self::match_case_insensitive_at(text, start, pattern_lower) {
141                return Some((start, end));
142            }
143        }
144
145        None
146    }
147
148    /// Build a map from word byte-position → canonical form for all proper names
149    /// that appear in the heading text (case-insensitive phrase match).
150    ///
151    /// This is used in `apply_sentence_case` so that words belonging to a proper
152    /// name phrase are never lowercased to begin with.
153    fn proper_name_canonical_forms(&self, text: &str) -> std::collections::HashMap<usize, &str> {
154        let mut map = std::collections::HashMap::new();
155
156        for name in &self.proper_names {
157            if name.is_empty() {
158                continue;
159            }
160            let name_lower = name.to_lowercase();
161            let canonical_words: Vec<&str> = name.split_whitespace().collect();
162            if canonical_words.is_empty() {
163                continue;
164            }
165            let mut search_start = 0;
166
167            while search_start < text.len() {
168                let Some((abs_pos, end_pos)) = Self::find_case_insensitive_match(text, &name_lower, search_start)
169                else {
170                    break;
171                };
172
173                // Require word boundaries
174                let before_ok = abs_pos == 0 || !text[..abs_pos].chars().last().is_some_and(|c| c.is_alphanumeric());
175                let after_ok =
176                    end_pos >= text.len() || !text[end_pos..].chars().next().is_some_and(|c| c.is_alphanumeric());
177
178                if before_ok && after_ok {
179                    // Map each word in the matched region to its canonical form.
180                    // We zip the words found in the text slice with the words of the
181                    // canonical name so that every word gets the right casing.
182                    let text_slice = &text[abs_pos..end_pos];
183                    let mut word_idx = 0;
184                    let mut slice_offset = 0;
185
186                    for text_word in text_slice.split_whitespace() {
187                        if let Some(w_rel) = text_slice[slice_offset..].find(text_word) {
188                            let word_abs = abs_pos + slice_offset + w_rel;
189                            if let Some(&canonical_word) = canonical_words.get(word_idx) {
190                                map.insert(word_abs, canonical_word);
191                            }
192                            slice_offset += w_rel + text_word.len();
193                            word_idx += 1;
194                        }
195                    }
196                }
197
198                // Advance by one Unicode scalar value to allow overlapping matches
199                // while staying on a UTF-8 char boundary.
200                search_start = abs_pos + text[abs_pos..].chars().next().map_or(1, |c| c.len_utf8());
201            }
202        }
203
204        map
205    }
206
207    /// Check if a word has internal capitals (like "iPhone", "macOS", "GitHub", "iOS")
208    fn has_internal_capitals(&self, word: &str) -> bool {
209        let chars: Vec<char> = word.chars().collect();
210        if chars.len() < 2 {
211            return false;
212        }
213
214        let first = chars[0];
215        let rest = &chars[1..];
216        let has_upper_in_rest = rest.iter().any(|c| c.is_uppercase());
217        let has_lower_in_rest = rest.iter().any(|c| c.is_lowercase());
218
219        // Case 1: Mixed case after first character (like "iPhone", "macOS", "GitHub", "JavaScript")
220        if has_upper_in_rest && has_lower_in_rest {
221            return true;
222        }
223
224        // Case 2: Lowercase first + uppercase in rest (like "iOS", "eBay")
225        if first.is_lowercase() && has_upper_in_rest {
226            return true;
227        }
228
229        false
230    }
231
232    /// Check if a word is an all-caps acronym (2+ consecutive uppercase letters)
233    /// Examples: "API", "GPU", "HTTP2", "IO" return true
234    /// Examples: "A", "iPhone", "npm" return false
235    fn is_all_caps_acronym(&self, word: &str) -> bool {
236        // Skip single-letter words (handled by title case rules)
237        if word.len() < 2 {
238            return false;
239        }
240
241        let mut consecutive_upper = 0;
242        let mut max_consecutive = 0;
243
244        for c in word.chars() {
245            if c.is_uppercase() {
246                consecutive_upper += 1;
247                max_consecutive = max_consecutive.max(consecutive_upper);
248            } else if c.is_lowercase() {
249                // Any lowercase letter means not all-caps
250                return false;
251            } else {
252                // Non-letter (number, punctuation) - reset counter but don't fail
253                consecutive_upper = 0;
254            }
255        }
256
257        // Must have at least 2 consecutive uppercase letters
258        max_consecutive >= 2
259    }
260
261    /// Check if a word should be preserved as-is
262    fn should_preserve_word(&self, word: &str) -> bool {
263        // Check ignore_words list (case-sensitive exact match)
264        if self.config.ignore_words.iter().any(|w| w == word) {
265            return true;
266        }
267
268        // Check if word has internal capitals and preserve_cased_words is enabled
269        if self.config.preserve_cased_words && self.has_internal_capitals(word) {
270            return true;
271        }
272
273        // Check if word is an all-caps acronym (2+ consecutive uppercase)
274        if self.config.preserve_cased_words && self.is_all_caps_acronym(word) {
275            return true;
276        }
277
278        // Preserve caret notation for control characters (^A, ^Z, ^@, etc.)
279        if self.is_caret_notation(word) {
280            return true;
281        }
282
283        false
284    }
285
286    /// Check if a word is caret notation for control characters (e.g., ^A, ^C, ^Z)
287    fn is_caret_notation(&self, word: &str) -> bool {
288        let chars: Vec<char> = word.chars().collect();
289        // Pattern: ^ followed by uppercase letter or @[\]^_
290        if chars.len() >= 2 && chars[0] == '^' {
291            let second = chars[1];
292            // Control characters: ^@ (NUL) through ^_ (US), which includes ^A-^Z
293            if second.is_ascii_uppercase() || "@[\\]^_".contains(second) {
294                return true;
295            }
296        }
297        false
298    }
299
300    /// Check if a word is a "lowercase word" (articles, prepositions, etc.)
301    fn is_lowercase_word(&self, word: &str) -> bool {
302        self.lowercase_set.contains(&word.to_lowercase())
303    }
304
305    /// Apply title case to a single word
306    fn title_case_word(&self, word: &str, is_first: bool, is_last: bool) -> String {
307        if word.is_empty() {
308            return word.to_string();
309        }
310
311        // Preserve words in ignore list or with internal capitals
312        if self.should_preserve_word(word) {
313            return word.to_string();
314        }
315
316        // First and last words are always capitalized
317        if is_first || is_last {
318            return self.capitalize_first(word);
319        }
320
321        // Check if it's a lowercase word (articles, prepositions, etc.)
322        if self.is_lowercase_word(word) {
323            return Self::lowercase_preserving_composition(word);
324        }
325
326        // Regular word - capitalize first letter
327        self.capitalize_first(word)
328    }
329
330    /// Apply canonical proper-name casing while preserving any trailing punctuation
331    /// attached to the original whitespace token (e.g. `javascript,` -> `JavaScript,`).
332    fn apply_canonical_form_to_word(word: &str, canonical: &str) -> String {
333        let canonical_lower = canonical.to_lowercase();
334        if canonical_lower.is_empty() {
335            return canonical.to_string();
336        }
337
338        if let Some(end_pos) = Self::match_case_insensitive_at(word, 0, &canonical_lower) {
339            let mut out = String::with_capacity(canonical.len() + word.len().saturating_sub(end_pos));
340            out.push_str(canonical);
341            out.push_str(&word[end_pos..]);
342            out
343        } else {
344            canonical.to_string()
345        }
346    }
347
348    /// Capitalize the first letter of a word, handling Unicode properly
349    fn capitalize_first(&self, word: &str) -> String {
350        if word.is_empty() {
351            return String::new();
352        }
353
354        // Find the first alphabetic character to capitalize
355        let first_alpha_pos = word.find(|c: char| c.is_alphabetic());
356        let Some(pos) = first_alpha_pos else {
357            return word.to_string();
358        };
359
360        let prefix = &word[..pos];
361        let mut chars = word[pos..].chars();
362        let first = chars.next().unwrap();
363        // Use composition-preserving uppercase to avoid decomposing
364        // precomposed characters (e.g., ῷ → Ω + combining marks + Ι)
365        let first_upper = Self::uppercase_preserving_composition(&first.to_string());
366        let rest: String = chars.collect();
367        let rest_lower = Self::lowercase_preserving_composition(&rest);
368        format!("{prefix}{first_upper}{rest_lower}")
369    }
370
371    /// Lowercase a string character-by-character, preserving precomposed
372    /// characters that would decompose during case conversion.
373    fn lowercase_preserving_composition(s: &str) -> String {
374        let mut result = String::with_capacity(s.len());
375        for c in s.chars() {
376            let lower: String = c.to_lowercase().collect();
377            if lower.chars().count() == 1 {
378                result.push_str(&lower);
379            } else {
380                // Lowercasing would decompose this character; keep original
381                result.push(c);
382            }
383        }
384        result
385    }
386
387    /// Uppercase a string character-by-character, preserving precomposed
388    /// characters that would decompose during case conversion.
389    /// For example, ῷ (U+1FF7) would decompose into Ω + combining marks + Ι
390    /// via to_uppercase(); this function keeps ῷ unchanged instead.
391    fn uppercase_preserving_composition(s: &str) -> String {
392        let mut result = String::with_capacity(s.len());
393        for c in s.chars() {
394            let upper: String = c.to_uppercase().collect();
395            if upper.chars().count() == 1 {
396                result.push_str(&upper);
397            } else {
398                // Uppercasing would decompose this character; keep original
399                result.push(c);
400            }
401        }
402        result
403    }
404
405    /// Apply title case to text, using our own title-case logic.
406    /// We avoid the external titlecase crate because it decomposes
407    /// precomposed Unicode characters during case conversion.
408    fn apply_title_case(&self, text: &str) -> String {
409        let canonical_forms = self.proper_name_canonical_forms(text);
410
411        let original_words: Vec<&str> = text.split_whitespace().collect();
412        let total_words = original_words.len();
413
414        // Pre-compute byte position of each word for canonical form lookup.
415        // Use usize::MAX as sentinel for unfound words so canonical_forms.get() returns None.
416        let mut word_positions: Vec<usize> = Vec::with_capacity(original_words.len());
417        let mut pos = 0;
418        for word in &original_words {
419            if let Some(rel) = text[pos..].find(word) {
420                word_positions.push(pos + rel);
421                pos = pos + rel + word.len();
422            } else {
423                word_positions.push(usize::MAX);
424            }
425        }
426
427        let result_words: Vec<String> = original_words
428            .iter()
429            .enumerate()
430            .map(|(i, word)| {
431                let is_first = i == 0;
432                let is_last = i == total_words - 1;
433
434                // Words that are part of an MD044 proper name use the canonical form directly.
435                if let Some(&canonical) = word_positions.get(i).and_then(|&p| canonical_forms.get(&p)) {
436                    return Self::apply_canonical_form_to_word(word, canonical);
437                }
438
439                // Preserve words in ignore list or with internal capitals
440                if self.should_preserve_word(word) {
441                    return (*word).to_string();
442                }
443
444                // Handle hyphenated words
445                if word.contains('-') {
446                    return self.handle_hyphenated_word(word, is_first, is_last);
447                }
448
449                self.title_case_word(word, is_first, is_last)
450            })
451            .collect();
452
453        result_words.join(" ")
454    }
455
456    /// Handle hyphenated words like "self-documenting"
457    fn handle_hyphenated_word(&self, word: &str, is_first: bool, is_last: bool) -> String {
458        let parts: Vec<&str> = word.split('-').collect();
459        let total_parts = parts.len();
460
461        let result_parts: Vec<String> = parts
462            .iter()
463            .enumerate()
464            .map(|(i, part)| {
465                // First part of first word and last part of last word get special treatment
466                let part_is_first = is_first && i == 0;
467                let part_is_last = is_last && i == total_parts - 1;
468                self.title_case_word(part, part_is_first, part_is_last)
469            })
470            .collect();
471
472        result_parts.join("-")
473    }
474
475    /// Apply sentence case to text
476    fn apply_sentence_case(&self, text: &str) -> String {
477        if text.is_empty() {
478            return text.to_string();
479        }
480
481        let canonical_forms = self.proper_name_canonical_forms(text);
482        let mut result = String::new();
483        let mut current_pos = 0;
484        let mut is_first_word = true;
485
486        // Use original text positions to preserve whitespace correctly
487        for word in text.split_whitespace() {
488            if let Some(pos) = text[current_pos..].find(word) {
489                let abs_pos = current_pos + pos;
490
491                // Preserve whitespace before this word
492                result.push_str(&text[current_pos..abs_pos]);
493
494                // Words that are part of an MD044 proper name use the canonical form
495                // directly, bypassing sentence-case lowercasing entirely.
496                if let Some(&canonical) = canonical_forms.get(&abs_pos) {
497                    result.push_str(&Self::apply_canonical_form_to_word(word, canonical));
498                    is_first_word = false;
499                } else if is_first_word {
500                    // Check if word should be preserved BEFORE any capitalization
501                    if self.should_preserve_word(word) {
502                        // Preserve ignore-words exactly as-is, even at start
503                        result.push_str(word);
504                    } else {
505                        // First word: capitalize first letter, lowercase rest
506                        let mut chars = word.chars();
507                        if let Some(first) = chars.next() {
508                            result.push_str(&Self::uppercase_preserving_composition(&first.to_string()));
509                            let rest: String = chars.collect();
510                            result.push_str(&Self::lowercase_preserving_composition(&rest));
511                        }
512                    }
513                    is_first_word = false;
514                } else {
515                    // Non-first words: preserve if needed, otherwise lowercase
516                    if self.should_preserve_word(word) {
517                        result.push_str(word);
518                    } else {
519                        result.push_str(&Self::lowercase_preserving_composition(word));
520                    }
521                }
522
523                current_pos = abs_pos + word.len();
524            }
525        }
526
527        // Preserve any trailing whitespace
528        if current_pos < text.len() {
529            result.push_str(&text[current_pos..]);
530        }
531
532        result
533    }
534
535    /// Apply all caps to text (preserve whitespace)
536    fn apply_all_caps(&self, text: &str) -> String {
537        if text.is_empty() {
538            return text.to_string();
539        }
540
541        let canonical_forms = self.proper_name_canonical_forms(text);
542        let mut result = String::new();
543        let mut current_pos = 0;
544
545        // Use original text positions to preserve whitespace correctly
546        for word in text.split_whitespace() {
547            if let Some(pos) = text[current_pos..].find(word) {
548                let abs_pos = current_pos + pos;
549
550                // Preserve whitespace before this word
551                result.push_str(&text[current_pos..abs_pos]);
552
553                // Words that are part of an MD044 proper name use the canonical form directly.
554                // This prevents oscillation with MD044 when all-caps style is active.
555                if let Some(&canonical) = canonical_forms.get(&abs_pos) {
556                    result.push_str(&Self::apply_canonical_form_to_word(word, canonical));
557                } else if self.should_preserve_word(word) {
558                    result.push_str(word);
559                } else {
560                    result.push_str(&Self::uppercase_preserving_composition(word));
561                }
562
563                current_pos = abs_pos + word.len();
564            }
565        }
566
567        // Preserve any trailing whitespace
568        if current_pos < text.len() {
569            result.push_str(&text[current_pos..]);
570        }
571
572        result
573    }
574
575    /// Parse heading text into segments
576    fn parse_segments(&self, text: &str) -> Vec<HeadingSegment> {
577        let mut segments = Vec::new();
578        let mut last_end = 0;
579
580        // Collect all special regions (code and links)
581        let mut special_regions: Vec<(usize, usize, HeadingSegment)> = Vec::new();
582
583        // Find inline code spans
584        for mat in INLINE_CODE_REGEX.find_iter(text) {
585            special_regions.push((mat.start(), mat.end(), HeadingSegment::Code(mat.as_str().to_string())));
586        }
587
588        // Find links
589        for caps in LINK_REGEX.captures_iter(text) {
590            let full_match = caps.get(0).unwrap();
591            let text_match = caps.get(1).or_else(|| caps.get(2));
592
593            if let Some(text_m) = text_match {
594                special_regions.push((
595                    full_match.start(),
596                    full_match.end(),
597                    HeadingSegment::Link {
598                        full: full_match.as_str().to_string(),
599                        text_start: text_m.start() - full_match.start(),
600                        text_end: text_m.end() - full_match.start(),
601                    },
602                ));
603            }
604        }
605
606        // Find inline HTML tags
607        for mat in HTML_TAG_REGEX.find_iter(text) {
608            special_regions.push((mat.start(), mat.end(), HeadingSegment::Html(mat.as_str().to_string())));
609        }
610
611        // Sort by start position
612        special_regions.sort_by_key(|(start, _, _)| *start);
613
614        // Remove overlapping regions (code takes precedence)
615        let mut filtered_regions: Vec<(usize, usize, HeadingSegment)> = Vec::new();
616        for region in special_regions {
617            let overlaps = filtered_regions.iter().any(|(s, e, _)| region.0 < *e && region.1 > *s);
618            if !overlaps {
619                filtered_regions.push(region);
620            }
621        }
622
623        // Build segments
624        for (start, end, segment) in filtered_regions {
625            // Add text before this special region
626            if start > last_end {
627                let text_segment = &text[last_end..start];
628                if !text_segment.is_empty() {
629                    segments.push(HeadingSegment::Text(text_segment.to_string()));
630                }
631            }
632            segments.push(segment);
633            last_end = end;
634        }
635
636        // Add remaining text
637        if last_end < text.len() {
638            let remaining = &text[last_end..];
639            if !remaining.is_empty() {
640                segments.push(HeadingSegment::Text(remaining.to_string()));
641            }
642        }
643
644        // If no segments were found, treat the whole thing as text
645        if segments.is_empty() && !text.is_empty() {
646            segments.push(HeadingSegment::Text(text.to_string()));
647        }
648
649        segments
650    }
651
652    /// Apply capitalization to heading text
653    fn apply_capitalization(&self, text: &str) -> String {
654        // Strip custom ID if present and re-add later
655        let (main_text, custom_id) = if let Some(mat) = CUSTOM_ID_REGEX.find(text) {
656            (&text[..mat.start()], Some(mat.as_str()))
657        } else {
658            (text, None)
659        };
660
661        // Parse into segments
662        let segments = self.parse_segments(main_text);
663
664        // Count text segments to determine first/last word context
665        let text_segments: Vec<usize> = segments
666            .iter()
667            .enumerate()
668            .filter_map(|(i, s)| matches!(s, HeadingSegment::Text(_)).then_some(i))
669            .collect();
670
671        // Determine if the first segment overall is a text segment
672        // For sentence case: if heading starts with code/link, the first text segment
673        // should NOT capitalize its first word (the heading already has a "first element")
674        let first_segment_is_text = segments
675            .first()
676            .map(|s| matches!(s, HeadingSegment::Text(_)))
677            .unwrap_or(false);
678
679        // Determine if the last segment overall is a text segment
680        // If the last segment is Code or Link, then the last text segment should NOT
681        // treat its last word as the heading's last word (for lowercase-words respect)
682        let last_segment_is_text = segments
683            .last()
684            .map(|s| matches!(s, HeadingSegment::Text(_)))
685            .unwrap_or(false);
686
687        // Apply capitalization to each segment
688        let mut result_parts: Vec<String> = Vec::new();
689
690        for (i, segment) in segments.iter().enumerate() {
691            match segment {
692                HeadingSegment::Text(t) => {
693                    let is_first_text = text_segments.first() == Some(&i);
694                    // A text segment is "last" only if it's the last text segment AND
695                    // the last segment overall is also text. If there's Code/Link after,
696                    // the last word should respect lowercase-words.
697                    let is_last_text = text_segments.last() == Some(&i) && last_segment_is_text;
698
699                    let capitalized = match self.config.style {
700                        HeadingCapStyle::TitleCase => self.apply_title_case_segment(t, is_first_text, is_last_text),
701                        HeadingCapStyle::SentenceCase => {
702                            // For sentence case, only capitalize first word if:
703                            // 1. This is the first text segment, AND
704                            // 2. The heading actually starts with text (not code/link)
705                            if is_first_text && first_segment_is_text {
706                                self.apply_sentence_case(t)
707                            } else {
708                                // Non-first segments OR heading starts with code/link
709                                self.apply_sentence_case_non_first(t)
710                            }
711                        }
712                        HeadingCapStyle::AllCaps => self.apply_all_caps(t),
713                    };
714                    result_parts.push(capitalized);
715                }
716                HeadingSegment::Code(c) => {
717                    result_parts.push(c.clone());
718                }
719                HeadingSegment::Link {
720                    full,
721                    text_start,
722                    text_end,
723                } => {
724                    // Apply capitalization to link text only
725                    let link_text = &full[*text_start..*text_end];
726                    let capitalized_text = match self.config.style {
727                        HeadingCapStyle::TitleCase => self.apply_title_case(link_text),
728                        // For sentence case, apply same preservation logic as non-first text
729                        // This preserves acronyms (API), brand names (iPhone), etc.
730                        HeadingCapStyle::SentenceCase => self.apply_sentence_case_non_first(link_text),
731                        HeadingCapStyle::AllCaps => self.apply_all_caps(link_text),
732                    };
733
734                    let mut new_link = String::new();
735                    new_link.push_str(&full[..*text_start]);
736                    new_link.push_str(&capitalized_text);
737                    new_link.push_str(&full[*text_end..]);
738                    result_parts.push(new_link);
739                }
740                HeadingSegment::Html(h) => {
741                    // Preserve HTML tags as-is (like code)
742                    result_parts.push(h.clone());
743                }
744            }
745        }
746
747        let mut result = result_parts.join("");
748
749        // Re-add custom ID if present
750        if let Some(id) = custom_id {
751            result.push_str(id);
752        }
753
754        result
755    }
756
757    /// Apply title case to a text segment with first/last awareness
758    fn apply_title_case_segment(&self, text: &str, is_first_segment: bool, is_last_segment: bool) -> String {
759        let canonical_forms = self.proper_name_canonical_forms(text);
760        let words: Vec<&str> = text.split_whitespace().collect();
761        let total_words = words.len();
762
763        if total_words == 0 {
764            return text.to_string();
765        }
766
767        // Pre-compute byte position of each word so we can look up canonical forms.
768        // Use usize::MAX as sentinel for unfound words so canonical_forms.get() returns None.
769        let mut word_positions: Vec<usize> = Vec::with_capacity(words.len());
770        let mut pos = 0;
771        for word in &words {
772            if let Some(rel) = text[pos..].find(word) {
773                word_positions.push(pos + rel);
774                pos = pos + rel + word.len();
775            } else {
776                word_positions.push(usize::MAX);
777            }
778        }
779
780        let result_words: Vec<String> = words
781            .iter()
782            .enumerate()
783            .map(|(i, word)| {
784                let is_first = is_first_segment && i == 0;
785                let is_last = is_last_segment && i == total_words - 1;
786
787                // Words that are part of an MD044 proper name use the canonical form directly.
788                if let Some(&canonical) = word_positions.get(i).and_then(|&p| canonical_forms.get(&p)) {
789                    return Self::apply_canonical_form_to_word(word, canonical);
790                }
791
792                // Handle hyphenated words
793                if word.contains('-') {
794                    return self.handle_hyphenated_word(word, is_first, is_last);
795                }
796
797                self.title_case_word(word, is_first, is_last)
798            })
799            .collect();
800
801        // Preserve original spacing
802        let mut result = String::new();
803        let mut word_iter = result_words.iter();
804        let mut in_word = false;
805
806        for c in text.chars() {
807            if c.is_whitespace() {
808                if in_word {
809                    in_word = false;
810                }
811                result.push(c);
812            } else if !in_word {
813                if let Some(word) = word_iter.next() {
814                    result.push_str(word);
815                }
816                in_word = true;
817            }
818        }
819
820        result
821    }
822
823    /// Apply sentence case to non-first segments (just lowercase, preserve whitespace)
824    fn apply_sentence_case_non_first(&self, text: &str) -> String {
825        if text.is_empty() {
826            return text.to_string();
827        }
828
829        let canonical_forms = self.proper_name_canonical_forms(text);
830        let mut result = String::new();
831        let mut current_pos = 0;
832
833        // Iterate over words in the original text so byte positions are consistent
834        // with the positions in canonical_forms (built from the same text).
835        for word in text.split_whitespace() {
836            if let Some(pos) = text[current_pos..].find(word) {
837                let abs_pos = current_pos + pos;
838
839                // Preserve whitespace before this word
840                result.push_str(&text[current_pos..abs_pos]);
841
842                // Words that are part of an MD044 proper name use the canonical form directly.
843                if let Some(&canonical) = canonical_forms.get(&abs_pos) {
844                    result.push_str(&Self::apply_canonical_form_to_word(word, canonical));
845                } else if self.should_preserve_word(word) {
846                    result.push_str(word);
847                } else {
848                    result.push_str(&Self::lowercase_preserving_composition(word));
849                }
850
851                current_pos = abs_pos + word.len();
852            }
853        }
854
855        // Preserve any trailing whitespace
856        if current_pos < text.len() {
857            result.push_str(&text[current_pos..]);
858        }
859
860        result
861    }
862
863    /// Get byte range for a line
864    fn get_line_byte_range(&self, content: &str, line_num: usize, line_index: &LineIndex) -> Range<usize> {
865        let start_pos = line_index.get_line_start_byte(line_num).unwrap_or(content.len());
866        let line = content.lines().nth(line_num - 1).unwrap_or("");
867        Range {
868            start: start_pos,
869            end: start_pos + line.len(),
870        }
871    }
872
873    /// Fix an ATX heading line
874    fn fix_atx_heading(&self, _line: &str, heading: &crate::lint_context::HeadingInfo) -> String {
875        // Parse the line to preserve structure
876        let indent = " ".repeat(heading.marker_column);
877        let hashes = "#".repeat(heading.level as usize);
878
879        // Apply capitalization to the text
880        let fixed_text = self.apply_capitalization(&heading.raw_text);
881
882        // Reconstruct with closing sequence if present
883        let closing = &heading.closing_sequence;
884        if heading.has_closing_sequence {
885            format!("{indent}{hashes} {fixed_text} {closing}")
886        } else {
887            format!("{indent}{hashes} {fixed_text}")
888        }
889    }
890
891    /// Fix a Setext heading line
892    fn fix_setext_heading(&self, line: &str, heading: &crate::lint_context::HeadingInfo) -> String {
893        // Apply capitalization to the text
894        let fixed_text = self.apply_capitalization(&heading.raw_text);
895
896        // Preserve leading whitespace from original line
897        let leading_ws: String = line.chars().take_while(|c| c.is_whitespace()).collect();
898
899        format!("{leading_ws}{fixed_text}")
900    }
901}
902
903impl Rule for MD063HeadingCapitalization {
904    fn name(&self) -> &'static str {
905        "MD063"
906    }
907
908    fn description(&self) -> &'static str {
909        "Heading capitalization"
910    }
911
912    fn category(&self) -> RuleCategory {
913        RuleCategory::Heading
914    }
915
916    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
917        !ctx.likely_has_headings() || !ctx.lines.iter().any(|line| line.heading.is_some())
918    }
919
920    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
921        let content = ctx.content;
922
923        if content.is_empty() {
924            return Ok(Vec::new());
925        }
926
927        let mut warnings = Vec::new();
928        let line_index = &ctx.line_index;
929
930        for (line_num, line_info) in ctx.lines.iter().enumerate() {
931            if let Some(heading) = &line_info.heading {
932                // Check level filter
933                if heading.level < self.config.min_level || heading.level > self.config.max_level {
934                    continue;
935                }
936
937                // Skip headings in code blocks (indented headings)
938                if line_info.visual_indent >= 4 && matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
939                    continue;
940                }
941
942                // Skip invalid headings (e.g., `#tag` which lacks required space after #)
943                if !heading.is_valid {
944                    continue;
945                }
946
947                // Apply capitalization and compare
948                let original_text = &heading.raw_text;
949                let fixed_text = self.apply_capitalization(original_text);
950
951                if original_text != &fixed_text {
952                    let line = line_info.content(ctx.content);
953                    let style_name = match self.config.style {
954                        HeadingCapStyle::TitleCase => "title case",
955                        HeadingCapStyle::SentenceCase => "sentence case",
956                        HeadingCapStyle::AllCaps => "ALL CAPS",
957                    };
958
959                    warnings.push(LintWarning {
960                        rule_name: Some(self.name().to_string()),
961                        line: line_num + 1,
962                        column: heading.content_column + 1,
963                        end_line: line_num + 1,
964                        end_column: heading.content_column + 1 + original_text.len(),
965                        message: format!("Heading should use {style_name}: '{original_text}' -> '{fixed_text}'"),
966                        severity: Severity::Warning,
967                        fix: Some(Fix {
968                            range: self.get_line_byte_range(content, line_num + 1, line_index),
969                            replacement: match heading.style {
970                                crate::lint_context::HeadingStyle::ATX => self.fix_atx_heading(line, heading),
971                                _ => self.fix_setext_heading(line, heading),
972                            },
973                        }),
974                    });
975                }
976            }
977        }
978
979        Ok(warnings)
980    }
981
982    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
983        let content = ctx.content;
984
985        if content.is_empty() {
986            return Ok(content.to_string());
987        }
988
989        let lines = ctx.raw_lines();
990        let mut fixed_lines: Vec<String> = lines.iter().map(|&s| s.to_string()).collect();
991
992        for (line_num, line_info) in ctx.lines.iter().enumerate() {
993            // Skip lines where the rule is disabled via inline config
994            if ctx.is_rule_disabled(self.name(), line_num + 1) {
995                continue;
996            }
997
998            if let Some(heading) = &line_info.heading {
999                // Check level filter
1000                if heading.level < self.config.min_level || heading.level > self.config.max_level {
1001                    continue;
1002                }
1003
1004                // Skip headings in code blocks
1005                if line_info.visual_indent >= 4 && matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
1006                    continue;
1007                }
1008
1009                // Skip invalid headings (e.g., `#tag` which lacks required space after #)
1010                if !heading.is_valid {
1011                    continue;
1012                }
1013
1014                let original_text = &heading.raw_text;
1015                let fixed_text = self.apply_capitalization(original_text);
1016
1017                if original_text != &fixed_text {
1018                    let line = line_info.content(ctx.content);
1019                    fixed_lines[line_num] = match heading.style {
1020                        crate::lint_context::HeadingStyle::ATX => self.fix_atx_heading(line, heading),
1021                        _ => self.fix_setext_heading(line, heading),
1022                    };
1023                }
1024            }
1025        }
1026
1027        // Reconstruct content preserving line endings
1028        let mut result = String::with_capacity(content.len());
1029        for (i, line) in fixed_lines.iter().enumerate() {
1030            result.push_str(line);
1031            if i < fixed_lines.len() - 1 || content.ends_with('\n') {
1032                result.push('\n');
1033            }
1034        }
1035
1036        Ok(result)
1037    }
1038
1039    fn as_any(&self) -> &dyn std::any::Any {
1040        self
1041    }
1042
1043    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1044        let json_value = serde_json::to_value(&self.config).ok()?;
1045        Some((
1046            self.name().to_string(),
1047            crate::rule_config_serde::json_to_toml_value(&json_value)?,
1048        ))
1049    }
1050
1051    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
1052    where
1053        Self: Sized,
1054    {
1055        let rule_config = crate::rule_config_serde::load_rule_config::<MD063Config>(config);
1056        let md044_config =
1057            crate::rule_config_serde::load_rule_config::<crate::rules::md044_proper_names::MD044Config>(config);
1058        let mut rule = Self::from_config_struct(rule_config);
1059        rule.proper_names = md044_config.names;
1060        Box::new(rule)
1061    }
1062}
1063
1064#[cfg(test)]
1065mod tests {
1066    use super::*;
1067    use crate::lint_context::LintContext;
1068
1069    fn create_rule() -> MD063HeadingCapitalization {
1070        let config = MD063Config {
1071            enabled: true,
1072            ..Default::default()
1073        };
1074        MD063HeadingCapitalization::from_config_struct(config)
1075    }
1076
1077    fn create_rule_with_style(style: HeadingCapStyle) -> MD063HeadingCapitalization {
1078        let config = MD063Config {
1079            enabled: true,
1080            style,
1081            ..Default::default()
1082        };
1083        MD063HeadingCapitalization::from_config_struct(config)
1084    }
1085
1086    // Title case tests
1087    #[test]
1088    fn test_title_case_basic() {
1089        let rule = create_rule();
1090        let content = "# hello world\n";
1091        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1092        let result = rule.check(&ctx).unwrap();
1093        assert_eq!(result.len(), 1);
1094        assert!(result[0].message.contains("Hello World"));
1095    }
1096
1097    #[test]
1098    fn test_title_case_lowercase_words() {
1099        let rule = create_rule();
1100        let content = "# the quick brown fox\n";
1101        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1102        let result = rule.check(&ctx).unwrap();
1103        assert_eq!(result.len(), 1);
1104        // "The" should be capitalized (first word), "quick", "brown", "fox" should be capitalized
1105        assert!(result[0].message.contains("The Quick Brown Fox"));
1106    }
1107
1108    #[test]
1109    fn test_title_case_already_correct() {
1110        let rule = create_rule();
1111        let content = "# The Quick Brown Fox\n";
1112        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1113        let result = rule.check(&ctx).unwrap();
1114        assert!(result.is_empty(), "Already correct heading should not be flagged");
1115    }
1116
1117    #[test]
1118    fn test_title_case_hyphenated() {
1119        let rule = create_rule();
1120        let content = "# self-documenting code\n";
1121        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1122        let result = rule.check(&ctx).unwrap();
1123        assert_eq!(result.len(), 1);
1124        assert!(result[0].message.contains("Self-Documenting Code"));
1125    }
1126
1127    // Sentence case tests
1128    #[test]
1129    fn test_sentence_case_basic() {
1130        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1131        let content = "# The Quick Brown Fox\n";
1132        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1133        let result = rule.check(&ctx).unwrap();
1134        assert_eq!(result.len(), 1);
1135        assert!(result[0].message.contains("The quick brown fox"));
1136    }
1137
1138    #[test]
1139    fn test_sentence_case_already_correct() {
1140        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1141        let content = "# The quick brown fox\n";
1142        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1143        let result = rule.check(&ctx).unwrap();
1144        assert!(result.is_empty());
1145    }
1146
1147    // All caps tests
1148    #[test]
1149    fn test_all_caps_basic() {
1150        let rule = create_rule_with_style(HeadingCapStyle::AllCaps);
1151        let content = "# hello world\n";
1152        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1153        let result = rule.check(&ctx).unwrap();
1154        assert_eq!(result.len(), 1);
1155        assert!(result[0].message.contains("HELLO WORLD"));
1156    }
1157
1158    // Preserve tests
1159    #[test]
1160    fn test_preserve_ignore_words() {
1161        let config = MD063Config {
1162            enabled: true,
1163            ignore_words: vec!["iPhone".to_string(), "macOS".to_string()],
1164            ..Default::default()
1165        };
1166        let rule = MD063HeadingCapitalization::from_config_struct(config);
1167
1168        let content = "# using iPhone on macOS\n";
1169        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1170        let result = rule.check(&ctx).unwrap();
1171        assert_eq!(result.len(), 1);
1172        // iPhone and macOS should be preserved
1173        assert!(result[0].message.contains("iPhone"));
1174        assert!(result[0].message.contains("macOS"));
1175    }
1176
1177    #[test]
1178    fn test_preserve_cased_words() {
1179        let rule = create_rule();
1180        let content = "# using GitHub actions\n";
1181        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1182        let result = rule.check(&ctx).unwrap();
1183        assert_eq!(result.len(), 1);
1184        // GitHub should be preserved (has internal capital)
1185        assert!(result[0].message.contains("GitHub"));
1186    }
1187
1188    // Inline code tests
1189    #[test]
1190    fn test_inline_code_preserved() {
1191        let rule = create_rule();
1192        let content = "# using `const` in javascript\n";
1193        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1194        let result = rule.check(&ctx).unwrap();
1195        assert_eq!(result.len(), 1);
1196        // `const` should be preserved, rest capitalized
1197        assert!(result[0].message.contains("`const`"));
1198        assert!(result[0].message.contains("Javascript") || result[0].message.contains("JavaScript"));
1199    }
1200
1201    // Level filter tests
1202    #[test]
1203    fn test_level_filter() {
1204        let config = MD063Config {
1205            enabled: true,
1206            min_level: 2,
1207            max_level: 4,
1208            ..Default::default()
1209        };
1210        let rule = MD063HeadingCapitalization::from_config_struct(config);
1211
1212        let content = "# h1 heading\n## h2 heading\n### h3 heading\n##### h5 heading\n";
1213        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1214        let result = rule.check(&ctx).unwrap();
1215
1216        // Only h2 and h3 should be flagged (h1 < min_level, h5 > max_level)
1217        assert_eq!(result.len(), 2);
1218        assert_eq!(result[0].line, 2); // h2
1219        assert_eq!(result[1].line, 3); // h3
1220    }
1221
1222    // Fix tests
1223    #[test]
1224    fn test_fix_atx_heading() {
1225        let rule = create_rule();
1226        let content = "# hello world\n";
1227        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1228        let fixed = rule.fix(&ctx).unwrap();
1229        assert_eq!(fixed, "# Hello World\n");
1230    }
1231
1232    #[test]
1233    fn test_fix_multiple_headings() {
1234        let rule = create_rule();
1235        let content = "# first heading\n\n## second heading\n";
1236        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1237        let fixed = rule.fix(&ctx).unwrap();
1238        assert_eq!(fixed, "# First Heading\n\n## Second Heading\n");
1239    }
1240
1241    // Setext heading tests
1242    #[test]
1243    fn test_setext_heading() {
1244        let rule = create_rule();
1245        let content = "hello world\n============\n";
1246        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1247        let result = rule.check(&ctx).unwrap();
1248        assert_eq!(result.len(), 1);
1249        assert!(result[0].message.contains("Hello World"));
1250    }
1251
1252    // Custom ID tests
1253    #[test]
1254    fn test_custom_id_preserved() {
1255        let rule = create_rule();
1256        let content = "# getting started {#intro}\n";
1257        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1258        let result = rule.check(&ctx).unwrap();
1259        assert_eq!(result.len(), 1);
1260        // Custom ID should be preserved
1261        assert!(result[0].message.contains("{#intro}"));
1262    }
1263
1264    // Acronym preservation tests
1265    #[test]
1266    fn test_skip_obsidian_tags_not_headings() {
1267        let rule = create_rule();
1268
1269        // #tag (no space after #) is an Obsidian tag, not a heading
1270        let content = "# H1\n\n#tag\n";
1271        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
1272        let result = rule.check(&ctx).unwrap();
1273        assert!(
1274            result.is_empty() || result.iter().all(|w| w.line != 3),
1275            "Obsidian tag #tag should not be treated as a heading: {result:?}"
1276        );
1277    }
1278
1279    #[test]
1280    fn test_skip_invalid_atx_headings_no_space() {
1281        let rule = create_rule();
1282
1283        // #NoSpace is not a valid ATX heading (requires space after #)
1284        let content = "#notaheading\n";
1285        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1286        let result = rule.check(&ctx).unwrap();
1287        assert!(
1288            result.is_empty(),
1289            "Invalid ATX heading without space should not be flagged: {result:?}"
1290        );
1291    }
1292
1293    #[test]
1294    fn test_fix_skips_obsidian_tags() {
1295        let rule = create_rule();
1296
1297        let content = "# hello world\n\n#tag\n";
1298        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
1299        let fixed = rule.fix(&ctx).unwrap();
1300        // Should fix the real heading but leave the tag alone
1301        assert!(fixed.contains("#tag"), "Fix should not modify Obsidian tag #tag");
1302        assert!(fixed.contains("# Hello World"), "Fix should still fix real headings");
1303    }
1304
1305    #[test]
1306    fn test_preserve_all_caps_acronyms() {
1307        let rule = create_rule();
1308        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1309
1310        // Basic acronyms should be preserved
1311        let fixed = rule.fix(&ctx("# using API in production\n")).unwrap();
1312        assert_eq!(fixed, "# Using API in Production\n");
1313
1314        // Multiple acronyms
1315        let fixed = rule.fix(&ctx("# API and GPU integration\n")).unwrap();
1316        assert_eq!(fixed, "# API and GPU Integration\n");
1317
1318        // Two-letter acronyms
1319        let fixed = rule.fix(&ctx("# IO performance guide\n")).unwrap();
1320        assert_eq!(fixed, "# IO Performance Guide\n");
1321
1322        // Acronyms with numbers
1323        let fixed = rule.fix(&ctx("# HTTP2 and MD5 hashing\n")).unwrap();
1324        assert_eq!(fixed, "# HTTP2 and MD5 Hashing\n");
1325    }
1326
1327    #[test]
1328    fn test_preserve_acronyms_in_hyphenated_words() {
1329        let rule = create_rule();
1330        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1331
1332        // Acronyms at start of hyphenated word
1333        let fixed = rule.fix(&ctx("# API-driven architecture\n")).unwrap();
1334        assert_eq!(fixed, "# API-Driven Architecture\n");
1335
1336        // Multiple acronyms with hyphens
1337        let fixed = rule.fix(&ctx("# GPU-accelerated CPU-intensive tasks\n")).unwrap();
1338        assert_eq!(fixed, "# GPU-Accelerated CPU-Intensive Tasks\n");
1339    }
1340
1341    #[test]
1342    fn test_single_letters_not_treated_as_acronyms() {
1343        let rule = create_rule();
1344        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1345
1346        // Single uppercase letters should follow title case rules, not be preserved
1347        let fixed = rule.fix(&ctx("# i am a heading\n")).unwrap();
1348        assert_eq!(fixed, "# I Am a Heading\n");
1349    }
1350
1351    #[test]
1352    fn test_lowercase_terms_need_ignore_words() {
1353        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1354
1355        // Without ignore_words: npm gets capitalized
1356        let rule = create_rule();
1357        let fixed = rule.fix(&ctx("# using npm packages\n")).unwrap();
1358        assert_eq!(fixed, "# Using Npm Packages\n");
1359
1360        // With ignore_words: npm preserved
1361        let config = MD063Config {
1362            enabled: true,
1363            ignore_words: vec!["npm".to_string()],
1364            ..Default::default()
1365        };
1366        let rule = MD063HeadingCapitalization::from_config_struct(config);
1367        let fixed = rule.fix(&ctx("# using npm packages\n")).unwrap();
1368        assert_eq!(fixed, "# Using npm Packages\n");
1369    }
1370
1371    #[test]
1372    fn test_acronyms_with_mixed_case_preserved() {
1373        let rule = create_rule();
1374        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1375
1376        // Both acronyms (API, GPU) and mixed-case (GitHub) should be preserved
1377        let fixed = rule.fix(&ctx("# using API with GitHub\n")).unwrap();
1378        assert_eq!(fixed, "# Using API with GitHub\n");
1379    }
1380
1381    #[test]
1382    fn test_real_world_acronyms() {
1383        let rule = create_rule();
1384        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1385
1386        // Common technical acronyms from tested repositories
1387        let content = "# FFI bindings for CPU optimization\n";
1388        let fixed = rule.fix(&ctx(content)).unwrap();
1389        assert_eq!(fixed, "# FFI Bindings for CPU Optimization\n");
1390
1391        let content = "# DOM manipulation and SSR rendering\n";
1392        let fixed = rule.fix(&ctx(content)).unwrap();
1393        assert_eq!(fixed, "# DOM Manipulation and SSR Rendering\n");
1394
1395        let content = "# CVE security and RNN models\n";
1396        let fixed = rule.fix(&ctx(content)).unwrap();
1397        assert_eq!(fixed, "# CVE Security and RNN Models\n");
1398    }
1399
1400    #[test]
1401    fn test_is_all_caps_acronym() {
1402        let rule = create_rule();
1403
1404        // Should return true for all-caps with 2+ letters
1405        assert!(rule.is_all_caps_acronym("API"));
1406        assert!(rule.is_all_caps_acronym("IO"));
1407        assert!(rule.is_all_caps_acronym("GPU"));
1408        assert!(rule.is_all_caps_acronym("HTTP2")); // Numbers don't break it
1409
1410        // Should return false for single letters
1411        assert!(!rule.is_all_caps_acronym("A"));
1412        assert!(!rule.is_all_caps_acronym("I"));
1413
1414        // Should return false for words with lowercase
1415        assert!(!rule.is_all_caps_acronym("Api"));
1416        assert!(!rule.is_all_caps_acronym("npm"));
1417        assert!(!rule.is_all_caps_acronym("iPhone"));
1418    }
1419
1420    #[test]
1421    fn test_sentence_case_ignore_words_first_word() {
1422        let config = MD063Config {
1423            enabled: true,
1424            style: HeadingCapStyle::SentenceCase,
1425            ignore_words: vec!["nvim".to_string()],
1426            ..Default::default()
1427        };
1428        let rule = MD063HeadingCapitalization::from_config_struct(config);
1429
1430        // "nvim" as first word should be preserved exactly
1431        let content = "# nvim config\n";
1432        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1433        let result = rule.check(&ctx).unwrap();
1434        assert!(
1435            result.is_empty(),
1436            "nvim in ignore-words should not be flagged. Got: {result:?}"
1437        );
1438
1439        // Verify fix also preserves it
1440        let fixed = rule.fix(&ctx).unwrap();
1441        assert_eq!(fixed, "# nvim config\n");
1442    }
1443
1444    #[test]
1445    fn test_sentence_case_ignore_words_not_first() {
1446        let config = MD063Config {
1447            enabled: true,
1448            style: HeadingCapStyle::SentenceCase,
1449            ignore_words: vec!["nvim".to_string()],
1450            ..Default::default()
1451        };
1452        let rule = MD063HeadingCapitalization::from_config_struct(config);
1453
1454        // "nvim" in middle should also be preserved
1455        let content = "# Using nvim editor\n";
1456        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1457        let result = rule.check(&ctx).unwrap();
1458        assert!(
1459            result.is_empty(),
1460            "nvim in ignore-words should be preserved. Got: {result:?}"
1461        );
1462    }
1463
1464    #[test]
1465    fn test_preserve_cased_words_ios() {
1466        let config = MD063Config {
1467            enabled: true,
1468            style: HeadingCapStyle::SentenceCase,
1469            preserve_cased_words: true,
1470            ..Default::default()
1471        };
1472        let rule = MD063HeadingCapitalization::from_config_struct(config);
1473
1474        // "iOS" should be preserved (has mixed case: lowercase 'i' + uppercase 'OS')
1475        let content = "## This is iOS\n";
1476        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1477        let result = rule.check(&ctx).unwrap();
1478        assert!(
1479            result.is_empty(),
1480            "iOS should be preserved with preserve-cased-words. Got: {result:?}"
1481        );
1482
1483        // Verify fix also preserves it
1484        let fixed = rule.fix(&ctx).unwrap();
1485        assert_eq!(fixed, "## This is iOS\n");
1486    }
1487
1488    #[test]
1489    fn test_preserve_cased_words_ios_title_case() {
1490        let config = MD063Config {
1491            enabled: true,
1492            style: HeadingCapStyle::TitleCase,
1493            preserve_cased_words: true,
1494            ..Default::default()
1495        };
1496        let rule = MD063HeadingCapitalization::from_config_struct(config);
1497
1498        // "iOS" should be preserved in title case too
1499        let content = "# developing for iOS\n";
1500        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1501        let fixed = rule.fix(&ctx).unwrap();
1502        assert_eq!(fixed, "# Developing for iOS\n");
1503    }
1504
1505    #[test]
1506    fn test_has_internal_capitals_ios() {
1507        let rule = create_rule();
1508
1509        // iOS should be detected as having internal capitals
1510        assert!(
1511            rule.has_internal_capitals("iOS"),
1512            "iOS has mixed case (lowercase i, uppercase OS)"
1513        );
1514
1515        // Other mixed-case words
1516        assert!(rule.has_internal_capitals("iPhone"));
1517        assert!(rule.has_internal_capitals("macOS"));
1518        assert!(rule.has_internal_capitals("GitHub"));
1519        assert!(rule.has_internal_capitals("JavaScript"));
1520        assert!(rule.has_internal_capitals("eBay"));
1521
1522        // All-caps should NOT be detected (handled by is_all_caps_acronym)
1523        assert!(!rule.has_internal_capitals("API"));
1524        assert!(!rule.has_internal_capitals("GPU"));
1525
1526        // All-lowercase should NOT be detected
1527        assert!(!rule.has_internal_capitals("npm"));
1528        assert!(!rule.has_internal_capitals("config"));
1529
1530        // Regular capitalized words should NOT be detected
1531        assert!(!rule.has_internal_capitals("The"));
1532        assert!(!rule.has_internal_capitals("Hello"));
1533    }
1534
1535    #[test]
1536    fn test_lowercase_words_before_trailing_code() {
1537        let config = MD063Config {
1538            enabled: true,
1539            style: HeadingCapStyle::TitleCase,
1540            lowercase_words: vec![
1541                "a".to_string(),
1542                "an".to_string(),
1543                "and".to_string(),
1544                "at".to_string(),
1545                "but".to_string(),
1546                "by".to_string(),
1547                "for".to_string(),
1548                "from".to_string(),
1549                "into".to_string(),
1550                "nor".to_string(),
1551                "on".to_string(),
1552                "onto".to_string(),
1553                "or".to_string(),
1554                "the".to_string(),
1555                "to".to_string(),
1556                "upon".to_string(),
1557                "via".to_string(),
1558                "vs".to_string(),
1559                "with".to_string(),
1560                "without".to_string(),
1561            ],
1562            preserve_cased_words: true,
1563            ..Default::default()
1564        };
1565        let rule = MD063HeadingCapitalization::from_config_struct(config);
1566
1567        // Test: "subtitle with a `app`" (all lowercase input)
1568        // Expected fix: "Subtitle With a `app`" - capitalize "Subtitle" and "With",
1569        // but keep "a" lowercase (it's in lowercase-words and not the last word)
1570        // Incorrect: "Subtitle with A `app`" (would incorrectly capitalize "a")
1571        let content = "## subtitle with a `app`\n";
1572        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1573        let result = rule.check(&ctx).unwrap();
1574
1575        // Should flag it
1576        assert!(!result.is_empty(), "Should flag incorrect capitalization");
1577        let fixed = rule.fix(&ctx).unwrap();
1578        // "a" should remain lowercase (not "A") because inline code at end doesn't change lowercase-words behavior
1579        assert!(
1580            fixed.contains("with a `app`"),
1581            "Expected 'with a `app`' but got: {fixed:?}"
1582        );
1583        assert!(
1584            !fixed.contains("with A `app`"),
1585            "Should not capitalize 'a' to 'A'. Got: {fixed:?}"
1586        );
1587        // "Subtitle" should be capitalized, "with" and "a" should remain lowercase (they're in lowercase-words)
1588        assert!(
1589            fixed.contains("Subtitle with a `app`"),
1590            "Expected 'Subtitle with a `app`' but got: {fixed:?}"
1591        );
1592    }
1593
1594    #[test]
1595    fn test_lowercase_words_preserved_before_trailing_code_variant() {
1596        let config = MD063Config {
1597            enabled: true,
1598            style: HeadingCapStyle::TitleCase,
1599            lowercase_words: vec!["a".to_string(), "the".to_string(), "with".to_string()],
1600            ..Default::default()
1601        };
1602        let rule = MD063HeadingCapitalization::from_config_struct(config);
1603
1604        // Another variant: "Title with the `code`"
1605        let content = "## Title with the `code`\n";
1606        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1607        let fixed = rule.fix(&ctx).unwrap();
1608        // "the" should remain lowercase
1609        assert!(
1610            fixed.contains("with the `code`"),
1611            "Expected 'with the `code`' but got: {fixed:?}"
1612        );
1613        assert!(
1614            !fixed.contains("with The `code`"),
1615            "Should not capitalize 'the' to 'The'. Got: {fixed:?}"
1616        );
1617    }
1618
1619    #[test]
1620    fn test_last_word_capitalized_when_no_trailing_code() {
1621        // Verify that when there's NO trailing code, the last word IS capitalized
1622        // (even if it's in lowercase-words) - this is the normal title case behavior
1623        let config = MD063Config {
1624            enabled: true,
1625            style: HeadingCapStyle::TitleCase,
1626            lowercase_words: vec!["a".to_string(), "the".to_string()],
1627            ..Default::default()
1628        };
1629        let rule = MD063HeadingCapitalization::from_config_struct(config);
1630
1631        // "title with a word" - "word" is last, should be capitalized
1632        // "a" is in lowercase-words and not last, so should be lowercase
1633        let content = "## title with a word\n";
1634        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1635        let fixed = rule.fix(&ctx).unwrap();
1636        // "a" should be lowercase, "word" should be capitalized (it's last)
1637        assert!(
1638            fixed.contains("With a Word"),
1639            "Expected 'With a Word' but got: {fixed:?}"
1640        );
1641    }
1642
1643    #[test]
1644    fn test_multiple_lowercase_words_before_code() {
1645        let config = MD063Config {
1646            enabled: true,
1647            style: HeadingCapStyle::TitleCase,
1648            lowercase_words: vec![
1649                "a".to_string(),
1650                "the".to_string(),
1651                "with".to_string(),
1652                "for".to_string(),
1653            ],
1654            ..Default::default()
1655        };
1656        let rule = MD063HeadingCapitalization::from_config_struct(config);
1657
1658        // Multiple lowercase words before code - all should remain lowercase
1659        let content = "## Guide for the `user`\n";
1660        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1661        let fixed = rule.fix(&ctx).unwrap();
1662        assert!(
1663            fixed.contains("for the `user`"),
1664            "Expected 'for the `user`' but got: {fixed:?}"
1665        );
1666        assert!(
1667            !fixed.contains("For The `user`"),
1668            "Should not capitalize lowercase words before code. Got: {fixed:?}"
1669        );
1670    }
1671
1672    #[test]
1673    fn test_code_in_middle_normal_rules_apply() {
1674        let config = MD063Config {
1675            enabled: true,
1676            style: HeadingCapStyle::TitleCase,
1677            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1678            ..Default::default()
1679        };
1680        let rule = MD063HeadingCapitalization::from_config_struct(config);
1681
1682        // Code in the middle - normal title case rules apply (last word capitalized)
1683        let content = "## Using `const` for the code\n";
1684        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1685        let fixed = rule.fix(&ctx).unwrap();
1686        // "for" and "the" should be lowercase (middle), "code" should be capitalized (last)
1687        assert!(
1688            fixed.contains("for the Code"),
1689            "Expected 'for the Code' but got: {fixed:?}"
1690        );
1691    }
1692
1693    #[test]
1694    fn test_link_at_end_same_as_code() {
1695        let config = MD063Config {
1696            enabled: true,
1697            style: HeadingCapStyle::TitleCase,
1698            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1699            ..Default::default()
1700        };
1701        let rule = MD063HeadingCapitalization::from_config_struct(config);
1702
1703        // Link at the end - same behavior as code (lowercase words before should remain lowercase)
1704        let content = "## Guide for the [link](./page.md)\n";
1705        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1706        let fixed = rule.fix(&ctx).unwrap();
1707        // "for" and "the" should remain lowercase (not last word because link follows)
1708        assert!(
1709            fixed.contains("for the [Link]"),
1710            "Expected 'for the [Link]' but got: {fixed:?}"
1711        );
1712        assert!(
1713            !fixed.contains("for The [Link]"),
1714            "Should not capitalize 'the' before link. Got: {fixed:?}"
1715        );
1716    }
1717
1718    #[test]
1719    fn test_multiple_code_segments() {
1720        let config = MD063Config {
1721            enabled: true,
1722            style: HeadingCapStyle::TitleCase,
1723            lowercase_words: vec!["a".to_string(), "the".to_string(), "with".to_string()],
1724            ..Default::default()
1725        };
1726        let rule = MD063HeadingCapitalization::from_config_struct(config);
1727
1728        // Multiple code segments - last segment is code, so lowercase words before should remain lowercase
1729        let content = "## Using `const` with a `variable`\n";
1730        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1731        let fixed = rule.fix(&ctx).unwrap();
1732        // "a" should remain lowercase (not last word because code follows)
1733        assert!(
1734            fixed.contains("with a `variable`"),
1735            "Expected 'with a `variable`' but got: {fixed:?}"
1736        );
1737        assert!(
1738            !fixed.contains("with A `variable`"),
1739            "Should not capitalize 'a' before trailing code. Got: {fixed:?}"
1740        );
1741    }
1742
1743    #[test]
1744    fn test_code_and_link_combination() {
1745        let config = MD063Config {
1746            enabled: true,
1747            style: HeadingCapStyle::TitleCase,
1748            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1749            ..Default::default()
1750        };
1751        let rule = MD063HeadingCapitalization::from_config_struct(config);
1752
1753        // Code then link - last segment is link, so lowercase words before code should remain lowercase
1754        let content = "## Guide for the `code` [link](./page.md)\n";
1755        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1756        let fixed = rule.fix(&ctx).unwrap();
1757        // "for" and "the" should remain lowercase (not last word because link follows)
1758        assert!(
1759            fixed.contains("for the `code`"),
1760            "Expected 'for the `code`' but got: {fixed:?}"
1761        );
1762    }
1763
1764    #[test]
1765    fn test_text_after_code_capitalizes_last() {
1766        let config = MD063Config {
1767            enabled: true,
1768            style: HeadingCapStyle::TitleCase,
1769            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1770            ..Default::default()
1771        };
1772        let rule = MD063HeadingCapitalization::from_config_struct(config);
1773
1774        // Code in middle, text after - last word should be capitalized
1775        let content = "## Using `const` for the code\n";
1776        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1777        let fixed = rule.fix(&ctx).unwrap();
1778        // "for" and "the" should be lowercase, "code" is last word, should be capitalized
1779        assert!(
1780            fixed.contains("for the Code"),
1781            "Expected 'for the Code' but got: {fixed:?}"
1782        );
1783    }
1784
1785    #[test]
1786    fn test_preserve_cased_words_with_trailing_code() {
1787        let config = MD063Config {
1788            enabled: true,
1789            style: HeadingCapStyle::TitleCase,
1790            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1791            preserve_cased_words: true,
1792            ..Default::default()
1793        };
1794        let rule = MD063HeadingCapitalization::from_config_struct(config);
1795
1796        // Preserve-cased words should still work with trailing code
1797        let content = "## Guide for iOS `app`\n";
1798        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1799        let fixed = rule.fix(&ctx).unwrap();
1800        // "iOS" should be preserved, "for" should be lowercase
1801        assert!(
1802            fixed.contains("for iOS `app`"),
1803            "Expected 'for iOS `app`' but got: {fixed:?}"
1804        );
1805        assert!(
1806            !fixed.contains("For iOS `app`"),
1807            "Should not capitalize 'for' before trailing code. Got: {fixed:?}"
1808        );
1809    }
1810
1811    #[test]
1812    fn test_ignore_words_with_trailing_code() {
1813        let config = MD063Config {
1814            enabled: true,
1815            style: HeadingCapStyle::TitleCase,
1816            lowercase_words: vec!["a".to_string(), "the".to_string(), "with".to_string()],
1817            ignore_words: vec!["npm".to_string()],
1818            ..Default::default()
1819        };
1820        let rule = MD063HeadingCapitalization::from_config_struct(config);
1821
1822        // Ignore-words should still work with trailing code
1823        let content = "## Using npm with a `script`\n";
1824        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1825        let fixed = rule.fix(&ctx).unwrap();
1826        // "npm" should be preserved, "with" and "a" should be lowercase
1827        assert!(
1828            fixed.contains("npm with a `script`"),
1829            "Expected 'npm with a `script`' but got: {fixed:?}"
1830        );
1831        assert!(
1832            !fixed.contains("with A `script`"),
1833            "Should not capitalize 'a' before trailing code. Got: {fixed:?}"
1834        );
1835    }
1836
1837    #[test]
1838    fn test_empty_text_segment_edge_case() {
1839        let config = MD063Config {
1840            enabled: true,
1841            style: HeadingCapStyle::TitleCase,
1842            lowercase_words: vec!["a".to_string(), "with".to_string()],
1843            ..Default::default()
1844        };
1845        let rule = MD063HeadingCapitalization::from_config_struct(config);
1846
1847        // Edge case: code at start, then text with lowercase word, then code at end
1848        let content = "## `start` with a `end`\n";
1849        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1850        let fixed = rule.fix(&ctx).unwrap();
1851        // "with" is first word in text segment, so capitalized (correct)
1852        // "a" should remain lowercase (not last word because code follows) - this is the key test
1853        assert!(fixed.contains("a `end`"), "Expected 'a `end`' but got: {fixed:?}");
1854        assert!(
1855            !fixed.contains("A `end`"),
1856            "Should not capitalize 'a' before trailing code. Got: {fixed:?}"
1857        );
1858    }
1859
1860    #[test]
1861    fn test_sentence_case_with_trailing_code() {
1862        let config = MD063Config {
1863            enabled: true,
1864            style: HeadingCapStyle::SentenceCase,
1865            lowercase_words: vec!["a".to_string(), "the".to_string()],
1866            ..Default::default()
1867        };
1868        let rule = MD063HeadingCapitalization::from_config_struct(config);
1869
1870        // Sentence case should also respect lowercase words before code
1871        let content = "## guide for the `user`\n";
1872        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1873        let fixed = rule.fix(&ctx).unwrap();
1874        // First word capitalized, rest lowercase including "the" before code
1875        assert!(
1876            fixed.contains("Guide for the `user`"),
1877            "Expected 'Guide for the `user`' but got: {fixed:?}"
1878        );
1879    }
1880
1881    #[test]
1882    fn test_hyphenated_word_before_code() {
1883        let config = MD063Config {
1884            enabled: true,
1885            style: HeadingCapStyle::TitleCase,
1886            lowercase_words: vec!["a".to_string(), "the".to_string(), "with".to_string()],
1887            ..Default::default()
1888        };
1889        let rule = MD063HeadingCapitalization::from_config_struct(config);
1890
1891        // Hyphenated word before code - last part should respect lowercase-words
1892        let content = "## Self-contained with a `feature`\n";
1893        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1894        let fixed = rule.fix(&ctx).unwrap();
1895        // "with" and "a" should remain lowercase (not last word because code follows)
1896        assert!(
1897            fixed.contains("with a `feature`"),
1898            "Expected 'with a `feature`' but got: {fixed:?}"
1899        );
1900    }
1901
1902    // Issue #228: Sentence case with inline code at heading start
1903    // When a heading starts with inline code, the first word after the code
1904    // should NOT be capitalized because the heading already has a "first element"
1905
1906    #[test]
1907    fn test_sentence_case_code_at_start_basic() {
1908        // The exact case from issue #228
1909        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1910        let content = "# `rumdl` is a linter\n";
1911        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1912        let result = rule.check(&ctx).unwrap();
1913        // Should be correct as-is: code is first, "is" stays lowercase
1914        assert!(
1915            result.is_empty(),
1916            "Heading with code at start should not flag 'is' for capitalization. Got: {:?}",
1917            result.iter().map(|w| &w.message).collect::<Vec<_>>()
1918        );
1919    }
1920
1921    #[test]
1922    fn test_sentence_case_code_at_start_incorrect_capitalization() {
1923        // Verify we detect incorrect capitalization after code at start
1924        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1925        let content = "# `rumdl` Is a Linter\n";
1926        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1927        let result = rule.check(&ctx).unwrap();
1928        // Should flag: "Is" and "Linter" should be lowercase
1929        assert_eq!(result.len(), 1, "Should detect incorrect capitalization");
1930        assert!(
1931            result[0].message.contains("`rumdl` is a linter"),
1932            "Should suggest lowercase after code. Got: {:?}",
1933            result[0].message
1934        );
1935    }
1936
1937    #[test]
1938    fn test_sentence_case_code_at_start_fix() {
1939        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1940        let content = "# `rumdl` Is A Linter\n";
1941        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1942        let fixed = rule.fix(&ctx).unwrap();
1943        assert!(
1944            fixed.contains("# `rumdl` is a linter"),
1945            "Should fix to lowercase after code. Got: {fixed:?}"
1946        );
1947    }
1948
1949    #[test]
1950    fn test_sentence_case_text_at_start_still_capitalizes() {
1951        // Ensure normal headings still capitalize first word
1952        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1953        let content = "# the quick brown fox\n";
1954        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1955        let result = rule.check(&ctx).unwrap();
1956        assert_eq!(result.len(), 1);
1957        assert!(
1958            result[0].message.contains("The quick brown fox"),
1959            "Text-first heading should capitalize first word. Got: {:?}",
1960            result[0].message
1961        );
1962    }
1963
1964    #[test]
1965    fn test_sentence_case_link_at_start() {
1966        // Links at start: link text is lowercased, following text also lowercase
1967        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1968        // Use lowercase link text to avoid link text case flagging
1969        let content = "# [api](api.md) reference guide\n";
1970        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1971        let result = rule.check(&ctx).unwrap();
1972        // "reference" should be lowercase (link is first)
1973        assert!(
1974            result.is_empty(),
1975            "Heading with link at start should not capitalize 'reference'. Got: {:?}",
1976            result.iter().map(|w| &w.message).collect::<Vec<_>>()
1977        );
1978    }
1979
1980    #[test]
1981    fn test_sentence_case_link_preserves_acronyms() {
1982        // Acronyms in link text should be preserved (API, HTTP, etc.)
1983        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1984        let content = "# [API](api.md) Reference Guide\n";
1985        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1986        let result = rule.check(&ctx).unwrap();
1987        assert_eq!(result.len(), 1);
1988        // "API" should be preserved (acronym), "Reference Guide" should be lowercased
1989        assert!(
1990            result[0].message.contains("[API](api.md) reference guide"),
1991            "Should preserve acronym 'API' but lowercase following text. Got: {:?}",
1992            result[0].message
1993        );
1994    }
1995
1996    #[test]
1997    fn test_sentence_case_link_preserves_brand_names() {
1998        // Brand names with internal capitals should be preserved
1999        let config = MD063Config {
2000            enabled: true,
2001            style: HeadingCapStyle::SentenceCase,
2002            preserve_cased_words: true,
2003            ..Default::default()
2004        };
2005        let rule = MD063HeadingCapitalization::from_config_struct(config);
2006        let content = "# [iPhone](iphone.md) Features Guide\n";
2007        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2008        let result = rule.check(&ctx).unwrap();
2009        assert_eq!(result.len(), 1);
2010        // "iPhone" should be preserved, "Features Guide" should be lowercased
2011        assert!(
2012            result[0].message.contains("[iPhone](iphone.md) features guide"),
2013            "Should preserve 'iPhone' but lowercase following text. Got: {:?}",
2014            result[0].message
2015        );
2016    }
2017
2018    #[test]
2019    fn test_sentence_case_link_lowercases_regular_words() {
2020        // Regular words in link text should be lowercased
2021        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2022        let content = "# [Documentation](docs.md) Reference\n";
2023        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2024        let result = rule.check(&ctx).unwrap();
2025        assert_eq!(result.len(), 1);
2026        // "Documentation" should be lowercased (regular word)
2027        assert!(
2028            result[0].message.contains("[documentation](docs.md) reference"),
2029            "Should lowercase regular link text. Got: {:?}",
2030            result[0].message
2031        );
2032    }
2033
2034    #[test]
2035    fn test_sentence_case_link_at_start_correct_already() {
2036        // Link with correct casing should not be flagged
2037        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2038        let content = "# [API](api.md) reference guide\n";
2039        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2040        let result = rule.check(&ctx).unwrap();
2041        assert!(
2042            result.is_empty(),
2043            "Correctly cased heading with link should not be flagged. Got: {:?}",
2044            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2045        );
2046    }
2047
2048    #[test]
2049    fn test_sentence_case_link_github_preserved() {
2050        // GitHub should be preserved (internal capitals)
2051        let config = MD063Config {
2052            enabled: true,
2053            style: HeadingCapStyle::SentenceCase,
2054            preserve_cased_words: true,
2055            ..Default::default()
2056        };
2057        let rule = MD063HeadingCapitalization::from_config_struct(config);
2058        let content = "# [GitHub](gh.md) Repository Setup\n";
2059        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2060        let result = rule.check(&ctx).unwrap();
2061        assert_eq!(result.len(), 1);
2062        assert!(
2063            result[0].message.contains("[GitHub](gh.md) repository setup"),
2064            "Should preserve 'GitHub'. Got: {:?}",
2065            result[0].message
2066        );
2067    }
2068
2069    #[test]
2070    fn test_sentence_case_multiple_code_spans() {
2071        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2072        let content = "# `foo` and `bar` are methods\n";
2073        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2074        let result = rule.check(&ctx).unwrap();
2075        // All text after first code should be lowercase
2076        assert!(
2077            result.is_empty(),
2078            "Should not capitalize words between/after code spans. Got: {:?}",
2079            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2080        );
2081    }
2082
2083    #[test]
2084    fn test_sentence_case_code_only_heading() {
2085        // Heading with only code, no text
2086        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2087        let content = "# `rumdl`\n";
2088        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2089        let result = rule.check(&ctx).unwrap();
2090        assert!(
2091            result.is_empty(),
2092            "Code-only heading should be fine. Got: {:?}",
2093            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2094        );
2095    }
2096
2097    #[test]
2098    fn test_sentence_case_code_at_end() {
2099        // Heading ending with code, text before should still capitalize first word
2100        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2101        let content = "# install the `rumdl` tool\n";
2102        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2103        let result = rule.check(&ctx).unwrap();
2104        // "install" should be capitalized (first word), rest lowercase
2105        assert_eq!(result.len(), 1);
2106        assert!(
2107            result[0].message.contains("Install the `rumdl` tool"),
2108            "First word should still be capitalized when text comes first. Got: {:?}",
2109            result[0].message
2110        );
2111    }
2112
2113    #[test]
2114    fn test_sentence_case_code_in_middle() {
2115        // Code in middle, text at start should capitalize first word
2116        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2117        let content = "# using the `rumdl` linter for markdown\n";
2118        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2119        let result = rule.check(&ctx).unwrap();
2120        // "using" should be capitalized, rest lowercase
2121        assert_eq!(result.len(), 1);
2122        assert!(
2123            result[0].message.contains("Using the `rumdl` linter for markdown"),
2124            "First word should be capitalized. Got: {:?}",
2125            result[0].message
2126        );
2127    }
2128
2129    #[test]
2130    fn test_sentence_case_preserved_word_after_code() {
2131        // Preserved words (like iPhone) should stay preserved even after code
2132        let config = MD063Config {
2133            enabled: true,
2134            style: HeadingCapStyle::SentenceCase,
2135            preserve_cased_words: true,
2136            ..Default::default()
2137        };
2138        let rule = MD063HeadingCapitalization::from_config_struct(config);
2139        let content = "# `swift` iPhone development\n";
2140        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2141        let result = rule.check(&ctx).unwrap();
2142        // "iPhone" should be preserved, "development" lowercase
2143        assert!(
2144            result.is_empty(),
2145            "Preserved words after code should stay. Got: {:?}",
2146            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2147        );
2148    }
2149
2150    #[test]
2151    fn test_title_case_code_at_start_still_capitalizes() {
2152        // Title case should still capitalize words even after code at start
2153        let rule = create_rule_with_style(HeadingCapStyle::TitleCase);
2154        let content = "# `api` quick start guide\n";
2155        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2156        let result = rule.check(&ctx).unwrap();
2157        // Title case: all major words capitalized
2158        assert_eq!(result.len(), 1);
2159        assert!(
2160            result[0].message.contains("Quick Start Guide") || result[0].message.contains("quick Start Guide"),
2161            "Title case should capitalize major words after code. Got: {:?}",
2162            result[0].message
2163        );
2164    }
2165
2166    // ======== HTML TAG TESTS ========
2167
2168    #[test]
2169    fn test_sentence_case_html_tag_at_start() {
2170        // HTML tag at start: text after should NOT capitalize first word
2171        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2172        let content = "# <kbd>Ctrl</kbd> is a Modifier Key\n";
2173        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2174        let result = rule.check(&ctx).unwrap();
2175        // "is", "a", "Modifier", "Key" should all be lowercase (except preserved words)
2176        assert_eq!(result.len(), 1);
2177        let fixed = rule.fix(&ctx).unwrap();
2178        assert_eq!(
2179            fixed, "# <kbd>Ctrl</kbd> is a modifier key\n",
2180            "Text after HTML at start should be lowercase"
2181        );
2182    }
2183
2184    #[test]
2185    fn test_sentence_case_html_tag_preserves_content() {
2186        // Content inside HTML tags should be preserved as-is
2187        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2188        let content = "# The <abbr>API</abbr> documentation guide\n";
2189        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2190        let result = rule.check(&ctx).unwrap();
2191        // "The" is first, "API" inside tag preserved, rest lowercase
2192        assert!(
2193            result.is_empty(),
2194            "HTML tag content should be preserved. Got: {:?}",
2195            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2196        );
2197    }
2198
2199    #[test]
2200    fn test_sentence_case_html_tag_at_start_with_acronym() {
2201        // HTML tag at start with acronym content
2202        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2203        let content = "# <abbr>API</abbr> Documentation Guide\n";
2204        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2205        let result = rule.check(&ctx).unwrap();
2206        assert_eq!(result.len(), 1);
2207        let fixed = rule.fix(&ctx).unwrap();
2208        assert_eq!(
2209            fixed, "# <abbr>API</abbr> documentation guide\n",
2210            "Text after HTML at start should be lowercase, HTML content preserved"
2211        );
2212    }
2213
2214    #[test]
2215    fn test_sentence_case_html_tag_in_middle() {
2216        // HTML tag in middle: first word still capitalized
2217        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2218        let content = "# using the <code>config</code> File\n";
2219        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2220        let result = rule.check(&ctx).unwrap();
2221        assert_eq!(result.len(), 1);
2222        let fixed = rule.fix(&ctx).unwrap();
2223        assert_eq!(
2224            fixed, "# Using the <code>config</code> file\n",
2225            "First word capitalized, HTML preserved, rest lowercase"
2226        );
2227    }
2228
2229    #[test]
2230    fn test_html_tag_strong_emphasis() {
2231        // <strong> tag handling
2232        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2233        let content = "# The <strong>Bold</strong> Way\n";
2234        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2235        let result = rule.check(&ctx).unwrap();
2236        assert_eq!(result.len(), 1);
2237        let fixed = rule.fix(&ctx).unwrap();
2238        assert_eq!(
2239            fixed, "# The <strong>Bold</strong> way\n",
2240            "<strong> tag content should be preserved"
2241        );
2242    }
2243
2244    #[test]
2245    fn test_html_tag_with_attributes() {
2246        // HTML tags with attributes should still be detected
2247        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2248        let content = "# <span class=\"highlight\">Important</span> Notice Here\n";
2249        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2250        let result = rule.check(&ctx).unwrap();
2251        assert_eq!(result.len(), 1);
2252        let fixed = rule.fix(&ctx).unwrap();
2253        assert_eq!(
2254            fixed, "# <span class=\"highlight\">Important</span> notice here\n",
2255            "HTML tag with attributes should be preserved"
2256        );
2257    }
2258
2259    #[test]
2260    fn test_multiple_html_tags() {
2261        // Multiple HTML tags in heading
2262        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2263        let content = "# <kbd>Ctrl</kbd>+<kbd>C</kbd> to Copy Text\n";
2264        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2265        let result = rule.check(&ctx).unwrap();
2266        assert_eq!(result.len(), 1);
2267        let fixed = rule.fix(&ctx).unwrap();
2268        assert_eq!(
2269            fixed, "# <kbd>Ctrl</kbd>+<kbd>C</kbd> to copy text\n",
2270            "Multiple HTML tags should all be preserved"
2271        );
2272    }
2273
2274    #[test]
2275    fn test_html_and_code_mixed() {
2276        // Mix of HTML tags and inline code
2277        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2278        let content = "# <kbd>Ctrl</kbd>+`v` Paste command\n";
2279        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2280        let result = rule.check(&ctx).unwrap();
2281        assert_eq!(result.len(), 1);
2282        let fixed = rule.fix(&ctx).unwrap();
2283        assert_eq!(
2284            fixed, "# <kbd>Ctrl</kbd>+`v` paste command\n",
2285            "HTML and code should both be preserved"
2286        );
2287    }
2288
2289    #[test]
2290    fn test_self_closing_html_tag() {
2291        // Self-closing tags like <br/>
2292        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2293        let content = "# Line one<br/>Line Two Here\n";
2294        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2295        let result = rule.check(&ctx).unwrap();
2296        assert_eq!(result.len(), 1);
2297        let fixed = rule.fix(&ctx).unwrap();
2298        assert_eq!(
2299            fixed, "# Line one<br/>line two here\n",
2300            "Self-closing HTML tags should be preserved"
2301        );
2302    }
2303
2304    #[test]
2305    fn test_title_case_with_html_tags() {
2306        // Title case with HTML tags
2307        let rule = create_rule_with_style(HeadingCapStyle::TitleCase);
2308        let content = "# the <kbd>ctrl</kbd> key is a modifier\n";
2309        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2310        let result = rule.check(&ctx).unwrap();
2311        assert_eq!(result.len(), 1);
2312        let fixed = rule.fix(&ctx).unwrap();
2313        // "the" as first word should be "The", content inside <kbd> preserved
2314        assert!(
2315            fixed.contains("<kbd>ctrl</kbd>"),
2316            "HTML tag content should be preserved in title case. Got: {fixed}"
2317        );
2318        assert!(
2319            fixed.starts_with("# The ") || fixed.starts_with("# the "),
2320            "Title case should work with HTML. Got: {fixed}"
2321        );
2322    }
2323
2324    // ======== CARET NOTATION TESTS ========
2325
2326    #[test]
2327    fn test_sentence_case_preserves_caret_notation() {
2328        // Caret notation for control characters should be preserved
2329        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2330        let content = "## Ctrl+A, Ctrl+R output ^A, ^R on zsh\n";
2331        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2332        let result = rule.check(&ctx).unwrap();
2333        // Should not flag - ^A and ^R are preserved
2334        assert!(
2335            result.is_empty(),
2336            "Caret notation should be preserved. Got: {:?}",
2337            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2338        );
2339    }
2340
2341    #[test]
2342    fn test_sentence_case_caret_notation_various() {
2343        // Various caret notation patterns
2344        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2345
2346        // ^C for interrupt
2347        let content = "## Press ^C to cancel\n";
2348        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2349        let result = rule.check(&ctx).unwrap();
2350        assert!(
2351            result.is_empty(),
2352            "^C should be preserved. Got: {:?}",
2353            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2354        );
2355
2356        // ^Z for suspend
2357        let content = "## Use ^Z for background\n";
2358        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2359        let result = rule.check(&ctx).unwrap();
2360        assert!(
2361            result.is_empty(),
2362            "^Z should be preserved. Got: {:?}",
2363            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2364        );
2365
2366        // ^[ for escape
2367        let content = "## Press ^[ for escape\n";
2368        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2369        let result = rule.check(&ctx).unwrap();
2370        assert!(
2371            result.is_empty(),
2372            "^[ should be preserved. Got: {:?}",
2373            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2374        );
2375    }
2376
2377    #[test]
2378    fn test_caret_notation_detection() {
2379        let rule = create_rule();
2380
2381        // Valid caret notation
2382        assert!(rule.is_caret_notation("^A"));
2383        assert!(rule.is_caret_notation("^Z"));
2384        assert!(rule.is_caret_notation("^C"));
2385        assert!(rule.is_caret_notation("^@")); // NUL
2386        assert!(rule.is_caret_notation("^[")); // ESC
2387        assert!(rule.is_caret_notation("^]")); // GS
2388        assert!(rule.is_caret_notation("^^")); // RS
2389        assert!(rule.is_caret_notation("^_")); // US
2390
2391        // Not caret notation
2392        assert!(!rule.is_caret_notation("^a")); // lowercase
2393        assert!(!rule.is_caret_notation("A")); // no caret
2394        assert!(!rule.is_caret_notation("^")); // caret alone
2395        assert!(!rule.is_caret_notation("^1")); // digit
2396    }
2397
2398    // MD044 proper names integration tests
2399    //
2400    // When MD063 (sentence case) and MD044 (proper names) are both active, MD063 must
2401    // preserve the exact capitalization of MD044 proper names rather than lowercasing them.
2402    // Without this, the two rules oscillate: MD044 re-capitalizes what MD063 lowercases.
2403
2404    fn create_sentence_case_rule_with_proper_names(names: Vec<String>) -> MD063HeadingCapitalization {
2405        let config = MD063Config {
2406            enabled: true,
2407            style: HeadingCapStyle::SentenceCase,
2408            ..Default::default()
2409        };
2410        let mut rule = MD063HeadingCapitalization::from_config_struct(config);
2411        rule.proper_names = names;
2412        rule
2413    }
2414
2415    #[test]
2416    fn test_sentence_case_preserves_single_word_proper_name() {
2417        let rule = create_sentence_case_rule_with_proper_names(vec!["JavaScript".to_string()]);
2418        // "javascript" in non-first position should become "JavaScript", not "javascript"
2419        let content = "# installing javascript\n";
2420        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2421        let result = rule.check(&ctx).unwrap();
2422        assert_eq!(result.len(), 1, "Should flag the heading");
2423        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2424        assert!(
2425            fix_text.contains("JavaScript"),
2426            "Fix should preserve proper name 'JavaScript', got: {fix_text:?}"
2427        );
2428        assert!(
2429            !fix_text.contains("javascript"),
2430            "Fix should not have lowercase 'javascript', got: {fix_text:?}"
2431        );
2432    }
2433
2434    #[test]
2435    fn test_sentence_case_preserves_multi_word_proper_name() {
2436        let rule = create_sentence_case_rule_with_proper_names(vec!["Good Application".to_string()]);
2437        // "Good Application" is a proper name; sentence case must not lowercase "Application"
2438        let content = "# using good application features\n";
2439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2440        let result = rule.check(&ctx).unwrap();
2441        assert_eq!(result.len(), 1, "Should flag the heading");
2442        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2443        assert!(
2444            fix_text.contains("Good Application"),
2445            "Fix should preserve 'Good Application' as a phrase, got: {fix_text:?}"
2446        );
2447    }
2448
2449    #[test]
2450    fn test_sentence_case_proper_name_at_start_of_heading() {
2451        let rule = create_sentence_case_rule_with_proper_names(vec!["Good Application".to_string()]);
2452        // The proper name "Good Application" starts the heading; both words must be canonical
2453        let content = "# good application overview\n";
2454        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2455        let result = rule.check(&ctx).unwrap();
2456        assert_eq!(result.len(), 1, "Should flag the heading");
2457        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2458        assert!(
2459            fix_text.contains("Good Application"),
2460            "Fix should produce 'Good Application' at start of heading, got: {fix_text:?}"
2461        );
2462        assert!(
2463            fix_text.contains("overview"),
2464            "Non-proper-name word 'overview' should be lowercase, got: {fix_text:?}"
2465        );
2466    }
2467
2468    #[test]
2469    fn test_sentence_case_with_proper_names_no_oscillation() {
2470        // This is the core convergence test: applying the fix once must produce
2471        // output that is already correct (no further changes needed).
2472        let rule = create_sentence_case_rule_with_proper_names(vec!["Good Application".to_string()]);
2473
2474        // First application of fix
2475        let content = "# installing good application on your system\n";
2476        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2477        let result = rule.check(&ctx).unwrap();
2478        assert_eq!(result.len(), 1);
2479        let fixed_heading = result[0].fix.as_ref().unwrap().replacement.as_str();
2480
2481        // The fixed heading should contain the proper name preserved
2482        assert!(
2483            fixed_heading.contains("Good Application"),
2484            "After fix, proper name must be preserved: {fixed_heading:?}"
2485        );
2486
2487        // Second application: must produce no further warnings (convergence)
2488        let fixed_line = format!("{fixed_heading}\n");
2489        let ctx2 = LintContext::new(&fixed_line, crate::config::MarkdownFlavor::Standard, None);
2490        let result2 = rule.check(&ctx2).unwrap();
2491        assert!(
2492            result2.is_empty(),
2493            "After one fix, heading must already satisfy both MD063 and MD044 - no oscillation. \
2494             Second pass warnings: {result2:?}"
2495        );
2496    }
2497
2498    #[test]
2499    fn test_sentence_case_proper_names_already_correct() {
2500        let rule = create_sentence_case_rule_with_proper_names(vec!["Good Application".to_string()]);
2501        // Heading already has correct sentence case with proper name preserved
2502        let content = "# Installing Good Application\n";
2503        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2504        let result = rule.check(&ctx).unwrap();
2505        assert!(
2506            result.is_empty(),
2507            "Correct sentence-case heading with proper name should not be flagged, got: {result:?}"
2508        );
2509    }
2510
2511    #[test]
2512    fn test_sentence_case_multiple_proper_names_in_heading() {
2513        let rule = create_sentence_case_rule_with_proper_names(vec!["TypeScript".to_string(), "React".to_string()]);
2514        let content = "# using typescript with react\n";
2515        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2516        let result = rule.check(&ctx).unwrap();
2517        assert_eq!(result.len(), 1);
2518        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2519        assert!(
2520            fix_text.contains("TypeScript"),
2521            "Fix should preserve 'TypeScript', got: {fix_text:?}"
2522        );
2523        assert!(
2524            fix_text.contains("React"),
2525            "Fix should preserve 'React', got: {fix_text:?}"
2526        );
2527    }
2528
2529    #[test]
2530    fn test_sentence_case_unicode_casefold_expansion_before_proper_name() {
2531        // Regression for Unicode case-fold expansion: `İ` lowercases to `i̇` (2 code points),
2532        // so matching offsets must be computed from the original text, not from a lowercased copy.
2533        let rule = create_sentence_case_rule_with_proper_names(vec!["Österreich".to_string()]);
2534        let content = "# İ österreich guide\n";
2535        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2536
2537        // Should not panic and should preserve canonical proper-name casing.
2538        let result = rule.check(&ctx).unwrap();
2539        assert_eq!(result.len(), 1, "Should flag heading for canonical proper-name casing");
2540        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2541        assert!(
2542            fix_text.contains("Österreich"),
2543            "Fix should preserve canonical 'Österreich', got: {fix_text:?}"
2544        );
2545    }
2546
2547    #[test]
2548    fn test_sentence_case_preserves_trailing_punctuation_on_proper_name() {
2549        let rule = create_sentence_case_rule_with_proper_names(vec!["JavaScript".to_string()]);
2550        let content = "# using javascript, today\n";
2551        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2552        let result = rule.check(&ctx).unwrap();
2553        assert_eq!(result.len(), 1, "Should flag heading");
2554        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2555        assert!(
2556            fix_text.contains("JavaScript,"),
2557            "Fix should preserve trailing punctuation, got: {fix_text:?}"
2558        );
2559    }
2560
2561    // Title case + MD044 conflict tests
2562    //
2563    // In title case, short words like "the", "a", "of" are kept lowercase by MD063.
2564    // If those words are part of an MD044 proper name (e.g. "The Rolling Stones"),
2565    // the same oscillation problem occurs.  The fix must extend to title case too.
2566
2567    fn create_title_case_rule_with_proper_names(names: Vec<String>) -> MD063HeadingCapitalization {
2568        let config = MD063Config {
2569            enabled: true,
2570            style: HeadingCapStyle::TitleCase,
2571            ..Default::default()
2572        };
2573        let mut rule = MD063HeadingCapitalization::from_config_struct(config);
2574        rule.proper_names = names;
2575        rule
2576    }
2577
2578    #[test]
2579    fn test_title_case_preserves_proper_name_with_lowercase_article() {
2580        // "The" is in the lowercase_words list for title case, so "the" in the middle
2581        // of a heading would normally stay lowercase.  But "The Rolling Stones" is a
2582        // proper name that must be capitalised exactly.
2583        let rule = create_title_case_rule_with_proper_names(vec!["The Rolling Stones".to_string()]);
2584        let content = "# listening to the rolling stones today\n";
2585        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2586        let result = rule.check(&ctx).unwrap();
2587        assert_eq!(result.len(), 1, "Should flag the heading");
2588        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2589        assert!(
2590            fix_text.contains("The Rolling Stones"),
2591            "Fix should preserve proper name 'The Rolling Stones', got: {fix_text:?}"
2592        );
2593    }
2594
2595    #[test]
2596    fn test_title_case_proper_name_no_oscillation() {
2597        // One fix pass must produce output that title case already accepts.
2598        let rule = create_title_case_rule_with_proper_names(vec!["The Rolling Stones".to_string()]);
2599        let content = "# listening to the rolling stones today\n";
2600        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2601        let result = rule.check(&ctx).unwrap();
2602        assert_eq!(result.len(), 1);
2603        let fixed_heading = result[0].fix.as_ref().unwrap().replacement.as_str();
2604
2605        let fixed_line = format!("{fixed_heading}\n");
2606        let ctx2 = LintContext::new(&fixed_line, crate::config::MarkdownFlavor::Standard, None);
2607        let result2 = rule.check(&ctx2).unwrap();
2608        assert!(
2609            result2.is_empty(),
2610            "After one title-case fix, heading must already satisfy both rules. \
2611             Second pass warnings: {result2:?}"
2612        );
2613    }
2614
2615    #[test]
2616    fn test_title_case_unicode_casefold_expansion_before_proper_name() {
2617        let rule = create_title_case_rule_with_proper_names(vec!["Österreich".to_string()]);
2618        let content = "# İ österreich guide\n";
2619        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2620        let result = rule.check(&ctx).unwrap();
2621        assert_eq!(result.len(), 1, "Should flag the heading");
2622        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2623        assert!(
2624            fix_text.contains("Österreich"),
2625            "Fix should preserve canonical proper-name casing, got: {fix_text:?}"
2626        );
2627    }
2628
2629    // End-to-end integration test: from_config wires MD044 names into MD063
2630    //
2631    // This tests the actual code path used in production, where both rules are
2632    // configured in a rumdl.toml and the rule registry calls from_config.
2633
2634    #[test]
2635    fn test_from_config_loads_md044_names_into_md063() {
2636        use crate::config::{Config, RuleConfig};
2637        use crate::rule::Rule;
2638        use std::collections::BTreeMap;
2639
2640        let mut config = Config::default();
2641
2642        // Configure MD063 with sentence_case
2643        let mut md063_values = BTreeMap::new();
2644        md063_values.insert("style".to_string(), toml::Value::String("sentence_case".to_string()));
2645        md063_values.insert("enabled".to_string(), toml::Value::Boolean(true));
2646        config.rules.insert(
2647            "MD063".to_string(),
2648            RuleConfig {
2649                values: md063_values,
2650                severity: None,
2651            },
2652        );
2653
2654        // Configure MD044 with a proper name
2655        let mut md044_values = BTreeMap::new();
2656        md044_values.insert(
2657            "names".to_string(),
2658            toml::Value::Array(vec![toml::Value::String("Good Application".to_string())]),
2659        );
2660        config.rules.insert(
2661            "MD044".to_string(),
2662            RuleConfig {
2663                values: md044_values,
2664                severity: None,
2665            },
2666        );
2667
2668        // Build MD063 via the production code path
2669        let rule = MD063HeadingCapitalization::from_config(&config);
2670
2671        // Verify MD044 names were loaded: the fix must preserve "Good Application"
2672        let content = "# using good application features\n";
2673        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2674        let result = rule.check(&ctx).unwrap();
2675        assert_eq!(result.len(), 1, "Should flag the heading");
2676        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2677        assert!(
2678            fix_text.contains("Good Application"),
2679            "from_config should wire MD044 names into MD063; fix should preserve \
2680             'Good Application', got: {fix_text:?}"
2681        );
2682    }
2683
2684    #[test]
2685    fn test_title_case_short_word_not_confused_with_substring() {
2686        // Verify that short preposition matching ("in") does not trigger on
2687        // substrings of longer words ("insert"). Title case must capitalize
2688        // "insert" while keeping "in" lowercase.
2689        let rule = create_rule_with_style(HeadingCapStyle::TitleCase);
2690
2691        // "in" is a short preposition (should be lowercase in title case)
2692        // "insert" contains "in" as substring but is a regular word (should be capitalized)
2693        let content = "# in the insert\n";
2694        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2695        let result = rule.check(&ctx).unwrap();
2696        assert_eq!(result.len(), 1, "Should flag the heading");
2697        let fix = result[0].fix.as_ref().expect("Fix should be present");
2698        // "In" capitalized as first word, "the" lowercase as article, "Insert" capitalized
2699        assert!(
2700            fix.replacement.contains("In the Insert"),
2701            "Expected 'In the Insert', got: {:?}",
2702            fix.replacement
2703        );
2704    }
2705
2706    #[test]
2707    fn test_title_case_or_not_confused_with_orchestra() {
2708        let rule = create_rule_with_style(HeadingCapStyle::TitleCase);
2709
2710        // "or" is a conjunction (should be lowercase in title case)
2711        // "orchestra" contains "or" as substring but is a regular word
2712        let content = "# or the orchestra\n";
2713        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2714        let result = rule.check(&ctx).unwrap();
2715        assert_eq!(result.len(), 1, "Should flag the heading");
2716        let fix = result[0].fix.as_ref().expect("Fix should be present");
2717        // "Or" capitalized as first word, "the" lowercase, "Orchestra" capitalized
2718        assert!(
2719            fix.replacement.contains("Or the Orchestra"),
2720            "Expected 'Or the Orchestra', got: {:?}",
2721            fix.replacement
2722        );
2723    }
2724
2725    #[test]
2726    fn test_all_caps_preserves_all_words() {
2727        let rule = create_rule_with_style(HeadingCapStyle::AllCaps);
2728
2729        let content = "# in the insert\n";
2730        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2731        let result = rule.check(&ctx).unwrap();
2732        assert_eq!(result.len(), 1, "Should flag the heading");
2733        let fix = result[0].fix.as_ref().expect("Fix should be present");
2734        assert!(
2735            fix.replacement.contains("IN THE INSERT"),
2736            "All caps should uppercase all words, got: {:?}",
2737            fix.replacement
2738        );
2739    }
2740}