Skip to main content

rumdl_lib/rules/md063_heading_capitalization/
mod.rs

1/// Rule MD063: Heading capitalization
2///
3/// See [docs/md063.md](../../docs/md063.md) for full documentation, configuration, and examples.
4///
5/// This rule enforces consistent capitalization styles for markdown headings.
6/// It supports title case, sentence case, and all caps styles.
7///
8/// **Note:** This rule is disabled by default. Enable it in your configuration:
9/// ```toml
10/// [MD063]
11/// enabled = true
12/// style = "title_case"
13/// ```
14use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
15use crate::utils::range_utils::LineIndex;
16use regex::Regex;
17use std::collections::HashSet;
18use std::ops::Range;
19use std::sync::LazyLock;
20
21mod md063_config;
22pub use md063_config::{HeadingCapStyle, MD063Config};
23
24// Regex to match inline code spans (backticks)
25static INLINE_CODE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`+[^`]+`+").unwrap());
26
27// Regex to match markdown links [text](url) or [text][ref]
28static LINK_REGEX: LazyLock<Regex> =
29    LazyLock::new(|| Regex::new(r"\[([^\]]*)\]\([^)]*\)|\[([^\]]*)\]\[[^\]]*\]").unwrap());
30
31// Regex to match inline HTML tags commonly used in headings
32// Matches paired tags: <tag>content</tag>, <tag attr="val">content</tag>
33// Matches self-closing: <tag/>, <tag />
34// Uses explicit list of common inline tags to avoid backreference (not supported in Rust regex)
35static HTML_TAG_REGEX: LazyLock<Regex> = LazyLock::new(|| {
36    // Common inline HTML tags used in documentation headings
37    let tags = "kbd|abbr|code|span|sub|sup|mark|cite|dfn|var|samp|small|strong|em|b|i|u|s|q|br|wbr";
38    let pattern = format!(r"<({tags})(?:\s[^>]*)?>.*?</({tags})>|<({tags})(?:\s[^>]*)?\s*/?>");
39    Regex::new(&pattern).unwrap()
40});
41
42// Regex to match custom header IDs {#id}
43static CUSTOM_ID_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s*\{#[^}]+\}\s*$").unwrap());
44
45/// Represents a segment of heading text
46#[derive(Debug, Clone)]
47enum HeadingSegment {
48    /// Regular text that should be capitalized
49    Text(String),
50    /// Inline code that should be preserved as-is
51    Code(String),
52    /// Link with text that may be capitalized and URL that's preserved
53    Link {
54        full: String,
55        text_start: usize,
56        text_end: usize,
57    },
58    /// Inline HTML tag that should be preserved as-is
59    Html(String),
60}
61
62/// Rule MD063: Heading capitalization
63#[derive(Clone)]
64pub struct MD063HeadingCapitalization {
65    config: MD063Config,
66    lowercase_set: HashSet<String>,
67    /// Multi-word proper names from MD044 that must survive sentence-case transformation.
68    /// Populated via `from_config` when both rules are active.
69    proper_names: Vec<String>,
70}
71
72impl Default for MD063HeadingCapitalization {
73    fn default() -> Self {
74        Self::new()
75    }
76}
77
78impl MD063HeadingCapitalization {
79    pub fn new() -> Self {
80        let config = MD063Config::default();
81        let lowercase_set = config.lowercase_words.iter().cloned().collect();
82        Self {
83            config,
84            lowercase_set,
85            proper_names: Vec::new(),
86        }
87    }
88
89    pub fn from_config_struct(config: MD063Config) -> Self {
90        let lowercase_set = config.lowercase_words.iter().cloned().collect();
91        Self {
92            config,
93            lowercase_set,
94            proper_names: Vec::new(),
95        }
96    }
97
98    /// Match `pattern_lower` at `start` in `text` using Unicode-aware lowercasing.
99    /// Returns the end byte offset in `text` when the match succeeds.
100    ///
101    /// This avoids converting the full `text` to lowercase and then reusing those
102    /// offsets on the original string, which can panic for case-fold expansions
103    /// (e.g. `İ` -> `i̇`).
104    fn match_case_insensitive_at(text: &str, start: usize, pattern_lower: &str) -> Option<usize> {
105        if start > text.len() || !text.is_char_boundary(start) || pattern_lower.is_empty() {
106            return None;
107        }
108
109        let mut matched_bytes = 0;
110
111        for (offset, ch) in text[start..].char_indices() {
112            if matched_bytes >= pattern_lower.len() {
113                break;
114            }
115
116            let lowered: String = ch.to_lowercase().collect();
117            if !pattern_lower[matched_bytes..].starts_with(&lowered) {
118                return None;
119            }
120
121            matched_bytes += lowered.len();
122
123            if matched_bytes == pattern_lower.len() {
124                return Some(start + offset + ch.len_utf8());
125            }
126        }
127
128        None
129    }
130
131    /// Find the next case-insensitive match of `pattern_lower` in `text`,
132    /// returning byte offsets in the ORIGINAL string.
133    fn find_case_insensitive_match(text: &str, pattern_lower: &str, search_start: usize) -> Option<(usize, usize)> {
134        if pattern_lower.is_empty() || search_start >= text.len() || !text.is_char_boundary(search_start) {
135            return None;
136        }
137
138        for (offset, _) in text[search_start..].char_indices() {
139            let start = search_start + offset;
140            if let Some(end) = Self::match_case_insensitive_at(text, start, pattern_lower) {
141                return Some((start, end));
142            }
143        }
144
145        None
146    }
147
148    /// Build a map from word byte-position → canonical form for all proper names
149    /// that appear in the heading text (case-insensitive phrase match).
150    ///
151    /// This is used in `apply_sentence_case` so that words belonging to a proper
152    /// name phrase are never lowercased to begin with.
153    fn proper_name_canonical_forms(&self, text: &str) -> std::collections::HashMap<usize, &str> {
154        let mut map = std::collections::HashMap::new();
155
156        for name in &self.proper_names {
157            if name.is_empty() {
158                continue;
159            }
160            let name_lower = name.to_lowercase();
161            let canonical_words: Vec<&str> = name.split_whitespace().collect();
162            if canonical_words.is_empty() {
163                continue;
164            }
165            let mut search_start = 0;
166
167            while search_start < text.len() {
168                let Some((abs_pos, end_pos)) = Self::find_case_insensitive_match(text, &name_lower, search_start)
169                else {
170                    break;
171                };
172
173                // Require word boundaries
174                let before_ok = abs_pos == 0 || !text[..abs_pos].chars().last().is_some_and(|c| c.is_alphanumeric());
175                let after_ok =
176                    end_pos >= text.len() || !text[end_pos..].chars().next().is_some_and(|c| c.is_alphanumeric());
177
178                if before_ok && after_ok {
179                    // Map each word in the matched region to its canonical form.
180                    // We zip the words found in the text slice with the words of the
181                    // canonical name so that every word gets the right casing.
182                    let text_slice = &text[abs_pos..end_pos];
183                    let mut word_idx = 0;
184                    let mut slice_offset = 0;
185
186                    for text_word in text_slice.split_whitespace() {
187                        if let Some(w_rel) = text_slice[slice_offset..].find(text_word) {
188                            let word_abs = abs_pos + slice_offset + w_rel;
189                            if let Some(&canonical_word) = canonical_words.get(word_idx) {
190                                map.insert(word_abs, canonical_word);
191                            }
192                            slice_offset += w_rel + text_word.len();
193                            word_idx += 1;
194                        }
195                    }
196                }
197
198                // Advance by one Unicode scalar value to allow overlapping matches
199                // while staying on a UTF-8 char boundary.
200                search_start = abs_pos + text[abs_pos..].chars().next().map_or(1, |c| c.len_utf8());
201            }
202        }
203
204        map
205    }
206
207    /// Check if a word has internal capitals (like "iPhone", "macOS", "GitHub", "iOS")
208    fn has_internal_capitals(&self, word: &str) -> bool {
209        let chars: Vec<char> = word.chars().collect();
210        if chars.len() < 2 {
211            return false;
212        }
213
214        let first = chars[0];
215        let rest = &chars[1..];
216        let has_upper_in_rest = rest.iter().any(|c| c.is_uppercase());
217        let has_lower_in_rest = rest.iter().any(|c| c.is_lowercase());
218
219        // Case 1: Mixed case after first character (like "iPhone", "macOS", "GitHub", "JavaScript")
220        if has_upper_in_rest && has_lower_in_rest {
221            return true;
222        }
223
224        // Case 2: Lowercase first + uppercase in rest (like "iOS", "eBay")
225        if first.is_lowercase() && has_upper_in_rest {
226            return true;
227        }
228
229        false
230    }
231
232    /// Check if a word is an all-caps acronym (2+ consecutive uppercase letters)
233    /// Examples: "API", "GPU", "HTTP2", "IO" return true
234    /// Examples: "A", "iPhone", "npm" return false
235    fn is_all_caps_acronym(&self, word: &str) -> bool {
236        // Skip single-letter words (handled by title case rules)
237        if word.len() < 2 {
238            return false;
239        }
240
241        let mut consecutive_upper = 0;
242        let mut max_consecutive = 0;
243
244        for c in word.chars() {
245            if c.is_uppercase() {
246                consecutive_upper += 1;
247                max_consecutive = max_consecutive.max(consecutive_upper);
248            } else if c.is_lowercase() {
249                // Any lowercase letter means not all-caps
250                return false;
251            } else {
252                // Non-letter (number, punctuation) - reset counter but don't fail
253                consecutive_upper = 0;
254            }
255        }
256
257        // Must have at least 2 consecutive uppercase letters
258        max_consecutive >= 2
259    }
260
261    /// Check if a word should be preserved as-is
262    fn should_preserve_word(&self, word: &str) -> bool {
263        // Check ignore_words list (case-sensitive exact match)
264        if self.config.ignore_words.iter().any(|w| w == word) {
265            return true;
266        }
267
268        // Check if word has internal capitals and preserve_cased_words is enabled
269        if self.config.preserve_cased_words && self.has_internal_capitals(word) {
270            return true;
271        }
272
273        // Check if word is an all-caps acronym (2+ consecutive uppercase)
274        if self.config.preserve_cased_words && self.is_all_caps_acronym(word) {
275            return true;
276        }
277
278        // Preserve caret notation for control characters (^A, ^Z, ^@, etc.)
279        if self.is_caret_notation(word) {
280            return true;
281        }
282
283        false
284    }
285
286    /// Check if a word is caret notation for control characters (e.g., ^A, ^C, ^Z)
287    fn is_caret_notation(&self, word: &str) -> bool {
288        let chars: Vec<char> = word.chars().collect();
289        // Pattern: ^ followed by uppercase letter or @[\]^_
290        if chars.len() >= 2 && chars[0] == '^' {
291            let second = chars[1];
292            // Control characters: ^@ (NUL) through ^_ (US), which includes ^A-^Z
293            if second.is_ascii_uppercase() || "@[\\]^_".contains(second) {
294                return true;
295            }
296        }
297        false
298    }
299
300    /// Check if a word is a "lowercase word" (articles, prepositions, etc.)
301    fn is_lowercase_word(&self, word: &str) -> bool {
302        self.lowercase_set.contains(&word.to_lowercase())
303    }
304
305    /// Apply title case to a single word
306    fn title_case_word(&self, word: &str, is_first: bool, is_last: bool) -> String {
307        if word.is_empty() {
308            return word.to_string();
309        }
310
311        // Preserve words in ignore list or with internal capitals
312        if self.should_preserve_word(word) {
313            return word.to_string();
314        }
315
316        // First and last words are always capitalized
317        if is_first || is_last {
318            return self.capitalize_first(word);
319        }
320
321        // Check if it's a lowercase word (articles, prepositions, etc.)
322        if self.is_lowercase_word(word) {
323            return Self::lowercase_preserving_composition(word);
324        }
325
326        // Regular word - capitalize first letter
327        self.capitalize_first(word)
328    }
329
330    /// Apply canonical proper-name casing while preserving any trailing punctuation
331    /// attached to the original whitespace token (e.g. `javascript,` -> `JavaScript,`).
332    fn apply_canonical_form_to_word(word: &str, canonical: &str) -> String {
333        let canonical_lower = canonical.to_lowercase();
334        if canonical_lower.is_empty() {
335            return canonical.to_string();
336        }
337
338        if let Some(end_pos) = Self::match_case_insensitive_at(word, 0, &canonical_lower) {
339            let mut out = String::with_capacity(canonical.len() + word.len().saturating_sub(end_pos));
340            out.push_str(canonical);
341            out.push_str(&word[end_pos..]);
342            out
343        } else {
344            canonical.to_string()
345        }
346    }
347
348    /// Capitalize the first letter of a word, handling Unicode properly
349    fn capitalize_first(&self, word: &str) -> String {
350        if word.is_empty() {
351            return String::new();
352        }
353
354        // Find the first alphabetic character to capitalize
355        let first_alpha_pos = word.find(|c: char| c.is_alphabetic());
356        let Some(pos) = first_alpha_pos else {
357            return word.to_string();
358        };
359
360        let prefix = &word[..pos];
361        let mut chars = word[pos..].chars();
362        let first = chars.next().unwrap();
363        // Use composition-preserving uppercase to avoid decomposing
364        // precomposed characters (e.g., ῷ → Ω + combining marks + Ι)
365        let first_upper = Self::uppercase_preserving_composition(&first.to_string());
366        let rest: String = chars.collect();
367        let rest_lower = Self::lowercase_preserving_composition(&rest);
368        format!("{prefix}{first_upper}{rest_lower}")
369    }
370
371    /// Lowercase a string character-by-character, preserving precomposed
372    /// characters that would decompose during case conversion.
373    fn lowercase_preserving_composition(s: &str) -> String {
374        let mut result = String::with_capacity(s.len());
375        for c in s.chars() {
376            let lower: String = c.to_lowercase().collect();
377            if lower.chars().count() == 1 {
378                result.push_str(&lower);
379            } else {
380                // Lowercasing would decompose this character; keep original
381                result.push(c);
382            }
383        }
384        result
385    }
386
387    /// Uppercase a string character-by-character, preserving precomposed
388    /// characters that would decompose during case conversion.
389    /// For example, ῷ (U+1FF7) would decompose into Ω + combining marks + Ι
390    /// via to_uppercase(); this function keeps ῷ unchanged instead.
391    fn uppercase_preserving_composition(s: &str) -> String {
392        let mut result = String::with_capacity(s.len());
393        for c in s.chars() {
394            let upper: String = c.to_uppercase().collect();
395            if upper.chars().count() == 1 {
396                result.push_str(&upper);
397            } else {
398                // Uppercasing would decompose this character; keep original
399                result.push(c);
400            }
401        }
402        result
403    }
404
405    /// Apply title case to text, using our own title-case logic.
406    /// We avoid the external titlecase crate because it decomposes
407    /// precomposed Unicode characters during case conversion.
408    fn apply_title_case(&self, text: &str) -> String {
409        let canonical_forms = self.proper_name_canonical_forms(text);
410
411        let original_words: Vec<&str> = text.split_whitespace().collect();
412        let total_words = original_words.len();
413
414        // Pre-compute byte position of each word for canonical form lookup.
415        // Use usize::MAX as sentinel for unfound words so canonical_forms.get() returns None.
416        let mut word_positions: Vec<usize> = Vec::with_capacity(original_words.len());
417        let mut pos = 0;
418        for word in &original_words {
419            if let Some(rel) = text[pos..].find(word) {
420                word_positions.push(pos + rel);
421                pos = pos + rel + word.len();
422            } else {
423                word_positions.push(usize::MAX);
424            }
425        }
426
427        let result_words: Vec<String> = original_words
428            .iter()
429            .enumerate()
430            .map(|(i, word)| {
431                let after_period = i > 0 && original_words[i - 1].ends_with('.');
432                let is_first = i == 0 || after_period;
433                let is_last = i == total_words - 1;
434
435                // Words that are part of an MD044 proper name use the canonical form directly.
436                if let Some(&canonical) = word_positions.get(i).and_then(|&p| canonical_forms.get(&p)) {
437                    return Self::apply_canonical_form_to_word(word, canonical);
438                }
439
440                // Preserve words in ignore list or with internal capitals
441                if self.should_preserve_word(word) {
442                    return (*word).to_string();
443                }
444
445                // Handle hyphenated words
446                if word.contains('-') {
447                    return self.handle_hyphenated_word(word, is_first, is_last);
448                }
449
450                self.title_case_word(word, is_first, is_last)
451            })
452            .collect();
453
454        result_words.join(" ")
455    }
456
457    /// Handle hyphenated words like "self-documenting"
458    fn handle_hyphenated_word(&self, word: &str, is_first: bool, is_last: bool) -> String {
459        let parts: Vec<&str> = word.split('-').collect();
460        let total_parts = parts.len();
461
462        let result_parts: Vec<String> = parts
463            .iter()
464            .enumerate()
465            .map(|(i, part)| {
466                // First part of first word and last part of last word get special treatment
467                let part_is_first = is_first && i == 0;
468                let part_is_last = is_last && i == total_parts - 1;
469                self.title_case_word(part, part_is_first, part_is_last)
470            })
471            .collect();
472
473        result_parts.join("-")
474    }
475
476    /// Apply sentence case to text
477    fn apply_sentence_case(&self, text: &str) -> String {
478        if text.is_empty() {
479            return text.to_string();
480        }
481
482        let canonical_forms = self.proper_name_canonical_forms(text);
483        let mut result = String::new();
484        let mut current_pos = 0;
485        let mut is_first_word = true;
486
487        // Use original text positions to preserve whitespace correctly
488        for word in text.split_whitespace() {
489            if let Some(pos) = text[current_pos..].find(word) {
490                let abs_pos = current_pos + pos;
491
492                // Preserve whitespace before this word
493                result.push_str(&text[current_pos..abs_pos]);
494
495                // Words that are part of an MD044 proper name use the canonical form
496                // directly, bypassing sentence-case lowercasing entirely.
497                if let Some(&canonical) = canonical_forms.get(&abs_pos) {
498                    result.push_str(&Self::apply_canonical_form_to_word(word, canonical));
499                    is_first_word = false;
500                } else if is_first_word {
501                    // Check if word should be preserved BEFORE any capitalization
502                    if self.should_preserve_word(word) {
503                        // Preserve ignore-words exactly as-is, even at start
504                        result.push_str(word);
505                    } else {
506                        // First word: capitalize first letter, lowercase rest
507                        let mut chars = word.chars();
508                        if let Some(first) = chars.next() {
509                            result.push_str(&Self::uppercase_preserving_composition(&first.to_string()));
510                            let rest: String = chars.collect();
511                            result.push_str(&Self::lowercase_preserving_composition(&rest));
512                        }
513                    }
514                    is_first_word = false;
515                } else {
516                    // Non-first words: preserve if needed, otherwise lowercase
517                    if self.should_preserve_word(word) {
518                        result.push_str(word);
519                    } else {
520                        result.push_str(&Self::lowercase_preserving_composition(word));
521                    }
522                }
523
524                current_pos = abs_pos + word.len();
525            }
526        }
527
528        // Preserve any trailing whitespace
529        if current_pos < text.len() {
530            result.push_str(&text[current_pos..]);
531        }
532
533        result
534    }
535
536    /// Apply all caps to text (preserve whitespace)
537    fn apply_all_caps(&self, text: &str) -> String {
538        if text.is_empty() {
539            return text.to_string();
540        }
541
542        let canonical_forms = self.proper_name_canonical_forms(text);
543        let mut result = String::new();
544        let mut current_pos = 0;
545
546        // Use original text positions to preserve whitespace correctly
547        for word in text.split_whitespace() {
548            if let Some(pos) = text[current_pos..].find(word) {
549                let abs_pos = current_pos + pos;
550
551                // Preserve whitespace before this word
552                result.push_str(&text[current_pos..abs_pos]);
553
554                // Words that are part of an MD044 proper name use the canonical form directly.
555                // This prevents oscillation with MD044 when all-caps style is active.
556                if let Some(&canonical) = canonical_forms.get(&abs_pos) {
557                    result.push_str(&Self::apply_canonical_form_to_word(word, canonical));
558                } else if self.should_preserve_word(word) {
559                    result.push_str(word);
560                } else {
561                    result.push_str(&Self::uppercase_preserving_composition(word));
562                }
563
564                current_pos = abs_pos + word.len();
565            }
566        }
567
568        // Preserve any trailing whitespace
569        if current_pos < text.len() {
570            result.push_str(&text[current_pos..]);
571        }
572
573        result
574    }
575
576    /// Parse heading text into segments
577    fn parse_segments(&self, text: &str) -> Vec<HeadingSegment> {
578        let mut segments = Vec::new();
579        let mut last_end = 0;
580
581        // Collect all special regions (code and links)
582        let mut special_regions: Vec<(usize, usize, HeadingSegment)> = Vec::new();
583
584        // Find inline code spans
585        for mat in INLINE_CODE_REGEX.find_iter(text) {
586            special_regions.push((mat.start(), mat.end(), HeadingSegment::Code(mat.as_str().to_string())));
587        }
588
589        // Find links
590        for caps in LINK_REGEX.captures_iter(text) {
591            let full_match = caps.get(0).unwrap();
592            let text_match = caps.get(1).or_else(|| caps.get(2));
593
594            if let Some(text_m) = text_match {
595                special_regions.push((
596                    full_match.start(),
597                    full_match.end(),
598                    HeadingSegment::Link {
599                        full: full_match.as_str().to_string(),
600                        text_start: text_m.start() - full_match.start(),
601                        text_end: text_m.end() - full_match.start(),
602                    },
603                ));
604            }
605        }
606
607        // Find inline HTML tags
608        for mat in HTML_TAG_REGEX.find_iter(text) {
609            special_regions.push((mat.start(), mat.end(), HeadingSegment::Html(mat.as_str().to_string())));
610        }
611
612        // Sort by start position
613        special_regions.sort_by_key(|(start, _, _)| *start);
614
615        // Remove overlapping regions (code takes precedence)
616        let mut filtered_regions: Vec<(usize, usize, HeadingSegment)> = Vec::new();
617        for region in special_regions {
618            let overlaps = filtered_regions.iter().any(|(s, e, _)| region.0 < *e && region.1 > *s);
619            if !overlaps {
620                filtered_regions.push(region);
621            }
622        }
623
624        // Build segments
625        for (start, end, segment) in filtered_regions {
626            // Add text before this special region
627            if start > last_end {
628                let text_segment = &text[last_end..start];
629                if !text_segment.is_empty() {
630                    segments.push(HeadingSegment::Text(text_segment.to_string()));
631                }
632            }
633            segments.push(segment);
634            last_end = end;
635        }
636
637        // Add remaining text
638        if last_end < text.len() {
639            let remaining = &text[last_end..];
640            if !remaining.is_empty() {
641                segments.push(HeadingSegment::Text(remaining.to_string()));
642            }
643        }
644
645        // If no segments were found, treat the whole thing as text
646        if segments.is_empty() && !text.is_empty() {
647            segments.push(HeadingSegment::Text(text.to_string()));
648        }
649
650        segments
651    }
652
653    /// Apply capitalization to heading text
654    fn apply_capitalization(&self, text: &str) -> String {
655        // Strip custom ID if present and re-add later
656        let (main_text, custom_id) = if let Some(mat) = CUSTOM_ID_REGEX.find(text) {
657            (&text[..mat.start()], Some(mat.as_str()))
658        } else {
659            (text, None)
660        };
661
662        // Parse into segments
663        let segments = self.parse_segments(main_text);
664
665        // Count text segments to determine first/last word context
666        let text_segments: Vec<usize> = segments
667            .iter()
668            .enumerate()
669            .filter_map(|(i, s)| matches!(s, HeadingSegment::Text(_)).then_some(i))
670            .collect();
671
672        // Determine if the first segment overall is a text segment
673        // For sentence case: if heading starts with code/link, the first text segment
674        // should NOT capitalize its first word (the heading already has a "first element")
675        let first_segment_is_text = segments
676            .first()
677            .map(|s| matches!(s, HeadingSegment::Text(_)))
678            .unwrap_or(false);
679
680        // Determine if the last segment overall is a text segment
681        // If the last segment is Code or Link, then the last text segment should NOT
682        // treat its last word as the heading's last word (for lowercase-words respect)
683        let last_segment_is_text = segments
684            .last()
685            .map(|s| matches!(s, HeadingSegment::Text(_)))
686            .unwrap_or(false);
687
688        // Apply capitalization to each segment
689        let mut result_parts: Vec<String> = Vec::new();
690
691        for (i, segment) in segments.iter().enumerate() {
692            match segment {
693                HeadingSegment::Text(t) => {
694                    let is_first_text = text_segments.first() == Some(&i);
695                    // A text segment is "last" only if it's the last text segment AND
696                    // the last segment overall is also text. If there's Code/Link after,
697                    // the last word should respect lowercase-words.
698                    let is_last_text = text_segments.last() == Some(&i) && last_segment_is_text;
699
700                    let capitalized = match self.config.style {
701                        HeadingCapStyle::TitleCase => self.apply_title_case_segment(t, is_first_text, is_last_text),
702                        HeadingCapStyle::SentenceCase => {
703                            // For sentence case, only capitalize first word if:
704                            // 1. This is the first text segment, AND
705                            // 2. The heading actually starts with text (not code/link)
706                            if is_first_text && first_segment_is_text {
707                                self.apply_sentence_case(t)
708                            } else {
709                                // Non-first segments OR heading starts with code/link
710                                self.apply_sentence_case_non_first(t)
711                            }
712                        }
713                        HeadingCapStyle::AllCaps => self.apply_all_caps(t),
714                    };
715                    result_parts.push(capitalized);
716                }
717                HeadingSegment::Code(c) => {
718                    result_parts.push(c.clone());
719                }
720                HeadingSegment::Link {
721                    full,
722                    text_start,
723                    text_end,
724                } => {
725                    // Apply capitalization to link text only
726                    let link_text = &full[*text_start..*text_end];
727                    let capitalized_text = match self.config.style {
728                        HeadingCapStyle::TitleCase => self.apply_title_case(link_text),
729                        // For sentence case, apply same preservation logic as non-first text
730                        // This preserves acronyms (API), brand names (iPhone), etc.
731                        HeadingCapStyle::SentenceCase => self.apply_sentence_case_non_first(link_text),
732                        HeadingCapStyle::AllCaps => self.apply_all_caps(link_text),
733                    };
734
735                    let mut new_link = String::new();
736                    new_link.push_str(&full[..*text_start]);
737                    new_link.push_str(&capitalized_text);
738                    new_link.push_str(&full[*text_end..]);
739                    result_parts.push(new_link);
740                }
741                HeadingSegment::Html(h) => {
742                    // Preserve HTML tags as-is (like code)
743                    result_parts.push(h.clone());
744                }
745            }
746        }
747
748        let mut result = result_parts.join("");
749
750        // Re-add custom ID if present
751        if let Some(id) = custom_id {
752            result.push_str(id);
753        }
754
755        result
756    }
757
758    /// Apply title case to a text segment with first/last awareness
759    fn apply_title_case_segment(&self, text: &str, is_first_segment: bool, is_last_segment: bool) -> String {
760        let canonical_forms = self.proper_name_canonical_forms(text);
761        let words: Vec<&str> = text.split_whitespace().collect();
762        let total_words = words.len();
763
764        if total_words == 0 {
765            return text.to_string();
766        }
767
768        // Pre-compute byte position of each word so we can look up canonical forms.
769        // Use usize::MAX as sentinel for unfound words so canonical_forms.get() returns None.
770        let mut word_positions: Vec<usize> = Vec::with_capacity(words.len());
771        let mut pos = 0;
772        for word in &words {
773            if let Some(rel) = text[pos..].find(word) {
774                word_positions.push(pos + rel);
775                pos = pos + rel + word.len();
776            } else {
777                word_positions.push(usize::MAX);
778            }
779        }
780
781        let result_words: Vec<String> = words
782            .iter()
783            .enumerate()
784            .map(|(i, word)| {
785                let after_period = i > 0 && words[i - 1].ends_with('.');
786                let is_first = (is_first_segment && i == 0) || after_period;
787                let is_last = is_last_segment && i == total_words - 1;
788
789                // Words that are part of an MD044 proper name use the canonical form directly.
790                if let Some(&canonical) = word_positions.get(i).and_then(|&p| canonical_forms.get(&p)) {
791                    return Self::apply_canonical_form_to_word(word, canonical);
792                }
793
794                // Handle hyphenated words
795                if word.contains('-') {
796                    return self.handle_hyphenated_word(word, is_first, is_last);
797                }
798
799                self.title_case_word(word, is_first, is_last)
800            })
801            .collect();
802
803        // Preserve original spacing
804        let mut result = String::new();
805        let mut word_iter = result_words.iter();
806        let mut in_word = false;
807
808        for c in text.chars() {
809            if c.is_whitespace() {
810                if in_word {
811                    in_word = false;
812                }
813                result.push(c);
814            } else if !in_word {
815                if let Some(word) = word_iter.next() {
816                    result.push_str(word);
817                }
818                in_word = true;
819            }
820        }
821
822        result
823    }
824
825    /// Apply sentence case to non-first segments (just lowercase, preserve whitespace)
826    fn apply_sentence_case_non_first(&self, text: &str) -> String {
827        if text.is_empty() {
828            return text.to_string();
829        }
830
831        let canonical_forms = self.proper_name_canonical_forms(text);
832        let mut result = String::new();
833        let mut current_pos = 0;
834
835        // Iterate over words in the original text so byte positions are consistent
836        // with the positions in canonical_forms (built from the same text).
837        for word in text.split_whitespace() {
838            if let Some(pos) = text[current_pos..].find(word) {
839                let abs_pos = current_pos + pos;
840
841                // Preserve whitespace before this word
842                result.push_str(&text[current_pos..abs_pos]);
843
844                // Words that are part of an MD044 proper name use the canonical form directly.
845                if let Some(&canonical) = canonical_forms.get(&abs_pos) {
846                    result.push_str(&Self::apply_canonical_form_to_word(word, canonical));
847                } else if self.should_preserve_word(word) {
848                    result.push_str(word);
849                } else {
850                    result.push_str(&Self::lowercase_preserving_composition(word));
851                }
852
853                current_pos = abs_pos + word.len();
854            }
855        }
856
857        // Preserve any trailing whitespace
858        if current_pos < text.len() {
859            result.push_str(&text[current_pos..]);
860        }
861
862        result
863    }
864
865    /// Get byte range for a line
866    fn get_line_byte_range(&self, content: &str, line_num: usize, line_index: &LineIndex) -> Range<usize> {
867        let start_pos = line_index.get_line_start_byte(line_num).unwrap_or(content.len());
868        let line = content.lines().nth(line_num - 1).unwrap_or("");
869        Range {
870            start: start_pos,
871            end: start_pos + line.len(),
872        }
873    }
874
875    /// Fix an ATX heading line
876    fn fix_atx_heading(&self, _line: &str, heading: &crate::lint_context::HeadingInfo) -> String {
877        // Parse the line to preserve structure
878        let indent = " ".repeat(heading.marker_column);
879        let hashes = "#".repeat(heading.level as usize);
880
881        // Apply capitalization to the text
882        let fixed_text = self.apply_capitalization(&heading.raw_text);
883
884        // Reconstruct with closing sequence if present
885        let closing = &heading.closing_sequence;
886        if heading.has_closing_sequence {
887            format!("{indent}{hashes} {fixed_text} {closing}")
888        } else {
889            format!("{indent}{hashes} {fixed_text}")
890        }
891    }
892
893    /// Fix a Setext heading line
894    fn fix_setext_heading(&self, line: &str, heading: &crate::lint_context::HeadingInfo) -> String {
895        // Apply capitalization to the text
896        let fixed_text = self.apply_capitalization(&heading.raw_text);
897
898        // Preserve leading whitespace from original line
899        let leading_ws: String = line.chars().take_while(|c| c.is_whitespace()).collect();
900
901        format!("{leading_ws}{fixed_text}")
902    }
903}
904
905impl Rule for MD063HeadingCapitalization {
906    fn name(&self) -> &'static str {
907        "MD063"
908    }
909
910    fn description(&self) -> &'static str {
911        "Heading capitalization"
912    }
913
914    fn category(&self) -> RuleCategory {
915        RuleCategory::Heading
916    }
917
918    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
919        !ctx.likely_has_headings() || !ctx.lines.iter().any(|line| line.heading.is_some())
920    }
921
922    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
923        let content = ctx.content;
924
925        if content.is_empty() {
926            return Ok(Vec::new());
927        }
928
929        let mut warnings = Vec::new();
930        let line_index = &ctx.line_index;
931
932        for (line_num, line_info) in ctx.lines.iter().enumerate() {
933            if let Some(heading) = &line_info.heading {
934                // Check level filter
935                if heading.level < self.config.min_level || heading.level > self.config.max_level {
936                    continue;
937                }
938
939                // Skip headings in code blocks (indented headings)
940                if line_info.visual_indent >= 4 && matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
941                    continue;
942                }
943
944                // Skip invalid headings (e.g., `#tag` which lacks required space after #)
945                if !heading.is_valid {
946                    continue;
947                }
948
949                // Apply capitalization and compare
950                let original_text = &heading.raw_text;
951                let fixed_text = self.apply_capitalization(original_text);
952
953                if original_text != &fixed_text {
954                    let line = line_info.content(ctx.content);
955                    let style_name = match self.config.style {
956                        HeadingCapStyle::TitleCase => "title case",
957                        HeadingCapStyle::SentenceCase => "sentence case",
958                        HeadingCapStyle::AllCaps => "ALL CAPS",
959                    };
960
961                    warnings.push(LintWarning {
962                        rule_name: Some(self.name().to_string()),
963                        line: line_num + 1,
964                        column: heading.content_column + 1,
965                        end_line: line_num + 1,
966                        end_column: heading.content_column + 1 + original_text.len(),
967                        message: format!("Heading should use {style_name}: '{original_text}' -> '{fixed_text}'"),
968                        severity: Severity::Warning,
969                        fix: Some(Fix {
970                            range: self.get_line_byte_range(content, line_num + 1, line_index),
971                            replacement: match heading.style {
972                                crate::lint_context::HeadingStyle::ATX => self.fix_atx_heading(line, heading),
973                                _ => self.fix_setext_heading(line, heading),
974                            },
975                        }),
976                    });
977                }
978            }
979        }
980
981        Ok(warnings)
982    }
983
984    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
985        let content = ctx.content;
986
987        if content.is_empty() {
988            return Ok(content.to_string());
989        }
990
991        let lines = ctx.raw_lines();
992        let mut fixed_lines: Vec<String> = lines.iter().map(|&s| s.to_string()).collect();
993
994        for (line_num, line_info) in ctx.lines.iter().enumerate() {
995            // Skip lines where the rule is disabled via inline config
996            if ctx.is_rule_disabled(self.name(), line_num + 1) {
997                continue;
998            }
999
1000            if let Some(heading) = &line_info.heading {
1001                // Check level filter
1002                if heading.level < self.config.min_level || heading.level > self.config.max_level {
1003                    continue;
1004                }
1005
1006                // Skip headings in code blocks
1007                if line_info.visual_indent >= 4 && matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
1008                    continue;
1009                }
1010
1011                // Skip invalid headings (e.g., `#tag` which lacks required space after #)
1012                if !heading.is_valid {
1013                    continue;
1014                }
1015
1016                let original_text = &heading.raw_text;
1017                let fixed_text = self.apply_capitalization(original_text);
1018
1019                if original_text != &fixed_text {
1020                    let line = line_info.content(ctx.content);
1021                    fixed_lines[line_num] = match heading.style {
1022                        crate::lint_context::HeadingStyle::ATX => self.fix_atx_heading(line, heading),
1023                        _ => self.fix_setext_heading(line, heading),
1024                    };
1025                }
1026            }
1027        }
1028
1029        // Reconstruct content preserving line endings
1030        let mut result = String::with_capacity(content.len());
1031        for (i, line) in fixed_lines.iter().enumerate() {
1032            result.push_str(line);
1033            if i < fixed_lines.len() - 1 || content.ends_with('\n') {
1034                result.push('\n');
1035            }
1036        }
1037
1038        Ok(result)
1039    }
1040
1041    fn as_any(&self) -> &dyn std::any::Any {
1042        self
1043    }
1044
1045    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1046        let json_value = serde_json::to_value(&self.config).ok()?;
1047        Some((
1048            self.name().to_string(),
1049            crate::rule_config_serde::json_to_toml_value(&json_value)?,
1050        ))
1051    }
1052
1053    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
1054    where
1055        Self: Sized,
1056    {
1057        let rule_config = crate::rule_config_serde::load_rule_config::<MD063Config>(config);
1058        let md044_config =
1059            crate::rule_config_serde::load_rule_config::<crate::rules::md044_proper_names::MD044Config>(config);
1060        let mut rule = Self::from_config_struct(rule_config);
1061        rule.proper_names = md044_config.names;
1062        Box::new(rule)
1063    }
1064}
1065
1066#[cfg(test)]
1067mod tests {
1068    use super::*;
1069    use crate::lint_context::LintContext;
1070
1071    fn create_rule() -> MD063HeadingCapitalization {
1072        let config = MD063Config {
1073            enabled: true,
1074            ..Default::default()
1075        };
1076        MD063HeadingCapitalization::from_config_struct(config)
1077    }
1078
1079    fn create_rule_with_style(style: HeadingCapStyle) -> MD063HeadingCapitalization {
1080        let config = MD063Config {
1081            enabled: true,
1082            style,
1083            ..Default::default()
1084        };
1085        MD063HeadingCapitalization::from_config_struct(config)
1086    }
1087
1088    // Title case tests
1089    #[test]
1090    fn test_title_case_basic() {
1091        let rule = create_rule();
1092        let content = "# hello world\n";
1093        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1094        let result = rule.check(&ctx).unwrap();
1095        assert_eq!(result.len(), 1);
1096        assert!(result[0].message.contains("Hello World"));
1097    }
1098
1099    #[test]
1100    fn test_title_case_lowercase_words() {
1101        let rule = create_rule();
1102        let content = "# the quick brown fox\n";
1103        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1104        let result = rule.check(&ctx).unwrap();
1105        assert_eq!(result.len(), 1);
1106        // "The" should be capitalized (first word), "quick", "brown", "fox" should be capitalized
1107        assert!(result[0].message.contains("The Quick Brown Fox"));
1108    }
1109
1110    #[test]
1111    fn test_title_case_already_correct() {
1112        let rule = create_rule();
1113        let content = "# The Quick Brown Fox\n";
1114        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1115        let result = rule.check(&ctx).unwrap();
1116        assert!(result.is_empty(), "Already correct heading should not be flagged");
1117    }
1118
1119    #[test]
1120    fn test_title_case_hyphenated() {
1121        let rule = create_rule();
1122        let content = "# self-documenting code\n";
1123        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1124        let result = rule.check(&ctx).unwrap();
1125        assert_eq!(result.len(), 1);
1126        assert!(result[0].message.contains("Self-Documenting Code"));
1127    }
1128
1129    // Sentence case tests
1130    #[test]
1131    fn test_sentence_case_basic() {
1132        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1133        let content = "# The Quick Brown Fox\n";
1134        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1135        let result = rule.check(&ctx).unwrap();
1136        assert_eq!(result.len(), 1);
1137        assert!(result[0].message.contains("The quick brown fox"));
1138    }
1139
1140    #[test]
1141    fn test_sentence_case_already_correct() {
1142        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1143        let content = "# The quick brown fox\n";
1144        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1145        let result = rule.check(&ctx).unwrap();
1146        assert!(result.is_empty());
1147    }
1148
1149    // All caps tests
1150    #[test]
1151    fn test_all_caps_basic() {
1152        let rule = create_rule_with_style(HeadingCapStyle::AllCaps);
1153        let content = "# hello world\n";
1154        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1155        let result = rule.check(&ctx).unwrap();
1156        assert_eq!(result.len(), 1);
1157        assert!(result[0].message.contains("HELLO WORLD"));
1158    }
1159
1160    // Preserve tests
1161    #[test]
1162    fn test_preserve_ignore_words() {
1163        let config = MD063Config {
1164            enabled: true,
1165            ignore_words: vec!["iPhone".to_string(), "macOS".to_string()],
1166            ..Default::default()
1167        };
1168        let rule = MD063HeadingCapitalization::from_config_struct(config);
1169
1170        let content = "# using iPhone on macOS\n";
1171        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1172        let result = rule.check(&ctx).unwrap();
1173        assert_eq!(result.len(), 1);
1174        // iPhone and macOS should be preserved
1175        assert!(result[0].message.contains("iPhone"));
1176        assert!(result[0].message.contains("macOS"));
1177    }
1178
1179    #[test]
1180    fn test_preserve_cased_words() {
1181        let rule = create_rule();
1182        let content = "# using GitHub actions\n";
1183        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1184        let result = rule.check(&ctx).unwrap();
1185        assert_eq!(result.len(), 1);
1186        // GitHub should be preserved (has internal capital)
1187        assert!(result[0].message.contains("GitHub"));
1188    }
1189
1190    // Inline code tests
1191    #[test]
1192    fn test_inline_code_preserved() {
1193        let rule = create_rule();
1194        let content = "# using `const` in javascript\n";
1195        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1196        let result = rule.check(&ctx).unwrap();
1197        assert_eq!(result.len(), 1);
1198        // `const` should be preserved, rest capitalized
1199        assert!(result[0].message.contains("`const`"));
1200        assert!(result[0].message.contains("Javascript") || result[0].message.contains("JavaScript"));
1201    }
1202
1203    // Level filter tests
1204    #[test]
1205    fn test_level_filter() {
1206        let config = MD063Config {
1207            enabled: true,
1208            min_level: 2,
1209            max_level: 4,
1210            ..Default::default()
1211        };
1212        let rule = MD063HeadingCapitalization::from_config_struct(config);
1213
1214        let content = "# h1 heading\n## h2 heading\n### h3 heading\n##### h5 heading\n";
1215        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1216        let result = rule.check(&ctx).unwrap();
1217
1218        // Only h2 and h3 should be flagged (h1 < min_level, h5 > max_level)
1219        assert_eq!(result.len(), 2);
1220        assert_eq!(result[0].line, 2); // h2
1221        assert_eq!(result[1].line, 3); // h3
1222    }
1223
1224    // Fix tests
1225    #[test]
1226    fn test_fix_atx_heading() {
1227        let rule = create_rule();
1228        let content = "# hello world\n";
1229        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1230        let fixed = rule.fix(&ctx).unwrap();
1231        assert_eq!(fixed, "# Hello World\n");
1232    }
1233
1234    #[test]
1235    fn test_fix_multiple_headings() {
1236        let rule = create_rule();
1237        let content = "# first heading\n\n## second heading\n";
1238        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1239        let fixed = rule.fix(&ctx).unwrap();
1240        assert_eq!(fixed, "# First Heading\n\n## Second Heading\n");
1241    }
1242
1243    // Setext heading tests
1244    #[test]
1245    fn test_setext_heading() {
1246        let rule = create_rule();
1247        let content = "hello world\n============\n";
1248        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1249        let result = rule.check(&ctx).unwrap();
1250        assert_eq!(result.len(), 1);
1251        assert!(result[0].message.contains("Hello World"));
1252    }
1253
1254    // Custom ID tests
1255    #[test]
1256    fn test_custom_id_preserved() {
1257        let rule = create_rule();
1258        let content = "# getting started {#intro}\n";
1259        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1260        let result = rule.check(&ctx).unwrap();
1261        assert_eq!(result.len(), 1);
1262        // Custom ID should be preserved
1263        assert!(result[0].message.contains("{#intro}"));
1264    }
1265
1266    // Acronym preservation tests
1267    #[test]
1268    fn test_skip_obsidian_tags_not_headings() {
1269        let rule = create_rule();
1270
1271        // #tag (no space after #) is an Obsidian tag, not a heading
1272        let content = "# H1\n\n#tag\n";
1273        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
1274        let result = rule.check(&ctx).unwrap();
1275        assert!(
1276            result.is_empty() || result.iter().all(|w| w.line != 3),
1277            "Obsidian tag #tag should not be treated as a heading: {result:?}"
1278        );
1279    }
1280
1281    #[test]
1282    fn test_skip_invalid_atx_headings_no_space() {
1283        let rule = create_rule();
1284
1285        // #NoSpace is not a valid ATX heading (requires space after #)
1286        let content = "#notaheading\n";
1287        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1288        let result = rule.check(&ctx).unwrap();
1289        assert!(
1290            result.is_empty(),
1291            "Invalid ATX heading without space should not be flagged: {result:?}"
1292        );
1293    }
1294
1295    #[test]
1296    fn test_fix_skips_obsidian_tags() {
1297        let rule = create_rule();
1298
1299        let content = "# hello world\n\n#tag\n";
1300        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
1301        let fixed = rule.fix(&ctx).unwrap();
1302        // Should fix the real heading but leave the tag alone
1303        assert!(fixed.contains("#tag"), "Fix should not modify Obsidian tag #tag");
1304        assert!(fixed.contains("# Hello World"), "Fix should still fix real headings");
1305    }
1306
1307    #[test]
1308    fn test_preserve_all_caps_acronyms() {
1309        let rule = create_rule();
1310        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1311
1312        // Basic acronyms should be preserved
1313        let fixed = rule.fix(&ctx("# using API in production\n")).unwrap();
1314        assert_eq!(fixed, "# Using API in Production\n");
1315
1316        // Multiple acronyms
1317        let fixed = rule.fix(&ctx("# API and GPU integration\n")).unwrap();
1318        assert_eq!(fixed, "# API and GPU Integration\n");
1319
1320        // Two-letter acronyms
1321        let fixed = rule.fix(&ctx("# IO performance guide\n")).unwrap();
1322        assert_eq!(fixed, "# IO Performance Guide\n");
1323
1324        // Acronyms with numbers
1325        let fixed = rule.fix(&ctx("# HTTP2 and MD5 hashing\n")).unwrap();
1326        assert_eq!(fixed, "# HTTP2 and MD5 Hashing\n");
1327    }
1328
1329    #[test]
1330    fn test_preserve_acronyms_in_hyphenated_words() {
1331        let rule = create_rule();
1332        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1333
1334        // Acronyms at start of hyphenated word
1335        let fixed = rule.fix(&ctx("# API-driven architecture\n")).unwrap();
1336        assert_eq!(fixed, "# API-Driven Architecture\n");
1337
1338        // Multiple acronyms with hyphens
1339        let fixed = rule.fix(&ctx("# GPU-accelerated CPU-intensive tasks\n")).unwrap();
1340        assert_eq!(fixed, "# GPU-Accelerated CPU-Intensive Tasks\n");
1341    }
1342
1343    #[test]
1344    fn test_single_letters_not_treated_as_acronyms() {
1345        let rule = create_rule();
1346        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1347
1348        // Single uppercase letters should follow title case rules, not be preserved
1349        let fixed = rule.fix(&ctx("# i am a heading\n")).unwrap();
1350        assert_eq!(fixed, "# I Am a Heading\n");
1351    }
1352
1353    #[test]
1354    fn test_lowercase_terms_need_ignore_words() {
1355        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1356
1357        // Without ignore_words: npm gets capitalized
1358        let rule = create_rule();
1359        let fixed = rule.fix(&ctx("# using npm packages\n")).unwrap();
1360        assert_eq!(fixed, "# Using Npm Packages\n");
1361
1362        // With ignore_words: npm preserved
1363        let config = MD063Config {
1364            enabled: true,
1365            ignore_words: vec!["npm".to_string()],
1366            ..Default::default()
1367        };
1368        let rule = MD063HeadingCapitalization::from_config_struct(config);
1369        let fixed = rule.fix(&ctx("# using npm packages\n")).unwrap();
1370        assert_eq!(fixed, "# Using npm Packages\n");
1371    }
1372
1373    #[test]
1374    fn test_acronyms_with_mixed_case_preserved() {
1375        let rule = create_rule();
1376        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1377
1378        // Both acronyms (API, GPU) and mixed-case (GitHub) should be preserved
1379        let fixed = rule.fix(&ctx("# using API with GitHub\n")).unwrap();
1380        assert_eq!(fixed, "# Using API with GitHub\n");
1381    }
1382
1383    #[test]
1384    fn test_real_world_acronyms() {
1385        let rule = create_rule();
1386        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1387
1388        // Common technical acronyms from tested repositories
1389        let content = "# FFI bindings for CPU optimization\n";
1390        let fixed = rule.fix(&ctx(content)).unwrap();
1391        assert_eq!(fixed, "# FFI Bindings for CPU Optimization\n");
1392
1393        let content = "# DOM manipulation and SSR rendering\n";
1394        let fixed = rule.fix(&ctx(content)).unwrap();
1395        assert_eq!(fixed, "# DOM Manipulation and SSR Rendering\n");
1396
1397        let content = "# CVE security and RNN models\n";
1398        let fixed = rule.fix(&ctx(content)).unwrap();
1399        assert_eq!(fixed, "# CVE Security and RNN Models\n");
1400    }
1401
1402    #[test]
1403    fn test_is_all_caps_acronym() {
1404        let rule = create_rule();
1405
1406        // Should return true for all-caps with 2+ letters
1407        assert!(rule.is_all_caps_acronym("API"));
1408        assert!(rule.is_all_caps_acronym("IO"));
1409        assert!(rule.is_all_caps_acronym("GPU"));
1410        assert!(rule.is_all_caps_acronym("HTTP2")); // Numbers don't break it
1411
1412        // Should return false for single letters
1413        assert!(!rule.is_all_caps_acronym("A"));
1414        assert!(!rule.is_all_caps_acronym("I"));
1415
1416        // Should return false for words with lowercase
1417        assert!(!rule.is_all_caps_acronym("Api"));
1418        assert!(!rule.is_all_caps_acronym("npm"));
1419        assert!(!rule.is_all_caps_acronym("iPhone"));
1420    }
1421
1422    #[test]
1423    fn test_sentence_case_ignore_words_first_word() {
1424        let config = MD063Config {
1425            enabled: true,
1426            style: HeadingCapStyle::SentenceCase,
1427            ignore_words: vec!["nvim".to_string()],
1428            ..Default::default()
1429        };
1430        let rule = MD063HeadingCapitalization::from_config_struct(config);
1431
1432        // "nvim" as first word should be preserved exactly
1433        let content = "# nvim config\n";
1434        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1435        let result = rule.check(&ctx).unwrap();
1436        assert!(
1437            result.is_empty(),
1438            "nvim in ignore-words should not be flagged. Got: {result:?}"
1439        );
1440
1441        // Verify fix also preserves it
1442        let fixed = rule.fix(&ctx).unwrap();
1443        assert_eq!(fixed, "# nvim config\n");
1444    }
1445
1446    #[test]
1447    fn test_sentence_case_ignore_words_not_first() {
1448        let config = MD063Config {
1449            enabled: true,
1450            style: HeadingCapStyle::SentenceCase,
1451            ignore_words: vec!["nvim".to_string()],
1452            ..Default::default()
1453        };
1454        let rule = MD063HeadingCapitalization::from_config_struct(config);
1455
1456        // "nvim" in middle should also be preserved
1457        let content = "# Using nvim editor\n";
1458        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1459        let result = rule.check(&ctx).unwrap();
1460        assert!(
1461            result.is_empty(),
1462            "nvim in ignore-words should be preserved. Got: {result:?}"
1463        );
1464    }
1465
1466    #[test]
1467    fn test_preserve_cased_words_ios() {
1468        let config = MD063Config {
1469            enabled: true,
1470            style: HeadingCapStyle::SentenceCase,
1471            preserve_cased_words: true,
1472            ..Default::default()
1473        };
1474        let rule = MD063HeadingCapitalization::from_config_struct(config);
1475
1476        // "iOS" should be preserved (has mixed case: lowercase 'i' + uppercase 'OS')
1477        let content = "## This is iOS\n";
1478        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1479        let result = rule.check(&ctx).unwrap();
1480        assert!(
1481            result.is_empty(),
1482            "iOS should be preserved with preserve-cased-words. Got: {result:?}"
1483        );
1484
1485        // Verify fix also preserves it
1486        let fixed = rule.fix(&ctx).unwrap();
1487        assert_eq!(fixed, "## This is iOS\n");
1488    }
1489
1490    #[test]
1491    fn test_preserve_cased_words_ios_title_case() {
1492        let config = MD063Config {
1493            enabled: true,
1494            style: HeadingCapStyle::TitleCase,
1495            preserve_cased_words: true,
1496            ..Default::default()
1497        };
1498        let rule = MD063HeadingCapitalization::from_config_struct(config);
1499
1500        // "iOS" should be preserved in title case too
1501        let content = "# developing for iOS\n";
1502        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1503        let fixed = rule.fix(&ctx).unwrap();
1504        assert_eq!(fixed, "# Developing for iOS\n");
1505    }
1506
1507    #[test]
1508    fn test_has_internal_capitals_ios() {
1509        let rule = create_rule();
1510
1511        // iOS should be detected as having internal capitals
1512        assert!(
1513            rule.has_internal_capitals("iOS"),
1514            "iOS has mixed case (lowercase i, uppercase OS)"
1515        );
1516
1517        // Other mixed-case words
1518        assert!(rule.has_internal_capitals("iPhone"));
1519        assert!(rule.has_internal_capitals("macOS"));
1520        assert!(rule.has_internal_capitals("GitHub"));
1521        assert!(rule.has_internal_capitals("JavaScript"));
1522        assert!(rule.has_internal_capitals("eBay"));
1523
1524        // All-caps should NOT be detected (handled by is_all_caps_acronym)
1525        assert!(!rule.has_internal_capitals("API"));
1526        assert!(!rule.has_internal_capitals("GPU"));
1527
1528        // All-lowercase should NOT be detected
1529        assert!(!rule.has_internal_capitals("npm"));
1530        assert!(!rule.has_internal_capitals("config"));
1531
1532        // Regular capitalized words should NOT be detected
1533        assert!(!rule.has_internal_capitals("The"));
1534        assert!(!rule.has_internal_capitals("Hello"));
1535    }
1536
1537    #[test]
1538    fn test_lowercase_words_before_trailing_code() {
1539        let config = MD063Config {
1540            enabled: true,
1541            style: HeadingCapStyle::TitleCase,
1542            lowercase_words: vec![
1543                "a".to_string(),
1544                "an".to_string(),
1545                "and".to_string(),
1546                "at".to_string(),
1547                "but".to_string(),
1548                "by".to_string(),
1549                "for".to_string(),
1550                "from".to_string(),
1551                "into".to_string(),
1552                "nor".to_string(),
1553                "on".to_string(),
1554                "onto".to_string(),
1555                "or".to_string(),
1556                "the".to_string(),
1557                "to".to_string(),
1558                "upon".to_string(),
1559                "via".to_string(),
1560                "vs".to_string(),
1561                "with".to_string(),
1562                "without".to_string(),
1563            ],
1564            preserve_cased_words: true,
1565            ..Default::default()
1566        };
1567        let rule = MD063HeadingCapitalization::from_config_struct(config);
1568
1569        // Test: "subtitle with a `app`" (all lowercase input)
1570        // Expected fix: "Subtitle With a `app`" - capitalize "Subtitle" and "With",
1571        // but keep "a" lowercase (it's in lowercase-words and not the last word)
1572        // Incorrect: "Subtitle with A `app`" (would incorrectly capitalize "a")
1573        let content = "## subtitle with a `app`\n";
1574        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1575        let result = rule.check(&ctx).unwrap();
1576
1577        // Should flag it
1578        assert!(!result.is_empty(), "Should flag incorrect capitalization");
1579        let fixed = rule.fix(&ctx).unwrap();
1580        // "a" should remain lowercase (not "A") because inline code at end doesn't change lowercase-words behavior
1581        assert!(
1582            fixed.contains("with a `app`"),
1583            "Expected 'with a `app`' but got: {fixed:?}"
1584        );
1585        assert!(
1586            !fixed.contains("with A `app`"),
1587            "Should not capitalize 'a' to 'A'. Got: {fixed:?}"
1588        );
1589        // "Subtitle" should be capitalized, "with" and "a" should remain lowercase (they're in lowercase-words)
1590        assert!(
1591            fixed.contains("Subtitle with a `app`"),
1592            "Expected 'Subtitle with a `app`' but got: {fixed:?}"
1593        );
1594    }
1595
1596    #[test]
1597    fn test_lowercase_words_preserved_before_trailing_code_variant() {
1598        let config = MD063Config {
1599            enabled: true,
1600            style: HeadingCapStyle::TitleCase,
1601            lowercase_words: vec!["a".to_string(), "the".to_string(), "with".to_string()],
1602            ..Default::default()
1603        };
1604        let rule = MD063HeadingCapitalization::from_config_struct(config);
1605
1606        // Another variant: "Title with the `code`"
1607        let content = "## Title with the `code`\n";
1608        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1609        let fixed = rule.fix(&ctx).unwrap();
1610        // "the" should remain lowercase
1611        assert!(
1612            fixed.contains("with the `code`"),
1613            "Expected 'with the `code`' but got: {fixed:?}"
1614        );
1615        assert!(
1616            !fixed.contains("with The `code`"),
1617            "Should not capitalize 'the' to 'The'. Got: {fixed:?}"
1618        );
1619    }
1620
1621    #[test]
1622    fn test_last_word_capitalized_when_no_trailing_code() {
1623        // Verify that when there's NO trailing code, the last word IS capitalized
1624        // (even if it's in lowercase-words) - this is the normal title case behavior
1625        let config = MD063Config {
1626            enabled: true,
1627            style: HeadingCapStyle::TitleCase,
1628            lowercase_words: vec!["a".to_string(), "the".to_string()],
1629            ..Default::default()
1630        };
1631        let rule = MD063HeadingCapitalization::from_config_struct(config);
1632
1633        // "title with a word" - "word" is last, should be capitalized
1634        // "a" is in lowercase-words and not last, so should be lowercase
1635        let content = "## title with a word\n";
1636        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1637        let fixed = rule.fix(&ctx).unwrap();
1638        // "a" should be lowercase, "word" should be capitalized (it's last)
1639        assert!(
1640            fixed.contains("With a Word"),
1641            "Expected 'With a Word' but got: {fixed:?}"
1642        );
1643    }
1644
1645    #[test]
1646    fn test_multiple_lowercase_words_before_code() {
1647        let config = MD063Config {
1648            enabled: true,
1649            style: HeadingCapStyle::TitleCase,
1650            lowercase_words: vec![
1651                "a".to_string(),
1652                "the".to_string(),
1653                "with".to_string(),
1654                "for".to_string(),
1655            ],
1656            ..Default::default()
1657        };
1658        let rule = MD063HeadingCapitalization::from_config_struct(config);
1659
1660        // Multiple lowercase words before code - all should remain lowercase
1661        let content = "## Guide for the `user`\n";
1662        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1663        let fixed = rule.fix(&ctx).unwrap();
1664        assert!(
1665            fixed.contains("for the `user`"),
1666            "Expected 'for the `user`' but got: {fixed:?}"
1667        );
1668        assert!(
1669            !fixed.contains("For The `user`"),
1670            "Should not capitalize lowercase words before code. Got: {fixed:?}"
1671        );
1672    }
1673
1674    #[test]
1675    fn test_code_in_middle_normal_rules_apply() {
1676        let config = MD063Config {
1677            enabled: true,
1678            style: HeadingCapStyle::TitleCase,
1679            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1680            ..Default::default()
1681        };
1682        let rule = MD063HeadingCapitalization::from_config_struct(config);
1683
1684        // Code in the middle - normal title case rules apply (last word capitalized)
1685        let content = "## Using `const` for the code\n";
1686        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1687        let fixed = rule.fix(&ctx).unwrap();
1688        // "for" and "the" should be lowercase (middle), "code" should be capitalized (last)
1689        assert!(
1690            fixed.contains("for the Code"),
1691            "Expected 'for the Code' but got: {fixed:?}"
1692        );
1693    }
1694
1695    #[test]
1696    fn test_link_at_end_same_as_code() {
1697        let config = MD063Config {
1698            enabled: true,
1699            style: HeadingCapStyle::TitleCase,
1700            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1701            ..Default::default()
1702        };
1703        let rule = MD063HeadingCapitalization::from_config_struct(config);
1704
1705        // Link at the end - same behavior as code (lowercase words before should remain lowercase)
1706        let content = "## Guide for the [link](./page.md)\n";
1707        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1708        let fixed = rule.fix(&ctx).unwrap();
1709        // "for" and "the" should remain lowercase (not last word because link follows)
1710        assert!(
1711            fixed.contains("for the [Link]"),
1712            "Expected 'for the [Link]' but got: {fixed:?}"
1713        );
1714        assert!(
1715            !fixed.contains("for The [Link]"),
1716            "Should not capitalize 'the' before link. Got: {fixed:?}"
1717        );
1718    }
1719
1720    #[test]
1721    fn test_multiple_code_segments() {
1722        let config = MD063Config {
1723            enabled: true,
1724            style: HeadingCapStyle::TitleCase,
1725            lowercase_words: vec!["a".to_string(), "the".to_string(), "with".to_string()],
1726            ..Default::default()
1727        };
1728        let rule = MD063HeadingCapitalization::from_config_struct(config);
1729
1730        // Multiple code segments - last segment is code, so lowercase words before should remain lowercase
1731        let content = "## Using `const` with a `variable`\n";
1732        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1733        let fixed = rule.fix(&ctx).unwrap();
1734        // "a" should remain lowercase (not last word because code follows)
1735        assert!(
1736            fixed.contains("with a `variable`"),
1737            "Expected 'with a `variable`' but got: {fixed:?}"
1738        );
1739        assert!(
1740            !fixed.contains("with A `variable`"),
1741            "Should not capitalize 'a' before trailing code. Got: {fixed:?}"
1742        );
1743    }
1744
1745    #[test]
1746    fn test_code_and_link_combination() {
1747        let config = MD063Config {
1748            enabled: true,
1749            style: HeadingCapStyle::TitleCase,
1750            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1751            ..Default::default()
1752        };
1753        let rule = MD063HeadingCapitalization::from_config_struct(config);
1754
1755        // Code then link - last segment is link, so lowercase words before code should remain lowercase
1756        let content = "## Guide for the `code` [link](./page.md)\n";
1757        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1758        let fixed = rule.fix(&ctx).unwrap();
1759        // "for" and "the" should remain lowercase (not last word because link follows)
1760        assert!(
1761            fixed.contains("for the `code`"),
1762            "Expected 'for the `code`' but got: {fixed:?}"
1763        );
1764    }
1765
1766    #[test]
1767    fn test_text_after_code_capitalizes_last() {
1768        let config = MD063Config {
1769            enabled: true,
1770            style: HeadingCapStyle::TitleCase,
1771            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1772            ..Default::default()
1773        };
1774        let rule = MD063HeadingCapitalization::from_config_struct(config);
1775
1776        // Code in middle, text after - last word should be capitalized
1777        let content = "## Using `const` for the code\n";
1778        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1779        let fixed = rule.fix(&ctx).unwrap();
1780        // "for" and "the" should be lowercase, "code" is last word, should be capitalized
1781        assert!(
1782            fixed.contains("for the Code"),
1783            "Expected 'for the Code' but got: {fixed:?}"
1784        );
1785    }
1786
1787    #[test]
1788    fn test_preserve_cased_words_with_trailing_code() {
1789        let config = MD063Config {
1790            enabled: true,
1791            style: HeadingCapStyle::TitleCase,
1792            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1793            preserve_cased_words: true,
1794            ..Default::default()
1795        };
1796        let rule = MD063HeadingCapitalization::from_config_struct(config);
1797
1798        // Preserve-cased words should still work with trailing code
1799        let content = "## Guide for iOS `app`\n";
1800        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1801        let fixed = rule.fix(&ctx).unwrap();
1802        // "iOS" should be preserved, "for" should be lowercase
1803        assert!(
1804            fixed.contains("for iOS `app`"),
1805            "Expected 'for iOS `app`' but got: {fixed:?}"
1806        );
1807        assert!(
1808            !fixed.contains("For iOS `app`"),
1809            "Should not capitalize 'for' before trailing code. Got: {fixed:?}"
1810        );
1811    }
1812
1813    #[test]
1814    fn test_ignore_words_with_trailing_code() {
1815        let config = MD063Config {
1816            enabled: true,
1817            style: HeadingCapStyle::TitleCase,
1818            lowercase_words: vec!["a".to_string(), "the".to_string(), "with".to_string()],
1819            ignore_words: vec!["npm".to_string()],
1820            ..Default::default()
1821        };
1822        let rule = MD063HeadingCapitalization::from_config_struct(config);
1823
1824        // Ignore-words should still work with trailing code
1825        let content = "## Using npm with a `script`\n";
1826        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1827        let fixed = rule.fix(&ctx).unwrap();
1828        // "npm" should be preserved, "with" and "a" should be lowercase
1829        assert!(
1830            fixed.contains("npm with a `script`"),
1831            "Expected 'npm with a `script`' but got: {fixed:?}"
1832        );
1833        assert!(
1834            !fixed.contains("with A `script`"),
1835            "Should not capitalize 'a' before trailing code. Got: {fixed:?}"
1836        );
1837    }
1838
1839    #[test]
1840    fn test_empty_text_segment_edge_case() {
1841        let config = MD063Config {
1842            enabled: true,
1843            style: HeadingCapStyle::TitleCase,
1844            lowercase_words: vec!["a".to_string(), "with".to_string()],
1845            ..Default::default()
1846        };
1847        let rule = MD063HeadingCapitalization::from_config_struct(config);
1848
1849        // Edge case: code at start, then text with lowercase word, then code at end
1850        let content = "## `start` with a `end`\n";
1851        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1852        let fixed = rule.fix(&ctx).unwrap();
1853        // "with" is first word in text segment, so capitalized (correct)
1854        // "a" should remain lowercase (not last word because code follows) - this is the key test
1855        assert!(fixed.contains("a `end`"), "Expected 'a `end`' but got: {fixed:?}");
1856        assert!(
1857            !fixed.contains("A `end`"),
1858            "Should not capitalize 'a' before trailing code. Got: {fixed:?}"
1859        );
1860    }
1861
1862    #[test]
1863    fn test_sentence_case_with_trailing_code() {
1864        let config = MD063Config {
1865            enabled: true,
1866            style: HeadingCapStyle::SentenceCase,
1867            lowercase_words: vec!["a".to_string(), "the".to_string()],
1868            ..Default::default()
1869        };
1870        let rule = MD063HeadingCapitalization::from_config_struct(config);
1871
1872        // Sentence case should also respect lowercase words before code
1873        let content = "## guide for the `user`\n";
1874        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1875        let fixed = rule.fix(&ctx).unwrap();
1876        // First word capitalized, rest lowercase including "the" before code
1877        assert!(
1878            fixed.contains("Guide for the `user`"),
1879            "Expected 'Guide for the `user`' but got: {fixed:?}"
1880        );
1881    }
1882
1883    #[test]
1884    fn test_hyphenated_word_before_code() {
1885        let config = MD063Config {
1886            enabled: true,
1887            style: HeadingCapStyle::TitleCase,
1888            lowercase_words: vec!["a".to_string(), "the".to_string(), "with".to_string()],
1889            ..Default::default()
1890        };
1891        let rule = MD063HeadingCapitalization::from_config_struct(config);
1892
1893        // Hyphenated word before code - last part should respect lowercase-words
1894        let content = "## Self-contained with a `feature`\n";
1895        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1896        let fixed = rule.fix(&ctx).unwrap();
1897        // "with" and "a" should remain lowercase (not last word because code follows)
1898        assert!(
1899            fixed.contains("with a `feature`"),
1900            "Expected 'with a `feature`' but got: {fixed:?}"
1901        );
1902    }
1903
1904    // Issue #228: Sentence case with inline code at heading start
1905    // When a heading starts with inline code, the first word after the code
1906    // should NOT be capitalized because the heading already has a "first element"
1907
1908    #[test]
1909    fn test_sentence_case_code_at_start_basic() {
1910        // The exact case from issue #228
1911        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1912        let content = "# `rumdl` is a linter\n";
1913        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1914        let result = rule.check(&ctx).unwrap();
1915        // Should be correct as-is: code is first, "is" stays lowercase
1916        assert!(
1917            result.is_empty(),
1918            "Heading with code at start should not flag 'is' for capitalization. Got: {:?}",
1919            result.iter().map(|w| &w.message).collect::<Vec<_>>()
1920        );
1921    }
1922
1923    #[test]
1924    fn test_sentence_case_code_at_start_incorrect_capitalization() {
1925        // Verify we detect incorrect capitalization after code at start
1926        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1927        let content = "# `rumdl` Is a Linter\n";
1928        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1929        let result = rule.check(&ctx).unwrap();
1930        // Should flag: "Is" and "Linter" should be lowercase
1931        assert_eq!(result.len(), 1, "Should detect incorrect capitalization");
1932        assert!(
1933            result[0].message.contains("`rumdl` is a linter"),
1934            "Should suggest lowercase after code. Got: {:?}",
1935            result[0].message
1936        );
1937    }
1938
1939    #[test]
1940    fn test_sentence_case_code_at_start_fix() {
1941        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1942        let content = "# `rumdl` Is A Linter\n";
1943        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1944        let fixed = rule.fix(&ctx).unwrap();
1945        assert!(
1946            fixed.contains("# `rumdl` is a linter"),
1947            "Should fix to lowercase after code. Got: {fixed:?}"
1948        );
1949    }
1950
1951    #[test]
1952    fn test_sentence_case_text_at_start_still_capitalizes() {
1953        // Ensure normal headings still capitalize first word
1954        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1955        let content = "# the quick brown fox\n";
1956        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1957        let result = rule.check(&ctx).unwrap();
1958        assert_eq!(result.len(), 1);
1959        assert!(
1960            result[0].message.contains("The quick brown fox"),
1961            "Text-first heading should capitalize first word. Got: {:?}",
1962            result[0].message
1963        );
1964    }
1965
1966    #[test]
1967    fn test_sentence_case_link_at_start() {
1968        // Links at start: link text is lowercased, following text also lowercase
1969        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1970        // Use lowercase link text to avoid link text case flagging
1971        let content = "# [api](api.md) reference guide\n";
1972        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1973        let result = rule.check(&ctx).unwrap();
1974        // "reference" should be lowercase (link is first)
1975        assert!(
1976            result.is_empty(),
1977            "Heading with link at start should not capitalize 'reference'. Got: {:?}",
1978            result.iter().map(|w| &w.message).collect::<Vec<_>>()
1979        );
1980    }
1981
1982    #[test]
1983    fn test_sentence_case_link_preserves_acronyms() {
1984        // Acronyms in link text should be preserved (API, HTTP, etc.)
1985        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1986        let content = "# [API](api.md) Reference Guide\n";
1987        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1988        let result = rule.check(&ctx).unwrap();
1989        assert_eq!(result.len(), 1);
1990        // "API" should be preserved (acronym), "Reference Guide" should be lowercased
1991        assert!(
1992            result[0].message.contains("[API](api.md) reference guide"),
1993            "Should preserve acronym 'API' but lowercase following text. Got: {:?}",
1994            result[0].message
1995        );
1996    }
1997
1998    #[test]
1999    fn test_sentence_case_link_preserves_brand_names() {
2000        // Brand names with internal capitals should be preserved
2001        let config = MD063Config {
2002            enabled: true,
2003            style: HeadingCapStyle::SentenceCase,
2004            preserve_cased_words: true,
2005            ..Default::default()
2006        };
2007        let rule = MD063HeadingCapitalization::from_config_struct(config);
2008        let content = "# [iPhone](iphone.md) Features Guide\n";
2009        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2010        let result = rule.check(&ctx).unwrap();
2011        assert_eq!(result.len(), 1);
2012        // "iPhone" should be preserved, "Features Guide" should be lowercased
2013        assert!(
2014            result[0].message.contains("[iPhone](iphone.md) features guide"),
2015            "Should preserve 'iPhone' but lowercase following text. Got: {:?}",
2016            result[0].message
2017        );
2018    }
2019
2020    #[test]
2021    fn test_sentence_case_link_lowercases_regular_words() {
2022        // Regular words in link text should be lowercased
2023        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2024        let content = "# [Documentation](docs.md) Reference\n";
2025        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2026        let result = rule.check(&ctx).unwrap();
2027        assert_eq!(result.len(), 1);
2028        // "Documentation" should be lowercased (regular word)
2029        assert!(
2030            result[0].message.contains("[documentation](docs.md) reference"),
2031            "Should lowercase regular link text. Got: {:?}",
2032            result[0].message
2033        );
2034    }
2035
2036    #[test]
2037    fn test_sentence_case_link_at_start_correct_already() {
2038        // Link with correct casing should not be flagged
2039        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2040        let content = "# [API](api.md) reference guide\n";
2041        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2042        let result = rule.check(&ctx).unwrap();
2043        assert!(
2044            result.is_empty(),
2045            "Correctly cased heading with link should not be flagged. Got: {:?}",
2046            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2047        );
2048    }
2049
2050    #[test]
2051    fn test_sentence_case_link_github_preserved() {
2052        // GitHub should be preserved (internal capitals)
2053        let config = MD063Config {
2054            enabled: true,
2055            style: HeadingCapStyle::SentenceCase,
2056            preserve_cased_words: true,
2057            ..Default::default()
2058        };
2059        let rule = MD063HeadingCapitalization::from_config_struct(config);
2060        let content = "# [GitHub](gh.md) Repository Setup\n";
2061        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2062        let result = rule.check(&ctx).unwrap();
2063        assert_eq!(result.len(), 1);
2064        assert!(
2065            result[0].message.contains("[GitHub](gh.md) repository setup"),
2066            "Should preserve 'GitHub'. Got: {:?}",
2067            result[0].message
2068        );
2069    }
2070
2071    #[test]
2072    fn test_sentence_case_multiple_code_spans() {
2073        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2074        let content = "# `foo` and `bar` are methods\n";
2075        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2076        let result = rule.check(&ctx).unwrap();
2077        // All text after first code should be lowercase
2078        assert!(
2079            result.is_empty(),
2080            "Should not capitalize words between/after code spans. Got: {:?}",
2081            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2082        );
2083    }
2084
2085    #[test]
2086    fn test_sentence_case_code_only_heading() {
2087        // Heading with only code, no text
2088        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2089        let content = "# `rumdl`\n";
2090        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2091        let result = rule.check(&ctx).unwrap();
2092        assert!(
2093            result.is_empty(),
2094            "Code-only heading should be fine. Got: {:?}",
2095            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2096        );
2097    }
2098
2099    #[test]
2100    fn test_sentence_case_code_at_end() {
2101        // Heading ending with code, text before should still capitalize first word
2102        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2103        let content = "# install the `rumdl` tool\n";
2104        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2105        let result = rule.check(&ctx).unwrap();
2106        // "install" should be capitalized (first word), rest lowercase
2107        assert_eq!(result.len(), 1);
2108        assert!(
2109            result[0].message.contains("Install the `rumdl` tool"),
2110            "First word should still be capitalized when text comes first. Got: {:?}",
2111            result[0].message
2112        );
2113    }
2114
2115    #[test]
2116    fn test_sentence_case_code_in_middle() {
2117        // Code in middle, text at start should capitalize first word
2118        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2119        let content = "# using the `rumdl` linter for markdown\n";
2120        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2121        let result = rule.check(&ctx).unwrap();
2122        // "using" should be capitalized, rest lowercase
2123        assert_eq!(result.len(), 1);
2124        assert!(
2125            result[0].message.contains("Using the `rumdl` linter for markdown"),
2126            "First word should be capitalized. Got: {:?}",
2127            result[0].message
2128        );
2129    }
2130
2131    #[test]
2132    fn test_sentence_case_preserved_word_after_code() {
2133        // Preserved words (like iPhone) should stay preserved even after code
2134        let config = MD063Config {
2135            enabled: true,
2136            style: HeadingCapStyle::SentenceCase,
2137            preserve_cased_words: true,
2138            ..Default::default()
2139        };
2140        let rule = MD063HeadingCapitalization::from_config_struct(config);
2141        let content = "# `swift` iPhone development\n";
2142        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2143        let result = rule.check(&ctx).unwrap();
2144        // "iPhone" should be preserved, "development" lowercase
2145        assert!(
2146            result.is_empty(),
2147            "Preserved words after code should stay. Got: {:?}",
2148            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2149        );
2150    }
2151
2152    #[test]
2153    fn test_title_case_code_at_start_still_capitalizes() {
2154        // Title case should still capitalize words even after code at start
2155        let rule = create_rule_with_style(HeadingCapStyle::TitleCase);
2156        let content = "# `api` quick start guide\n";
2157        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2158        let result = rule.check(&ctx).unwrap();
2159        // Title case: all major words capitalized
2160        assert_eq!(result.len(), 1);
2161        assert!(
2162            result[0].message.contains("Quick Start Guide") || result[0].message.contains("quick Start Guide"),
2163            "Title case should capitalize major words after code. Got: {:?}",
2164            result[0].message
2165        );
2166    }
2167
2168    // ======== HTML TAG TESTS ========
2169
2170    #[test]
2171    fn test_sentence_case_html_tag_at_start() {
2172        // HTML tag at start: text after should NOT capitalize first word
2173        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2174        let content = "# <kbd>Ctrl</kbd> is a Modifier Key\n";
2175        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2176        let result = rule.check(&ctx).unwrap();
2177        // "is", "a", "Modifier", "Key" should all be lowercase (except preserved words)
2178        assert_eq!(result.len(), 1);
2179        let fixed = rule.fix(&ctx).unwrap();
2180        assert_eq!(
2181            fixed, "# <kbd>Ctrl</kbd> is a modifier key\n",
2182            "Text after HTML at start should be lowercase"
2183        );
2184    }
2185
2186    #[test]
2187    fn test_sentence_case_html_tag_preserves_content() {
2188        // Content inside HTML tags should be preserved as-is
2189        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2190        let content = "# The <abbr>API</abbr> documentation guide\n";
2191        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2192        let result = rule.check(&ctx).unwrap();
2193        // "The" is first, "API" inside tag preserved, rest lowercase
2194        assert!(
2195            result.is_empty(),
2196            "HTML tag content should be preserved. Got: {:?}",
2197            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2198        );
2199    }
2200
2201    #[test]
2202    fn test_sentence_case_html_tag_at_start_with_acronym() {
2203        // HTML tag at start with acronym content
2204        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2205        let content = "# <abbr>API</abbr> Documentation Guide\n";
2206        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2207        let result = rule.check(&ctx).unwrap();
2208        assert_eq!(result.len(), 1);
2209        let fixed = rule.fix(&ctx).unwrap();
2210        assert_eq!(
2211            fixed, "# <abbr>API</abbr> documentation guide\n",
2212            "Text after HTML at start should be lowercase, HTML content preserved"
2213        );
2214    }
2215
2216    #[test]
2217    fn test_sentence_case_html_tag_in_middle() {
2218        // HTML tag in middle: first word still capitalized
2219        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2220        let content = "# using the <code>config</code> File\n";
2221        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2222        let result = rule.check(&ctx).unwrap();
2223        assert_eq!(result.len(), 1);
2224        let fixed = rule.fix(&ctx).unwrap();
2225        assert_eq!(
2226            fixed, "# Using the <code>config</code> file\n",
2227            "First word capitalized, HTML preserved, rest lowercase"
2228        );
2229    }
2230
2231    #[test]
2232    fn test_html_tag_strong_emphasis() {
2233        // <strong> tag handling
2234        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2235        let content = "# The <strong>Bold</strong> Way\n";
2236        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2237        let result = rule.check(&ctx).unwrap();
2238        assert_eq!(result.len(), 1);
2239        let fixed = rule.fix(&ctx).unwrap();
2240        assert_eq!(
2241            fixed, "# The <strong>Bold</strong> way\n",
2242            "<strong> tag content should be preserved"
2243        );
2244    }
2245
2246    #[test]
2247    fn test_html_tag_with_attributes() {
2248        // HTML tags with attributes should still be detected
2249        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2250        let content = "# <span class=\"highlight\">Important</span> Notice Here\n";
2251        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2252        let result = rule.check(&ctx).unwrap();
2253        assert_eq!(result.len(), 1);
2254        let fixed = rule.fix(&ctx).unwrap();
2255        assert_eq!(
2256            fixed, "# <span class=\"highlight\">Important</span> notice here\n",
2257            "HTML tag with attributes should be preserved"
2258        );
2259    }
2260
2261    #[test]
2262    fn test_multiple_html_tags() {
2263        // Multiple HTML tags in heading
2264        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2265        let content = "# <kbd>Ctrl</kbd>+<kbd>C</kbd> to Copy Text\n";
2266        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2267        let result = rule.check(&ctx).unwrap();
2268        assert_eq!(result.len(), 1);
2269        let fixed = rule.fix(&ctx).unwrap();
2270        assert_eq!(
2271            fixed, "# <kbd>Ctrl</kbd>+<kbd>C</kbd> to copy text\n",
2272            "Multiple HTML tags should all be preserved"
2273        );
2274    }
2275
2276    #[test]
2277    fn test_html_and_code_mixed() {
2278        // Mix of HTML tags and inline code
2279        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2280        let content = "# <kbd>Ctrl</kbd>+`v` Paste command\n";
2281        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2282        let result = rule.check(&ctx).unwrap();
2283        assert_eq!(result.len(), 1);
2284        let fixed = rule.fix(&ctx).unwrap();
2285        assert_eq!(
2286            fixed, "# <kbd>Ctrl</kbd>+`v` paste command\n",
2287            "HTML and code should both be preserved"
2288        );
2289    }
2290
2291    #[test]
2292    fn test_self_closing_html_tag() {
2293        // Self-closing tags like <br/>
2294        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2295        let content = "# Line one<br/>Line Two Here\n";
2296        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2297        let result = rule.check(&ctx).unwrap();
2298        assert_eq!(result.len(), 1);
2299        let fixed = rule.fix(&ctx).unwrap();
2300        assert_eq!(
2301            fixed, "# Line one<br/>line two here\n",
2302            "Self-closing HTML tags should be preserved"
2303        );
2304    }
2305
2306    #[test]
2307    fn test_title_case_with_html_tags() {
2308        // Title case with HTML tags
2309        let rule = create_rule_with_style(HeadingCapStyle::TitleCase);
2310        let content = "# the <kbd>ctrl</kbd> key is a modifier\n";
2311        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2312        let result = rule.check(&ctx).unwrap();
2313        assert_eq!(result.len(), 1);
2314        let fixed = rule.fix(&ctx).unwrap();
2315        // "the" as first word should be "The", content inside <kbd> preserved
2316        assert!(
2317            fixed.contains("<kbd>ctrl</kbd>"),
2318            "HTML tag content should be preserved in title case. Got: {fixed}"
2319        );
2320        assert!(
2321            fixed.starts_with("# The ") || fixed.starts_with("# the "),
2322            "Title case should work with HTML. Got: {fixed}"
2323        );
2324    }
2325
2326    // ======== CARET NOTATION TESTS ========
2327
2328    #[test]
2329    fn test_sentence_case_preserves_caret_notation() {
2330        // Caret notation for control characters should be preserved
2331        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2332        let content = "## Ctrl+A, Ctrl+R output ^A, ^R on zsh\n";
2333        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2334        let result = rule.check(&ctx).unwrap();
2335        // Should not flag - ^A and ^R are preserved
2336        assert!(
2337            result.is_empty(),
2338            "Caret notation should be preserved. Got: {:?}",
2339            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2340        );
2341    }
2342
2343    #[test]
2344    fn test_sentence_case_caret_notation_various() {
2345        // Various caret notation patterns
2346        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2347
2348        // ^C for interrupt
2349        let content = "## Press ^C to cancel\n";
2350        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2351        let result = rule.check(&ctx).unwrap();
2352        assert!(
2353            result.is_empty(),
2354            "^C should be preserved. Got: {:?}",
2355            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2356        );
2357
2358        // ^Z for suspend
2359        let content = "## Use ^Z for background\n";
2360        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2361        let result = rule.check(&ctx).unwrap();
2362        assert!(
2363            result.is_empty(),
2364            "^Z should be preserved. Got: {:?}",
2365            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2366        );
2367
2368        // ^[ for escape
2369        let content = "## Press ^[ for escape\n";
2370        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2371        let result = rule.check(&ctx).unwrap();
2372        assert!(
2373            result.is_empty(),
2374            "^[ should be preserved. Got: {:?}",
2375            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2376        );
2377    }
2378
2379    #[test]
2380    fn test_caret_notation_detection() {
2381        let rule = create_rule();
2382
2383        // Valid caret notation
2384        assert!(rule.is_caret_notation("^A"));
2385        assert!(rule.is_caret_notation("^Z"));
2386        assert!(rule.is_caret_notation("^C"));
2387        assert!(rule.is_caret_notation("^@")); // NUL
2388        assert!(rule.is_caret_notation("^[")); // ESC
2389        assert!(rule.is_caret_notation("^]")); // GS
2390        assert!(rule.is_caret_notation("^^")); // RS
2391        assert!(rule.is_caret_notation("^_")); // US
2392
2393        // Not caret notation
2394        assert!(!rule.is_caret_notation("^a")); // lowercase
2395        assert!(!rule.is_caret_notation("A")); // no caret
2396        assert!(!rule.is_caret_notation("^")); // caret alone
2397        assert!(!rule.is_caret_notation("^1")); // digit
2398    }
2399
2400    // MD044 proper names integration tests
2401    //
2402    // When MD063 (sentence case) and MD044 (proper names) are both active, MD063 must
2403    // preserve the exact capitalization of MD044 proper names rather than lowercasing them.
2404    // Without this, the two rules oscillate: MD044 re-capitalizes what MD063 lowercases.
2405
2406    fn create_sentence_case_rule_with_proper_names(names: Vec<String>) -> MD063HeadingCapitalization {
2407        let config = MD063Config {
2408            enabled: true,
2409            style: HeadingCapStyle::SentenceCase,
2410            ..Default::default()
2411        };
2412        let mut rule = MD063HeadingCapitalization::from_config_struct(config);
2413        rule.proper_names = names;
2414        rule
2415    }
2416
2417    #[test]
2418    fn test_sentence_case_preserves_single_word_proper_name() {
2419        let rule = create_sentence_case_rule_with_proper_names(vec!["JavaScript".to_string()]);
2420        // "javascript" in non-first position should become "JavaScript", not "javascript"
2421        let content = "# installing javascript\n";
2422        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2423        let result = rule.check(&ctx).unwrap();
2424        assert_eq!(result.len(), 1, "Should flag the heading");
2425        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2426        assert!(
2427            fix_text.contains("JavaScript"),
2428            "Fix should preserve proper name 'JavaScript', got: {fix_text:?}"
2429        );
2430        assert!(
2431            !fix_text.contains("javascript"),
2432            "Fix should not have lowercase 'javascript', got: {fix_text:?}"
2433        );
2434    }
2435
2436    #[test]
2437    fn test_sentence_case_preserves_multi_word_proper_name() {
2438        let rule = create_sentence_case_rule_with_proper_names(vec!["Good Application".to_string()]);
2439        // "Good Application" is a proper name; sentence case must not lowercase "Application"
2440        let content = "# using good application features\n";
2441        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2442        let result = rule.check(&ctx).unwrap();
2443        assert_eq!(result.len(), 1, "Should flag the heading");
2444        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2445        assert!(
2446            fix_text.contains("Good Application"),
2447            "Fix should preserve 'Good Application' as a phrase, got: {fix_text:?}"
2448        );
2449    }
2450
2451    #[test]
2452    fn test_sentence_case_proper_name_at_start_of_heading() {
2453        let rule = create_sentence_case_rule_with_proper_names(vec!["Good Application".to_string()]);
2454        // The proper name "Good Application" starts the heading; both words must be canonical
2455        let content = "# good application overview\n";
2456        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2457        let result = rule.check(&ctx).unwrap();
2458        assert_eq!(result.len(), 1, "Should flag the heading");
2459        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2460        assert!(
2461            fix_text.contains("Good Application"),
2462            "Fix should produce 'Good Application' at start of heading, got: {fix_text:?}"
2463        );
2464        assert!(
2465            fix_text.contains("overview"),
2466            "Non-proper-name word 'overview' should be lowercase, got: {fix_text:?}"
2467        );
2468    }
2469
2470    #[test]
2471    fn test_sentence_case_with_proper_names_no_oscillation() {
2472        // This is the core convergence test: applying the fix once must produce
2473        // output that is already correct (no further changes needed).
2474        let rule = create_sentence_case_rule_with_proper_names(vec!["Good Application".to_string()]);
2475
2476        // First application of fix
2477        let content = "# installing good application on your system\n";
2478        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2479        let result = rule.check(&ctx).unwrap();
2480        assert_eq!(result.len(), 1);
2481        let fixed_heading = result[0].fix.as_ref().unwrap().replacement.as_str();
2482
2483        // The fixed heading should contain the proper name preserved
2484        assert!(
2485            fixed_heading.contains("Good Application"),
2486            "After fix, proper name must be preserved: {fixed_heading:?}"
2487        );
2488
2489        // Second application: must produce no further warnings (convergence)
2490        let fixed_line = format!("{fixed_heading}\n");
2491        let ctx2 = LintContext::new(&fixed_line, crate::config::MarkdownFlavor::Standard, None);
2492        let result2 = rule.check(&ctx2).unwrap();
2493        assert!(
2494            result2.is_empty(),
2495            "After one fix, heading must already satisfy both MD063 and MD044 - no oscillation. \
2496             Second pass warnings: {result2:?}"
2497        );
2498    }
2499
2500    #[test]
2501    fn test_sentence_case_proper_names_already_correct() {
2502        let rule = create_sentence_case_rule_with_proper_names(vec!["Good Application".to_string()]);
2503        // Heading already has correct sentence case with proper name preserved
2504        let content = "# Installing Good Application\n";
2505        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2506        let result = rule.check(&ctx).unwrap();
2507        assert!(
2508            result.is_empty(),
2509            "Correct sentence-case heading with proper name should not be flagged, got: {result:?}"
2510        );
2511    }
2512
2513    #[test]
2514    fn test_sentence_case_multiple_proper_names_in_heading() {
2515        let rule = create_sentence_case_rule_with_proper_names(vec!["TypeScript".to_string(), "React".to_string()]);
2516        let content = "# using typescript with react\n";
2517        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2518        let result = rule.check(&ctx).unwrap();
2519        assert_eq!(result.len(), 1);
2520        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2521        assert!(
2522            fix_text.contains("TypeScript"),
2523            "Fix should preserve 'TypeScript', got: {fix_text:?}"
2524        );
2525        assert!(
2526            fix_text.contains("React"),
2527            "Fix should preserve 'React', got: {fix_text:?}"
2528        );
2529    }
2530
2531    #[test]
2532    fn test_sentence_case_unicode_casefold_expansion_before_proper_name() {
2533        // Regression for Unicode case-fold expansion: `İ` lowercases to `i̇` (2 code points),
2534        // so matching offsets must be computed from the original text, not from a lowercased copy.
2535        let rule = create_sentence_case_rule_with_proper_names(vec!["Österreich".to_string()]);
2536        let content = "# İ österreich guide\n";
2537        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2538
2539        // Should not panic and should preserve canonical proper-name casing.
2540        let result = rule.check(&ctx).unwrap();
2541        assert_eq!(result.len(), 1, "Should flag heading for canonical proper-name casing");
2542        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2543        assert!(
2544            fix_text.contains("Österreich"),
2545            "Fix should preserve canonical 'Österreich', got: {fix_text:?}"
2546        );
2547    }
2548
2549    #[test]
2550    fn test_sentence_case_preserves_trailing_punctuation_on_proper_name() {
2551        let rule = create_sentence_case_rule_with_proper_names(vec!["JavaScript".to_string()]);
2552        let content = "# using javascript, today\n";
2553        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2554        let result = rule.check(&ctx).unwrap();
2555        assert_eq!(result.len(), 1, "Should flag heading");
2556        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2557        assert!(
2558            fix_text.contains("JavaScript,"),
2559            "Fix should preserve trailing punctuation, got: {fix_text:?}"
2560        );
2561    }
2562
2563    // Title case + MD044 conflict tests
2564    //
2565    // In title case, short words like "the", "a", "of" are kept lowercase by MD063.
2566    // If those words are part of an MD044 proper name (e.g. "The Rolling Stones"),
2567    // the same oscillation problem occurs.  The fix must extend to title case too.
2568
2569    fn create_title_case_rule_with_proper_names(names: Vec<String>) -> MD063HeadingCapitalization {
2570        let config = MD063Config {
2571            enabled: true,
2572            style: HeadingCapStyle::TitleCase,
2573            ..Default::default()
2574        };
2575        let mut rule = MD063HeadingCapitalization::from_config_struct(config);
2576        rule.proper_names = names;
2577        rule
2578    }
2579
2580    #[test]
2581    fn test_title_case_preserves_proper_name_with_lowercase_article() {
2582        // "The" is in the lowercase_words list for title case, so "the" in the middle
2583        // of a heading would normally stay lowercase.  But "The Rolling Stones" is a
2584        // proper name that must be capitalised exactly.
2585        let rule = create_title_case_rule_with_proper_names(vec!["The Rolling Stones".to_string()]);
2586        let content = "# listening to the rolling stones today\n";
2587        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2588        let result = rule.check(&ctx).unwrap();
2589        assert_eq!(result.len(), 1, "Should flag the heading");
2590        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2591        assert!(
2592            fix_text.contains("The Rolling Stones"),
2593            "Fix should preserve proper name 'The Rolling Stones', got: {fix_text:?}"
2594        );
2595    }
2596
2597    #[test]
2598    fn test_title_case_proper_name_no_oscillation() {
2599        // One fix pass must produce output that title case already accepts.
2600        let rule = create_title_case_rule_with_proper_names(vec!["The Rolling Stones".to_string()]);
2601        let content = "# listening to the rolling stones today\n";
2602        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2603        let result = rule.check(&ctx).unwrap();
2604        assert_eq!(result.len(), 1);
2605        let fixed_heading = result[0].fix.as_ref().unwrap().replacement.as_str();
2606
2607        let fixed_line = format!("{fixed_heading}\n");
2608        let ctx2 = LintContext::new(&fixed_line, crate::config::MarkdownFlavor::Standard, None);
2609        let result2 = rule.check(&ctx2).unwrap();
2610        assert!(
2611            result2.is_empty(),
2612            "After one title-case fix, heading must already satisfy both rules. \
2613             Second pass warnings: {result2:?}"
2614        );
2615    }
2616
2617    #[test]
2618    fn test_title_case_unicode_casefold_expansion_before_proper_name() {
2619        let rule = create_title_case_rule_with_proper_names(vec!["Österreich".to_string()]);
2620        let content = "# İ österreich guide\n";
2621        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2622        let result = rule.check(&ctx).unwrap();
2623        assert_eq!(result.len(), 1, "Should flag the heading");
2624        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2625        assert!(
2626            fix_text.contains("Österreich"),
2627            "Fix should preserve canonical proper-name casing, got: {fix_text:?}"
2628        );
2629    }
2630
2631    // End-to-end integration test: from_config wires MD044 names into MD063
2632    //
2633    // This tests the actual code path used in production, where both rules are
2634    // configured in a rumdl.toml and the rule registry calls from_config.
2635
2636    #[test]
2637    fn test_from_config_loads_md044_names_into_md063() {
2638        use crate::config::{Config, RuleConfig};
2639        use crate::rule::Rule;
2640        use std::collections::BTreeMap;
2641
2642        let mut config = Config::default();
2643
2644        // Configure MD063 with sentence_case
2645        let mut md063_values = BTreeMap::new();
2646        md063_values.insert("style".to_string(), toml::Value::String("sentence_case".to_string()));
2647        md063_values.insert("enabled".to_string(), toml::Value::Boolean(true));
2648        config.rules.insert(
2649            "MD063".to_string(),
2650            RuleConfig {
2651                values: md063_values,
2652                severity: None,
2653            },
2654        );
2655
2656        // Configure MD044 with a proper name
2657        let mut md044_values = BTreeMap::new();
2658        md044_values.insert(
2659            "names".to_string(),
2660            toml::Value::Array(vec![toml::Value::String("Good Application".to_string())]),
2661        );
2662        config.rules.insert(
2663            "MD044".to_string(),
2664            RuleConfig {
2665                values: md044_values,
2666                severity: None,
2667            },
2668        );
2669
2670        // Build MD063 via the production code path
2671        let rule = MD063HeadingCapitalization::from_config(&config);
2672
2673        // Verify MD044 names were loaded: the fix must preserve "Good Application"
2674        let content = "# using good application features\n";
2675        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2676        let result = rule.check(&ctx).unwrap();
2677        assert_eq!(result.len(), 1, "Should flag the heading");
2678        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2679        assert!(
2680            fix_text.contains("Good Application"),
2681            "from_config should wire MD044 names into MD063; fix should preserve \
2682             'Good Application', got: {fix_text:?}"
2683        );
2684    }
2685
2686    #[test]
2687    fn test_title_case_short_word_not_confused_with_substring() {
2688        // Verify that short preposition matching ("in") does not trigger on
2689        // substrings of longer words ("insert"). Title case must capitalize
2690        // "insert" while keeping "in" lowercase.
2691        let rule = create_rule_with_style(HeadingCapStyle::TitleCase);
2692
2693        // "in" is a short preposition (should be lowercase in title case)
2694        // "insert" contains "in" as substring but is a regular word (should be capitalized)
2695        let content = "# in the insert\n";
2696        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2697        let result = rule.check(&ctx).unwrap();
2698        assert_eq!(result.len(), 1, "Should flag the heading");
2699        let fix = result[0].fix.as_ref().expect("Fix should be present");
2700        // "In" capitalized as first word, "the" lowercase as article, "Insert" capitalized
2701        assert!(
2702            fix.replacement.contains("In the Insert"),
2703            "Expected 'In the Insert', got: {:?}",
2704            fix.replacement
2705        );
2706    }
2707
2708    #[test]
2709    fn test_title_case_or_not_confused_with_orchestra() {
2710        let rule = create_rule_with_style(HeadingCapStyle::TitleCase);
2711
2712        // "or" is a conjunction (should be lowercase in title case)
2713        // "orchestra" contains "or" as substring but is a regular word
2714        let content = "# or the orchestra\n";
2715        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2716        let result = rule.check(&ctx).unwrap();
2717        assert_eq!(result.len(), 1, "Should flag the heading");
2718        let fix = result[0].fix.as_ref().expect("Fix should be present");
2719        // "Or" capitalized as first word, "the" lowercase, "Orchestra" capitalized
2720        assert!(
2721            fix.replacement.contains("Or the Orchestra"),
2722            "Expected 'Or the Orchestra', got: {:?}",
2723            fix.replacement
2724        );
2725    }
2726
2727    #[test]
2728    fn test_all_caps_preserves_all_words() {
2729        let rule = create_rule_with_style(HeadingCapStyle::AllCaps);
2730
2731        let content = "# in the insert\n";
2732        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2733        let result = rule.check(&ctx).unwrap();
2734        assert_eq!(result.len(), 1, "Should flag the heading");
2735        let fix = result[0].fix.as_ref().expect("Fix should be present");
2736        assert!(
2737            fix.replacement.contains("IN THE INSERT"),
2738            "All caps should uppercase all words, got: {:?}",
2739            fix.replacement
2740        );
2741    }
2742
2743    // Numbered prefix tests — words following a period-terminated token must be capitalized
2744    #[test]
2745    fn test_title_case_numbered_prefix_lowercase_word() {
2746        // "to" follows "1." and must be treated as the start of a new phrase
2747        let rule = create_rule();
2748        let content = "## 1. To Be a Thing\n";
2749        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2750        let result = rule.check(&ctx).unwrap();
2751        assert!(
2752            result.is_empty(),
2753            "Should not flag '## 1. To Be a Thing', got: {result:?}"
2754        );
2755
2756        let content_lower = "## 1. to be a thing\n";
2757        let ctx2 = LintContext::new(content_lower, crate::config::MarkdownFlavor::Standard, None);
2758        let result2 = rule.check(&ctx2).unwrap();
2759        assert!(!result2.is_empty(), "Should flag '## 1. to be a thing'");
2760        let fix = result2[0].fix.as_ref().expect("Should have a fix");
2761        assert!(
2762            fix.replacement.contains("1. To Be a Thing"),
2763            "Fix should capitalize 'To', got: {:?}",
2764            fix.replacement
2765        );
2766    }
2767
2768    #[test]
2769    fn test_title_case_numbered_prefix_article() {
2770        // "a" follows "2." and must be capitalized as the first word of the phrase
2771        let rule = create_rule();
2772        let content = "## 2. A Guide to the Galaxy\n";
2773        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2774        let result = rule.check(&ctx).unwrap();
2775        assert!(
2776            result.is_empty(),
2777            "Should not flag '## 2. A Guide to the Galaxy', got: {result:?}"
2778        );
2779
2780        let content_lower = "## 2. a guide to the galaxy\n";
2781        let ctx2 = LintContext::new(content_lower, crate::config::MarkdownFlavor::Standard, None);
2782        let result2 = rule.check(&ctx2).unwrap();
2783        assert!(!result2.is_empty(), "Should flag '## 2. a guide to the galaxy'");
2784        let fix = result2[0].fix.as_ref().expect("Should have a fix");
2785        assert!(
2786            fix.replacement.contains("2. A Guide to the Galaxy"),
2787            "Fix should capitalize 'A', got: {:?}",
2788            fix.replacement
2789        );
2790    }
2791
2792    #[test]
2793    fn test_title_case_mid_sentence_period_word() {
2794        // "introduction" follows "1." embedded in a phrase — must be capitalized
2795        let rule = create_rule();
2796        let content = "## Step 1. Introduction to the Problem\n";
2797        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2798        let result = rule.check(&ctx).unwrap();
2799        assert!(
2800            result.is_empty(),
2801            "Should not flag '## Step 1. Introduction to the Problem', got: {result:?}"
2802        );
2803
2804        let content_lower = "## Step 1. introduction to the problem\n";
2805        let ctx2 = LintContext::new(content_lower, crate::config::MarkdownFlavor::Standard, None);
2806        let result2 = rule.check(&ctx2).unwrap();
2807        assert!(
2808            !result2.is_empty(),
2809            "Should flag '## Step 1. introduction to the problem'"
2810        );
2811        let fix = result2[0].fix.as_ref().expect("Should have a fix");
2812        assert!(
2813            fix.replacement.contains("Step 1. Introduction to the Problem"),
2814            "Fix should capitalize 'Introduction', got: {:?}",
2815            fix.replacement
2816        );
2817    }
2818
2819    #[test]
2820    fn test_title_case_numbered_prefix_in_link_text() {
2821        // apply_title_case (link text path) must also respect after_period.
2822        // A heading whose only content is a link: ## [1. to be a thing](url)
2823        let config = MD063Config {
2824            enabled: true,
2825            style: HeadingCapStyle::TitleCase,
2826            ..Default::default()
2827        };
2828        let rule = MD063HeadingCapitalization::from_config_struct(config);
2829
2830        // Correct heading — link text already title-cased after numbered prefix
2831        let content = "## [1. To Be a Thing](https://example.com)\n";
2832        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2833        let result = rule.check(&ctx).unwrap();
2834        assert!(
2835            result.is_empty(),
2836            "Should not flag '## [1. To Be a Thing](url)', got: {result:?}"
2837        );
2838
2839        // Incorrect heading — "to" in link text must be capitalized after "1."
2840        let content_lower = "## [1. to be a thing](https://example.com)\n";
2841        let ctx2 = LintContext::new(content_lower, crate::config::MarkdownFlavor::Standard, None);
2842        let result2 = rule.check(&ctx2).unwrap();
2843        assert!(!result2.is_empty(), "Should flag '## [1. to be a thing](url)'");
2844        let fix = result2[0].fix.as_ref().expect("Should have a fix");
2845        assert!(
2846            fix.replacement.contains("1. To Be a Thing"),
2847            "Fix should capitalize 'To' in link text, got: {:?}",
2848            fix.replacement
2849        );
2850    }
2851}