Skip to main content

rumdl_lib/rules/md063_heading_capitalization/
mod.rs

1/// Rule MD063: Heading capitalization
2///
3/// See [docs/md063.md](../../docs/md063.md) for full documentation, configuration, and examples.
4///
5/// This rule enforces consistent capitalization styles for markdown headings.
6/// It supports title case, sentence case, and all caps styles.
7///
8/// **Note:** This rule is disabled by default. Enable it in your configuration:
9/// ```toml
10/// [MD063]
11/// enabled = true
12/// style = "title_case"
13/// ```
14use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
15use crate::utils::range_utils::LineIndex;
16use regex::Regex;
17use std::collections::HashSet;
18use std::ops::Range;
19use std::sync::LazyLock;
20
21mod md063_config;
22pub use md063_config::{HeadingCapStyle, MD063Config};
23
24// Regex to match inline code spans (backticks)
25static INLINE_CODE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`+[^`]+`+").unwrap());
26
27// Regex to match markdown links [text](url) or [text][ref]
28static LINK_REGEX: LazyLock<Regex> =
29    LazyLock::new(|| Regex::new(r"\[([^\]]*)\]\([^)]*\)|\[([^\]]*)\]\[[^\]]*\]").unwrap());
30
31// Regex to match inline HTML tags commonly used in headings
32// Matches paired tags: <tag>content</tag>, <tag attr="val">content</tag>
33// Matches self-closing: <tag/>, <tag />
34// Uses explicit list of common inline tags to avoid backreference (not supported in Rust regex)
35static HTML_TAG_REGEX: LazyLock<Regex> = LazyLock::new(|| {
36    // Common inline HTML tags used in documentation headings
37    let tags = "kbd|abbr|code|span|sub|sup|mark|cite|dfn|var|samp|small|strong|em|b|i|u|s|q|br|wbr";
38    let pattern = format!(r"<({tags})(?:\s[^>]*)?>.*?</({tags})>|<({tags})(?:\s[^>]*)?\s*/?>");
39    Regex::new(&pattern).unwrap()
40});
41
42// Regex to match custom header IDs {#id}
43static CUSTOM_ID_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s*\{#[^}]+\}\s*$").unwrap());
44
45/// Represents a segment of heading text
46#[derive(Debug, Clone)]
47enum HeadingSegment {
48    /// Regular text that should be capitalized
49    Text(String),
50    /// Inline code that should be preserved as-is
51    Code(String),
52    /// Link with text that may be capitalized and URL that's preserved
53    Link {
54        full: String,
55        text_start: usize,
56        text_end: usize,
57    },
58    /// Inline HTML tag that should be preserved as-is
59    Html(String),
60}
61
62/// Rule MD063: Heading capitalization
63#[derive(Clone)]
64pub struct MD063HeadingCapitalization {
65    config: MD063Config,
66    lowercase_set: HashSet<String>,
67    /// Multi-word proper names from MD044 that must survive sentence-case transformation.
68    /// Populated via `from_config` when both rules are active.
69    proper_names: Vec<String>,
70}
71
72impl Default for MD063HeadingCapitalization {
73    fn default() -> Self {
74        Self::new()
75    }
76}
77
78impl MD063HeadingCapitalization {
79    pub fn new() -> Self {
80        let config = MD063Config::default();
81        let lowercase_set = config.lowercase_words.iter().cloned().collect();
82        Self {
83            config,
84            lowercase_set,
85            proper_names: Vec::new(),
86        }
87    }
88
89    pub fn from_config_struct(config: MD063Config) -> Self {
90        let lowercase_set = config.lowercase_words.iter().cloned().collect();
91        Self {
92            config,
93            lowercase_set,
94            proper_names: Vec::new(),
95        }
96    }
97
98    /// Match `pattern_lower` at `start` in `text` using Unicode-aware lowercasing.
99    /// Returns the end byte offset in `text` when the match succeeds.
100    ///
101    /// This avoids converting the full `text` to lowercase and then reusing those
102    /// offsets on the original string, which can panic for case-fold expansions
103    /// (e.g. `İ` -> `i̇`).
104    fn match_case_insensitive_at(text: &str, start: usize, pattern_lower: &str) -> Option<usize> {
105        if start > text.len() || !text.is_char_boundary(start) || pattern_lower.is_empty() {
106            return None;
107        }
108
109        let mut matched_bytes = 0;
110
111        for (offset, ch) in text[start..].char_indices() {
112            if matched_bytes >= pattern_lower.len() {
113                break;
114            }
115
116            let lowered: String = ch.to_lowercase().collect();
117            if !pattern_lower[matched_bytes..].starts_with(&lowered) {
118                return None;
119            }
120
121            matched_bytes += lowered.len();
122
123            if matched_bytes == pattern_lower.len() {
124                return Some(start + offset + ch.len_utf8());
125            }
126        }
127
128        None
129    }
130
131    /// Find the next case-insensitive match of `pattern_lower` in `text`,
132    /// returning byte offsets in the ORIGINAL string.
133    fn find_case_insensitive_match(text: &str, pattern_lower: &str, search_start: usize) -> Option<(usize, usize)> {
134        if pattern_lower.is_empty() || search_start >= text.len() || !text.is_char_boundary(search_start) {
135            return None;
136        }
137
138        for (offset, _) in text[search_start..].char_indices() {
139            let start = search_start + offset;
140            if let Some(end) = Self::match_case_insensitive_at(text, start, pattern_lower) {
141                return Some((start, end));
142            }
143        }
144
145        None
146    }
147
148    /// Build a map from word byte-position → canonical form for all proper names
149    /// that appear in the heading text (case-insensitive phrase match).
150    ///
151    /// This is used in `apply_sentence_case` so that words belonging to a proper
152    /// name phrase are never lowercased to begin with.
153    fn proper_name_canonical_forms(&self, text: &str) -> std::collections::HashMap<usize, &str> {
154        let mut map = std::collections::HashMap::new();
155
156        for name in &self.proper_names {
157            if name.is_empty() {
158                continue;
159            }
160            let name_lower = name.to_lowercase();
161            let canonical_words: Vec<&str> = name.split_whitespace().collect();
162            if canonical_words.is_empty() {
163                continue;
164            }
165            let mut search_start = 0;
166
167            while search_start < text.len() {
168                let Some((abs_pos, end_pos)) = Self::find_case_insensitive_match(text, &name_lower, search_start)
169                else {
170                    break;
171                };
172
173                // Require word boundaries
174                let before_ok = abs_pos == 0 || !text[..abs_pos].chars().last().map_or(false, |c| c.is_alphanumeric());
175                let after_ok =
176                    end_pos >= text.len() || !text[end_pos..].chars().next().map_or(false, |c| c.is_alphanumeric());
177
178                if before_ok && after_ok {
179                    // Map each word in the matched region to its canonical form.
180                    // We zip the words found in the text slice with the words of the
181                    // canonical name so that every word gets the right casing.
182                    let text_slice = &text[abs_pos..end_pos];
183                    let mut word_idx = 0;
184                    let mut slice_offset = 0;
185
186                    for text_word in text_slice.split_whitespace() {
187                        if let Some(w_rel) = text_slice[slice_offset..].find(text_word) {
188                            let word_abs = abs_pos + slice_offset + w_rel;
189                            if let Some(&canonical_word) = canonical_words.get(word_idx) {
190                                map.insert(word_abs, canonical_word);
191                            }
192                            slice_offset += w_rel + text_word.len();
193                            word_idx += 1;
194                        }
195                    }
196                }
197
198                // Advance by one Unicode scalar value to allow overlapping matches
199                // while staying on a UTF-8 char boundary.
200                search_start = abs_pos + text[abs_pos..].chars().next().map_or(1, |c| c.len_utf8());
201            }
202        }
203
204        map
205    }
206
207    /// Check if a word has internal capitals (like "iPhone", "macOS", "GitHub", "iOS")
208    fn has_internal_capitals(&self, word: &str) -> bool {
209        let chars: Vec<char> = word.chars().collect();
210        if chars.len() < 2 {
211            return false;
212        }
213
214        let first = chars[0];
215        let rest = &chars[1..];
216        let has_upper_in_rest = rest.iter().any(|c| c.is_uppercase());
217        let has_lower_in_rest = rest.iter().any(|c| c.is_lowercase());
218
219        // Case 1: Mixed case after first character (like "iPhone", "macOS", "GitHub", "JavaScript")
220        if has_upper_in_rest && has_lower_in_rest {
221            return true;
222        }
223
224        // Case 2: Lowercase first + uppercase in rest (like "iOS", "eBay")
225        if first.is_lowercase() && has_upper_in_rest {
226            return true;
227        }
228
229        false
230    }
231
232    /// Check if a word is an all-caps acronym (2+ consecutive uppercase letters)
233    /// Examples: "API", "GPU", "HTTP2", "IO" return true
234    /// Examples: "A", "iPhone", "npm" return false
235    fn is_all_caps_acronym(&self, word: &str) -> bool {
236        // Skip single-letter words (handled by title case rules)
237        if word.len() < 2 {
238            return false;
239        }
240
241        let mut consecutive_upper = 0;
242        let mut max_consecutive = 0;
243
244        for c in word.chars() {
245            if c.is_uppercase() {
246                consecutive_upper += 1;
247                max_consecutive = max_consecutive.max(consecutive_upper);
248            } else if c.is_lowercase() {
249                // Any lowercase letter means not all-caps
250                return false;
251            } else {
252                // Non-letter (number, punctuation) - reset counter but don't fail
253                consecutive_upper = 0;
254            }
255        }
256
257        // Must have at least 2 consecutive uppercase letters
258        max_consecutive >= 2
259    }
260
261    /// Check if a word should be preserved as-is
262    fn should_preserve_word(&self, word: &str) -> bool {
263        // Check ignore_words list (case-sensitive exact match)
264        if self.config.ignore_words.iter().any(|w| w == word) {
265            return true;
266        }
267
268        // Check if word has internal capitals and preserve_cased_words is enabled
269        if self.config.preserve_cased_words && self.has_internal_capitals(word) {
270            return true;
271        }
272
273        // Check if word is an all-caps acronym (2+ consecutive uppercase)
274        if self.config.preserve_cased_words && self.is_all_caps_acronym(word) {
275            return true;
276        }
277
278        // Preserve caret notation for control characters (^A, ^Z, ^@, etc.)
279        if self.is_caret_notation(word) {
280            return true;
281        }
282
283        false
284    }
285
286    /// Check if a word is caret notation for control characters (e.g., ^A, ^C, ^Z)
287    fn is_caret_notation(&self, word: &str) -> bool {
288        let chars: Vec<char> = word.chars().collect();
289        // Pattern: ^ followed by uppercase letter or @[\]^_
290        if chars.len() >= 2 && chars[0] == '^' {
291            let second = chars[1];
292            // Control characters: ^@ (NUL) through ^_ (US), which includes ^A-^Z
293            if second.is_ascii_uppercase() || "@[\\]^_".contains(second) {
294                return true;
295            }
296        }
297        false
298    }
299
300    /// Check if a word is a "lowercase word" (articles, prepositions, etc.)
301    fn is_lowercase_word(&self, word: &str) -> bool {
302        self.lowercase_set.contains(&word.to_lowercase())
303    }
304
305    /// Apply title case to a single word
306    fn title_case_word(&self, word: &str, is_first: bool, is_last: bool) -> String {
307        if word.is_empty() {
308            return word.to_string();
309        }
310
311        // Preserve words in ignore list or with internal capitals
312        if self.should_preserve_word(word) {
313            return word.to_string();
314        }
315
316        // First and last words are always capitalized
317        if is_first || is_last {
318            return self.capitalize_first(word);
319        }
320
321        // Check if it's a lowercase word (articles, prepositions, etc.)
322        if self.is_lowercase_word(word) {
323            return word.to_lowercase();
324        }
325
326        // Regular word - capitalize first letter
327        self.capitalize_first(word)
328    }
329
330    /// Apply canonical proper-name casing while preserving any trailing punctuation
331    /// attached to the original whitespace token (e.g. `javascript,` -> `JavaScript,`).
332    fn apply_canonical_form_to_word(word: &str, canonical: &str) -> String {
333        let canonical_lower = canonical.to_lowercase();
334        if canonical_lower.is_empty() {
335            return canonical.to_string();
336        }
337
338        if let Some(end_pos) = Self::match_case_insensitive_at(word, 0, &canonical_lower) {
339            let mut out = String::with_capacity(canonical.len() + word.len().saturating_sub(end_pos));
340            out.push_str(canonical);
341            out.push_str(&word[end_pos..]);
342            out
343        } else {
344            canonical.to_string()
345        }
346    }
347
348    /// Capitalize the first letter of a word, handling Unicode properly
349    fn capitalize_first(&self, word: &str) -> String {
350        let mut chars = word.chars();
351        match chars.next() {
352            None => String::new(),
353            Some(first) => {
354                let first_upper: String = first.to_uppercase().collect();
355                let rest: String = chars.collect();
356                format!("{}{}", first_upper, rest.to_lowercase())
357            }
358        }
359    }
360
361    /// Apply title case to text (using titlecase crate as base, then our customizations)
362    fn apply_title_case(&self, text: &str) -> String {
363        let canonical_forms = self.proper_name_canonical_forms(text);
364
365        // Pre-compute byte position of each original word for canonical form lookup.
366        let original_words: Vec<&str> = text.split_whitespace().collect();
367        let mut word_positions: Vec<usize> = Vec::with_capacity(original_words.len());
368        let mut pos = 0;
369        for word in &original_words {
370            if let Some(rel) = text[pos..].find(word) {
371                word_positions.push(pos + rel);
372                pos = pos + rel + word.len();
373            } else {
374                word_positions.push(0);
375            }
376        }
377
378        // Use the titlecase crate for the base transformation
379        let base_result = titlecase::titlecase(text);
380        let transformed_words: Vec<&str> = base_result.split_whitespace().collect();
381        let total_words = transformed_words.len();
382
383        let result_words: Vec<String> = transformed_words
384            .iter()
385            .enumerate()
386            .map(|(i, word)| {
387                let is_first = i == 0;
388                let is_last = i == total_words - 1;
389
390                // Words that are part of an MD044 proper name use the canonical form directly.
391                if let Some(&canonical) = word_positions.get(i).and_then(|&p| canonical_forms.get(&p)) {
392                    if let Some(original_word) = original_words.get(i) {
393                        return Self::apply_canonical_form_to_word(original_word, canonical);
394                    }
395                    return canonical.to_string();
396                }
397
398                // Check if the ORIGINAL word should be preserved (for acronyms like "API")
399                if let Some(original_word) = original_words.get(i)
400                    && self.should_preserve_word(original_word)
401                {
402                    return (*original_word).to_string();
403                }
404
405                // Handle hyphenated words
406                if word.contains('-') {
407                    // Also check original for hyphenated preservation
408                    if let Some(original_word) = original_words.get(i) {
409                        return self.handle_hyphenated_word_with_original(word, original_word, is_first, is_last);
410                    }
411                    return self.handle_hyphenated_word(word, is_first, is_last);
412                }
413
414                self.title_case_word(word, is_first, is_last)
415            })
416            .collect();
417
418        result_words.join(" ")
419    }
420
421    /// Handle hyphenated words like "self-documenting"
422    fn handle_hyphenated_word(&self, word: &str, is_first: bool, is_last: bool) -> String {
423        let parts: Vec<&str> = word.split('-').collect();
424        let total_parts = parts.len();
425
426        let result_parts: Vec<String> = parts
427            .iter()
428            .enumerate()
429            .map(|(i, part)| {
430                // First part of first word and last part of last word get special treatment
431                let part_is_first = is_first && i == 0;
432                let part_is_last = is_last && i == total_parts - 1;
433                self.title_case_word(part, part_is_first, part_is_last)
434            })
435            .collect();
436
437        result_parts.join("-")
438    }
439
440    /// Handle hyphenated words with original text for acronym preservation
441    fn handle_hyphenated_word_with_original(
442        &self,
443        word: &str,
444        original: &str,
445        is_first: bool,
446        is_last: bool,
447    ) -> String {
448        let parts: Vec<&str> = word.split('-').collect();
449        let original_parts: Vec<&str> = original.split('-').collect();
450        let total_parts = parts.len();
451
452        let result_parts: Vec<String> = parts
453            .iter()
454            .enumerate()
455            .map(|(i, part)| {
456                // Check if the original part should be preserved (for acronyms)
457                if let Some(original_part) = original_parts.get(i)
458                    && self.should_preserve_word(original_part)
459                {
460                    return (*original_part).to_string();
461                }
462
463                // First part of first word and last part of last word get special treatment
464                let part_is_first = is_first && i == 0;
465                let part_is_last = is_last && i == total_parts - 1;
466                self.title_case_word(part, part_is_first, part_is_last)
467            })
468            .collect();
469
470        result_parts.join("-")
471    }
472
473    /// Apply sentence case to text
474    fn apply_sentence_case(&self, text: &str) -> String {
475        if text.is_empty() {
476            return text.to_string();
477        }
478
479        let canonical_forms = self.proper_name_canonical_forms(text);
480        let mut result = String::new();
481        let mut current_pos = 0;
482        let mut is_first_word = true;
483
484        // Use original text positions to preserve whitespace correctly
485        for word in text.split_whitespace() {
486            if let Some(pos) = text[current_pos..].find(word) {
487                let abs_pos = current_pos + pos;
488
489                // Preserve whitespace before this word
490                result.push_str(&text[current_pos..abs_pos]);
491
492                // Words that are part of an MD044 proper name use the canonical form
493                // directly, bypassing sentence-case lowercasing entirely.
494                if let Some(&canonical) = canonical_forms.get(&abs_pos) {
495                    result.push_str(&Self::apply_canonical_form_to_word(word, canonical));
496                    is_first_word = false;
497                } else if is_first_word {
498                    // Check if word should be preserved BEFORE any capitalization
499                    if self.should_preserve_word(word) {
500                        // Preserve ignore-words exactly as-is, even at start
501                        result.push_str(word);
502                    } else {
503                        // First word: capitalize first letter, lowercase rest
504                        let mut chars = word.chars();
505                        if let Some(first) = chars.next() {
506                            let first_upper: String = first.to_uppercase().collect();
507                            result.push_str(&first_upper);
508                            let rest: String = chars.collect();
509                            result.push_str(&rest.to_lowercase());
510                        }
511                    }
512                    is_first_word = false;
513                } else {
514                    // Non-first words: preserve if needed, otherwise lowercase
515                    if self.should_preserve_word(word) {
516                        result.push_str(word);
517                    } else {
518                        result.push_str(&word.to_lowercase());
519                    }
520                }
521
522                current_pos = abs_pos + word.len();
523            }
524        }
525
526        // Preserve any trailing whitespace
527        if current_pos < text.len() {
528            result.push_str(&text[current_pos..]);
529        }
530
531        result
532    }
533
534    /// Apply all caps to text (preserve whitespace)
535    fn apply_all_caps(&self, text: &str) -> String {
536        if text.is_empty() {
537            return text.to_string();
538        }
539
540        let canonical_forms = self.proper_name_canonical_forms(text);
541        let mut result = String::new();
542        let mut current_pos = 0;
543
544        // Use original text positions to preserve whitespace correctly
545        for word in text.split_whitespace() {
546            if let Some(pos) = text[current_pos..].find(word) {
547                let abs_pos = current_pos + pos;
548
549                // Preserve whitespace before this word
550                result.push_str(&text[current_pos..abs_pos]);
551
552                // Words that are part of an MD044 proper name use the canonical form directly.
553                // This prevents oscillation with MD044 when all-caps style is active.
554                if let Some(&canonical) = canonical_forms.get(&abs_pos) {
555                    result.push_str(&Self::apply_canonical_form_to_word(word, canonical));
556                } else if self.should_preserve_word(word) {
557                    result.push_str(word);
558                } else {
559                    result.push_str(&word.to_uppercase());
560                }
561
562                current_pos = abs_pos + word.len();
563            }
564        }
565
566        // Preserve any trailing whitespace
567        if current_pos < text.len() {
568            result.push_str(&text[current_pos..]);
569        }
570
571        result
572    }
573
574    /// Parse heading text into segments
575    fn parse_segments(&self, text: &str) -> Vec<HeadingSegment> {
576        let mut segments = Vec::new();
577        let mut last_end = 0;
578
579        // Collect all special regions (code and links)
580        let mut special_regions: Vec<(usize, usize, HeadingSegment)> = Vec::new();
581
582        // Find inline code spans
583        for mat in INLINE_CODE_REGEX.find_iter(text) {
584            special_regions.push((mat.start(), mat.end(), HeadingSegment::Code(mat.as_str().to_string())));
585        }
586
587        // Find links
588        for caps in LINK_REGEX.captures_iter(text) {
589            let full_match = caps.get(0).unwrap();
590            let text_match = caps.get(1).or_else(|| caps.get(2));
591
592            if let Some(text_m) = text_match {
593                special_regions.push((
594                    full_match.start(),
595                    full_match.end(),
596                    HeadingSegment::Link {
597                        full: full_match.as_str().to_string(),
598                        text_start: text_m.start() - full_match.start(),
599                        text_end: text_m.end() - full_match.start(),
600                    },
601                ));
602            }
603        }
604
605        // Find inline HTML tags
606        for mat in HTML_TAG_REGEX.find_iter(text) {
607            special_regions.push((mat.start(), mat.end(), HeadingSegment::Html(mat.as_str().to_string())));
608        }
609
610        // Sort by start position
611        special_regions.sort_by_key(|(start, _, _)| *start);
612
613        // Remove overlapping regions (code takes precedence)
614        let mut filtered_regions: Vec<(usize, usize, HeadingSegment)> = Vec::new();
615        for region in special_regions {
616            let overlaps = filtered_regions.iter().any(|(s, e, _)| region.0 < *e && region.1 > *s);
617            if !overlaps {
618                filtered_regions.push(region);
619            }
620        }
621
622        // Build segments
623        for (start, end, segment) in filtered_regions {
624            // Add text before this special region
625            if start > last_end {
626                let text_segment = &text[last_end..start];
627                if !text_segment.is_empty() {
628                    segments.push(HeadingSegment::Text(text_segment.to_string()));
629                }
630            }
631            segments.push(segment);
632            last_end = end;
633        }
634
635        // Add remaining text
636        if last_end < text.len() {
637            let remaining = &text[last_end..];
638            if !remaining.is_empty() {
639                segments.push(HeadingSegment::Text(remaining.to_string()));
640            }
641        }
642
643        // If no segments were found, treat the whole thing as text
644        if segments.is_empty() && !text.is_empty() {
645            segments.push(HeadingSegment::Text(text.to_string()));
646        }
647
648        segments
649    }
650
651    /// Apply capitalization to heading text
652    fn apply_capitalization(&self, text: &str) -> String {
653        // Strip custom ID if present and re-add later
654        let (main_text, custom_id) = if let Some(mat) = CUSTOM_ID_REGEX.find(text) {
655            (&text[..mat.start()], Some(mat.as_str()))
656        } else {
657            (text, None)
658        };
659
660        // Parse into segments
661        let segments = self.parse_segments(main_text);
662
663        // Count text segments to determine first/last word context
664        let text_segments: Vec<usize> = segments
665            .iter()
666            .enumerate()
667            .filter_map(|(i, s)| matches!(s, HeadingSegment::Text(_)).then_some(i))
668            .collect();
669
670        // Determine if the first segment overall is a text segment
671        // For sentence case: if heading starts with code/link, the first text segment
672        // should NOT capitalize its first word (the heading already has a "first element")
673        let first_segment_is_text = segments
674            .first()
675            .map(|s| matches!(s, HeadingSegment::Text(_)))
676            .unwrap_or(false);
677
678        // Determine if the last segment overall is a text segment
679        // If the last segment is Code or Link, then the last text segment should NOT
680        // treat its last word as the heading's last word (for lowercase-words respect)
681        let last_segment_is_text = segments
682            .last()
683            .map(|s| matches!(s, HeadingSegment::Text(_)))
684            .unwrap_or(false);
685
686        // Apply capitalization to each segment
687        let mut result_parts: Vec<String> = Vec::new();
688
689        for (i, segment) in segments.iter().enumerate() {
690            match segment {
691                HeadingSegment::Text(t) => {
692                    let is_first_text = text_segments.first() == Some(&i);
693                    // A text segment is "last" only if it's the last text segment AND
694                    // the last segment overall is also text. If there's Code/Link after,
695                    // the last word should respect lowercase-words.
696                    let is_last_text = text_segments.last() == Some(&i) && last_segment_is_text;
697
698                    let capitalized = match self.config.style {
699                        HeadingCapStyle::TitleCase => self.apply_title_case_segment(t, is_first_text, is_last_text),
700                        HeadingCapStyle::SentenceCase => {
701                            // For sentence case, only capitalize first word if:
702                            // 1. This is the first text segment, AND
703                            // 2. The heading actually starts with text (not code/link)
704                            if is_first_text && first_segment_is_text {
705                                self.apply_sentence_case(t)
706                            } else {
707                                // Non-first segments OR heading starts with code/link
708                                self.apply_sentence_case_non_first(t)
709                            }
710                        }
711                        HeadingCapStyle::AllCaps => self.apply_all_caps(t),
712                    };
713                    result_parts.push(capitalized);
714                }
715                HeadingSegment::Code(c) => {
716                    result_parts.push(c.clone());
717                }
718                HeadingSegment::Link {
719                    full,
720                    text_start,
721                    text_end,
722                } => {
723                    // Apply capitalization to link text only
724                    let link_text = &full[*text_start..*text_end];
725                    let capitalized_text = match self.config.style {
726                        HeadingCapStyle::TitleCase => self.apply_title_case(link_text),
727                        // For sentence case, apply same preservation logic as non-first text
728                        // This preserves acronyms (API), brand names (iPhone), etc.
729                        HeadingCapStyle::SentenceCase => self.apply_sentence_case_non_first(link_text),
730                        HeadingCapStyle::AllCaps => self.apply_all_caps(link_text),
731                    };
732
733                    let mut new_link = String::new();
734                    new_link.push_str(&full[..*text_start]);
735                    new_link.push_str(&capitalized_text);
736                    new_link.push_str(&full[*text_end..]);
737                    result_parts.push(new_link);
738                }
739                HeadingSegment::Html(h) => {
740                    // Preserve HTML tags as-is (like code)
741                    result_parts.push(h.clone());
742                }
743            }
744        }
745
746        let mut result = result_parts.join("");
747
748        // Re-add custom ID if present
749        if let Some(id) = custom_id {
750            result.push_str(id);
751        }
752
753        result
754    }
755
756    /// Apply title case to a text segment with first/last awareness
757    fn apply_title_case_segment(&self, text: &str, is_first_segment: bool, is_last_segment: bool) -> String {
758        let canonical_forms = self.proper_name_canonical_forms(text);
759        let words: Vec<&str> = text.split_whitespace().collect();
760        let total_words = words.len();
761
762        if total_words == 0 {
763            return text.to_string();
764        }
765
766        // Pre-compute byte position of each word so we can look up canonical forms.
767        let mut word_positions: Vec<usize> = Vec::with_capacity(words.len());
768        let mut pos = 0;
769        for word in &words {
770            if let Some(rel) = text[pos..].find(word) {
771                word_positions.push(pos + rel);
772                pos = pos + rel + word.len();
773            } else {
774                word_positions.push(0);
775            }
776        }
777
778        let result_words: Vec<String> = words
779            .iter()
780            .enumerate()
781            .map(|(i, word)| {
782                let is_first = is_first_segment && i == 0;
783                let is_last = is_last_segment && i == total_words - 1;
784
785                // Words that are part of an MD044 proper name use the canonical form directly.
786                if let Some(&canonical) = word_positions.get(i).and_then(|&p| canonical_forms.get(&p)) {
787                    return Self::apply_canonical_form_to_word(word, canonical);
788                }
789
790                // Handle hyphenated words
791                if word.contains('-') {
792                    return self.handle_hyphenated_word(word, is_first, is_last);
793                }
794
795                self.title_case_word(word, is_first, is_last)
796            })
797            .collect();
798
799        // Preserve original spacing
800        let mut result = String::new();
801        let mut word_iter = result_words.iter();
802        let mut in_word = false;
803
804        for c in text.chars() {
805            if c.is_whitespace() {
806                if in_word {
807                    in_word = false;
808                }
809                result.push(c);
810            } else if !in_word {
811                if let Some(word) = word_iter.next() {
812                    result.push_str(word);
813                }
814                in_word = true;
815            }
816        }
817
818        result
819    }
820
821    /// Apply sentence case to non-first segments (just lowercase, preserve whitespace)
822    fn apply_sentence_case_non_first(&self, text: &str) -> String {
823        if text.is_empty() {
824            return text.to_string();
825        }
826
827        let canonical_forms = self.proper_name_canonical_forms(text);
828        let mut result = String::new();
829        let mut current_pos = 0;
830
831        // Iterate over words in the original text so byte positions are consistent
832        // with the positions in canonical_forms (built from the same text).
833        for word in text.split_whitespace() {
834            if let Some(pos) = text[current_pos..].find(word) {
835                let abs_pos = current_pos + pos;
836
837                // Preserve whitespace before this word
838                result.push_str(&text[current_pos..abs_pos]);
839
840                // Words that are part of an MD044 proper name use the canonical form directly.
841                if let Some(&canonical) = canonical_forms.get(&abs_pos) {
842                    result.push_str(&Self::apply_canonical_form_to_word(word, canonical));
843                } else if self.should_preserve_word(word) {
844                    result.push_str(word);
845                } else {
846                    result.push_str(&word.to_lowercase());
847                }
848
849                current_pos = abs_pos + word.len();
850            }
851        }
852
853        // Preserve any trailing whitespace
854        if current_pos < text.len() {
855            result.push_str(&text[current_pos..]);
856        }
857
858        result
859    }
860
861    /// Get byte range for a line
862    fn get_line_byte_range(&self, content: &str, line_num: usize, line_index: &LineIndex) -> Range<usize> {
863        let start_pos = line_index.get_line_start_byte(line_num).unwrap_or(content.len());
864        let line = content.lines().nth(line_num - 1).unwrap_or("");
865        Range {
866            start: start_pos,
867            end: start_pos + line.len(),
868        }
869    }
870
871    /// Fix an ATX heading line
872    fn fix_atx_heading(&self, _line: &str, heading: &crate::lint_context::HeadingInfo) -> String {
873        // Parse the line to preserve structure
874        let indent = " ".repeat(heading.marker_column);
875        let hashes = "#".repeat(heading.level as usize);
876
877        // Apply capitalization to the text
878        let fixed_text = self.apply_capitalization(&heading.raw_text);
879
880        // Reconstruct with closing sequence if present
881        let closing = &heading.closing_sequence;
882        if heading.has_closing_sequence {
883            format!("{indent}{hashes} {fixed_text} {closing}")
884        } else {
885            format!("{indent}{hashes} {fixed_text}")
886        }
887    }
888
889    /// Fix a Setext heading line
890    fn fix_setext_heading(&self, line: &str, heading: &crate::lint_context::HeadingInfo) -> String {
891        // Apply capitalization to the text
892        let fixed_text = self.apply_capitalization(&heading.raw_text);
893
894        // Preserve leading whitespace from original line
895        let leading_ws: String = line.chars().take_while(|c| c.is_whitespace()).collect();
896
897        format!("{leading_ws}{fixed_text}")
898    }
899}
900
901impl Rule for MD063HeadingCapitalization {
902    fn name(&self) -> &'static str {
903        "MD063"
904    }
905
906    fn description(&self) -> &'static str {
907        "Heading capitalization"
908    }
909
910    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
911        !ctx.likely_has_headings() || !ctx.lines.iter().any(|line| line.heading.is_some())
912    }
913
914    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
915        let content = ctx.content;
916
917        if content.is_empty() {
918            return Ok(Vec::new());
919        }
920
921        let mut warnings = Vec::new();
922        let line_index = &ctx.line_index;
923
924        for (line_num, line_info) in ctx.lines.iter().enumerate() {
925            if let Some(heading) = &line_info.heading {
926                // Check level filter
927                if heading.level < self.config.min_level || heading.level > self.config.max_level {
928                    continue;
929                }
930
931                // Skip headings in code blocks (indented headings)
932                if line_info.visual_indent >= 4 && matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
933                    continue;
934                }
935
936                // Apply capitalization and compare
937                let original_text = &heading.raw_text;
938                let fixed_text = self.apply_capitalization(original_text);
939
940                if original_text != &fixed_text {
941                    let line = line_info.content(ctx.content);
942                    let style_name = match self.config.style {
943                        HeadingCapStyle::TitleCase => "title case",
944                        HeadingCapStyle::SentenceCase => "sentence case",
945                        HeadingCapStyle::AllCaps => "ALL CAPS",
946                    };
947
948                    warnings.push(LintWarning {
949                        rule_name: Some(self.name().to_string()),
950                        line: line_num + 1,
951                        column: heading.content_column + 1,
952                        end_line: line_num + 1,
953                        end_column: heading.content_column + 1 + original_text.len(),
954                        message: format!("Heading should use {style_name}: '{original_text}' -> '{fixed_text}'"),
955                        severity: Severity::Warning,
956                        fix: Some(Fix {
957                            range: self.get_line_byte_range(content, line_num + 1, line_index),
958                            replacement: match heading.style {
959                                crate::lint_context::HeadingStyle::ATX => self.fix_atx_heading(line, heading),
960                                _ => self.fix_setext_heading(line, heading),
961                            },
962                        }),
963                    });
964                }
965            }
966        }
967
968        Ok(warnings)
969    }
970
971    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
972        let content = ctx.content;
973
974        if content.is_empty() {
975            return Ok(content.to_string());
976        }
977
978        let lines = ctx.raw_lines();
979        let mut fixed_lines: Vec<String> = lines.iter().map(|&s| s.to_string()).collect();
980
981        for (line_num, line_info) in ctx.lines.iter().enumerate() {
982            if let Some(heading) = &line_info.heading {
983                // Check level filter
984                if heading.level < self.config.min_level || heading.level > self.config.max_level {
985                    continue;
986                }
987
988                // Skip headings in code blocks
989                if line_info.visual_indent >= 4 && matches!(heading.style, crate::lint_context::HeadingStyle::ATX) {
990                    continue;
991                }
992
993                let original_text = &heading.raw_text;
994                let fixed_text = self.apply_capitalization(original_text);
995
996                if original_text != &fixed_text {
997                    let line = line_info.content(ctx.content);
998                    fixed_lines[line_num] = match heading.style {
999                        crate::lint_context::HeadingStyle::ATX => self.fix_atx_heading(line, heading),
1000                        _ => self.fix_setext_heading(line, heading),
1001                    };
1002                }
1003            }
1004        }
1005
1006        // Reconstruct content preserving line endings
1007        let mut result = String::with_capacity(content.len());
1008        for (i, line) in fixed_lines.iter().enumerate() {
1009            result.push_str(line);
1010            if i < fixed_lines.len() - 1 || content.ends_with('\n') {
1011                result.push('\n');
1012            }
1013        }
1014
1015        Ok(result)
1016    }
1017
1018    fn as_any(&self) -> &dyn std::any::Any {
1019        self
1020    }
1021
1022    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1023        let json_value = serde_json::to_value(&self.config).ok()?;
1024        Some((
1025            self.name().to_string(),
1026            crate::rule_config_serde::json_to_toml_value(&json_value)?,
1027        ))
1028    }
1029
1030    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
1031    where
1032        Self: Sized,
1033    {
1034        let rule_config = crate::rule_config_serde::load_rule_config::<MD063Config>(config);
1035        let md044_config =
1036            crate::rule_config_serde::load_rule_config::<crate::rules::md044_proper_names::MD044Config>(config);
1037        let mut rule = Self::from_config_struct(rule_config);
1038        rule.proper_names = md044_config.names;
1039        Box::new(rule)
1040    }
1041}
1042
1043#[cfg(test)]
1044mod tests {
1045    use super::*;
1046    use crate::lint_context::LintContext;
1047
1048    fn create_rule() -> MD063HeadingCapitalization {
1049        let config = MD063Config {
1050            enabled: true,
1051            ..Default::default()
1052        };
1053        MD063HeadingCapitalization::from_config_struct(config)
1054    }
1055
1056    fn create_rule_with_style(style: HeadingCapStyle) -> MD063HeadingCapitalization {
1057        let config = MD063Config {
1058            enabled: true,
1059            style,
1060            ..Default::default()
1061        };
1062        MD063HeadingCapitalization::from_config_struct(config)
1063    }
1064
1065    // Title case tests
1066    #[test]
1067    fn test_title_case_basic() {
1068        let rule = create_rule();
1069        let content = "# hello world\n";
1070        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1071        let result = rule.check(&ctx).unwrap();
1072        assert_eq!(result.len(), 1);
1073        assert!(result[0].message.contains("Hello World"));
1074    }
1075
1076    #[test]
1077    fn test_title_case_lowercase_words() {
1078        let rule = create_rule();
1079        let content = "# the quick brown fox\n";
1080        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1081        let result = rule.check(&ctx).unwrap();
1082        assert_eq!(result.len(), 1);
1083        // "The" should be capitalized (first word), "quick", "brown", "fox" should be capitalized
1084        assert!(result[0].message.contains("The Quick Brown Fox"));
1085    }
1086
1087    #[test]
1088    fn test_title_case_already_correct() {
1089        let rule = create_rule();
1090        let content = "# The Quick Brown Fox\n";
1091        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1092        let result = rule.check(&ctx).unwrap();
1093        assert!(result.is_empty(), "Already correct heading should not be flagged");
1094    }
1095
1096    #[test]
1097    fn test_title_case_hyphenated() {
1098        let rule = create_rule();
1099        let content = "# self-documenting code\n";
1100        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1101        let result = rule.check(&ctx).unwrap();
1102        assert_eq!(result.len(), 1);
1103        assert!(result[0].message.contains("Self-Documenting Code"));
1104    }
1105
1106    // Sentence case tests
1107    #[test]
1108    fn test_sentence_case_basic() {
1109        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1110        let content = "# The Quick Brown Fox\n";
1111        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1112        let result = rule.check(&ctx).unwrap();
1113        assert_eq!(result.len(), 1);
1114        assert!(result[0].message.contains("The quick brown fox"));
1115    }
1116
1117    #[test]
1118    fn test_sentence_case_already_correct() {
1119        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1120        let content = "# The quick brown fox\n";
1121        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1122        let result = rule.check(&ctx).unwrap();
1123        assert!(result.is_empty());
1124    }
1125
1126    // All caps tests
1127    #[test]
1128    fn test_all_caps_basic() {
1129        let rule = create_rule_with_style(HeadingCapStyle::AllCaps);
1130        let content = "# hello world\n";
1131        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1132        let result = rule.check(&ctx).unwrap();
1133        assert_eq!(result.len(), 1);
1134        assert!(result[0].message.contains("HELLO WORLD"));
1135    }
1136
1137    // Preserve tests
1138    #[test]
1139    fn test_preserve_ignore_words() {
1140        let config = MD063Config {
1141            enabled: true,
1142            ignore_words: vec!["iPhone".to_string(), "macOS".to_string()],
1143            ..Default::default()
1144        };
1145        let rule = MD063HeadingCapitalization::from_config_struct(config);
1146
1147        let content = "# using iPhone on macOS\n";
1148        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1149        let result = rule.check(&ctx).unwrap();
1150        assert_eq!(result.len(), 1);
1151        // iPhone and macOS should be preserved
1152        assert!(result[0].message.contains("iPhone"));
1153        assert!(result[0].message.contains("macOS"));
1154    }
1155
1156    #[test]
1157    fn test_preserve_cased_words() {
1158        let rule = create_rule();
1159        let content = "# using GitHub actions\n";
1160        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1161        let result = rule.check(&ctx).unwrap();
1162        assert_eq!(result.len(), 1);
1163        // GitHub should be preserved (has internal capital)
1164        assert!(result[0].message.contains("GitHub"));
1165    }
1166
1167    // Inline code tests
1168    #[test]
1169    fn test_inline_code_preserved() {
1170        let rule = create_rule();
1171        let content = "# using `const` in javascript\n";
1172        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1173        let result = rule.check(&ctx).unwrap();
1174        assert_eq!(result.len(), 1);
1175        // `const` should be preserved, rest capitalized
1176        assert!(result[0].message.contains("`const`"));
1177        assert!(result[0].message.contains("Javascript") || result[0].message.contains("JavaScript"));
1178    }
1179
1180    // Level filter tests
1181    #[test]
1182    fn test_level_filter() {
1183        let config = MD063Config {
1184            enabled: true,
1185            min_level: 2,
1186            max_level: 4,
1187            ..Default::default()
1188        };
1189        let rule = MD063HeadingCapitalization::from_config_struct(config);
1190
1191        let content = "# h1 heading\n## h2 heading\n### h3 heading\n##### h5 heading\n";
1192        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1193        let result = rule.check(&ctx).unwrap();
1194
1195        // Only h2 and h3 should be flagged (h1 < min_level, h5 > max_level)
1196        assert_eq!(result.len(), 2);
1197        assert_eq!(result[0].line, 2); // h2
1198        assert_eq!(result[1].line, 3); // h3
1199    }
1200
1201    // Fix tests
1202    #[test]
1203    fn test_fix_atx_heading() {
1204        let rule = create_rule();
1205        let content = "# hello world\n";
1206        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1207        let fixed = rule.fix(&ctx).unwrap();
1208        assert_eq!(fixed, "# Hello World\n");
1209    }
1210
1211    #[test]
1212    fn test_fix_multiple_headings() {
1213        let rule = create_rule();
1214        let content = "# first heading\n\n## second heading\n";
1215        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1216        let fixed = rule.fix(&ctx).unwrap();
1217        assert_eq!(fixed, "# First Heading\n\n## Second Heading\n");
1218    }
1219
1220    // Setext heading tests
1221    #[test]
1222    fn test_setext_heading() {
1223        let rule = create_rule();
1224        let content = "hello world\n============\n";
1225        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1226        let result = rule.check(&ctx).unwrap();
1227        assert_eq!(result.len(), 1);
1228        assert!(result[0].message.contains("Hello World"));
1229    }
1230
1231    // Custom ID tests
1232    #[test]
1233    fn test_custom_id_preserved() {
1234        let rule = create_rule();
1235        let content = "# getting started {#intro}\n";
1236        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1237        let result = rule.check(&ctx).unwrap();
1238        assert_eq!(result.len(), 1);
1239        // Custom ID should be preserved
1240        assert!(result[0].message.contains("{#intro}"));
1241    }
1242
1243    // Acronym preservation tests
1244    #[test]
1245    fn test_preserve_all_caps_acronyms() {
1246        let rule = create_rule();
1247        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1248
1249        // Basic acronyms should be preserved
1250        let fixed = rule.fix(&ctx("# using API in production\n")).unwrap();
1251        assert_eq!(fixed, "# Using API in Production\n");
1252
1253        // Multiple acronyms
1254        let fixed = rule.fix(&ctx("# API and GPU integration\n")).unwrap();
1255        assert_eq!(fixed, "# API and GPU Integration\n");
1256
1257        // Two-letter acronyms
1258        let fixed = rule.fix(&ctx("# IO performance guide\n")).unwrap();
1259        assert_eq!(fixed, "# IO Performance Guide\n");
1260
1261        // Acronyms with numbers
1262        let fixed = rule.fix(&ctx("# HTTP2 and MD5 hashing\n")).unwrap();
1263        assert_eq!(fixed, "# HTTP2 and MD5 Hashing\n");
1264    }
1265
1266    #[test]
1267    fn test_preserve_acronyms_in_hyphenated_words() {
1268        let rule = create_rule();
1269        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1270
1271        // Acronyms at start of hyphenated word
1272        let fixed = rule.fix(&ctx("# API-driven architecture\n")).unwrap();
1273        assert_eq!(fixed, "# API-Driven Architecture\n");
1274
1275        // Multiple acronyms with hyphens
1276        let fixed = rule.fix(&ctx("# GPU-accelerated CPU-intensive tasks\n")).unwrap();
1277        assert_eq!(fixed, "# GPU-Accelerated CPU-Intensive Tasks\n");
1278    }
1279
1280    #[test]
1281    fn test_single_letters_not_treated_as_acronyms() {
1282        let rule = create_rule();
1283        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1284
1285        // Single uppercase letters should follow title case rules, not be preserved
1286        let fixed = rule.fix(&ctx("# i am a heading\n")).unwrap();
1287        assert_eq!(fixed, "# I Am a Heading\n");
1288    }
1289
1290    #[test]
1291    fn test_lowercase_terms_need_ignore_words() {
1292        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1293
1294        // Without ignore_words: npm gets capitalized
1295        let rule = create_rule();
1296        let fixed = rule.fix(&ctx("# using npm packages\n")).unwrap();
1297        assert_eq!(fixed, "# Using Npm Packages\n");
1298
1299        // With ignore_words: npm preserved
1300        let config = MD063Config {
1301            enabled: true,
1302            ignore_words: vec!["npm".to_string()],
1303            ..Default::default()
1304        };
1305        let rule = MD063HeadingCapitalization::from_config_struct(config);
1306        let fixed = rule.fix(&ctx("# using npm packages\n")).unwrap();
1307        assert_eq!(fixed, "# Using npm Packages\n");
1308    }
1309
1310    #[test]
1311    fn test_acronyms_with_mixed_case_preserved() {
1312        let rule = create_rule();
1313        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1314
1315        // Both acronyms (API, GPU) and mixed-case (GitHub) should be preserved
1316        let fixed = rule.fix(&ctx("# using API with GitHub\n")).unwrap();
1317        assert_eq!(fixed, "# Using API with GitHub\n");
1318    }
1319
1320    #[test]
1321    fn test_real_world_acronyms() {
1322        let rule = create_rule();
1323        let ctx = |c| LintContext::new(c, crate::config::MarkdownFlavor::Standard, None);
1324
1325        // Common technical acronyms from tested repositories
1326        let content = "# FFI bindings for CPU optimization\n";
1327        let fixed = rule.fix(&ctx(content)).unwrap();
1328        assert_eq!(fixed, "# FFI Bindings for CPU Optimization\n");
1329
1330        let content = "# DOM manipulation and SSR rendering\n";
1331        let fixed = rule.fix(&ctx(content)).unwrap();
1332        assert_eq!(fixed, "# DOM Manipulation and SSR Rendering\n");
1333
1334        let content = "# CVE security and RNN models\n";
1335        let fixed = rule.fix(&ctx(content)).unwrap();
1336        assert_eq!(fixed, "# CVE Security and RNN Models\n");
1337    }
1338
1339    #[test]
1340    fn test_is_all_caps_acronym() {
1341        let rule = create_rule();
1342
1343        // Should return true for all-caps with 2+ letters
1344        assert!(rule.is_all_caps_acronym("API"));
1345        assert!(rule.is_all_caps_acronym("IO"));
1346        assert!(rule.is_all_caps_acronym("GPU"));
1347        assert!(rule.is_all_caps_acronym("HTTP2")); // Numbers don't break it
1348
1349        // Should return false for single letters
1350        assert!(!rule.is_all_caps_acronym("A"));
1351        assert!(!rule.is_all_caps_acronym("I"));
1352
1353        // Should return false for words with lowercase
1354        assert!(!rule.is_all_caps_acronym("Api"));
1355        assert!(!rule.is_all_caps_acronym("npm"));
1356        assert!(!rule.is_all_caps_acronym("iPhone"));
1357    }
1358
1359    #[test]
1360    fn test_sentence_case_ignore_words_first_word() {
1361        let config = MD063Config {
1362            enabled: true,
1363            style: HeadingCapStyle::SentenceCase,
1364            ignore_words: vec!["nvim".to_string()],
1365            ..Default::default()
1366        };
1367        let rule = MD063HeadingCapitalization::from_config_struct(config);
1368
1369        // "nvim" as first word should be preserved exactly
1370        let content = "# nvim config\n";
1371        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1372        let result = rule.check(&ctx).unwrap();
1373        assert!(
1374            result.is_empty(),
1375            "nvim in ignore-words should not be flagged. Got: {result:?}"
1376        );
1377
1378        // Verify fix also preserves it
1379        let fixed = rule.fix(&ctx).unwrap();
1380        assert_eq!(fixed, "# nvim config\n");
1381    }
1382
1383    #[test]
1384    fn test_sentence_case_ignore_words_not_first() {
1385        let config = MD063Config {
1386            enabled: true,
1387            style: HeadingCapStyle::SentenceCase,
1388            ignore_words: vec!["nvim".to_string()],
1389            ..Default::default()
1390        };
1391        let rule = MD063HeadingCapitalization::from_config_struct(config);
1392
1393        // "nvim" in middle should also be preserved
1394        let content = "# Using nvim editor\n";
1395        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1396        let result = rule.check(&ctx).unwrap();
1397        assert!(
1398            result.is_empty(),
1399            "nvim in ignore-words should be preserved. Got: {result:?}"
1400        );
1401    }
1402
1403    #[test]
1404    fn test_preserve_cased_words_ios() {
1405        let config = MD063Config {
1406            enabled: true,
1407            style: HeadingCapStyle::SentenceCase,
1408            preserve_cased_words: true,
1409            ..Default::default()
1410        };
1411        let rule = MD063HeadingCapitalization::from_config_struct(config);
1412
1413        // "iOS" should be preserved (has mixed case: lowercase 'i' + uppercase 'OS')
1414        let content = "## This is iOS\n";
1415        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1416        let result = rule.check(&ctx).unwrap();
1417        assert!(
1418            result.is_empty(),
1419            "iOS should be preserved with preserve-cased-words. Got: {result:?}"
1420        );
1421
1422        // Verify fix also preserves it
1423        let fixed = rule.fix(&ctx).unwrap();
1424        assert_eq!(fixed, "## This is iOS\n");
1425    }
1426
1427    #[test]
1428    fn test_preserve_cased_words_ios_title_case() {
1429        let config = MD063Config {
1430            enabled: true,
1431            style: HeadingCapStyle::TitleCase,
1432            preserve_cased_words: true,
1433            ..Default::default()
1434        };
1435        let rule = MD063HeadingCapitalization::from_config_struct(config);
1436
1437        // "iOS" should be preserved in title case too
1438        let content = "# developing for iOS\n";
1439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1440        let fixed = rule.fix(&ctx).unwrap();
1441        assert_eq!(fixed, "# Developing for iOS\n");
1442    }
1443
1444    #[test]
1445    fn test_has_internal_capitals_ios() {
1446        let rule = create_rule();
1447
1448        // iOS should be detected as having internal capitals
1449        assert!(
1450            rule.has_internal_capitals("iOS"),
1451            "iOS has mixed case (lowercase i, uppercase OS)"
1452        );
1453
1454        // Other mixed-case words
1455        assert!(rule.has_internal_capitals("iPhone"));
1456        assert!(rule.has_internal_capitals("macOS"));
1457        assert!(rule.has_internal_capitals("GitHub"));
1458        assert!(rule.has_internal_capitals("JavaScript"));
1459        assert!(rule.has_internal_capitals("eBay"));
1460
1461        // All-caps should NOT be detected (handled by is_all_caps_acronym)
1462        assert!(!rule.has_internal_capitals("API"));
1463        assert!(!rule.has_internal_capitals("GPU"));
1464
1465        // All-lowercase should NOT be detected
1466        assert!(!rule.has_internal_capitals("npm"));
1467        assert!(!rule.has_internal_capitals("config"));
1468
1469        // Regular capitalized words should NOT be detected
1470        assert!(!rule.has_internal_capitals("The"));
1471        assert!(!rule.has_internal_capitals("Hello"));
1472    }
1473
1474    #[test]
1475    fn test_lowercase_words_before_trailing_code() {
1476        let config = MD063Config {
1477            enabled: true,
1478            style: HeadingCapStyle::TitleCase,
1479            lowercase_words: vec![
1480                "a".to_string(),
1481                "an".to_string(),
1482                "and".to_string(),
1483                "at".to_string(),
1484                "but".to_string(),
1485                "by".to_string(),
1486                "for".to_string(),
1487                "from".to_string(),
1488                "into".to_string(),
1489                "nor".to_string(),
1490                "on".to_string(),
1491                "onto".to_string(),
1492                "or".to_string(),
1493                "the".to_string(),
1494                "to".to_string(),
1495                "upon".to_string(),
1496                "via".to_string(),
1497                "vs".to_string(),
1498                "with".to_string(),
1499                "without".to_string(),
1500            ],
1501            preserve_cased_words: true,
1502            ..Default::default()
1503        };
1504        let rule = MD063HeadingCapitalization::from_config_struct(config);
1505
1506        // Test: "subtitle with a `app`" (all lowercase input)
1507        // Expected fix: "Subtitle With a `app`" - capitalize "Subtitle" and "With",
1508        // but keep "a" lowercase (it's in lowercase-words and not the last word)
1509        // Incorrect: "Subtitle with A `app`" (would incorrectly capitalize "a")
1510        let content = "## subtitle with a `app`\n";
1511        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1512        let result = rule.check(&ctx).unwrap();
1513
1514        // Should flag it
1515        assert!(!result.is_empty(), "Should flag incorrect capitalization");
1516        let fixed = rule.fix(&ctx).unwrap();
1517        // "a" should remain lowercase (not "A") because inline code at end doesn't change lowercase-words behavior
1518        assert!(
1519            fixed.contains("with a `app`"),
1520            "Expected 'with a `app`' but got: {fixed:?}"
1521        );
1522        assert!(
1523            !fixed.contains("with A `app`"),
1524            "Should not capitalize 'a' to 'A'. Got: {fixed:?}"
1525        );
1526        // "Subtitle" should be capitalized, "with" and "a" should remain lowercase (they're in lowercase-words)
1527        assert!(
1528            fixed.contains("Subtitle with a `app`"),
1529            "Expected 'Subtitle with a `app`' but got: {fixed:?}"
1530        );
1531    }
1532
1533    #[test]
1534    fn test_lowercase_words_preserved_before_trailing_code_variant() {
1535        let config = MD063Config {
1536            enabled: true,
1537            style: HeadingCapStyle::TitleCase,
1538            lowercase_words: vec!["a".to_string(), "the".to_string(), "with".to_string()],
1539            ..Default::default()
1540        };
1541        let rule = MD063HeadingCapitalization::from_config_struct(config);
1542
1543        // Another variant: "Title with the `code`"
1544        let content = "## Title with the `code`\n";
1545        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1546        let fixed = rule.fix(&ctx).unwrap();
1547        // "the" should remain lowercase
1548        assert!(
1549            fixed.contains("with the `code`"),
1550            "Expected 'with the `code`' but got: {fixed:?}"
1551        );
1552        assert!(
1553            !fixed.contains("with The `code`"),
1554            "Should not capitalize 'the' to 'The'. Got: {fixed:?}"
1555        );
1556    }
1557
1558    #[test]
1559    fn test_last_word_capitalized_when_no_trailing_code() {
1560        // Verify that when there's NO trailing code, the last word IS capitalized
1561        // (even if it's in lowercase-words) - this is the normal title case behavior
1562        let config = MD063Config {
1563            enabled: true,
1564            style: HeadingCapStyle::TitleCase,
1565            lowercase_words: vec!["a".to_string(), "the".to_string()],
1566            ..Default::default()
1567        };
1568        let rule = MD063HeadingCapitalization::from_config_struct(config);
1569
1570        // "title with a word" - "word" is last, should be capitalized
1571        // "a" is in lowercase-words and not last, so should be lowercase
1572        let content = "## title with a word\n";
1573        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1574        let fixed = rule.fix(&ctx).unwrap();
1575        // "a" should be lowercase, "word" should be capitalized (it's last)
1576        assert!(
1577            fixed.contains("With a Word"),
1578            "Expected 'With a Word' but got: {fixed:?}"
1579        );
1580    }
1581
1582    #[test]
1583    fn test_multiple_lowercase_words_before_code() {
1584        let config = MD063Config {
1585            enabled: true,
1586            style: HeadingCapStyle::TitleCase,
1587            lowercase_words: vec![
1588                "a".to_string(),
1589                "the".to_string(),
1590                "with".to_string(),
1591                "for".to_string(),
1592            ],
1593            ..Default::default()
1594        };
1595        let rule = MD063HeadingCapitalization::from_config_struct(config);
1596
1597        // Multiple lowercase words before code - all should remain lowercase
1598        let content = "## Guide for the `user`\n";
1599        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1600        let fixed = rule.fix(&ctx).unwrap();
1601        assert!(
1602            fixed.contains("for the `user`"),
1603            "Expected 'for the `user`' but got: {fixed:?}"
1604        );
1605        assert!(
1606            !fixed.contains("For The `user`"),
1607            "Should not capitalize lowercase words before code. Got: {fixed:?}"
1608        );
1609    }
1610
1611    #[test]
1612    fn test_code_in_middle_normal_rules_apply() {
1613        let config = MD063Config {
1614            enabled: true,
1615            style: HeadingCapStyle::TitleCase,
1616            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1617            ..Default::default()
1618        };
1619        let rule = MD063HeadingCapitalization::from_config_struct(config);
1620
1621        // Code in the middle - normal title case rules apply (last word capitalized)
1622        let content = "## Using `const` for the code\n";
1623        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1624        let fixed = rule.fix(&ctx).unwrap();
1625        // "for" and "the" should be lowercase (middle), "code" should be capitalized (last)
1626        assert!(
1627            fixed.contains("for the Code"),
1628            "Expected 'for the Code' but got: {fixed:?}"
1629        );
1630    }
1631
1632    #[test]
1633    fn test_link_at_end_same_as_code() {
1634        let config = MD063Config {
1635            enabled: true,
1636            style: HeadingCapStyle::TitleCase,
1637            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1638            ..Default::default()
1639        };
1640        let rule = MD063HeadingCapitalization::from_config_struct(config);
1641
1642        // Link at the end - same behavior as code (lowercase words before should remain lowercase)
1643        let content = "## Guide for the [link](./page.md)\n";
1644        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1645        let fixed = rule.fix(&ctx).unwrap();
1646        // "for" and "the" should remain lowercase (not last word because link follows)
1647        assert!(
1648            fixed.contains("for the [Link]"),
1649            "Expected 'for the [Link]' but got: {fixed:?}"
1650        );
1651        assert!(
1652            !fixed.contains("for The [Link]"),
1653            "Should not capitalize 'the' before link. Got: {fixed:?}"
1654        );
1655    }
1656
1657    #[test]
1658    fn test_multiple_code_segments() {
1659        let config = MD063Config {
1660            enabled: true,
1661            style: HeadingCapStyle::TitleCase,
1662            lowercase_words: vec!["a".to_string(), "the".to_string(), "with".to_string()],
1663            ..Default::default()
1664        };
1665        let rule = MD063HeadingCapitalization::from_config_struct(config);
1666
1667        // Multiple code segments - last segment is code, so lowercase words before should remain lowercase
1668        let content = "## Using `const` with a `variable`\n";
1669        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1670        let fixed = rule.fix(&ctx).unwrap();
1671        // "a" should remain lowercase (not last word because code follows)
1672        assert!(
1673            fixed.contains("with a `variable`"),
1674            "Expected 'with a `variable`' but got: {fixed:?}"
1675        );
1676        assert!(
1677            !fixed.contains("with A `variable`"),
1678            "Should not capitalize 'a' before trailing code. Got: {fixed:?}"
1679        );
1680    }
1681
1682    #[test]
1683    fn test_code_and_link_combination() {
1684        let config = MD063Config {
1685            enabled: true,
1686            style: HeadingCapStyle::TitleCase,
1687            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1688            ..Default::default()
1689        };
1690        let rule = MD063HeadingCapitalization::from_config_struct(config);
1691
1692        // Code then link - last segment is link, so lowercase words before code should remain lowercase
1693        let content = "## Guide for the `code` [link](./page.md)\n";
1694        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1695        let fixed = rule.fix(&ctx).unwrap();
1696        // "for" and "the" should remain lowercase (not last word because link follows)
1697        assert!(
1698            fixed.contains("for the `code`"),
1699            "Expected 'for the `code`' but got: {fixed:?}"
1700        );
1701    }
1702
1703    #[test]
1704    fn test_text_after_code_capitalizes_last() {
1705        let config = MD063Config {
1706            enabled: true,
1707            style: HeadingCapStyle::TitleCase,
1708            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1709            ..Default::default()
1710        };
1711        let rule = MD063HeadingCapitalization::from_config_struct(config);
1712
1713        // Code in middle, text after - last word should be capitalized
1714        let content = "## Using `const` for the code\n";
1715        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1716        let fixed = rule.fix(&ctx).unwrap();
1717        // "for" and "the" should be lowercase, "code" is last word, should be capitalized
1718        assert!(
1719            fixed.contains("for the Code"),
1720            "Expected 'for the Code' but got: {fixed:?}"
1721        );
1722    }
1723
1724    #[test]
1725    fn test_preserve_cased_words_with_trailing_code() {
1726        let config = MD063Config {
1727            enabled: true,
1728            style: HeadingCapStyle::TitleCase,
1729            lowercase_words: vec!["a".to_string(), "the".to_string(), "for".to_string()],
1730            preserve_cased_words: true,
1731            ..Default::default()
1732        };
1733        let rule = MD063HeadingCapitalization::from_config_struct(config);
1734
1735        // Preserve-cased words should still work with trailing code
1736        let content = "## Guide for iOS `app`\n";
1737        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1738        let fixed = rule.fix(&ctx).unwrap();
1739        // "iOS" should be preserved, "for" should be lowercase
1740        assert!(
1741            fixed.contains("for iOS `app`"),
1742            "Expected 'for iOS `app`' but got: {fixed:?}"
1743        );
1744        assert!(
1745            !fixed.contains("For iOS `app`"),
1746            "Should not capitalize 'for' before trailing code. Got: {fixed:?}"
1747        );
1748    }
1749
1750    #[test]
1751    fn test_ignore_words_with_trailing_code() {
1752        let config = MD063Config {
1753            enabled: true,
1754            style: HeadingCapStyle::TitleCase,
1755            lowercase_words: vec!["a".to_string(), "the".to_string(), "with".to_string()],
1756            ignore_words: vec!["npm".to_string()],
1757            ..Default::default()
1758        };
1759        let rule = MD063HeadingCapitalization::from_config_struct(config);
1760
1761        // Ignore-words should still work with trailing code
1762        let content = "## Using npm with a `script`\n";
1763        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1764        let fixed = rule.fix(&ctx).unwrap();
1765        // "npm" should be preserved, "with" and "a" should be lowercase
1766        assert!(
1767            fixed.contains("npm with a `script`"),
1768            "Expected 'npm with a `script`' but got: {fixed:?}"
1769        );
1770        assert!(
1771            !fixed.contains("with A `script`"),
1772            "Should not capitalize 'a' before trailing code. Got: {fixed:?}"
1773        );
1774    }
1775
1776    #[test]
1777    fn test_empty_text_segment_edge_case() {
1778        let config = MD063Config {
1779            enabled: true,
1780            style: HeadingCapStyle::TitleCase,
1781            lowercase_words: vec!["a".to_string(), "with".to_string()],
1782            ..Default::default()
1783        };
1784        let rule = MD063HeadingCapitalization::from_config_struct(config);
1785
1786        // Edge case: code at start, then text with lowercase word, then code at end
1787        let content = "## `start` with a `end`\n";
1788        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1789        let fixed = rule.fix(&ctx).unwrap();
1790        // "with" is first word in text segment, so capitalized (correct)
1791        // "a" should remain lowercase (not last word because code follows) - this is the key test
1792        assert!(fixed.contains("a `end`"), "Expected 'a `end`' but got: {fixed:?}");
1793        assert!(
1794            !fixed.contains("A `end`"),
1795            "Should not capitalize 'a' before trailing code. Got: {fixed:?}"
1796        );
1797    }
1798
1799    #[test]
1800    fn test_sentence_case_with_trailing_code() {
1801        let config = MD063Config {
1802            enabled: true,
1803            style: HeadingCapStyle::SentenceCase,
1804            lowercase_words: vec!["a".to_string(), "the".to_string()],
1805            ..Default::default()
1806        };
1807        let rule = MD063HeadingCapitalization::from_config_struct(config);
1808
1809        // Sentence case should also respect lowercase words before code
1810        let content = "## guide for the `user`\n";
1811        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1812        let fixed = rule.fix(&ctx).unwrap();
1813        // First word capitalized, rest lowercase including "the" before code
1814        assert!(
1815            fixed.contains("Guide for the `user`"),
1816            "Expected 'Guide for the `user`' but got: {fixed:?}"
1817        );
1818    }
1819
1820    #[test]
1821    fn test_hyphenated_word_before_code() {
1822        let config = MD063Config {
1823            enabled: true,
1824            style: HeadingCapStyle::TitleCase,
1825            lowercase_words: vec!["a".to_string(), "the".to_string(), "with".to_string()],
1826            ..Default::default()
1827        };
1828        let rule = MD063HeadingCapitalization::from_config_struct(config);
1829
1830        // Hyphenated word before code - last part should respect lowercase-words
1831        let content = "## Self-contained with a `feature`\n";
1832        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1833        let fixed = rule.fix(&ctx).unwrap();
1834        // "with" and "a" should remain lowercase (not last word because code follows)
1835        assert!(
1836            fixed.contains("with a `feature`"),
1837            "Expected 'with a `feature`' but got: {fixed:?}"
1838        );
1839    }
1840
1841    // Issue #228: Sentence case with inline code at heading start
1842    // When a heading starts with inline code, the first word after the code
1843    // should NOT be capitalized because the heading already has a "first element"
1844
1845    #[test]
1846    fn test_sentence_case_code_at_start_basic() {
1847        // The exact case from issue #228
1848        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1849        let content = "# `rumdl` is a linter\n";
1850        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1851        let result = rule.check(&ctx).unwrap();
1852        // Should be correct as-is: code is first, "is" stays lowercase
1853        assert!(
1854            result.is_empty(),
1855            "Heading with code at start should not flag 'is' for capitalization. Got: {:?}",
1856            result.iter().map(|w| &w.message).collect::<Vec<_>>()
1857        );
1858    }
1859
1860    #[test]
1861    fn test_sentence_case_code_at_start_incorrect_capitalization() {
1862        // Verify we detect incorrect capitalization after code at start
1863        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1864        let content = "# `rumdl` Is a Linter\n";
1865        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1866        let result = rule.check(&ctx).unwrap();
1867        // Should flag: "Is" and "Linter" should be lowercase
1868        assert_eq!(result.len(), 1, "Should detect incorrect capitalization");
1869        assert!(
1870            result[0].message.contains("`rumdl` is a linter"),
1871            "Should suggest lowercase after code. Got: {:?}",
1872            result[0].message
1873        );
1874    }
1875
1876    #[test]
1877    fn test_sentence_case_code_at_start_fix() {
1878        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1879        let content = "# `rumdl` Is A Linter\n";
1880        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1881        let fixed = rule.fix(&ctx).unwrap();
1882        assert!(
1883            fixed.contains("# `rumdl` is a linter"),
1884            "Should fix to lowercase after code. Got: {fixed:?}"
1885        );
1886    }
1887
1888    #[test]
1889    fn test_sentence_case_text_at_start_still_capitalizes() {
1890        // Ensure normal headings still capitalize first word
1891        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1892        let content = "# the quick brown fox\n";
1893        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1894        let result = rule.check(&ctx).unwrap();
1895        assert_eq!(result.len(), 1);
1896        assert!(
1897            result[0].message.contains("The quick brown fox"),
1898            "Text-first heading should capitalize first word. Got: {:?}",
1899            result[0].message
1900        );
1901    }
1902
1903    #[test]
1904    fn test_sentence_case_link_at_start() {
1905        // Links at start: link text is lowercased, following text also lowercase
1906        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1907        // Use lowercase link text to avoid link text case flagging
1908        let content = "# [api](api.md) reference guide\n";
1909        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1910        let result = rule.check(&ctx).unwrap();
1911        // "reference" should be lowercase (link is first)
1912        assert!(
1913            result.is_empty(),
1914            "Heading with link at start should not capitalize 'reference'. Got: {:?}",
1915            result.iter().map(|w| &w.message).collect::<Vec<_>>()
1916        );
1917    }
1918
1919    #[test]
1920    fn test_sentence_case_link_preserves_acronyms() {
1921        // Acronyms in link text should be preserved (API, HTTP, etc.)
1922        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1923        let content = "# [API](api.md) Reference Guide\n";
1924        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1925        let result = rule.check(&ctx).unwrap();
1926        assert_eq!(result.len(), 1);
1927        // "API" should be preserved (acronym), "Reference Guide" should be lowercased
1928        assert!(
1929            result[0].message.contains("[API](api.md) reference guide"),
1930            "Should preserve acronym 'API' but lowercase following text. Got: {:?}",
1931            result[0].message
1932        );
1933    }
1934
1935    #[test]
1936    fn test_sentence_case_link_preserves_brand_names() {
1937        // Brand names with internal capitals should be preserved
1938        let config = MD063Config {
1939            enabled: true,
1940            style: HeadingCapStyle::SentenceCase,
1941            preserve_cased_words: true,
1942            ..Default::default()
1943        };
1944        let rule = MD063HeadingCapitalization::from_config_struct(config);
1945        let content = "# [iPhone](iphone.md) Features Guide\n";
1946        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1947        let result = rule.check(&ctx).unwrap();
1948        assert_eq!(result.len(), 1);
1949        // "iPhone" should be preserved, "Features Guide" should be lowercased
1950        assert!(
1951            result[0].message.contains("[iPhone](iphone.md) features guide"),
1952            "Should preserve 'iPhone' but lowercase following text. Got: {:?}",
1953            result[0].message
1954        );
1955    }
1956
1957    #[test]
1958    fn test_sentence_case_link_lowercases_regular_words() {
1959        // Regular words in link text should be lowercased
1960        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1961        let content = "# [Documentation](docs.md) Reference\n";
1962        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1963        let result = rule.check(&ctx).unwrap();
1964        assert_eq!(result.len(), 1);
1965        // "Documentation" should be lowercased (regular word)
1966        assert!(
1967            result[0].message.contains("[documentation](docs.md) reference"),
1968            "Should lowercase regular link text. Got: {:?}",
1969            result[0].message
1970        );
1971    }
1972
1973    #[test]
1974    fn test_sentence_case_link_at_start_correct_already() {
1975        // Link with correct casing should not be flagged
1976        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
1977        let content = "# [API](api.md) reference guide\n";
1978        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1979        let result = rule.check(&ctx).unwrap();
1980        assert!(
1981            result.is_empty(),
1982            "Correctly cased heading with link should not be flagged. Got: {:?}",
1983            result.iter().map(|w| &w.message).collect::<Vec<_>>()
1984        );
1985    }
1986
1987    #[test]
1988    fn test_sentence_case_link_github_preserved() {
1989        // GitHub should be preserved (internal capitals)
1990        let config = MD063Config {
1991            enabled: true,
1992            style: HeadingCapStyle::SentenceCase,
1993            preserve_cased_words: true,
1994            ..Default::default()
1995        };
1996        let rule = MD063HeadingCapitalization::from_config_struct(config);
1997        let content = "# [GitHub](gh.md) Repository Setup\n";
1998        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1999        let result = rule.check(&ctx).unwrap();
2000        assert_eq!(result.len(), 1);
2001        assert!(
2002            result[0].message.contains("[GitHub](gh.md) repository setup"),
2003            "Should preserve 'GitHub'. Got: {:?}",
2004            result[0].message
2005        );
2006    }
2007
2008    #[test]
2009    fn test_sentence_case_multiple_code_spans() {
2010        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2011        let content = "# `foo` and `bar` are methods\n";
2012        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2013        let result = rule.check(&ctx).unwrap();
2014        // All text after first code should be lowercase
2015        assert!(
2016            result.is_empty(),
2017            "Should not capitalize words between/after code spans. Got: {:?}",
2018            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2019        );
2020    }
2021
2022    #[test]
2023    fn test_sentence_case_code_only_heading() {
2024        // Heading with only code, no text
2025        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2026        let content = "# `rumdl`\n";
2027        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2028        let result = rule.check(&ctx).unwrap();
2029        assert!(
2030            result.is_empty(),
2031            "Code-only heading should be fine. Got: {:?}",
2032            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2033        );
2034    }
2035
2036    #[test]
2037    fn test_sentence_case_code_at_end() {
2038        // Heading ending with code, text before should still capitalize first word
2039        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2040        let content = "# install the `rumdl` tool\n";
2041        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2042        let result = rule.check(&ctx).unwrap();
2043        // "install" should be capitalized (first word), rest lowercase
2044        assert_eq!(result.len(), 1);
2045        assert!(
2046            result[0].message.contains("Install the `rumdl` tool"),
2047            "First word should still be capitalized when text comes first. Got: {:?}",
2048            result[0].message
2049        );
2050    }
2051
2052    #[test]
2053    fn test_sentence_case_code_in_middle() {
2054        // Code in middle, text at start should capitalize first word
2055        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2056        let content = "# using the `rumdl` linter for markdown\n";
2057        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2058        let result = rule.check(&ctx).unwrap();
2059        // "using" should be capitalized, rest lowercase
2060        assert_eq!(result.len(), 1);
2061        assert!(
2062            result[0].message.contains("Using the `rumdl` linter for markdown"),
2063            "First word should be capitalized. Got: {:?}",
2064            result[0].message
2065        );
2066    }
2067
2068    #[test]
2069    fn test_sentence_case_preserved_word_after_code() {
2070        // Preserved words (like iPhone) should stay preserved even after code
2071        let config = MD063Config {
2072            enabled: true,
2073            style: HeadingCapStyle::SentenceCase,
2074            preserve_cased_words: true,
2075            ..Default::default()
2076        };
2077        let rule = MD063HeadingCapitalization::from_config_struct(config);
2078        let content = "# `swift` iPhone development\n";
2079        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2080        let result = rule.check(&ctx).unwrap();
2081        // "iPhone" should be preserved, "development" lowercase
2082        assert!(
2083            result.is_empty(),
2084            "Preserved words after code should stay. Got: {:?}",
2085            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2086        );
2087    }
2088
2089    #[test]
2090    fn test_title_case_code_at_start_still_capitalizes() {
2091        // Title case should still capitalize words even after code at start
2092        let rule = create_rule_with_style(HeadingCapStyle::TitleCase);
2093        let content = "# `api` quick start guide\n";
2094        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2095        let result = rule.check(&ctx).unwrap();
2096        // Title case: all major words capitalized
2097        assert_eq!(result.len(), 1);
2098        assert!(
2099            result[0].message.contains("Quick Start Guide") || result[0].message.contains("quick Start Guide"),
2100            "Title case should capitalize major words after code. Got: {:?}",
2101            result[0].message
2102        );
2103    }
2104
2105    // ======== HTML TAG TESTS ========
2106
2107    #[test]
2108    fn test_sentence_case_html_tag_at_start() {
2109        // HTML tag at start: text after should NOT capitalize first word
2110        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2111        let content = "# <kbd>Ctrl</kbd> is a Modifier Key\n";
2112        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2113        let result = rule.check(&ctx).unwrap();
2114        // "is", "a", "Modifier", "Key" should all be lowercase (except preserved words)
2115        assert_eq!(result.len(), 1);
2116        let fixed = rule.fix(&ctx).unwrap();
2117        assert_eq!(
2118            fixed, "# <kbd>Ctrl</kbd> is a modifier key\n",
2119            "Text after HTML at start should be lowercase"
2120        );
2121    }
2122
2123    #[test]
2124    fn test_sentence_case_html_tag_preserves_content() {
2125        // Content inside HTML tags should be preserved as-is
2126        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2127        let content = "# The <abbr>API</abbr> documentation guide\n";
2128        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2129        let result = rule.check(&ctx).unwrap();
2130        // "The" is first, "API" inside tag preserved, rest lowercase
2131        assert!(
2132            result.is_empty(),
2133            "HTML tag content should be preserved. Got: {:?}",
2134            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2135        );
2136    }
2137
2138    #[test]
2139    fn test_sentence_case_html_tag_at_start_with_acronym() {
2140        // HTML tag at start with acronym content
2141        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2142        let content = "# <abbr>API</abbr> Documentation Guide\n";
2143        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2144        let result = rule.check(&ctx).unwrap();
2145        assert_eq!(result.len(), 1);
2146        let fixed = rule.fix(&ctx).unwrap();
2147        assert_eq!(
2148            fixed, "# <abbr>API</abbr> documentation guide\n",
2149            "Text after HTML at start should be lowercase, HTML content preserved"
2150        );
2151    }
2152
2153    #[test]
2154    fn test_sentence_case_html_tag_in_middle() {
2155        // HTML tag in middle: first word still capitalized
2156        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2157        let content = "# using the <code>config</code> File\n";
2158        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2159        let result = rule.check(&ctx).unwrap();
2160        assert_eq!(result.len(), 1);
2161        let fixed = rule.fix(&ctx).unwrap();
2162        assert_eq!(
2163            fixed, "# Using the <code>config</code> file\n",
2164            "First word capitalized, HTML preserved, rest lowercase"
2165        );
2166    }
2167
2168    #[test]
2169    fn test_html_tag_strong_emphasis() {
2170        // <strong> tag handling
2171        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2172        let content = "# The <strong>Bold</strong> Way\n";
2173        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2174        let result = rule.check(&ctx).unwrap();
2175        assert_eq!(result.len(), 1);
2176        let fixed = rule.fix(&ctx).unwrap();
2177        assert_eq!(
2178            fixed, "# The <strong>Bold</strong> way\n",
2179            "<strong> tag content should be preserved"
2180        );
2181    }
2182
2183    #[test]
2184    fn test_html_tag_with_attributes() {
2185        // HTML tags with attributes should still be detected
2186        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2187        let content = "# <span class=\"highlight\">Important</span> Notice Here\n";
2188        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2189        let result = rule.check(&ctx).unwrap();
2190        assert_eq!(result.len(), 1);
2191        let fixed = rule.fix(&ctx).unwrap();
2192        assert_eq!(
2193            fixed, "# <span class=\"highlight\">Important</span> notice here\n",
2194            "HTML tag with attributes should be preserved"
2195        );
2196    }
2197
2198    #[test]
2199    fn test_multiple_html_tags() {
2200        // Multiple HTML tags in heading
2201        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2202        let content = "# <kbd>Ctrl</kbd>+<kbd>C</kbd> to Copy Text\n";
2203        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2204        let result = rule.check(&ctx).unwrap();
2205        assert_eq!(result.len(), 1);
2206        let fixed = rule.fix(&ctx).unwrap();
2207        assert_eq!(
2208            fixed, "# <kbd>Ctrl</kbd>+<kbd>C</kbd> to copy text\n",
2209            "Multiple HTML tags should all be preserved"
2210        );
2211    }
2212
2213    #[test]
2214    fn test_html_and_code_mixed() {
2215        // Mix of HTML tags and inline code
2216        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2217        let content = "# <kbd>Ctrl</kbd>+`v` Paste command\n";
2218        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2219        let result = rule.check(&ctx).unwrap();
2220        assert_eq!(result.len(), 1);
2221        let fixed = rule.fix(&ctx).unwrap();
2222        assert_eq!(
2223            fixed, "# <kbd>Ctrl</kbd>+`v` paste command\n",
2224            "HTML and code should both be preserved"
2225        );
2226    }
2227
2228    #[test]
2229    fn test_self_closing_html_tag() {
2230        // Self-closing tags like <br/>
2231        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2232        let content = "# Line one<br/>Line Two Here\n";
2233        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2234        let result = rule.check(&ctx).unwrap();
2235        assert_eq!(result.len(), 1);
2236        let fixed = rule.fix(&ctx).unwrap();
2237        assert_eq!(
2238            fixed, "# Line one<br/>line two here\n",
2239            "Self-closing HTML tags should be preserved"
2240        );
2241    }
2242
2243    #[test]
2244    fn test_title_case_with_html_tags() {
2245        // Title case with HTML tags
2246        let rule = create_rule_with_style(HeadingCapStyle::TitleCase);
2247        let content = "# the <kbd>ctrl</kbd> key is a modifier\n";
2248        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2249        let result = rule.check(&ctx).unwrap();
2250        assert_eq!(result.len(), 1);
2251        let fixed = rule.fix(&ctx).unwrap();
2252        // "the" as first word should be "The", content inside <kbd> preserved
2253        assert!(
2254            fixed.contains("<kbd>ctrl</kbd>"),
2255            "HTML tag content should be preserved in title case. Got: {fixed}"
2256        );
2257        assert!(
2258            fixed.starts_with("# The ") || fixed.starts_with("# the "),
2259            "Title case should work with HTML. Got: {fixed}"
2260        );
2261    }
2262
2263    // ======== CARET NOTATION TESTS ========
2264
2265    #[test]
2266    fn test_sentence_case_preserves_caret_notation() {
2267        // Caret notation for control characters should be preserved
2268        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2269        let content = "## Ctrl+A, Ctrl+R output ^A, ^R on zsh\n";
2270        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2271        let result = rule.check(&ctx).unwrap();
2272        // Should not flag - ^A and ^R are preserved
2273        assert!(
2274            result.is_empty(),
2275            "Caret notation should be preserved. Got: {:?}",
2276            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2277        );
2278    }
2279
2280    #[test]
2281    fn test_sentence_case_caret_notation_various() {
2282        // Various caret notation patterns
2283        let rule = create_rule_with_style(HeadingCapStyle::SentenceCase);
2284
2285        // ^C for interrupt
2286        let content = "## Press ^C to cancel\n";
2287        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2288        let result = rule.check(&ctx).unwrap();
2289        assert!(
2290            result.is_empty(),
2291            "^C should be preserved. Got: {:?}",
2292            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2293        );
2294
2295        // ^Z for suspend
2296        let content = "## Use ^Z for background\n";
2297        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2298        let result = rule.check(&ctx).unwrap();
2299        assert!(
2300            result.is_empty(),
2301            "^Z should be preserved. Got: {:?}",
2302            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2303        );
2304
2305        // ^[ for escape
2306        let content = "## Press ^[ for escape\n";
2307        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2308        let result = rule.check(&ctx).unwrap();
2309        assert!(
2310            result.is_empty(),
2311            "^[ should be preserved. Got: {:?}",
2312            result.iter().map(|w| &w.message).collect::<Vec<_>>()
2313        );
2314    }
2315
2316    #[test]
2317    fn test_caret_notation_detection() {
2318        let rule = create_rule();
2319
2320        // Valid caret notation
2321        assert!(rule.is_caret_notation("^A"));
2322        assert!(rule.is_caret_notation("^Z"));
2323        assert!(rule.is_caret_notation("^C"));
2324        assert!(rule.is_caret_notation("^@")); // NUL
2325        assert!(rule.is_caret_notation("^[")); // ESC
2326        assert!(rule.is_caret_notation("^]")); // GS
2327        assert!(rule.is_caret_notation("^^")); // RS
2328        assert!(rule.is_caret_notation("^_")); // US
2329
2330        // Not caret notation
2331        assert!(!rule.is_caret_notation("^a")); // lowercase
2332        assert!(!rule.is_caret_notation("A")); // no caret
2333        assert!(!rule.is_caret_notation("^")); // caret alone
2334        assert!(!rule.is_caret_notation("^1")); // digit
2335    }
2336
2337    // MD044 proper names integration tests
2338    //
2339    // When MD063 (sentence case) and MD044 (proper names) are both active, MD063 must
2340    // preserve the exact capitalization of MD044 proper names rather than lowercasing them.
2341    // Without this, the two rules oscillate: MD044 re-capitalizes what MD063 lowercases.
2342
2343    fn create_sentence_case_rule_with_proper_names(names: Vec<String>) -> MD063HeadingCapitalization {
2344        let config = MD063Config {
2345            enabled: true,
2346            style: HeadingCapStyle::SentenceCase,
2347            ..Default::default()
2348        };
2349        let mut rule = MD063HeadingCapitalization::from_config_struct(config);
2350        rule.proper_names = names;
2351        rule
2352    }
2353
2354    #[test]
2355    fn test_sentence_case_preserves_single_word_proper_name() {
2356        let rule = create_sentence_case_rule_with_proper_names(vec!["JavaScript".to_string()]);
2357        // "javascript" in non-first position should become "JavaScript", not "javascript"
2358        let content = "# installing javascript\n";
2359        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2360        let result = rule.check(&ctx).unwrap();
2361        assert_eq!(result.len(), 1, "Should flag the heading");
2362        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2363        assert!(
2364            fix_text.contains("JavaScript"),
2365            "Fix should preserve proper name 'JavaScript', got: {fix_text:?}"
2366        );
2367        assert!(
2368            !fix_text.contains("javascript"),
2369            "Fix should not have lowercase 'javascript', got: {fix_text:?}"
2370        );
2371    }
2372
2373    #[test]
2374    fn test_sentence_case_preserves_multi_word_proper_name() {
2375        let rule = create_sentence_case_rule_with_proper_names(vec!["Good Application".to_string()]);
2376        // "Good Application" is a proper name; sentence case must not lowercase "Application"
2377        let content = "# using good application features\n";
2378        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2379        let result = rule.check(&ctx).unwrap();
2380        assert_eq!(result.len(), 1, "Should flag the heading");
2381        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2382        assert!(
2383            fix_text.contains("Good Application"),
2384            "Fix should preserve 'Good Application' as a phrase, got: {fix_text:?}"
2385        );
2386    }
2387
2388    #[test]
2389    fn test_sentence_case_proper_name_at_start_of_heading() {
2390        let rule = create_sentence_case_rule_with_proper_names(vec!["Good Application".to_string()]);
2391        // The proper name "Good Application" starts the heading; both words must be canonical
2392        let content = "# good application overview\n";
2393        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2394        let result = rule.check(&ctx).unwrap();
2395        assert_eq!(result.len(), 1, "Should flag the heading");
2396        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2397        assert!(
2398            fix_text.contains("Good Application"),
2399            "Fix should produce 'Good Application' at start of heading, got: {fix_text:?}"
2400        );
2401        assert!(
2402            fix_text.contains("overview"),
2403            "Non-proper-name word 'overview' should be lowercase, got: {fix_text:?}"
2404        );
2405    }
2406
2407    #[test]
2408    fn test_sentence_case_with_proper_names_no_oscillation() {
2409        // This is the core convergence test: applying the fix once must produce
2410        // output that is already correct (no further changes needed).
2411        let rule = create_sentence_case_rule_with_proper_names(vec!["Good Application".to_string()]);
2412
2413        // First application of fix
2414        let content = "# installing good application on your system\n";
2415        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2416        let result = rule.check(&ctx).unwrap();
2417        assert_eq!(result.len(), 1);
2418        let fixed_heading = result[0].fix.as_ref().unwrap().replacement.as_str();
2419
2420        // The fixed heading should contain the proper name preserved
2421        assert!(
2422            fixed_heading.contains("Good Application"),
2423            "After fix, proper name must be preserved: {fixed_heading:?}"
2424        );
2425
2426        // Second application: must produce no further warnings (convergence)
2427        let fixed_line = format!("{fixed_heading}\n");
2428        let ctx2 = LintContext::new(&fixed_line, crate::config::MarkdownFlavor::Standard, None);
2429        let result2 = rule.check(&ctx2).unwrap();
2430        assert!(
2431            result2.is_empty(),
2432            "After one fix, heading must already satisfy both MD063 and MD044 - no oscillation. \
2433             Second pass warnings: {result2:?}"
2434        );
2435    }
2436
2437    #[test]
2438    fn test_sentence_case_proper_names_already_correct() {
2439        let rule = create_sentence_case_rule_with_proper_names(vec!["Good Application".to_string()]);
2440        // Heading already has correct sentence case with proper name preserved
2441        let content = "# Installing Good Application\n";
2442        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2443        let result = rule.check(&ctx).unwrap();
2444        assert!(
2445            result.is_empty(),
2446            "Correct sentence-case heading with proper name should not be flagged, got: {result:?}"
2447        );
2448    }
2449
2450    #[test]
2451    fn test_sentence_case_multiple_proper_names_in_heading() {
2452        let rule = create_sentence_case_rule_with_proper_names(vec!["TypeScript".to_string(), "React".to_string()]);
2453        let content = "# using typescript with react\n";
2454        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2455        let result = rule.check(&ctx).unwrap();
2456        assert_eq!(result.len(), 1);
2457        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2458        assert!(
2459            fix_text.contains("TypeScript"),
2460            "Fix should preserve 'TypeScript', got: {fix_text:?}"
2461        );
2462        assert!(
2463            fix_text.contains("React"),
2464            "Fix should preserve 'React', got: {fix_text:?}"
2465        );
2466    }
2467
2468    #[test]
2469    fn test_sentence_case_unicode_casefold_expansion_before_proper_name() {
2470        // Regression for Unicode case-fold expansion: `İ` lowercases to `i̇` (2 code points),
2471        // so matching offsets must be computed from the original text, not from a lowercased copy.
2472        let rule = create_sentence_case_rule_with_proper_names(vec!["Österreich".to_string()]);
2473        let content = "# İ österreich guide\n";
2474        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2475
2476        // Should not panic and should preserve canonical proper-name casing.
2477        let result = rule.check(&ctx).unwrap();
2478        assert_eq!(result.len(), 1, "Should flag heading for canonical proper-name casing");
2479        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2480        assert!(
2481            fix_text.contains("Österreich"),
2482            "Fix should preserve canonical 'Österreich', got: {fix_text:?}"
2483        );
2484    }
2485
2486    #[test]
2487    fn test_sentence_case_preserves_trailing_punctuation_on_proper_name() {
2488        let rule = create_sentence_case_rule_with_proper_names(vec!["JavaScript".to_string()]);
2489        let content = "# using javascript, today\n";
2490        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2491        let result = rule.check(&ctx).unwrap();
2492        assert_eq!(result.len(), 1, "Should flag heading");
2493        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2494        assert!(
2495            fix_text.contains("JavaScript,"),
2496            "Fix should preserve trailing punctuation, got: {fix_text:?}"
2497        );
2498    }
2499
2500    // Title case + MD044 conflict tests
2501    //
2502    // In title case, short words like "the", "a", "of" are kept lowercase by MD063.
2503    // If those words are part of an MD044 proper name (e.g. "The Rolling Stones"),
2504    // the same oscillation problem occurs.  The fix must extend to title case too.
2505
2506    fn create_title_case_rule_with_proper_names(names: Vec<String>) -> MD063HeadingCapitalization {
2507        let config = MD063Config {
2508            enabled: true,
2509            style: HeadingCapStyle::TitleCase,
2510            ..Default::default()
2511        };
2512        let mut rule = MD063HeadingCapitalization::from_config_struct(config);
2513        rule.proper_names = names;
2514        rule
2515    }
2516
2517    #[test]
2518    fn test_title_case_preserves_proper_name_with_lowercase_article() {
2519        // "The" is in the lowercase_words list for title case, so "the" in the middle
2520        // of a heading would normally stay lowercase.  But "The Rolling Stones" is a
2521        // proper name that must be capitalised exactly.
2522        let rule = create_title_case_rule_with_proper_names(vec!["The Rolling Stones".to_string()]);
2523        let content = "# listening to the rolling stones today\n";
2524        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2525        let result = rule.check(&ctx).unwrap();
2526        assert_eq!(result.len(), 1, "Should flag the heading");
2527        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2528        assert!(
2529            fix_text.contains("The Rolling Stones"),
2530            "Fix should preserve proper name 'The Rolling Stones', got: {fix_text:?}"
2531        );
2532    }
2533
2534    #[test]
2535    fn test_title_case_proper_name_no_oscillation() {
2536        // One fix pass must produce output that title case already accepts.
2537        let rule = create_title_case_rule_with_proper_names(vec!["The Rolling Stones".to_string()]);
2538        let content = "# listening to the rolling stones today\n";
2539        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2540        let result = rule.check(&ctx).unwrap();
2541        assert_eq!(result.len(), 1);
2542        let fixed_heading = result[0].fix.as_ref().unwrap().replacement.as_str();
2543
2544        let fixed_line = format!("{fixed_heading}\n");
2545        let ctx2 = LintContext::new(&fixed_line, crate::config::MarkdownFlavor::Standard, None);
2546        let result2 = rule.check(&ctx2).unwrap();
2547        assert!(
2548            result2.is_empty(),
2549            "After one title-case fix, heading must already satisfy both rules. \
2550             Second pass warnings: {result2:?}"
2551        );
2552    }
2553
2554    #[test]
2555    fn test_title_case_unicode_casefold_expansion_before_proper_name() {
2556        let rule = create_title_case_rule_with_proper_names(vec!["Österreich".to_string()]);
2557        let content = "# İ österreich guide\n";
2558        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2559        let result = rule.check(&ctx).unwrap();
2560        assert_eq!(result.len(), 1, "Should flag the heading");
2561        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2562        assert!(
2563            fix_text.contains("Österreich"),
2564            "Fix should preserve canonical proper-name casing, got: {fix_text:?}"
2565        );
2566    }
2567
2568    // End-to-end integration test: from_config wires MD044 names into MD063
2569    //
2570    // This tests the actual code path used in production, where both rules are
2571    // configured in a rumdl.toml and the rule registry calls from_config.
2572
2573    #[test]
2574    fn test_from_config_loads_md044_names_into_md063() {
2575        use crate::config::{Config, RuleConfig};
2576        use crate::rule::Rule;
2577        use std::collections::BTreeMap;
2578
2579        let mut config = Config::default();
2580
2581        // Configure MD063 with sentence_case
2582        let mut md063_values = BTreeMap::new();
2583        md063_values.insert("style".to_string(), toml::Value::String("sentence_case".to_string()));
2584        md063_values.insert("enabled".to_string(), toml::Value::Boolean(true));
2585        config.rules.insert(
2586            "MD063".to_string(),
2587            RuleConfig {
2588                values: md063_values,
2589                severity: None,
2590            },
2591        );
2592
2593        // Configure MD044 with a proper name
2594        let mut md044_values = BTreeMap::new();
2595        md044_values.insert(
2596            "names".to_string(),
2597            toml::Value::Array(vec![toml::Value::String("Good Application".to_string())]),
2598        );
2599        config.rules.insert(
2600            "MD044".to_string(),
2601            RuleConfig {
2602                values: md044_values,
2603                severity: None,
2604            },
2605        );
2606
2607        // Build MD063 via the production code path
2608        let rule = MD063HeadingCapitalization::from_config(&config);
2609
2610        // Verify MD044 names were loaded: the fix must preserve "Good Application"
2611        let content = "# using good application features\n";
2612        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2613        let result = rule.check(&ctx).unwrap();
2614        assert_eq!(result.len(), 1, "Should flag the heading");
2615        let fix_text = result[0].fix.as_ref().unwrap().replacement.as_str();
2616        assert!(
2617            fix_text.contains("Good Application"),
2618            "from_config should wire MD044 names into MD063; fix should preserve \
2619             'Good Application', got: {fix_text:?}"
2620        );
2621    }
2622}