rumdl_lib/rules/
md044_proper_names.rs

1use crate::utils::fast_hash;
2use crate::utils::regex_cache::{escape_regex, get_cached_regex};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use std::collections::{HashMap, HashSet};
6use std::sync::{Arc, Mutex};
7
8mod md044_config;
9pub use md044_config::MD044Config;
10
11type WarningPosition = (usize, usize, String); // (line, column, found_name)
12
13/// Rule MD044: Proper names should be capitalized
14///
15/// See [docs/md044.md](../../docs/md044.md) for full documentation, configuration, and examples.
16///
17/// This rule is triggered when proper names are not capitalized correctly in the document.
18/// For example, if you have defined "JavaScript" as a proper name, the rule will flag any
19/// occurrences of "javascript" or "Javascript" as violations.
20///
21/// ## Purpose
22///
23/// Ensuring consistent capitalization of proper names improves document quality and
24/// professionalism. This is especially important for technical documentation where
25/// product names, programming languages, and technologies often have specific
26/// capitalization conventions.
27///
28/// ## Configuration Options
29///
30/// The rule supports the following configuration options:
31///
32/// ```yaml
33/// MD044:
34///   names: []                # List of proper names to check for correct capitalization
35///   code-blocks: false       # Whether to check code blocks (default: false)
36/// ```
37///
38/// Example configuration:
39///
40/// ```yaml
41/// MD044:
42///   names: ["JavaScript", "Node.js", "TypeScript"]
43///   code-blocks: true
44/// ```
45///
46/// ## Performance Optimizations
47///
48/// This rule implements several performance optimizations:
49///
50/// 1. **Regex Caching**: Pre-compiles and caches regex patterns for each proper name
51/// 2. **Content Caching**: Caches results based on content hashing for repeated checks
52/// 3. **Efficient Text Processing**: Uses optimized algorithms to avoid redundant text processing
53/// 4. **Smart Code Block Detection**: Efficiently identifies and optionally excludes code blocks
54///
55/// ## Edge Cases Handled
56///
57/// - **Word Boundaries**: Only matches complete words, not substrings within other words
58/// - **Case Sensitivity**: Properly handles case-specific matching
59/// - **Code Blocks**: Optionally checks code blocks (controlled by code-blocks setting)
60/// - **Markdown Formatting**: Handles proper names within Markdown formatting elements
61///
62/// ## Fix Behavior
63///
64/// When fixing issues, this rule replaces incorrect capitalization with the correct form
65/// as defined in the configuration.
66///
67/// Check if a trimmed line is an inline config comment from a linting tool.
68/// Recognized tools: rumdl, markdownlint, Vale, and remark-lint.
69fn is_inline_config_comment(trimmed: &str) -> bool {
70    trimmed.starts_with("<!-- rumdl-")
71        || trimmed.starts_with("<!-- markdownlint-")
72        || trimmed.starts_with("<!-- vale off")
73        || trimmed.starts_with("<!-- vale on")
74        || (trimmed.starts_with("<!-- vale ") && trimmed.contains(" = "))
75        || trimmed.starts_with("<!-- vale style")
76        || trimmed.starts_with("<!-- lint disable ")
77        || trimmed.starts_with("<!-- lint enable ")
78        || trimmed.starts_with("<!-- lint ignore ")
79}
80
81#[derive(Clone)]
82pub struct MD044ProperNames {
83    config: MD044Config,
84    // Cache the combined regex pattern string
85    combined_pattern: Option<String>,
86    // Precomputed lowercase name variants for fast pre-checks
87    name_variants: Vec<String>,
88    // Cache for name violations by content hash
89    content_cache: Arc<Mutex<HashMap<u64, Vec<WarningPosition>>>>,
90}
91
92impl MD044ProperNames {
93    pub fn new(names: Vec<String>, code_blocks: bool) -> Self {
94        let config = MD044Config {
95            names,
96            code_blocks,
97            html_elements: true, // Default to checking HTML elements
98            html_comments: true, // Default to checking HTML comments
99        };
100        let combined_pattern = Self::create_combined_pattern(&config);
101        let name_variants = Self::build_name_variants(&config);
102        Self {
103            config,
104            combined_pattern,
105            name_variants,
106            content_cache: Arc::new(Mutex::new(HashMap::new())),
107        }
108    }
109
110    // Helper function for consistent ASCII normalization
111    fn ascii_normalize(s: &str) -> String {
112        s.replace(['é', 'è', 'ê', 'ë'], "e")
113            .replace(['à', 'á', 'â', 'ä', 'ã', 'å'], "a")
114            .replace(['ï', 'î', 'í', 'ì'], "i")
115            .replace(['ü', 'ú', 'ù', 'û'], "u")
116            .replace(['ö', 'ó', 'ò', 'ô', 'õ'], "o")
117            .replace('ñ', "n")
118            .replace('ç', "c")
119    }
120
121    pub fn from_config_struct(config: MD044Config) -> Self {
122        let combined_pattern = Self::create_combined_pattern(&config);
123        let name_variants = Self::build_name_variants(&config);
124        Self {
125            config,
126            combined_pattern,
127            name_variants,
128            content_cache: Arc::new(Mutex::new(HashMap::new())),
129        }
130    }
131
132    // Create a combined regex pattern for all proper names
133    fn create_combined_pattern(config: &MD044Config) -> Option<String> {
134        if config.names.is_empty() {
135            return None;
136        }
137
138        // Create patterns for all names and their variations
139        let mut patterns: Vec<String> = config
140            .names
141            .iter()
142            .flat_map(|name| {
143                let mut variations = vec![];
144                let lower_name = name.to_lowercase();
145
146                // Add the lowercase version
147                variations.push(escape_regex(&lower_name));
148
149                // Add version without dots
150                let lower_name_no_dots = lower_name.replace('.', "");
151                if lower_name != lower_name_no_dots {
152                    variations.push(escape_regex(&lower_name_no_dots));
153                }
154
155                // Add ASCII-normalized versions for common accented characters
156                let ascii_normalized = Self::ascii_normalize(&lower_name);
157
158                if ascii_normalized != lower_name {
159                    variations.push(escape_regex(&ascii_normalized));
160
161                    // Also add version without dots
162                    let ascii_no_dots = ascii_normalized.replace('.', "");
163                    if ascii_normalized != ascii_no_dots {
164                        variations.push(escape_regex(&ascii_no_dots));
165                    }
166                }
167
168                variations
169            })
170            .collect();
171
172        // Sort patterns by length (longest first) to avoid shorter patterns matching within longer ones
173        patterns.sort_by_key(|b| std::cmp::Reverse(b.len()));
174
175        // Combine all patterns into a single regex with capture groups
176        // Don't use \b as it doesn't work with Unicode - we'll check boundaries manually
177        Some(format!(r"(?i)({})", patterns.join("|")))
178    }
179
180    fn build_name_variants(config: &MD044Config) -> Vec<String> {
181        let mut variants = HashSet::new();
182        for name in &config.names {
183            let lower_name = name.to_lowercase();
184            variants.insert(lower_name.clone());
185
186            let lower_no_dots = lower_name.replace('.', "");
187            if lower_name != lower_no_dots {
188                variants.insert(lower_no_dots);
189            }
190
191            let ascii_normalized = Self::ascii_normalize(&lower_name);
192            if ascii_normalized != lower_name {
193                variants.insert(ascii_normalized.clone());
194
195                let ascii_no_dots = ascii_normalized.replace('.', "");
196                if ascii_normalized != ascii_no_dots {
197                    variants.insert(ascii_no_dots);
198                }
199            }
200        }
201
202        variants.into_iter().collect()
203    }
204
205    // Find all name violations in the content and return positions.
206    // `content_lower` is the pre-computed lowercase version of `content` to avoid redundant allocations.
207    fn find_name_violations(
208        &self,
209        content: &str,
210        ctx: &crate::lint_context::LintContext,
211        content_lower: &str,
212    ) -> Vec<WarningPosition> {
213        // Early return: if no names configured or content is empty
214        if self.config.names.is_empty() || content.is_empty() || self.combined_pattern.is_none() {
215            return Vec::new();
216        }
217
218        // Early return: quick check if any of the configured names might be in content
219        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
220
221        if !has_potential_matches {
222            return Vec::new();
223        }
224
225        // Check if we have cached results
226        let hash = fast_hash(content);
227        {
228            // Use a separate scope for borrowing to minimize lock time
229            if let Ok(cache) = self.content_cache.lock()
230                && let Some(cached) = cache.get(&hash)
231            {
232                return cached.clone();
233            }
234        }
235
236        let mut violations = Vec::new();
237
238        // Get the regex from global cache
239        let combined_regex = match &self.combined_pattern {
240            Some(pattern) => match get_cached_regex(pattern) {
241                Ok(regex) => regex,
242                Err(_) => return Vec::new(),
243            },
244            None => return Vec::new(),
245        };
246
247        // Use ctx.lines for better performance
248        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
249            let line_num = line_idx + 1;
250            let line = line_info.content(ctx.content);
251
252            // Skip code fence lines (```language or ~~~language)
253            let trimmed = line.trim_start();
254            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
255                continue;
256            }
257
258            // Skip if in code block (when code_blocks = false)
259            if !self.config.code_blocks && line_info.in_code_block {
260                continue;
261            }
262
263            // Skip if in HTML block (when html_elements = false)
264            if !self.config.html_elements && line_info.in_html_block {
265                continue;
266            }
267
268            // Skip HTML comments using pre-computed line flag
269            if !self.config.html_comments && line_info.in_html_comment {
270                continue;
271            }
272
273            // Skip JSX expressions and MDX comments (MDX flavor)
274            if line_info.in_jsx_expression || line_info.in_mdx_comment {
275                continue;
276            }
277
278            // Skip Obsidian comments (Obsidian flavor)
279            if line_info.in_obsidian_comment {
280                continue;
281            }
282
283            // For frontmatter lines, determine offset where checkable value content starts.
284            // YAML keys should not be checked against proper names - only values.
285            let fm_value_offset = if line_info.in_front_matter {
286                Self::frontmatter_value_offset(line)
287            } else {
288                0
289            };
290            if fm_value_offset == usize::MAX {
291                continue;
292            }
293
294            // Skip inline config comments (rumdl, markdownlint, Vale, remark-lint directives)
295            if is_inline_config_comment(trimmed) {
296                continue;
297            }
298
299            // Early return: skip lines that don't contain any potential matches
300            let line_lower = line.to_lowercase();
301            let has_line_matches = self.name_variants.iter().any(|name| line_lower.contains(name));
302
303            if !has_line_matches {
304                continue;
305            }
306
307            // Use the combined regex to find all matches in one pass
308            for cap in combined_regex.find_iter(line) {
309                let found_name = &line[cap.start()..cap.end()];
310
311                // Check word boundaries manually for Unicode support
312                let start_pos = cap.start();
313                let end_pos = cap.end();
314
315                // Skip matches in the key portion of frontmatter lines
316                if start_pos < fm_value_offset {
317                    continue;
318                }
319
320                // Skip matches inside HTML tag attributes (handles multi-line tags)
321                let byte_pos = line_info.byte_offset + start_pos;
322                if ctx.is_in_html_tag(byte_pos) {
323                    continue;
324                }
325
326                if !Self::is_at_word_boundary(line, start_pos, true) || !Self::is_at_word_boundary(line, end_pos, false)
327                {
328                    continue; // Not at word boundary
329                }
330
331                // Skip if in inline code when code_blocks is false
332                if !self.config.code_blocks {
333                    if ctx.is_in_code_block_or_span(byte_pos) {
334                        continue;
335                    }
336                    // pulldown-cmark doesn't parse markdown syntax inside HTML
337                    // comments, HTML blocks, or frontmatter, so backtick-wrapped
338                    // text isn't detected by is_in_code_block_or_span. Check directly.
339                    if (line_info.in_html_comment || line_info.in_html_block || line_info.in_front_matter)
340                        && Self::is_in_backtick_code_in_line(line, start_pos)
341                    {
342                        continue;
343                    }
344                }
345
346                // Skip if in link URL or reference definition
347                if Self::is_in_link(ctx, byte_pos) {
348                    continue;
349                }
350
351                // Skip if inside an angle-bracket URL (e.g., <https://...>)
352                // The link parser skips autolinks inside HTML comments,
353                // so we detect them directly in the line text.
354                if Self::is_in_angle_bracket_url(line, start_pos) {
355                    continue;
356                }
357
358                // Skip if inside a Markdown inline link URL in contexts where
359                // pulldown-cmark doesn't parse Markdown syntax
360                if (line_info.in_html_comment || line_info.in_html_block || line_info.in_front_matter)
361                    && Self::is_in_markdown_link_url(line, start_pos)
362                {
363                    continue;
364                }
365
366                // Find which proper name this matches
367                if let Some(proper_name) = self.get_proper_name_for(found_name) {
368                    // Only flag if it's not already correct
369                    if found_name != proper_name {
370                        violations.push((line_num, cap.start() + 1, found_name.to_string()));
371                    }
372                }
373            }
374        }
375
376        // Store in cache (ignore if mutex is poisoned)
377        if let Ok(mut cache) = self.content_cache.lock() {
378            cache.insert(hash, violations.clone());
379        }
380        violations
381    }
382
383    /// Check if a byte position is within a link URL (not link text)
384    ///
385    /// Link text should be checked for proper names, but URLs should be skipped.
386    /// For `[text](url)` - check text, skip url
387    /// For `[text][ref]` - check text, skip reference portion
388    /// For `[[text]]` (WikiLinks) - check text, skip brackets
389    fn is_in_link(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
390        use pulldown_cmark::LinkType;
391
392        // Binary search links (sorted by byte_offset) to find candidate containing byte_pos
393        let link_idx = ctx.links.partition_point(|link| link.byte_offset <= byte_pos);
394        if link_idx > 0 {
395            let link = &ctx.links[link_idx - 1];
396            if byte_pos < link.byte_end {
397                // WikiLinks [[text]] start with '[[', regular links [text] start with '['
398                let text_start = if matches!(link.link_type, LinkType::WikiLink { .. }) {
399                    link.byte_offset + 2
400                } else {
401                    link.byte_offset + 1
402                };
403                let text_end = text_start + link.text.len();
404
405                // If position is within the text portion, skip only if text is a URL
406                if byte_pos >= text_start && byte_pos < text_end {
407                    return Self::link_text_is_url(&link.text);
408                }
409                // Position is in the URL/reference portion, skip it
410                return true;
411            }
412        }
413
414        // Binary search images (sorted by byte_offset) to find candidate containing byte_pos
415        let image_idx = ctx.images.partition_point(|img| img.byte_offset <= byte_pos);
416        if image_idx > 0 {
417            let image = &ctx.images[image_idx - 1];
418            if byte_pos < image.byte_end {
419                // Image starts with '![' so alt text starts at byte_offset + 2
420                let alt_start = image.byte_offset + 2;
421                let alt_end = alt_start + image.alt_text.len();
422
423                // If position is within the alt text portion, don't skip
424                if byte_pos >= alt_start && byte_pos < alt_end {
425                    return false;
426                }
427                // Position is in the URL/reference portion, skip it
428                return true;
429            }
430        }
431
432        // Check pre-computed reference definitions
433        ctx.is_in_reference_def(byte_pos)
434    }
435
436    /// Check if link text is a URL that should not have proper name corrections.
437    /// Matches markdownlint behavior: skip text starting with `http://`, `https://`, or `www.`.
438    fn link_text_is_url(text: &str) -> bool {
439        let lower = text.trim().to_ascii_lowercase();
440        lower.starts_with("http://") || lower.starts_with("https://") || lower.starts_with("www.")
441    }
442
443    /// Check if a position within a line falls inside an angle-bracket URL (`<scheme://...>`).
444    ///
445    /// The link parser skips autolinks inside HTML comments, so `ctx.links` won't
446    /// contain them. This function detects angle-bracket URLs directly in the line
447    /// text, covering both HTML comments and regular text as a safety net.
448    fn is_in_angle_bracket_url(line: &str, pos: usize) -> bool {
449        let bytes = line.as_bytes();
450        let len = bytes.len();
451        let mut i = 0;
452        while i < len {
453            if bytes[i] == b'<' {
454                let after_open = i + 1;
455                // Check for a valid URI scheme per CommonMark autolink spec:
456                // scheme = [a-zA-Z][a-zA-Z0-9+.-]{0,31}
457                // followed by ':'
458                if after_open < len && bytes[after_open].is_ascii_alphabetic() {
459                    let mut s = after_open + 1;
460                    let scheme_max = (after_open + 32).min(len);
461                    while s < scheme_max
462                        && (bytes[s].is_ascii_alphanumeric()
463                            || bytes[s] == b'+'
464                            || bytes[s] == b'-'
465                            || bytes[s] == b'.')
466                    {
467                        s += 1;
468                    }
469                    if s < len && bytes[s] == b':' {
470                        // Valid scheme found; scan for closing '>' with no spaces or '<'
471                        let mut j = s + 1;
472                        let mut found_close = false;
473                        while j < len {
474                            match bytes[j] {
475                                b'>' => {
476                                    found_close = true;
477                                    break;
478                                }
479                                b' ' | b'<' => break,
480                                _ => j += 1,
481                            }
482                        }
483                        if found_close && pos >= i && pos <= j {
484                            return true;
485                        }
486                        if found_close {
487                            i = j + 1;
488                            continue;
489                        }
490                    }
491                }
492            }
493            i += 1;
494        }
495        false
496    }
497
498    /// Check if a position within a line falls inside a Markdown link's
499    /// non-text portion (URL or reference label).
500    ///
501    /// pulldown-cmark does not parse Markdown syntax inside HTML comments, HTML
502    /// blocks, or frontmatter, so `ctx.links` won't contain links found there.
503    /// This function detects link patterns directly in the line text:
504    /// - `[text](url)` — returns true if `pos` is within `(...)`
505    /// - `[text][ref]` — returns true if `pos` is within the second `[...]`
506    fn is_in_markdown_link_url(line: &str, pos: usize) -> bool {
507        let bytes = line.as_bytes();
508        let len = bytes.len();
509        let mut i = 0;
510
511        while i < len {
512            // Look for unescaped '[' (handle double-escaped \\[ as unescaped)
513            if bytes[i] == b'[' && (i == 0 || bytes[i - 1] != b'\\' || (i >= 2 && bytes[i - 2] == b'\\')) {
514                // Find matching ']' handling nested brackets
515                let mut depth: u32 = 1;
516                let mut j = i + 1;
517                while j < len && depth > 0 {
518                    match bytes[j] {
519                        b'\\' => {
520                            j += 1; // skip escaped char
521                        }
522                        b'[' => depth += 1,
523                        b']' => depth -= 1,
524                        _ => {}
525                    }
526                    j += 1;
527                }
528
529                // j is now one past the ']'
530                if depth == 0 && j < len {
531                    if bytes[j] == b'(' {
532                        // Inline link: [text](url)
533                        let url_start = j;
534                        let mut paren_depth: u32 = 1;
535                        let mut k = j + 1;
536                        while k < len && paren_depth > 0 {
537                            match bytes[k] {
538                                b'\\' => {
539                                    k += 1; // skip escaped char
540                                }
541                                b'(' => paren_depth += 1,
542                                b')' => paren_depth -= 1,
543                                _ => {}
544                            }
545                            k += 1;
546                        }
547
548                        if paren_depth == 0 {
549                            if pos > url_start && pos < k {
550                                return true;
551                            }
552                            i = k;
553                            continue;
554                        }
555                    } else if bytes[j] == b'[' {
556                        // Reference link: [text][ref]
557                        let ref_start = j;
558                        let mut ref_depth: u32 = 1;
559                        let mut k = j + 1;
560                        while k < len && ref_depth > 0 {
561                            match bytes[k] {
562                                b'\\' => {
563                                    k += 1;
564                                }
565                                b'[' => ref_depth += 1,
566                                b']' => ref_depth -= 1,
567                                _ => {}
568                            }
569                            k += 1;
570                        }
571
572                        if ref_depth == 0 {
573                            if pos > ref_start && pos < k {
574                                return true;
575                            }
576                            i = k;
577                            continue;
578                        }
579                    }
580                }
581            }
582            i += 1;
583        }
584        false
585    }
586
587    /// Check if a position within a line falls inside backtick-delimited code.
588    ///
589    /// pulldown-cmark does not parse markdown syntax inside HTML comments, so
590    /// `ctx.is_in_code_block_or_span` returns false for backtick-wrapped text
591    /// within comments. This function detects backtick code spans directly in
592    /// the line text following CommonMark rules: a code span starts with N
593    /// backticks and ends with exactly N backticks.
594    fn is_in_backtick_code_in_line(line: &str, pos: usize) -> bool {
595        let bytes = line.as_bytes();
596        let len = bytes.len();
597        let mut i = 0;
598        while i < len {
599            if bytes[i] == b'`' {
600                // Count the opening backtick sequence length
601                let open_start = i;
602                while i < len && bytes[i] == b'`' {
603                    i += 1;
604                }
605                let tick_len = i - open_start;
606
607                // Scan forward for a closing sequence of exactly tick_len backticks
608                while i < len {
609                    if bytes[i] == b'`' {
610                        let close_start = i;
611                        while i < len && bytes[i] == b'`' {
612                            i += 1;
613                        }
614                        if i - close_start == tick_len {
615                            // Matched pair found; the code span content is between
616                            // the end of the opening backticks and the start of the
617                            // closing backticks (exclusive of the backticks themselves).
618                            let content_start = open_start + tick_len;
619                            let content_end = close_start;
620                            if pos >= content_start && pos < content_end {
621                                return true;
622                            }
623                            // Continue scanning after this pair
624                            break;
625                        }
626                        // Not the right length; keep scanning
627                    } else {
628                        i += 1;
629                    }
630                }
631            } else {
632                i += 1;
633            }
634        }
635        false
636    }
637
638    // Check if a character is a word boundary (handles Unicode)
639    fn is_word_boundary_char(c: char) -> bool {
640        !c.is_alphanumeric()
641    }
642
643    // Check if position is at a word boundary using byte-level lookups.
644    fn is_at_word_boundary(content: &str, pos: usize, is_start: bool) -> bool {
645        if is_start {
646            if pos == 0 {
647                return true;
648            }
649            match content[..pos].chars().next_back() {
650                None => true,
651                Some(c) => Self::is_word_boundary_char(c),
652            }
653        } else {
654            if pos >= content.len() {
655                return true;
656            }
657            match content[pos..].chars().next() {
658                None => true,
659                Some(c) => Self::is_word_boundary_char(c),
660            }
661        }
662    }
663
664    /// For a frontmatter line, return the byte offset where the checkable
665    /// value portion starts. Returns `usize::MAX` if the entire line should be
666    /// skipped (frontmatter delimiters, key-only lines, YAML comments, flow constructs).
667    fn frontmatter_value_offset(line: &str) -> usize {
668        let trimmed = line.trim();
669
670        // Skip frontmatter delimiters and empty lines
671        if trimmed == "---" || trimmed == "+++" || trimmed.is_empty() {
672            return usize::MAX;
673        }
674
675        // Skip YAML comments
676        if trimmed.starts_with('#') {
677            return usize::MAX;
678        }
679
680        // YAML list item: "  - item" or "  - key: value"
681        let stripped = line.trim_start();
682        if let Some(after_dash) = stripped.strip_prefix("- ") {
683            let leading = line.len() - stripped.len();
684            // Check if the list item contains a mapping (e.g., "- key: value")
685            if let Some(result) = Self::kv_value_offset(line, after_dash, leading + 2) {
686                return result;
687            }
688            // Bare list item value (no colon) - check content after "- "
689            return leading + 2;
690        }
691        if stripped == "-" {
692            return usize::MAX;
693        }
694
695        // Key-value pair with colon separator (YAML): "key: value"
696        if let Some(result) = Self::kv_value_offset(line, stripped, line.len() - stripped.len()) {
697            return result;
698        }
699
700        // Key-value pair with equals separator (TOML): "key = value"
701        if let Some(eq_pos) = line.find('=') {
702            let after_eq = eq_pos + 1;
703            if after_eq < line.len() && line.as_bytes()[after_eq] == b' ' {
704                let value_start = after_eq + 1;
705                let value_slice = &line[value_start..];
706                let value_trimmed = value_slice.trim();
707                if value_trimmed.is_empty() {
708                    return usize::MAX;
709                }
710                // For quoted values, skip the opening quote character
711                if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
712                    || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
713                {
714                    let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
715                    return value_start + quote_offset + 1;
716                }
717                return value_start;
718            }
719            // Equals with no space after or at end of line -> no value to check
720            return usize::MAX;
721        }
722
723        // No separator found - continuation line or bare value, check the whole line
724        0
725    }
726
727    /// Parse a key-value pair using colon separator within `content` that starts
728    /// at `base_offset` in the original line. Returns `Some(offset)` if a colon
729    /// separator is found, `None` if no colon is present.
730    fn kv_value_offset(line: &str, content: &str, base_offset: usize) -> Option<usize> {
731        let colon_pos = content.find(':')?;
732        let abs_colon = base_offset + colon_pos;
733        let after_colon = abs_colon + 1;
734        if after_colon < line.len() && line.as_bytes()[after_colon] == b' ' {
735            let value_start = after_colon + 1;
736            let value_slice = &line[value_start..];
737            let value_trimmed = value_slice.trim();
738            if value_trimmed.is_empty() {
739                return Some(usize::MAX);
740            }
741            // Skip flow mappings and flow sequences - too complex for heuristic parsing
742            if value_trimmed.starts_with('{') || value_trimmed.starts_with('[') {
743                return Some(usize::MAX);
744            }
745            // For quoted values, skip the opening quote character
746            if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
747                || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
748            {
749                let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
750                return Some(value_start + quote_offset + 1);
751            }
752            return Some(value_start);
753        }
754        // Colon with no space after or at end of line -> no value to check
755        Some(usize::MAX)
756    }
757
758    // Get the proper name that should be used for a found name
759    fn get_proper_name_for(&self, found_name: &str) -> Option<String> {
760        let found_lower = found_name.to_lowercase();
761
762        // Iterate through the configured proper names
763        for name in &self.config.names {
764            let lower_name = name.to_lowercase();
765            let lower_name_no_dots = lower_name.replace('.', "");
766
767            // Direct match
768            if found_lower == lower_name || found_lower == lower_name_no_dots {
769                return Some(name.clone());
770            }
771
772            // Check ASCII-normalized version
773            let ascii_normalized = Self::ascii_normalize(&lower_name);
774
775            let ascii_no_dots = ascii_normalized.replace('.', "");
776
777            if found_lower == ascii_normalized || found_lower == ascii_no_dots {
778                return Some(name.clone());
779            }
780        }
781        None
782    }
783}
784
785impl Rule for MD044ProperNames {
786    fn name(&self) -> &'static str {
787        "MD044"
788    }
789
790    fn description(&self) -> &'static str {
791        "Proper names should have the correct capitalization"
792    }
793
794    fn category(&self) -> RuleCategory {
795        RuleCategory::Other
796    }
797
798    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
799        if self.config.names.is_empty() {
800            return true;
801        }
802        // Quick check if any configured name variants exist (case-insensitive)
803        let content_lower = if ctx.content.is_ascii() {
804            ctx.content.to_ascii_lowercase()
805        } else {
806            ctx.content.to_lowercase()
807        };
808        !self.name_variants.iter().any(|name| content_lower.contains(name))
809    }
810
811    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
812        let content = ctx.content;
813        if content.is_empty() || self.config.names.is_empty() || self.combined_pattern.is_none() {
814            return Ok(Vec::new());
815        }
816
817        // Compute lowercase content once and reuse across all checks
818        let content_lower = if content.is_ascii() {
819            content.to_ascii_lowercase()
820        } else {
821            content.to_lowercase()
822        };
823
824        // Early return: use pre-computed name_variants for the quick check
825        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
826
827        if !has_potential_matches {
828            return Ok(Vec::new());
829        }
830
831        let line_index = &ctx.line_index;
832        let violations = self.find_name_violations(content, ctx, &content_lower);
833
834        let warnings = violations
835            .into_iter()
836            .filter_map(|(line, column, found_name)| {
837                self.get_proper_name_for(&found_name).map(|proper_name| LintWarning {
838                    rule_name: Some(self.name().to_string()),
839                    line,
840                    column,
841                    end_line: line,
842                    end_column: column + found_name.len(),
843                    message: format!("Proper name '{found_name}' should be '{proper_name}'"),
844                    severity: Severity::Warning,
845                    fix: Some(Fix {
846                        range: line_index.line_col_to_byte_range_with_length(line, column, found_name.len()),
847                        replacement: proper_name,
848                    }),
849                })
850            })
851            .collect();
852
853        Ok(warnings)
854    }
855
856    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
857        let content = ctx.content;
858        if content.is_empty() || self.config.names.is_empty() {
859            return Ok(content.to_string());
860        }
861
862        let content_lower = if content.is_ascii() {
863            content.to_ascii_lowercase()
864        } else {
865            content.to_lowercase()
866        };
867        let violations = self.find_name_violations(content, ctx, &content_lower);
868        if violations.is_empty() {
869            return Ok(content.to_string());
870        }
871
872        // Process lines and build the fixed content
873        let mut fixed_lines = Vec::new();
874
875        // Group violations by line
876        let mut violations_by_line: HashMap<usize, Vec<(usize, String)>> = HashMap::new();
877        for (line_num, col_num, found_name) in violations {
878            violations_by_line
879                .entry(line_num)
880                .or_default()
881                .push((col_num, found_name));
882        }
883
884        // Sort violations within each line in reverse order
885        for violations in violations_by_line.values_mut() {
886            violations.sort_by_key(|b| std::cmp::Reverse(b.0));
887        }
888
889        // Process each line
890        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
891            let line_num = line_idx + 1;
892
893            // Skip lines where this rule is disabled by inline config
894            if ctx.inline_config().is_rule_disabled(self.name(), line_num) {
895                fixed_lines.push(line_info.content(ctx.content).to_string());
896                continue;
897            }
898
899            if let Some(line_violations) = violations_by_line.get(&line_num) {
900                // This line has violations, fix them
901                let mut fixed_line = line_info.content(ctx.content).to_string();
902
903                for (col_num, found_name) in line_violations {
904                    if let Some(proper_name) = self.get_proper_name_for(found_name) {
905                        let start_col = col_num - 1; // Convert to 0-based
906                        let end_col = start_col + found_name.len();
907
908                        if end_col <= fixed_line.len()
909                            && fixed_line.is_char_boundary(start_col)
910                            && fixed_line.is_char_boundary(end_col)
911                        {
912                            fixed_line.replace_range(start_col..end_col, &proper_name);
913                        }
914                    }
915                }
916
917                fixed_lines.push(fixed_line);
918            } else {
919                // No violations on this line, keep it as is
920                fixed_lines.push(line_info.content(ctx.content).to_string());
921            }
922        }
923
924        // Join lines with newlines, preserving the original ending
925        let mut result = fixed_lines.join("\n");
926        if content.ends_with('\n') && !result.ends_with('\n') {
927            result.push('\n');
928        }
929        Ok(result)
930    }
931
932    fn as_any(&self) -> &dyn std::any::Any {
933        self
934    }
935
936    fn default_config_section(&self) -> Option<(String, toml::Value)> {
937        let json_value = serde_json::to_value(&self.config).ok()?;
938        Some((
939            self.name().to_string(),
940            crate::rule_config_serde::json_to_toml_value(&json_value)?,
941        ))
942    }
943
944    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
945    where
946        Self: Sized,
947    {
948        let rule_config = crate::rule_config_serde::load_rule_config::<MD044Config>(config);
949        Box::new(Self::from_config_struct(rule_config))
950    }
951}
952
953#[cfg(test)]
954mod tests {
955    use super::*;
956    use crate::lint_context::LintContext;
957
958    fn create_context(content: &str) -> LintContext<'_> {
959        LintContext::new(content, crate::config::MarkdownFlavor::Standard, None)
960    }
961
962    #[test]
963    fn test_correctly_capitalized_names() {
964        let rule = MD044ProperNames::new(
965            vec![
966                "JavaScript".to_string(),
967                "TypeScript".to_string(),
968                "Node.js".to_string(),
969            ],
970            true,
971        );
972
973        let content = "This document uses JavaScript, TypeScript, and Node.js correctly.";
974        let ctx = create_context(content);
975        let result = rule.check(&ctx).unwrap();
976        assert!(result.is_empty(), "Should not flag correctly capitalized names");
977    }
978
979    #[test]
980    fn test_incorrectly_capitalized_names() {
981        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
982
983        let content = "This document uses javascript and typescript incorrectly.";
984        let ctx = create_context(content);
985        let result = rule.check(&ctx).unwrap();
986
987        assert_eq!(result.len(), 2, "Should flag two incorrect capitalizations");
988        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
989        assert_eq!(result[0].line, 1);
990        assert_eq!(result[0].column, 20);
991        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
992        assert_eq!(result[1].line, 1);
993        assert_eq!(result[1].column, 35);
994    }
995
996    #[test]
997    fn test_names_at_beginning_of_sentences() {
998        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "Python".to_string()], true);
999
1000        let content = "javascript is a great language. python is also popular.";
1001        let ctx = create_context(content);
1002        let result = rule.check(&ctx).unwrap();
1003
1004        assert_eq!(result.len(), 2, "Should flag names at beginning of sentences");
1005        assert_eq!(result[0].line, 1);
1006        assert_eq!(result[0].column, 1);
1007        assert_eq!(result[1].line, 1);
1008        assert_eq!(result[1].column, 33);
1009    }
1010
1011    #[test]
1012    fn test_names_in_code_blocks_checked_by_default() {
1013        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1014
1015        let content = r#"Here is some text with JavaScript.
1016
1017```javascript
1018// This javascript should be checked
1019const lang = "javascript";
1020```
1021
1022But this javascript should be flagged."#;
1023
1024        let ctx = create_context(content);
1025        let result = rule.check(&ctx).unwrap();
1026
1027        assert_eq!(result.len(), 3, "Should flag javascript inside and outside code blocks");
1028        assert_eq!(result[0].line, 4);
1029        assert_eq!(result[1].line, 5);
1030        assert_eq!(result[2].line, 8);
1031    }
1032
1033    #[test]
1034    fn test_names_in_code_blocks_ignored_when_disabled() {
1035        let rule = MD044ProperNames::new(
1036            vec!["JavaScript".to_string()],
1037            false, // code_blocks = false means skip code blocks
1038        );
1039
1040        let content = r#"```
1041javascript in code block
1042```"#;
1043
1044        let ctx = create_context(content);
1045        let result = rule.check(&ctx).unwrap();
1046
1047        assert_eq!(
1048            result.len(),
1049            0,
1050            "Should not flag javascript in code blocks when code_blocks is false"
1051        );
1052    }
1053
1054    #[test]
1055    fn test_names_in_inline_code_checked_by_default() {
1056        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1057
1058        let content = "This is `javascript` in inline code and javascript outside.";
1059        let ctx = create_context(content);
1060        let result = rule.check(&ctx).unwrap();
1061
1062        // When code_blocks=true, inline code should be checked
1063        assert_eq!(result.len(), 2, "Should flag javascript inside and outside inline code");
1064        assert_eq!(result[0].column, 10); // javascript in inline code
1065        assert_eq!(result[1].column, 41); // javascript outside
1066    }
1067
1068    #[test]
1069    fn test_multiple_names_in_same_line() {
1070        let rule = MD044ProperNames::new(
1071            vec!["JavaScript".to_string(), "TypeScript".to_string(), "React".to_string()],
1072            true,
1073        );
1074
1075        let content = "I use javascript, typescript, and react in my projects.";
1076        let ctx = create_context(content);
1077        let result = rule.check(&ctx).unwrap();
1078
1079        assert_eq!(result.len(), 3, "Should flag all three incorrect names");
1080        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
1081        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
1082        assert_eq!(result[2].message, "Proper name 'react' should be 'React'");
1083    }
1084
1085    #[test]
1086    fn test_case_sensitivity() {
1087        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1088
1089        let content = "JAVASCRIPT, Javascript, javascript, and JavaScript variations.";
1090        let ctx = create_context(content);
1091        let result = rule.check(&ctx).unwrap();
1092
1093        assert_eq!(result.len(), 3, "Should flag all incorrect case variations");
1094        // JavaScript (correct) should not be flagged
1095        assert!(result.iter().all(|w| w.message.contains("should be 'JavaScript'")));
1096    }
1097
1098    #[test]
1099    fn test_configuration_with_custom_name_list() {
1100        let config = MD044Config {
1101            names: vec!["GitHub".to_string(), "GitLab".to_string(), "DevOps".to_string()],
1102            code_blocks: true,
1103            html_elements: true,
1104            html_comments: true,
1105        };
1106        let rule = MD044ProperNames::from_config_struct(config);
1107
1108        let content = "We use github, gitlab, and devops for our workflow.";
1109        let ctx = create_context(content);
1110        let result = rule.check(&ctx).unwrap();
1111
1112        assert_eq!(result.len(), 3, "Should flag all custom names");
1113        assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
1114        assert_eq!(result[1].message, "Proper name 'gitlab' should be 'GitLab'");
1115        assert_eq!(result[2].message, "Proper name 'devops' should be 'DevOps'");
1116    }
1117
1118    #[test]
1119    fn test_empty_configuration() {
1120        let rule = MD044ProperNames::new(vec![], true);
1121
1122        let content = "This has javascript and typescript but no configured names.";
1123        let ctx = create_context(content);
1124        let result = rule.check(&ctx).unwrap();
1125
1126        assert!(result.is_empty(), "Should not flag anything with empty configuration");
1127    }
1128
1129    #[test]
1130    fn test_names_with_special_characters() {
1131        let rule = MD044ProperNames::new(
1132            vec!["Node.js".to_string(), "ASP.NET".to_string(), "C++".to_string()],
1133            true,
1134        );
1135
1136        let content = "We use nodejs, asp.net, ASP.NET, and c++ in our stack.";
1137        let ctx = create_context(content);
1138        let result = rule.check(&ctx).unwrap();
1139
1140        // nodejs should match Node.js (dotless variation)
1141        // asp.net should be flagged (wrong case)
1142        // ASP.NET should not be flagged (correct)
1143        // c++ should be flagged
1144        assert_eq!(result.len(), 3, "Should handle special characters correctly");
1145
1146        let messages: Vec<&str> = result.iter().map(|w| w.message.as_str()).collect();
1147        assert!(messages.contains(&"Proper name 'nodejs' should be 'Node.js'"));
1148        assert!(messages.contains(&"Proper name 'asp.net' should be 'ASP.NET'"));
1149        assert!(messages.contains(&"Proper name 'c++' should be 'C++'"));
1150    }
1151
1152    #[test]
1153    fn test_word_boundaries() {
1154        let rule = MD044ProperNames::new(vec!["Java".to_string(), "Script".to_string()], true);
1155
1156        let content = "JavaScript is not java or script, but Java and Script are separate.";
1157        let ctx = create_context(content);
1158        let result = rule.check(&ctx).unwrap();
1159
1160        // Should only flag lowercase "java" and "script" as separate words
1161        assert_eq!(result.len(), 2, "Should respect word boundaries");
1162        assert!(result.iter().any(|w| w.column == 19)); // "java" position
1163        assert!(result.iter().any(|w| w.column == 27)); // "script" position
1164    }
1165
1166    #[test]
1167    fn test_fix_method() {
1168        let rule = MD044ProperNames::new(
1169            vec![
1170                "JavaScript".to_string(),
1171                "TypeScript".to_string(),
1172                "Node.js".to_string(),
1173            ],
1174            true,
1175        );
1176
1177        let content = "I love javascript, typescript, and nodejs!";
1178        let ctx = create_context(content);
1179        let fixed = rule.fix(&ctx).unwrap();
1180
1181        assert_eq!(fixed, "I love JavaScript, TypeScript, and Node.js!");
1182    }
1183
1184    #[test]
1185    fn test_fix_multiple_occurrences() {
1186        let rule = MD044ProperNames::new(vec!["Python".to_string()], true);
1187
1188        let content = "python is great. I use python daily. PYTHON is powerful.";
1189        let ctx = create_context(content);
1190        let fixed = rule.fix(&ctx).unwrap();
1191
1192        assert_eq!(fixed, "Python is great. I use Python daily. Python is powerful.");
1193    }
1194
1195    #[test]
1196    fn test_fix_checks_code_blocks_by_default() {
1197        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1198
1199        let content = r#"I love javascript.
1200
1201```
1202const lang = "javascript";
1203```
1204
1205More javascript here."#;
1206
1207        let ctx = create_context(content);
1208        let fixed = rule.fix(&ctx).unwrap();
1209
1210        let expected = r#"I love JavaScript.
1211
1212```
1213const lang = "JavaScript";
1214```
1215
1216More JavaScript here."#;
1217
1218        assert_eq!(fixed, expected);
1219    }
1220
1221    #[test]
1222    fn test_multiline_content() {
1223        let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
1224
1225        let content = r#"First line with rust.
1226Second line with python.
1227Third line with RUST and PYTHON."#;
1228
1229        let ctx = create_context(content);
1230        let result = rule.check(&ctx).unwrap();
1231
1232        assert_eq!(result.len(), 4, "Should flag all incorrect occurrences");
1233        assert_eq!(result[0].line, 1);
1234        assert_eq!(result[1].line, 2);
1235        assert_eq!(result[2].line, 3);
1236        assert_eq!(result[3].line, 3);
1237    }
1238
1239    #[test]
1240    fn test_default_config() {
1241        let config = MD044Config::default();
1242        assert!(config.names.is_empty());
1243        assert!(!config.code_blocks);
1244        assert!(config.html_elements);
1245        assert!(config.html_comments);
1246    }
1247
1248    #[test]
1249    fn test_default_config_checks_html_comments() {
1250        let config = MD044Config {
1251            names: vec!["JavaScript".to_string()],
1252            ..MD044Config::default()
1253        };
1254        let rule = MD044ProperNames::from_config_struct(config);
1255
1256        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1257        let ctx = create_context(content);
1258        let result = rule.check(&ctx).unwrap();
1259
1260        assert_eq!(result.len(), 1, "Default config should check HTML comments");
1261        assert_eq!(result[0].line, 3);
1262    }
1263
1264    #[test]
1265    fn test_default_config_skips_code_blocks() {
1266        let config = MD044Config {
1267            names: vec!["JavaScript".to_string()],
1268            ..MD044Config::default()
1269        };
1270        let rule = MD044ProperNames::from_config_struct(config);
1271
1272        let content = "# Guide\n\n```\njavascript in code\n```\n";
1273        let ctx = create_context(content);
1274        let result = rule.check(&ctx).unwrap();
1275
1276        assert_eq!(result.len(), 0, "Default config should skip code blocks");
1277    }
1278
1279    #[test]
1280    fn test_standalone_html_comment_checked() {
1281        let config = MD044Config {
1282            names: vec!["Test".to_string()],
1283            ..MD044Config::default()
1284        };
1285        let rule = MD044ProperNames::from_config_struct(config);
1286
1287        let content = "# Heading\n\n<!-- this is a test example -->\n";
1288        let ctx = create_context(content);
1289        let result = rule.check(&ctx).unwrap();
1290
1291        assert_eq!(result.len(), 1, "Should flag proper name in standalone HTML comment");
1292        assert_eq!(result[0].line, 3);
1293    }
1294
1295    #[test]
1296    fn test_inline_config_comments_not_flagged() {
1297        let config = MD044Config {
1298            names: vec!["RUMDL".to_string()],
1299            ..MD044Config::default()
1300        };
1301        let rule = MD044ProperNames::from_config_struct(config);
1302
1303        // Lines 1, 3, 4, 6 are inline config comments — should not be flagged.
1304        // Lines 2, 5 contain "rumdl" in regular text — flagged by rule.check(),
1305        // but would be suppressed by the linting engine's inline config filtering.
1306        let content = "<!-- rumdl-disable MD044 -->\nSome rumdl text here.\n<!-- rumdl-enable MD044 -->\n<!-- markdownlint-disable -->\nMore rumdl text.\n<!-- markdownlint-enable -->\n";
1307        let ctx = create_context(content);
1308        let result = rule.check(&ctx).unwrap();
1309
1310        assert_eq!(result.len(), 2, "Should only flag body lines, not config comments");
1311        assert_eq!(result[0].line, 2);
1312        assert_eq!(result[1].line, 5);
1313    }
1314
1315    #[test]
1316    fn test_html_comment_skipped_when_disabled() {
1317        let config = MD044Config {
1318            names: vec!["Test".to_string()],
1319            code_blocks: true,
1320            html_elements: true,
1321            html_comments: false,
1322        };
1323        let rule = MD044ProperNames::from_config_struct(config);
1324
1325        let content = "# Heading\n\n<!-- this is a test example -->\n\nRegular test here.\n";
1326        let ctx = create_context(content);
1327        let result = rule.check(&ctx).unwrap();
1328
1329        assert_eq!(
1330            result.len(),
1331            1,
1332            "Should only flag 'test' outside HTML comment when html_comments=false"
1333        );
1334        assert_eq!(result[0].line, 5);
1335    }
1336
1337    #[test]
1338    fn test_fix_corrects_html_comment_content() {
1339        let config = MD044Config {
1340            names: vec!["JavaScript".to_string()],
1341            ..MD044Config::default()
1342        };
1343        let rule = MD044ProperNames::from_config_struct(config);
1344
1345        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1346        let ctx = create_context(content);
1347        let fixed = rule.fix(&ctx).unwrap();
1348
1349        assert_eq!(fixed, "# Guide\n\n<!-- JavaScript mentioned here -->\n");
1350    }
1351
1352    #[test]
1353    fn test_fix_does_not_modify_inline_config_comments() {
1354        let config = MD044Config {
1355            names: vec!["RUMDL".to_string()],
1356            ..MD044Config::default()
1357        };
1358        let rule = MD044ProperNames::from_config_struct(config);
1359
1360        let content = "<!-- rumdl-disable -->\nSome rumdl text.\n<!-- rumdl-enable -->\n";
1361        let ctx = create_context(content);
1362        let fixed = rule.fix(&ctx).unwrap();
1363
1364        // Config comments should be untouched
1365        assert!(fixed.contains("<!-- rumdl-disable -->"));
1366        assert!(fixed.contains("<!-- rumdl-enable -->"));
1367        // Body text inside disable block should NOT be fixed (rule is disabled)
1368        assert!(
1369            fixed.contains("Some rumdl text."),
1370            "Line inside rumdl-disable block should not be modified by fix()"
1371        );
1372    }
1373
1374    #[test]
1375    fn test_fix_respects_inline_disable_partial() {
1376        let config = MD044Config {
1377            names: vec!["RUMDL".to_string()],
1378            ..MD044Config::default()
1379        };
1380        let rule = MD044ProperNames::from_config_struct(config);
1381
1382        let content =
1383            "<!-- rumdl-disable MD044 -->\nSome rumdl text.\n<!-- rumdl-enable MD044 -->\n\nSome rumdl text outside.\n";
1384        let ctx = create_context(content);
1385        let fixed = rule.fix(&ctx).unwrap();
1386
1387        // Line inside disable block should be preserved
1388        assert!(
1389            fixed.contains("Some rumdl text.\n<!-- rumdl-enable"),
1390            "Line inside disable block should not be modified"
1391        );
1392        // Line outside disable block should be fixed
1393        assert!(
1394            fixed.contains("Some RUMDL text outside."),
1395            "Line outside disable block should be fixed"
1396        );
1397    }
1398
1399    #[test]
1400    fn test_performance_with_many_names() {
1401        let mut names = vec![];
1402        for i in 0..50 {
1403            names.push(format!("ProperName{i}"));
1404        }
1405
1406        let rule = MD044ProperNames::new(names, true);
1407
1408        let content = "This has propername0, propername25, and propername49 incorrectly.";
1409        let ctx = create_context(content);
1410        let result = rule.check(&ctx).unwrap();
1411
1412        assert_eq!(result.len(), 3, "Should handle many configured names efficiently");
1413    }
1414
1415    #[test]
1416    fn test_large_name_count_performance() {
1417        // Verify MD044 can handle large numbers of names without regex limitations
1418        // This test confirms that fancy-regex handles large patterns well
1419        let names = (0..1000).map(|i| format!("ProperName{i}")).collect::<Vec<_>>();
1420
1421        let rule = MD044ProperNames::new(names, true);
1422
1423        // The combined pattern should be created successfully
1424        assert!(rule.combined_pattern.is_some());
1425
1426        // Should be able to check content without errors
1427        let content = "This has propername0 and propername999 in it.";
1428        let ctx = create_context(content);
1429        let result = rule.check(&ctx).unwrap();
1430
1431        // Should detect both incorrect names
1432        assert_eq!(result.len(), 2, "Should handle 1000 names without issues");
1433    }
1434
1435    #[test]
1436    fn test_cache_behavior() {
1437        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1438
1439        let content = "Using javascript here.";
1440        let ctx = create_context(content);
1441
1442        // First check
1443        let result1 = rule.check(&ctx).unwrap();
1444        assert_eq!(result1.len(), 1);
1445
1446        // Second check should use cache
1447        let result2 = rule.check(&ctx).unwrap();
1448        assert_eq!(result2.len(), 1);
1449
1450        // Results should be identical
1451        assert_eq!(result1[0].line, result2[0].line);
1452        assert_eq!(result1[0].column, result2[0].column);
1453    }
1454
1455    #[test]
1456    fn test_html_comments_not_checked_when_disabled() {
1457        let config = MD044Config {
1458            names: vec!["JavaScript".to_string()],
1459            code_blocks: true,    // Check code blocks
1460            html_elements: true,  // Check HTML elements
1461            html_comments: false, // Don't check HTML comments
1462        };
1463        let rule = MD044ProperNames::from_config_struct(config);
1464
1465        let content = r#"Regular javascript here.
1466<!-- This javascript in HTML comment should be ignored -->
1467More javascript outside."#;
1468
1469        let ctx = create_context(content);
1470        let result = rule.check(&ctx).unwrap();
1471
1472        assert_eq!(result.len(), 2, "Should only flag javascript outside HTML comments");
1473        assert_eq!(result[0].line, 1);
1474        assert_eq!(result[1].line, 3);
1475    }
1476
1477    #[test]
1478    fn test_html_comments_checked_when_enabled() {
1479        let config = MD044Config {
1480            names: vec!["JavaScript".to_string()],
1481            code_blocks: true,   // Check code blocks
1482            html_elements: true, // Check HTML elements
1483            html_comments: true, // Check HTML comments
1484        };
1485        let rule = MD044ProperNames::from_config_struct(config);
1486
1487        let content = r#"Regular javascript here.
1488<!-- This javascript in HTML comment should be checked -->
1489More javascript outside."#;
1490
1491        let ctx = create_context(content);
1492        let result = rule.check(&ctx).unwrap();
1493
1494        assert_eq!(
1495            result.len(),
1496            3,
1497            "Should flag all javascript occurrences including in HTML comments"
1498        );
1499    }
1500
1501    #[test]
1502    fn test_multiline_html_comments() {
1503        let config = MD044Config {
1504            names: vec!["Python".to_string(), "JavaScript".to_string()],
1505            code_blocks: true,    // Check code blocks
1506            html_elements: true,  // Check HTML elements
1507            html_comments: false, // Don't check HTML comments
1508        };
1509        let rule = MD044ProperNames::from_config_struct(config);
1510
1511        let content = r#"Regular python here.
1512<!--
1513This is a multiline comment
1514with javascript and python
1515that should be ignored
1516-->
1517More javascript outside."#;
1518
1519        let ctx = create_context(content);
1520        let result = rule.check(&ctx).unwrap();
1521
1522        assert_eq!(result.len(), 2, "Should only flag names outside HTML comments");
1523        assert_eq!(result[0].line, 1); // python
1524        assert_eq!(result[1].line, 7); // javascript
1525    }
1526
1527    #[test]
1528    fn test_fix_preserves_html_comments_when_disabled() {
1529        let config = MD044Config {
1530            names: vec!["JavaScript".to_string()],
1531            code_blocks: true,    // Check code blocks
1532            html_elements: true,  // Check HTML elements
1533            html_comments: false, // Don't check HTML comments
1534        };
1535        let rule = MD044ProperNames::from_config_struct(config);
1536
1537        let content = r#"javascript here.
1538<!-- javascript in comment -->
1539More javascript."#;
1540
1541        let ctx = create_context(content);
1542        let fixed = rule.fix(&ctx).unwrap();
1543
1544        let expected = r#"JavaScript here.
1545<!-- javascript in comment -->
1546More JavaScript."#;
1547
1548        assert_eq!(
1549            fixed, expected,
1550            "Should not fix names inside HTML comments when disabled"
1551        );
1552    }
1553
1554    #[test]
1555    fn test_proper_names_in_link_text_are_flagged() {
1556        let rule = MD044ProperNames::new(
1557            vec!["JavaScript".to_string(), "Node.js".to_string(), "Python".to_string()],
1558            true,
1559        );
1560
1561        let content = r#"Check this [javascript documentation](https://javascript.info) for info.
1562
1563Visit [node.js homepage](https://nodejs.org) and [python tutorial](https://python.org).
1564
1565Real javascript should be flagged.
1566
1567Also see the [typescript guide][ts-ref] for more.
1568
1569Real python should be flagged too.
1570
1571[ts-ref]: https://typescript.org/handbook"#;
1572
1573        let ctx = create_context(content);
1574        let result = rule.check(&ctx).unwrap();
1575
1576        // Link text should be checked, URLs should not be checked
1577        // Line 1: [javascript documentation] - "javascript" should be flagged
1578        // Line 3: [node.js homepage] - "node.js" should be flagged (matches "Node.js")
1579        // Line 3: [python tutorial] - "python" should be flagged
1580        // Line 5: standalone javascript
1581        // Line 9: standalone python
1582        assert_eq!(result.len(), 5, "Expected 5 warnings: 3 in link text + 2 standalone");
1583
1584        // Verify line numbers for link text warnings
1585        let line_1_warnings: Vec<_> = result.iter().filter(|w| w.line == 1).collect();
1586        assert_eq!(line_1_warnings.len(), 1);
1587        assert!(
1588            line_1_warnings[0]
1589                .message
1590                .contains("'javascript' should be 'JavaScript'")
1591        );
1592
1593        let line_3_warnings: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1594        assert_eq!(line_3_warnings.len(), 2); // node.js and python
1595
1596        // Standalone warnings
1597        assert!(result.iter().any(|w| w.line == 5 && w.message.contains("'javascript'")));
1598        assert!(result.iter().any(|w| w.line == 9 && w.message.contains("'python'")));
1599    }
1600
1601    #[test]
1602    fn test_link_urls_not_flagged() {
1603        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1604
1605        // URL contains "javascript" but should NOT be flagged
1606        let content = r#"[Link Text](https://javascript.info/guide)"#;
1607
1608        let ctx = create_context(content);
1609        let result = rule.check(&ctx).unwrap();
1610
1611        // URL should not be checked
1612        assert!(result.is_empty(), "URLs should not be checked for proper names");
1613    }
1614
1615    #[test]
1616    fn test_proper_names_in_image_alt_text_are_flagged() {
1617        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1618
1619        let content = r#"Here is a ![javascript logo](javascript.png "javascript icon") image.
1620
1621Real javascript should be flagged."#;
1622
1623        let ctx = create_context(content);
1624        let result = rule.check(&ctx).unwrap();
1625
1626        // Image alt text should be checked, URL and title should not be checked
1627        // Line 1: ![javascript logo] - "javascript" should be flagged
1628        // Line 3: standalone javascript
1629        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in alt text + 1 standalone");
1630        assert!(result[0].message.contains("'javascript' should be 'JavaScript'"));
1631        assert!(result[0].line == 1); // "![javascript logo]"
1632        assert!(result[1].message.contains("'javascript' should be 'JavaScript'"));
1633        assert!(result[1].line == 3); // "Real javascript should be flagged."
1634    }
1635
1636    #[test]
1637    fn test_image_urls_not_flagged() {
1638        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1639
1640        // URL contains "javascript" but should NOT be flagged
1641        let content = r#"![Logo](https://javascript.info/logo.png)"#;
1642
1643        let ctx = create_context(content);
1644        let result = rule.check(&ctx).unwrap();
1645
1646        // Image URL should not be checked
1647        assert!(result.is_empty(), "Image URLs should not be checked for proper names");
1648    }
1649
1650    #[test]
1651    fn test_reference_link_text_flagged_but_definition_not() {
1652        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1653
1654        let content = r#"Check the [javascript guide][js-ref] for details.
1655
1656Real javascript should be flagged.
1657
1658[js-ref]: https://javascript.info/typescript/guide"#;
1659
1660        let ctx = create_context(content);
1661        let result = rule.check(&ctx).unwrap();
1662
1663        // Link text should be checked, reference definitions should not
1664        // Line 1: [javascript guide] - should be flagged
1665        // Line 3: standalone javascript - should be flagged
1666        // Line 5: reference definition - should NOT be flagged
1667        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in link text + 1 standalone");
1668        assert!(result.iter().any(|w| w.line == 1 && w.message.contains("'javascript'")));
1669        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1670    }
1671
1672    #[test]
1673    fn test_reference_definitions_not_flagged() {
1674        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1675
1676        // Reference definition should NOT be flagged
1677        let content = r#"[js-ref]: https://javascript.info/guide"#;
1678
1679        let ctx = create_context(content);
1680        let result = rule.check(&ctx).unwrap();
1681
1682        // Reference definition URLs should not be checked
1683        assert!(result.is_empty(), "Reference definitions should not be checked");
1684    }
1685
1686    #[test]
1687    fn test_wikilinks_text_is_flagged() {
1688        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1689
1690        // WikiLinks [[destination]] should have their text checked
1691        let content = r#"[[javascript]]
1692
1693Regular javascript here.
1694
1695[[JavaScript|display text]]"#;
1696
1697        let ctx = create_context(content);
1698        let result = rule.check(&ctx).unwrap();
1699
1700        // Line 1: [[javascript]] - should be flagged (WikiLink text)
1701        // Line 3: standalone javascript - should be flagged
1702        // Line 5: [[JavaScript|display text]] - correct capitalization, no flag
1703        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in WikiLink + 1 standalone");
1704        assert!(
1705            result
1706                .iter()
1707                .any(|w| w.line == 1 && w.column == 3 && w.message.contains("'javascript'"))
1708        );
1709        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1710    }
1711
1712    #[test]
1713    fn test_url_link_text_not_flagged() {
1714        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1715
1716        // Link text that is itself a URL should not be flagged
1717        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1718
1719[http://github.com/org/repo](http://github.com/org/repo)
1720
1721[www.github.com/org/repo](https://www.github.com/org/repo)"#;
1722
1723        let ctx = create_context(content);
1724        let result = rule.check(&ctx).unwrap();
1725
1726        assert!(
1727            result.is_empty(),
1728            "URL-like link text should not be flagged, got: {result:?}"
1729        );
1730    }
1731
1732    #[test]
1733    fn test_url_link_text_with_leading_space_not_flagged() {
1734        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1735
1736        // Leading/trailing whitespace in link text should be trimmed before URL check
1737        let content = r#"[ https://github.com/org/repo](https://github.com/org/repo)"#;
1738
1739        let ctx = create_context(content);
1740        let result = rule.check(&ctx).unwrap();
1741
1742        assert!(
1743            result.is_empty(),
1744            "URL-like link text with leading space should not be flagged, got: {result:?}"
1745        );
1746    }
1747
1748    #[test]
1749    fn test_url_link_text_uppercase_scheme_not_flagged() {
1750        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1751
1752        let content = r#"[HTTPS://GITHUB.COM/org/repo](https://github.com/org/repo)"#;
1753
1754        let ctx = create_context(content);
1755        let result = rule.check(&ctx).unwrap();
1756
1757        assert!(
1758            result.is_empty(),
1759            "URL-like link text with uppercase scheme should not be flagged, got: {result:?}"
1760        );
1761    }
1762
1763    #[test]
1764    fn test_non_url_link_text_still_flagged() {
1765        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1766
1767        // Link text that is NOT a URL should still be flagged
1768        let content = r#"[github.com/org/repo](https://github.com/org/repo)
1769
1770[Visit github](https://github.com/org/repo)
1771
1772[//github.com/org/repo](//github.com/org/repo)
1773
1774[ftp://github.com/org/repo](ftp://github.com/org/repo)"#;
1775
1776        let ctx = create_context(content);
1777        let result = rule.check(&ctx).unwrap();
1778
1779        assert_eq!(result.len(), 4, "Non-URL link text should be flagged, got: {result:?}");
1780        assert!(result.iter().any(|w| w.line == 1)); // github.com (no protocol)
1781        assert!(result.iter().any(|w| w.line == 3)); // Visit github
1782        assert!(result.iter().any(|w| w.line == 5)); // //github.com (protocol-relative)
1783        assert!(result.iter().any(|w| w.line == 7)); // ftp://github.com
1784    }
1785
1786    #[test]
1787    fn test_url_link_text_fix_not_applied() {
1788        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1789
1790        let content = "[https://github.com/org/repo](https://github.com/org/repo)\n";
1791
1792        let ctx = create_context(content);
1793        let result = rule.fix(&ctx).unwrap();
1794
1795        assert_eq!(result, content, "Fix should not modify URL-like link text");
1796    }
1797
1798    #[test]
1799    fn test_mixed_url_and_regular_link_text() {
1800        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1801
1802        // Mix of URL link text (should skip) and regular text (should flag)
1803        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1804
1805Visit [github documentation](https://github.com/docs) for details.
1806
1807[www.github.com/pricing](https://www.github.com/pricing)"#;
1808
1809        let ctx = create_context(content);
1810        let result = rule.check(&ctx).unwrap();
1811
1812        // Only line 3 should be flagged ("github documentation" is not a URL)
1813        assert_eq!(
1814            result.len(),
1815            1,
1816            "Only non-URL link text should be flagged, got: {result:?}"
1817        );
1818        assert_eq!(result[0].line, 3);
1819    }
1820
1821    #[test]
1822    fn test_html_attribute_values_not_flagged() {
1823        // Matches inside HTML tag attributes (between `<` and `>`) are not flagged.
1824        // Attribute values are not prose — they hold URLs, class names, data values, etc.
1825        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1826        let content = "# Heading\n\ntest\n\n<img src=\"www.example.test/test_image.png\">\n";
1827        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1828        let result = rule.check(&ctx).unwrap();
1829
1830        // Nothing on line 5 should be flagged — everything is inside the `<img ...>` tag
1831        let line5_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
1832        assert!(
1833            line5_violations.is_empty(),
1834            "Should not flag anything inside HTML tag attributes: {line5_violations:?}"
1835        );
1836
1837        // Plain text on line 3 is still flagged
1838        let line3_violations: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1839        assert_eq!(line3_violations.len(), 1, "Plain 'test' on line 3 should be flagged");
1840    }
1841
1842    #[test]
1843    fn test_html_text_content_still_flagged() {
1844        // Text between HTML tags (not inside `<...>`) is still checked.
1845        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1846        let content = "# Heading\n\n<a href=\"https://example.test/page\">test link</a>\n";
1847        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1848        let result = rule.check(&ctx).unwrap();
1849
1850        // "example.test" in the href attribute → not flagged (inside `<...>`)
1851        // "test link" in the anchor text → flagged (between `>` and `<`)
1852        assert_eq!(
1853            result.len(),
1854            1,
1855            "Should flag only 'test' in anchor text, not in href: {result:?}"
1856        );
1857        assert_eq!(result[0].column, 37, "Should flag col 37 ('test link' in anchor text)");
1858    }
1859
1860    #[test]
1861    fn test_html_attribute_various_not_flagged() {
1862        // All attribute types are ignored: src, href, alt, class, data-*, title, etc.
1863        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1864        let content = concat!(
1865            "# Heading\n\n",
1866            "<img src=\"test.png\" alt=\"test image\">\n",
1867            "<span class=\"test-class\" data-test=\"value\">test content</span>\n",
1868        );
1869        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1870        let result = rule.check(&ctx).unwrap();
1871
1872        // Only "test content" (between tags on line 4) should be flagged
1873        assert_eq!(
1874            result.len(),
1875            1,
1876            "Should flag only 'test content' between tags: {result:?}"
1877        );
1878        assert_eq!(result[0].line, 4);
1879    }
1880
1881    #[test]
1882    fn test_plain_text_underscore_boundary_unchanged() {
1883        // Plain text (outside HTML tags) still uses original word boundary semantics where
1884        // underscore is a boundary character, matching markdownlint's behavior via AST splitting.
1885        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1886        let content = "# Heading\n\ntest_image is here and just_test ends here\n";
1887        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1888        let result = rule.check(&ctx).unwrap();
1889
1890        // Both "test_image" (test at start) and "just_test" (test at end) are flagged
1891        // because in plain text, "_" is a word boundary
1892        assert_eq!(
1893            result.len(),
1894            2,
1895            "Should flag 'test' in both 'test_image' and 'just_test': {result:?}"
1896        );
1897        let cols: Vec<usize> = result.iter().map(|w| w.column).collect();
1898        assert!(cols.contains(&1), "Should flag col 1 (test_image): {cols:?}");
1899        assert!(cols.contains(&29), "Should flag col 29 (just_test): {cols:?}");
1900    }
1901
1902    #[test]
1903    fn test_frontmatter_yaml_keys_not_flagged() {
1904        // YAML keys in frontmatter should NOT be checked for proper name violations.
1905        // Only values should be checked.
1906        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1907
1908        let content = "---\ntitle: Heading\ntest: Some Test value\n---\n\nTest\n";
1909        let ctx = create_context(content);
1910        let result = rule.check(&ctx).unwrap();
1911
1912        // "test" in the YAML key (line 3) should NOT be flagged
1913        // "Test" in the YAML value (line 3) is correct capitalization, no flag
1914        // "Test" in body (line 6) is correct capitalization, no flag
1915        assert!(
1916            result.is_empty(),
1917            "Should not flag YAML keys or correctly capitalized values: {result:?}"
1918        );
1919    }
1920
1921    #[test]
1922    fn test_frontmatter_yaml_values_flagged() {
1923        // Incorrectly capitalized names in YAML values should be flagged.
1924        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1925
1926        let content = "---\ntitle: Heading\nkey: a test value\n---\n\nTest\n";
1927        let ctx = create_context(content);
1928        let result = rule.check(&ctx).unwrap();
1929
1930        // "test" in the YAML value (line 3) SHOULD be flagged
1931        assert_eq!(result.len(), 1, "Should flag 'test' in YAML value: {result:?}");
1932        assert_eq!(result[0].line, 3);
1933        assert_eq!(result[0].column, 8); // "key: a " = 7 chars, then "test" at column 8
1934    }
1935
1936    #[test]
1937    fn test_frontmatter_key_matches_name_not_flagged() {
1938        // A YAML key that happens to match a configured name should NOT be flagged.
1939        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1940
1941        let content = "---\ntest: other value\n---\n\nBody text\n";
1942        let ctx = create_context(content);
1943        let result = rule.check(&ctx).unwrap();
1944
1945        assert!(
1946            result.is_empty(),
1947            "Should not flag YAML key that matches configured name: {result:?}"
1948        );
1949    }
1950
1951    #[test]
1952    fn test_frontmatter_empty_value_not_flagged() {
1953        // YAML key with no value should be skipped entirely.
1954        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1955
1956        let content = "---\ntest:\ntest: \n---\n\nBody text\n";
1957        let ctx = create_context(content);
1958        let result = rule.check(&ctx).unwrap();
1959
1960        assert!(
1961            result.is_empty(),
1962            "Should not flag YAML keys with empty values: {result:?}"
1963        );
1964    }
1965
1966    #[test]
1967    fn test_frontmatter_nested_yaml_key_not_flagged() {
1968        // Nested/indented YAML keys should also be skipped.
1969        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1970
1971        let content = "---\nparent:\n  test: nested value\n---\n\nBody text\n";
1972        let ctx = create_context(content);
1973        let result = rule.check(&ctx).unwrap();
1974
1975        // "test" as a nested key should NOT be flagged
1976        assert!(result.is_empty(), "Should not flag nested YAML keys: {result:?}");
1977    }
1978
1979    #[test]
1980    fn test_frontmatter_list_items_checked() {
1981        // YAML list items are values and should be checked for proper names.
1982        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1983
1984        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
1985        let ctx = create_context(content);
1986        let result = rule.check(&ctx).unwrap();
1987
1988        // "test" as a list item value SHOULD be flagged
1989        assert_eq!(result.len(), 1, "Should flag 'test' in YAML list item: {result:?}");
1990        assert_eq!(result[0].line, 3);
1991    }
1992
1993    #[test]
1994    fn test_frontmatter_value_with_multiple_colons() {
1995        // For "key: value: more", key is before first colon.
1996        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1997
1998        let content = "---\ntest: description: a test thing\n---\n\nBody text\n";
1999        let ctx = create_context(content);
2000        let result = rule.check(&ctx).unwrap();
2001
2002        // "test" as key should NOT be flagged
2003        // "test" in value portion ("description: a test thing") SHOULD be flagged
2004        assert_eq!(
2005            result.len(),
2006            1,
2007            "Should flag 'test' in value after first colon: {result:?}"
2008        );
2009        assert_eq!(result[0].line, 2);
2010        assert!(result[0].column > 6, "Violation column should be in value portion");
2011    }
2012
2013    #[test]
2014    fn test_frontmatter_does_not_affect_body() {
2015        // Body text after frontmatter should still be fully checked.
2016        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2017
2018        let content = "---\ntitle: Heading\n---\n\ntest should be flagged here\n";
2019        let ctx = create_context(content);
2020        let result = rule.check(&ctx).unwrap();
2021
2022        assert_eq!(result.len(), 1, "Should flag 'test' in body text: {result:?}");
2023        assert_eq!(result[0].line, 5);
2024    }
2025
2026    #[test]
2027    fn test_frontmatter_fix_corrects_values_preserves_keys() {
2028        // Fix should correct YAML values but preserve keys.
2029        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2030
2031        let content = "---\ntest: a test value\n---\n\ntest here\n";
2032        let ctx = create_context(content);
2033        let fixed = rule.fix(&ctx).unwrap();
2034
2035        // Key "test" should remain lowercase; value "test" should become "Test"
2036        assert_eq!(fixed, "---\ntest: a Test value\n---\n\nTest here\n");
2037    }
2038
2039    #[test]
2040    fn test_frontmatter_multiword_value_flagged() {
2041        // Multiple proper names in a single YAML value should all be flagged.
2042        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
2043
2044        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
2045        let ctx = create_context(content);
2046        let result = rule.check(&ctx).unwrap();
2047
2048        assert_eq!(result.len(), 2, "Should flag both names in YAML value: {result:?}");
2049        assert!(result.iter().all(|w| w.line == 2));
2050    }
2051
2052    #[test]
2053    fn test_frontmatter_yaml_comments_not_checked() {
2054        // YAML comments inside frontmatter should be skipped entirely.
2055        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2056
2057        let content = "---\n# test comment\ntitle: Heading\n---\n\nBody text\n";
2058        let ctx = create_context(content);
2059        let result = rule.check(&ctx).unwrap();
2060
2061        assert!(result.is_empty(), "Should not flag names in YAML comments: {result:?}");
2062    }
2063
2064    #[test]
2065    fn test_frontmatter_delimiters_not_checked() {
2066        // Frontmatter delimiter lines (--- or +++) should never be checked.
2067        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2068
2069        let content = "---\ntitle: Heading\n---\n\ntest here\n";
2070        let ctx = create_context(content);
2071        let result = rule.check(&ctx).unwrap();
2072
2073        // Only the body "test" on line 5 should be flagged
2074        assert_eq!(result.len(), 1, "Should only flag body text: {result:?}");
2075        assert_eq!(result[0].line, 5);
2076    }
2077
2078    #[test]
2079    fn test_frontmatter_continuation_lines_checked() {
2080        // Continuation lines (indented, no colon) are value content and should be checked.
2081        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2082
2083        let content = "---\ndescription: >\n  a test value\n  continued here\n---\n\nBody\n";
2084        let ctx = create_context(content);
2085        let result = rule.check(&ctx).unwrap();
2086
2087        // "test" on the continuation line should be flagged
2088        assert_eq!(result.len(), 1, "Should flag 'test' in continuation line: {result:?}");
2089        assert_eq!(result[0].line, 3);
2090    }
2091
2092    #[test]
2093    fn test_frontmatter_quoted_values_checked() {
2094        // Quoted YAML values should have their content checked (inside the quotes).
2095        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2096
2097        let content = "---\ntitle: \"a test title\"\n---\n\nBody\n";
2098        let ctx = create_context(content);
2099        let result = rule.check(&ctx).unwrap();
2100
2101        assert_eq!(result.len(), 1, "Should flag 'test' in quoted YAML value: {result:?}");
2102        assert_eq!(result[0].line, 2);
2103    }
2104
2105    #[test]
2106    fn test_frontmatter_single_quoted_values_checked() {
2107        // Single-quoted YAML values should have their content checked.
2108        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2109
2110        let content = "---\ntitle: 'a test title'\n---\n\nBody\n";
2111        let ctx = create_context(content);
2112        let result = rule.check(&ctx).unwrap();
2113
2114        assert_eq!(
2115            result.len(),
2116            1,
2117            "Should flag 'test' in single-quoted YAML value: {result:?}"
2118        );
2119        assert_eq!(result[0].line, 2);
2120    }
2121
2122    #[test]
2123    fn test_frontmatter_fix_multiword_values() {
2124        // Fix should correct all proper names in frontmatter values.
2125        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
2126
2127        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
2128        let ctx = create_context(content);
2129        let fixed = rule.fix(&ctx).unwrap();
2130
2131        assert_eq!(
2132            fixed,
2133            "---\ndescription: Learn JavaScript and TypeScript\n---\n\nBody\n"
2134        );
2135    }
2136
2137    #[test]
2138    fn test_frontmatter_fix_preserves_yaml_structure() {
2139        // Fix should preserve YAML structure while correcting values.
2140        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2141
2142        let content = "---\ntags:\n  - test\n  - other\ntitle: a test doc\n---\n\ntest body\n";
2143        let ctx = create_context(content);
2144        let fixed = rule.fix(&ctx).unwrap();
2145
2146        assert_eq!(
2147            fixed,
2148            "---\ntags:\n  - Test\n  - other\ntitle: a Test doc\n---\n\nTest body\n"
2149        );
2150    }
2151
2152    #[test]
2153    fn test_frontmatter_toml_delimiters_not_checked() {
2154        // TOML frontmatter with +++ delimiters should also be handled.
2155        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2156
2157        let content = "+++\ntitle = \"a test title\"\n+++\n\ntest body\n";
2158        let ctx = create_context(content);
2159        let result = rule.check(&ctx).unwrap();
2160
2161        // "title" as TOML key should NOT be flagged
2162        // "test" in TOML quoted value SHOULD be flagged (line 2)
2163        // "test" in body SHOULD be flagged (line 5)
2164        assert_eq!(result.len(), 2, "Should flag TOML value and body: {result:?}");
2165        let fm_violations: Vec<_> = result.iter().filter(|w| w.line == 2).collect();
2166        assert_eq!(fm_violations.len(), 1, "Should flag 'test' in TOML value: {result:?}");
2167        let body_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
2168        assert_eq!(body_violations.len(), 1, "Should flag body 'test': {result:?}");
2169    }
2170
2171    #[test]
2172    fn test_frontmatter_toml_key_not_flagged() {
2173        // TOML keys should NOT be flagged, only values.
2174        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2175
2176        let content = "+++\ntest = \"other value\"\n+++\n\nBody text\n";
2177        let ctx = create_context(content);
2178        let result = rule.check(&ctx).unwrap();
2179
2180        assert!(
2181            result.is_empty(),
2182            "Should not flag TOML key that matches configured name: {result:?}"
2183        );
2184    }
2185
2186    #[test]
2187    fn test_frontmatter_toml_fix_preserves_keys() {
2188        // Fix should correct TOML values but preserve keys.
2189        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2190
2191        let content = "+++\ntest = \"a test value\"\n+++\n\ntest here\n";
2192        let ctx = create_context(content);
2193        let fixed = rule.fix(&ctx).unwrap();
2194
2195        // Key "test" should remain lowercase; value "test" should become "Test"
2196        assert_eq!(fixed, "+++\ntest = \"a Test value\"\n+++\n\nTest here\n");
2197    }
2198
2199    #[test]
2200    fn test_frontmatter_list_item_mapping_key_not_flagged() {
2201        // In "- test: nested value", "test" is a YAML key within a list-item mapping.
2202        // The key should NOT be flagged; only the value should be checked.
2203        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2204
2205        let content = "---\nitems:\n  - test: nested value\n---\n\nBody text\n";
2206        let ctx = create_context(content);
2207        let result = rule.check(&ctx).unwrap();
2208
2209        assert!(
2210            result.is_empty(),
2211            "Should not flag YAML key in list-item mapping: {result:?}"
2212        );
2213    }
2214
2215    #[test]
2216    fn test_frontmatter_list_item_mapping_value_flagged() {
2217        // In "- key: test value", the value portion should be checked.
2218        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2219
2220        let content = "---\nitems:\n  - key: a test value\n---\n\nBody text\n";
2221        let ctx = create_context(content);
2222        let result = rule.check(&ctx).unwrap();
2223
2224        assert_eq!(
2225            result.len(),
2226            1,
2227            "Should flag 'test' in list-item mapping value: {result:?}"
2228        );
2229        assert_eq!(result[0].line, 3);
2230    }
2231
2232    #[test]
2233    fn test_frontmatter_bare_list_item_still_flagged() {
2234        // Bare list items without a colon (e.g., "- test") are values and should be flagged.
2235        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2236
2237        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
2238        let ctx = create_context(content);
2239        let result = rule.check(&ctx).unwrap();
2240
2241        assert_eq!(result.len(), 1, "Should flag 'test' in bare list item: {result:?}");
2242        assert_eq!(result[0].line, 3);
2243    }
2244
2245    #[test]
2246    fn test_frontmatter_flow_mapping_not_flagged() {
2247        // Flow mappings like {test: value} contain YAML keys that should not be flagged.
2248        // The entire flow construct should be skipped.
2249        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2250
2251        let content = "---\nflow_map: {test: value, other: test}\n---\n\nBody text\n";
2252        let ctx = create_context(content);
2253        let result = rule.check(&ctx).unwrap();
2254
2255        assert!(
2256            result.is_empty(),
2257            "Should not flag names inside flow mappings: {result:?}"
2258        );
2259    }
2260
2261    #[test]
2262    fn test_frontmatter_flow_sequence_not_flagged() {
2263        // Flow sequences like [test, other] should also be skipped.
2264        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2265
2266        let content = "---\nitems: [test, other, test]\n---\n\nBody text\n";
2267        let ctx = create_context(content);
2268        let result = rule.check(&ctx).unwrap();
2269
2270        assert!(
2271            result.is_empty(),
2272            "Should not flag names inside flow sequences: {result:?}"
2273        );
2274    }
2275
2276    #[test]
2277    fn test_frontmatter_list_item_mapping_fix_preserves_key() {
2278        // Fix should correct values in list-item mappings but preserve keys.
2279        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2280
2281        let content = "---\nitems:\n  - test: a test value\n---\n\ntest here\n";
2282        let ctx = create_context(content);
2283        let fixed = rule.fix(&ctx).unwrap();
2284
2285        // "test" as list-item key should remain lowercase;
2286        // "test" in value portion should become "Test"
2287        assert_eq!(fixed, "---\nitems:\n  - test: a Test value\n---\n\nTest here\n");
2288    }
2289
2290    #[test]
2291    fn test_frontmatter_backtick_code_not_flagged() {
2292        // Names inside backticks in frontmatter should NOT be flagged when code_blocks=false.
2293        let config = MD044Config {
2294            names: vec!["GoodApplication".to_string()],
2295            code_blocks: false,
2296            ..MD044Config::default()
2297        };
2298        let rule = MD044ProperNames::from_config_struct(config);
2299
2300        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nIntroductory `goodapplication` CLI text.\n";
2301        let ctx = create_context(content);
2302        let result = rule.check(&ctx).unwrap();
2303
2304        // Neither the frontmatter nor the body backtick-wrapped name should be flagged
2305        assert!(
2306            result.is_empty(),
2307            "Should not flag names inside backticks in frontmatter or body: {result:?}"
2308        );
2309    }
2310
2311    #[test]
2312    fn test_frontmatter_unquoted_backtick_code_not_flagged() {
2313        // Exact case from issue #513: unquoted YAML frontmatter with backticks
2314        let config = MD044Config {
2315            names: vec!["GoodApplication".to_string()],
2316            code_blocks: false,
2317            ..MD044Config::default()
2318        };
2319        let rule = MD044ProperNames::from_config_struct(config);
2320
2321        let content = "---\ntitle: `goodapplication` CLI\n---\n\nIntroductory `goodapplication` CLI text.\n";
2322        let ctx = create_context(content);
2323        let result = rule.check(&ctx).unwrap();
2324
2325        assert!(
2326            result.is_empty(),
2327            "Should not flag names inside backticks in unquoted YAML frontmatter: {result:?}"
2328        );
2329    }
2330
2331    #[test]
2332    fn test_frontmatter_bare_name_still_flagged_with_backtick_nearby() {
2333        // Names outside backticks in frontmatter should still be flagged.
2334        let config = MD044Config {
2335            names: vec!["GoodApplication".to_string()],
2336            code_blocks: false,
2337            ..MD044Config::default()
2338        };
2339        let rule = MD044ProperNames::from_config_struct(config);
2340
2341        let content = "---\ntitle: goodapplication `goodapplication` CLI\n---\n\nBody\n";
2342        let ctx = create_context(content);
2343        let result = rule.check(&ctx).unwrap();
2344
2345        // Only the bare "goodapplication" (before backticks) should be flagged
2346        assert_eq!(
2347            result.len(),
2348            1,
2349            "Should flag bare name but not backtick-wrapped name: {result:?}"
2350        );
2351        assert_eq!(result[0].line, 2);
2352        assert_eq!(result[0].column, 8); // "title: " = 7 chars, name at column 8
2353    }
2354
2355    #[test]
2356    fn test_frontmatter_backtick_code_with_code_blocks_true() {
2357        // When code_blocks=true, names inside backticks ARE checked.
2358        let config = MD044Config {
2359            names: vec!["GoodApplication".to_string()],
2360            code_blocks: true,
2361            ..MD044Config::default()
2362        };
2363        let rule = MD044ProperNames::from_config_struct(config);
2364
2365        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nBody\n";
2366        let ctx = create_context(content);
2367        let result = rule.check(&ctx).unwrap();
2368
2369        // With code_blocks=true, backtick-wrapped name SHOULD be flagged
2370        assert_eq!(
2371            result.len(),
2372            1,
2373            "Should flag backtick-wrapped name when code_blocks=true: {result:?}"
2374        );
2375        assert_eq!(result[0].line, 2);
2376    }
2377
2378    #[test]
2379    fn test_frontmatter_fix_preserves_backtick_code() {
2380        // Fix should NOT change names inside backticks in frontmatter.
2381        let config = MD044Config {
2382            names: vec!["GoodApplication".to_string()],
2383            code_blocks: false,
2384            ..MD044Config::default()
2385        };
2386        let rule = MD044ProperNames::from_config_struct(config);
2387
2388        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nIntroductory `goodapplication` CLI text.\n";
2389        let ctx = create_context(content);
2390        let fixed = rule.fix(&ctx).unwrap();
2391
2392        // Neither backtick-wrapped occurrence should be changed
2393        assert_eq!(
2394            fixed, content,
2395            "Fix should not modify names inside backticks in frontmatter"
2396        );
2397    }
2398
2399    // --- Angle-bracket URL tests (issue #457) ---
2400
2401    #[test]
2402    fn test_angle_bracket_url_in_html_comment_not_flagged() {
2403        // Angle-bracket URLs inside HTML comments should be skipped
2404        let config = MD044Config {
2405            names: vec!["Test".to_string()],
2406            ..MD044Config::default()
2407        };
2408        let rule = MD044ProperNames::from_config_struct(config);
2409
2410        let content = "---\ntitle: Level 1 heading\n---\n\n<https://www.example.test>\n\n<!-- This is a Test https://www.example.test -->\n<!-- This is a Test <https://www.example.test> -->\n";
2411        let ctx = create_context(content);
2412        let result = rule.check(&ctx).unwrap();
2413
2414        // Line 7: "Test" in comment prose before bare URL -- already correct capitalization
2415        // Line 7: "test" in bare URL (not in angle brackets) -- but "test" is in URL domain, not prose.
2416        //   However, .example.test has "test" at a word boundary (after '.'), so it IS flagged.
2417        // Line 8: "Test" in comment prose -- correct capitalization, not flagged
2418        // Line 8: "test" in <https://www.example.test> -- inside angle-bracket URL, NOT flagged
2419
2420        // The key assertion: line 8's angle-bracket URL should NOT produce a warning
2421        let line8_warnings: Vec<_> = result.iter().filter(|w| w.line == 8).collect();
2422        assert!(
2423            line8_warnings.is_empty(),
2424            "Should not flag names inside angle-bracket URLs in HTML comments: {line8_warnings:?}"
2425        );
2426    }
2427
2428    #[test]
2429    fn test_bare_url_in_html_comment_still_flagged() {
2430        // Bare URLs (not in angle brackets) inside HTML comments should still be checked
2431        let config = MD044Config {
2432            names: vec!["Test".to_string()],
2433            ..MD044Config::default()
2434        };
2435        let rule = MD044ProperNames::from_config_struct(config);
2436
2437        let content = "<!-- This is a test https://www.example.test -->\n";
2438        let ctx = create_context(content);
2439        let result = rule.check(&ctx).unwrap();
2440
2441        // "test" appears as prose text before URL and also in the bare URL domain
2442        // At minimum, the prose "test" should be flagged
2443        assert!(
2444            !result.is_empty(),
2445            "Should flag 'test' in prose text of HTML comment with bare URL"
2446        );
2447    }
2448
2449    #[test]
2450    fn test_angle_bracket_url_in_regular_markdown_not_flagged() {
2451        // Angle-bracket URLs in regular markdown are already handled by the link parser,
2452        // but the angle-bracket check provides a safety net
2453        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2454
2455        let content = "<https://www.example.test>\n";
2456        let ctx = create_context(content);
2457        let result = rule.check(&ctx).unwrap();
2458
2459        assert!(
2460            result.is_empty(),
2461            "Should not flag names inside angle-bracket URLs in regular markdown: {result:?}"
2462        );
2463    }
2464
2465    #[test]
2466    fn test_multiple_angle_bracket_urls_in_one_comment() {
2467        let config = MD044Config {
2468            names: vec!["Test".to_string()],
2469            ..MD044Config::default()
2470        };
2471        let rule = MD044ProperNames::from_config_struct(config);
2472
2473        let content = "<!-- See <https://test.example.com> and <https://www.example.test> for details -->\n";
2474        let ctx = create_context(content);
2475        let result = rule.check(&ctx).unwrap();
2476
2477        // Both URLs are inside angle brackets, so "test" inside them should NOT be flagged
2478        assert!(
2479            result.is_empty(),
2480            "Should not flag names inside multiple angle-bracket URLs: {result:?}"
2481        );
2482    }
2483
2484    #[test]
2485    fn test_angle_bracket_non_url_still_flagged() {
2486        // <Test> is NOT a URL (no scheme), so is_in_angle_bracket_url does NOT protect it.
2487        // Whether it gets flagged depends on HTML tag detection, not on our URL check.
2488        assert!(
2489            !MD044ProperNames::is_in_angle_bracket_url("<test> which is not a URL.", 1),
2490            "is_in_angle_bracket_url should return false for non-URL angle brackets"
2491        );
2492    }
2493
2494    #[test]
2495    fn test_angle_bracket_mailto_url_not_flagged() {
2496        let config = MD044Config {
2497            names: vec!["Test".to_string()],
2498            ..MD044Config::default()
2499        };
2500        let rule = MD044ProperNames::from_config_struct(config);
2501
2502        let content = "<!-- Contact <mailto:test@example.com> for help -->\n";
2503        let ctx = create_context(content);
2504        let result = rule.check(&ctx).unwrap();
2505
2506        assert!(
2507            result.is_empty(),
2508            "Should not flag names inside angle-bracket mailto URLs: {result:?}"
2509        );
2510    }
2511
2512    #[test]
2513    fn test_angle_bracket_ftp_url_not_flagged() {
2514        let config = MD044Config {
2515            names: vec!["Test".to_string()],
2516            ..MD044Config::default()
2517        };
2518        let rule = MD044ProperNames::from_config_struct(config);
2519
2520        let content = "<!-- Download from <ftp://test.example.com/file> -->\n";
2521        let ctx = create_context(content);
2522        let result = rule.check(&ctx).unwrap();
2523
2524        assert!(
2525            result.is_empty(),
2526            "Should not flag names inside angle-bracket FTP URLs: {result:?}"
2527        );
2528    }
2529
2530    #[test]
2531    fn test_angle_bracket_url_fix_preserves_url() {
2532        // Fix should not modify text inside angle-bracket URLs
2533        let config = MD044Config {
2534            names: vec!["Test".to_string()],
2535            ..MD044Config::default()
2536        };
2537        let rule = MD044ProperNames::from_config_struct(config);
2538
2539        let content = "<!-- test text <https://www.example.test> -->\n";
2540        let ctx = create_context(content);
2541        let fixed = rule.fix(&ctx).unwrap();
2542
2543        // "test" in prose should be fixed, URL should be preserved
2544        assert!(
2545            fixed.contains("<https://www.example.test>"),
2546            "Fix should preserve angle-bracket URLs: {fixed}"
2547        );
2548        assert!(
2549            fixed.contains("Test text"),
2550            "Fix should correct prose 'test' to 'Test': {fixed}"
2551        );
2552    }
2553
2554    #[test]
2555    fn test_is_in_angle_bracket_url_helper() {
2556        // Direct tests of the helper function
2557        let line = "text <https://example.test> more text";
2558
2559        // Inside the URL
2560        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 5)); // '<'
2561        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 6)); // 'h'
2562        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 15)); // middle of URL
2563        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 26)); // '>'
2564
2565        // Outside the URL
2566        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 0)); // 't' at start
2567        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 4)); // space before '<'
2568        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 27)); // space after '>'
2569
2570        // Non-URL angle brackets
2571        assert!(!MD044ProperNames::is_in_angle_bracket_url("<notaurl>", 1));
2572
2573        // mailto scheme
2574        assert!(MD044ProperNames::is_in_angle_bracket_url(
2575            "<mailto:test@example.com>",
2576            10
2577        ));
2578
2579        // ftp scheme
2580        assert!(MD044ProperNames::is_in_angle_bracket_url(
2581            "<ftp://test.example.com>",
2582            10
2583        ));
2584    }
2585
2586    #[test]
2587    fn test_is_in_angle_bracket_url_uppercase_scheme() {
2588        // RFC 3986: URI schemes are case-insensitive
2589        assert!(MD044ProperNames::is_in_angle_bracket_url(
2590            "<HTTPS://test.example.com>",
2591            10
2592        ));
2593        assert!(MD044ProperNames::is_in_angle_bracket_url(
2594            "<Http://test.example.com>",
2595            10
2596        ));
2597    }
2598
2599    #[test]
2600    fn test_is_in_angle_bracket_url_uncommon_schemes() {
2601        // ssh scheme
2602        assert!(MD044ProperNames::is_in_angle_bracket_url(
2603            "<ssh://test@example.com>",
2604            10
2605        ));
2606        // file scheme
2607        assert!(MD044ProperNames::is_in_angle_bracket_url("<file:///test/path>", 10));
2608        // data scheme (no authority, just colon)
2609        assert!(MD044ProperNames::is_in_angle_bracket_url("<data:text/plain;test>", 10));
2610    }
2611
2612    #[test]
2613    fn test_is_in_angle_bracket_url_unclosed() {
2614        // Unclosed angle bracket should NOT match
2615        assert!(!MD044ProperNames::is_in_angle_bracket_url(
2616            "<https://test.example.com",
2617            10
2618        ));
2619    }
2620
2621    #[test]
2622    fn test_vale_inline_config_comments_not_flagged() {
2623        let config = MD044Config {
2624            names: vec!["Vale".to_string(), "JavaScript".to_string()],
2625            ..MD044Config::default()
2626        };
2627        let rule = MD044ProperNames::from_config_struct(config);
2628
2629        let content = "\
2630<!-- vale off -->
2631Some javascript text here.
2632<!-- vale on -->
2633<!-- vale Style.Rule = NO -->
2634More javascript text.
2635<!-- vale Style.Rule = YES -->
2636<!-- vale JavaScript.Grammar = NO -->
2637";
2638        let ctx = create_context(content);
2639        let result = rule.check(&ctx).unwrap();
2640
2641        // Only the body text lines (2, 5) should be flagged for "javascript"
2642        assert_eq!(result.len(), 2, "Should only flag body lines, not Vale config comments");
2643        assert_eq!(result[0].line, 2);
2644        assert_eq!(result[1].line, 5);
2645    }
2646
2647    #[test]
2648    fn test_remark_lint_inline_config_comments_not_flagged() {
2649        let config = MD044Config {
2650            names: vec!["JavaScript".to_string()],
2651            ..MD044Config::default()
2652        };
2653        let rule = MD044ProperNames::from_config_struct(config);
2654
2655        let content = "\
2656<!-- lint disable remark-lint-some-rule -->
2657Some javascript text here.
2658<!-- lint enable remark-lint-some-rule -->
2659<!-- lint ignore remark-lint-some-rule -->
2660More javascript text.
2661";
2662        let ctx = create_context(content);
2663        let result = rule.check(&ctx).unwrap();
2664
2665        assert_eq!(
2666            result.len(),
2667            2,
2668            "Should only flag body lines, not remark-lint config comments"
2669        );
2670        assert_eq!(result[0].line, 2);
2671        assert_eq!(result[1].line, 5);
2672    }
2673
2674    #[test]
2675    fn test_fix_does_not_modify_vale_remark_lint_comments() {
2676        let config = MD044Config {
2677            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2678            ..MD044Config::default()
2679        };
2680        let rule = MD044ProperNames::from_config_struct(config);
2681
2682        let content = "\
2683<!-- vale off -->
2684Some javascript text.
2685<!-- vale on -->
2686<!-- lint disable remark-lint-some-rule -->
2687More javascript text.
2688<!-- lint enable remark-lint-some-rule -->
2689";
2690        let ctx = create_context(content);
2691        let fixed = rule.fix(&ctx).unwrap();
2692
2693        // Config directive lines must be preserved unchanged
2694        assert!(fixed.contains("<!-- vale off -->"));
2695        assert!(fixed.contains("<!-- vale on -->"));
2696        assert!(fixed.contains("<!-- lint disable remark-lint-some-rule -->"));
2697        assert!(fixed.contains("<!-- lint enable remark-lint-some-rule -->"));
2698        // Body text should be fixed
2699        assert!(fixed.contains("Some JavaScript text."));
2700        assert!(fixed.contains("More JavaScript text."));
2701    }
2702
2703    #[test]
2704    fn test_mixed_tool_directives_all_skipped() {
2705        let config = MD044Config {
2706            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2707            ..MD044Config::default()
2708        };
2709        let rule = MD044ProperNames::from_config_struct(config);
2710
2711        let content = "\
2712<!-- rumdl-disable MD044 -->
2713Some javascript text.
2714<!-- markdownlint-disable -->
2715More javascript text.
2716<!-- vale off -->
2717Even more javascript text.
2718<!-- lint disable some-rule -->
2719Final javascript text.
2720<!-- rumdl-enable MD044 -->
2721<!-- markdownlint-enable -->
2722<!-- vale on -->
2723<!-- lint enable some-rule -->
2724";
2725        let ctx = create_context(content);
2726        let result = rule.check(&ctx).unwrap();
2727
2728        // Only body text lines should be flagged (lines 2, 4, 6, 8)
2729        assert_eq!(
2730            result.len(),
2731            4,
2732            "Should only flag body lines, not any tool directive comments"
2733        );
2734        assert_eq!(result[0].line, 2);
2735        assert_eq!(result[1].line, 4);
2736        assert_eq!(result[2].line, 6);
2737        assert_eq!(result[3].line, 8);
2738    }
2739
2740    #[test]
2741    fn test_vale_remark_lint_edge_cases_not_matched() {
2742        let config = MD044Config {
2743            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2744            ..MD044Config::default()
2745        };
2746        let rule = MD044ProperNames::from_config_struct(config);
2747
2748        // These are regular HTML comments, NOT tool directives:
2749        // - "<!-- vale -->" is not a valid Vale directive (no action keyword)
2750        // - "<!-- vale is a tool -->" starts with "vale" but is prose, not a directive
2751        // - "<!-- valedictorian javascript -->" does not start with "<!-- vale "
2752        // - "<!-- linting javascript tips -->" does not start with "<!-- lint "
2753        // - "<!-- vale javascript -->" starts with "vale" but has no action keyword
2754        // - "<!-- lint your javascript code -->" starts with "lint" but has no action keyword
2755        let content = "\
2756<!-- vale -->
2757<!-- vale is a tool for writing -->
2758<!-- valedictorian javascript -->
2759<!-- linting javascript tips -->
2760<!-- vale javascript -->
2761<!-- lint your javascript code -->
2762";
2763        let ctx = create_context(content);
2764        let result = rule.check(&ctx).unwrap();
2765
2766        // Line 1: "<!-- vale -->" contains "vale" (wrong case for "Vale") -> flagged
2767        // Line 2: "<!-- vale is a tool for writing -->" contains "vale" -> flagged
2768        // Line 3: "<!-- valedictorian javascript -->" contains "javascript" -> flagged
2769        // Line 4: "<!-- linting javascript tips -->" contains "javascript" -> flagged
2770        // Line 5: "<!-- vale javascript -->" contains "vale" and "javascript" -> flagged for both
2771        // Line 6: "<!-- lint your javascript code -->" contains "javascript" -> flagged
2772        assert_eq!(
2773            result.len(),
2774            7,
2775            "Should flag proper names in non-directive HTML comments: got {result:?}"
2776        );
2777        assert_eq!(result[0].line, 1); // "vale" in <!-- vale -->
2778        assert_eq!(result[1].line, 2); // "vale" in <!-- vale is a tool -->
2779        assert_eq!(result[2].line, 3); // "javascript" in <!-- valedictorian javascript -->
2780        assert_eq!(result[3].line, 4); // "javascript" in <!-- linting javascript tips -->
2781        assert_eq!(result[4].line, 5); // "vale" in <!-- vale javascript -->
2782        assert_eq!(result[5].line, 5); // "javascript" in <!-- vale javascript -->
2783        assert_eq!(result[6].line, 6); // "javascript" in <!-- lint your javascript code -->
2784    }
2785
2786    #[test]
2787    fn test_vale_style_directives_skipped() {
2788        let config = MD044Config {
2789            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2790            ..MD044Config::default()
2791        };
2792        let rule = MD044ProperNames::from_config_struct(config);
2793
2794        // These ARE valid Vale directives and should be skipped:
2795        let content = "\
2796<!-- vale style = MyStyle -->
2797<!-- vale styles = Style1, Style2 -->
2798<!-- vale MyRule.Name = YES -->
2799<!-- vale MyRule.Name = NO -->
2800Some javascript text.
2801";
2802        let ctx = create_context(content);
2803        let result = rule.check(&ctx).unwrap();
2804
2805        // Only line 5 (body text) should be flagged
2806        assert_eq!(
2807            result.len(),
2808            1,
2809            "Should only flag body lines, not Vale style/rule directives: got {result:?}"
2810        );
2811        assert_eq!(result[0].line, 5);
2812    }
2813
2814    // --- is_in_backtick_code_in_line unit tests ---
2815
2816    #[test]
2817    fn test_backtick_code_single_backticks() {
2818        let line = "hello `world` bye";
2819        // 'w' is at index 7, inside the backtick span (content between backticks at 6 and 12)
2820        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 7));
2821        // 'h' at index 0 is outside
2822        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2823        // 'b' at index 14 is outside
2824        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 14));
2825    }
2826
2827    #[test]
2828    fn test_backtick_code_double_backticks() {
2829        let line = "a ``code`` b";
2830        // 'c' is at index 4, inside ``...``
2831        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 4));
2832        // 'a' at index 0 is outside
2833        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2834        // 'b' at index 11 is outside
2835        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 11));
2836    }
2837
2838    #[test]
2839    fn test_backtick_code_unclosed() {
2840        let line = "a `code b";
2841        // No closing backtick, so nothing is a code span
2842        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 3));
2843    }
2844
2845    #[test]
2846    fn test_backtick_code_mismatched_count() {
2847        // Single backtick opening, double backtick is not a match
2848        let line = "a `code`` b";
2849        // The single ` at index 2 doesn't match `` at index 7-8
2850        // So 'c' at index 3 is NOT in a code span
2851        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 3));
2852    }
2853
2854    #[test]
2855    fn test_backtick_code_multiple_spans() {
2856        let line = "`first` and `second`";
2857        // 'f' at index 1 (inside first span)
2858        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 1));
2859        // 'a' at index 8 (between spans)
2860        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 8));
2861        // 's' at index 13 (inside second span)
2862        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 13));
2863    }
2864
2865    #[test]
2866    fn test_backtick_code_on_backtick_boundary() {
2867        let line = "`code`";
2868        // Position 0 is the opening backtick itself, not inside the span
2869        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2870        // Position 5 is the closing backtick, not inside the span
2871        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 5));
2872        // Position 1-4 are inside the span
2873        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 1));
2874        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 4));
2875    }
2876}
rumdl_lib/rules/md044_proper_names.rs

rumdl_lib/rules/
md044_proper_names.rs