rumdl_lib/rules/
md044_proper_names.rs

1use crate::utils::fast_hash;
2use crate::utils::regex_cache::{escape_regex, get_cached_regex};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use std::collections::{HashMap, HashSet};
6use std::sync::{Arc, Mutex};
7
8mod md044_config;
9pub use md044_config::MD044Config;
10
11type WarningPosition = (usize, usize, String); // (line, column, found_name)
12
13/// Rule MD044: Proper names should be capitalized
14///
15/// See [docs/md044.md](../../docs/md044.md) for full documentation, configuration, and examples.
16///
17/// This rule is triggered when proper names are not capitalized correctly in the document.
18/// For example, if you have defined "JavaScript" as a proper name, the rule will flag any
19/// occurrences of "javascript" or "Javascript" as violations.
20///
21/// ## Purpose
22///
23/// Ensuring consistent capitalization of proper names improves document quality and
24/// professionalism. This is especially important for technical documentation where
25/// product names, programming languages, and technologies often have specific
26/// capitalization conventions.
27///
28/// ## Configuration Options
29///
30/// The rule supports the following configuration options:
31///
32/// ```yaml
33/// MD044:
34///   names: []                # List of proper names to check for correct capitalization
35///   code-blocks: false       # Whether to check code blocks (default: false)
36/// ```
37///
38/// Example configuration:
39///
40/// ```yaml
41/// MD044:
42///   names: ["JavaScript", "Node.js", "TypeScript"]
43///   code-blocks: true
44/// ```
45///
46/// ## Performance Optimizations
47///
48/// This rule implements several performance optimizations:
49///
50/// 1. **Regex Caching**: Pre-compiles and caches regex patterns for each proper name
51/// 2. **Content Caching**: Caches results based on content hashing for repeated checks
52/// 3. **Efficient Text Processing**: Uses optimized algorithms to avoid redundant text processing
53/// 4. **Smart Code Block Detection**: Efficiently identifies and optionally excludes code blocks
54///
55/// ## Edge Cases Handled
56///
57/// - **Word Boundaries**: Only matches complete words, not substrings within other words
58/// - **Case Sensitivity**: Properly handles case-specific matching
59/// - **Code Blocks**: Optionally checks code blocks (controlled by code-blocks setting)
60/// - **Markdown Formatting**: Handles proper names within Markdown formatting elements
61///
62/// ## Fix Behavior
63///
64/// When fixing issues, this rule replaces incorrect capitalization with the correct form
65/// as defined in the configuration.
66///
67/// Check if a trimmed line is an inline config comment from a linting tool.
68/// Recognized tools: rumdl, markdownlint, Vale, and remark-lint.
69fn is_inline_config_comment(trimmed: &str) -> bool {
70    trimmed.starts_with("<!-- rumdl-")
71        || trimmed.starts_with("<!-- markdownlint-")
72        || trimmed.starts_with("<!-- vale off")
73        || trimmed.starts_with("<!-- vale on")
74        || (trimmed.starts_with("<!-- vale ") && trimmed.contains(" = "))
75        || trimmed.starts_with("<!-- vale style")
76        || trimmed.starts_with("<!-- lint disable ")
77        || trimmed.starts_with("<!-- lint enable ")
78        || trimmed.starts_with("<!-- lint ignore ")
79}
80
81#[derive(Clone)]
82pub struct MD044ProperNames {
83    config: MD044Config,
84    // Cache the combined regex pattern string
85    combined_pattern: Option<String>,
86    // Precomputed lowercase name variants for fast pre-checks
87    name_variants: Vec<String>,
88    // Cache for name violations by content hash
89    content_cache: Arc<Mutex<HashMap<u64, Vec<WarningPosition>>>>,
90}
91
92impl MD044ProperNames {
93    pub fn new(names: Vec<String>, code_blocks: bool) -> Self {
94        let config = MD044Config {
95            names,
96            code_blocks,
97            html_elements: true, // Default to checking HTML elements
98            html_comments: true, // Default to checking HTML comments
99        };
100        let combined_pattern = Self::create_combined_pattern(&config);
101        let name_variants = Self::build_name_variants(&config);
102        Self {
103            config,
104            combined_pattern,
105            name_variants,
106            content_cache: Arc::new(Mutex::new(HashMap::new())),
107        }
108    }
109
110    // Helper function for consistent ASCII normalization
111    fn ascii_normalize(s: &str) -> String {
112        s.replace(['é', 'è', 'ê', 'ë'], "e")
113            .replace(['à', 'á', 'â', 'ä', 'ã', 'å'], "a")
114            .replace(['ï', 'î', 'í', 'ì'], "i")
115            .replace(['ü', 'ú', 'ù', 'û'], "u")
116            .replace(['ö', 'ó', 'ò', 'ô', 'õ'], "o")
117            .replace('ñ', "n")
118            .replace('ç', "c")
119    }
120
121    pub fn from_config_struct(config: MD044Config) -> Self {
122        let combined_pattern = Self::create_combined_pattern(&config);
123        let name_variants = Self::build_name_variants(&config);
124        Self {
125            config,
126            combined_pattern,
127            name_variants,
128            content_cache: Arc::new(Mutex::new(HashMap::new())),
129        }
130    }
131
132    // Create a combined regex pattern for all proper names
133    fn create_combined_pattern(config: &MD044Config) -> Option<String> {
134        if config.names.is_empty() {
135            return None;
136        }
137
138        // Create patterns for all names and their variations
139        let mut patterns: Vec<String> = config
140            .names
141            .iter()
142            .flat_map(|name| {
143                let mut variations = vec![];
144                let lower_name = name.to_lowercase();
145
146                // Add the lowercase version
147                variations.push(escape_regex(&lower_name));
148
149                // Add version without dots
150                let lower_name_no_dots = lower_name.replace('.', "");
151                if lower_name != lower_name_no_dots {
152                    variations.push(escape_regex(&lower_name_no_dots));
153                }
154
155                // Add ASCII-normalized versions for common accented characters
156                let ascii_normalized = Self::ascii_normalize(&lower_name);
157
158                if ascii_normalized != lower_name {
159                    variations.push(escape_regex(&ascii_normalized));
160
161                    // Also add version without dots
162                    let ascii_no_dots = ascii_normalized.replace('.', "");
163                    if ascii_normalized != ascii_no_dots {
164                        variations.push(escape_regex(&ascii_no_dots));
165                    }
166                }
167
168                variations
169            })
170            .collect();
171
172        // Sort patterns by length (longest first) to avoid shorter patterns matching within longer ones
173        patterns.sort_by_key(|b| std::cmp::Reverse(b.len()));
174
175        // Combine all patterns into a single regex with capture groups
176        // Don't use \b as it doesn't work with Unicode - we'll check boundaries manually
177        Some(format!(r"(?i)({})", patterns.join("|")))
178    }
179
180    fn build_name_variants(config: &MD044Config) -> Vec<String> {
181        let mut variants = HashSet::new();
182        for name in &config.names {
183            let lower_name = name.to_lowercase();
184            variants.insert(lower_name.clone());
185
186            let lower_no_dots = lower_name.replace('.', "");
187            if lower_name != lower_no_dots {
188                variants.insert(lower_no_dots);
189            }
190
191            let ascii_normalized = Self::ascii_normalize(&lower_name);
192            if ascii_normalized != lower_name {
193                variants.insert(ascii_normalized.clone());
194
195                let ascii_no_dots = ascii_normalized.replace('.', "");
196                if ascii_normalized != ascii_no_dots {
197                    variants.insert(ascii_no_dots);
198                }
199            }
200        }
201
202        variants.into_iter().collect()
203    }
204
205    // Find all name violations in the content and return positions.
206    // `content_lower` is the pre-computed lowercase version of `content` to avoid redundant allocations.
207    fn find_name_violations(
208        &self,
209        content: &str,
210        ctx: &crate::lint_context::LintContext,
211        content_lower: &str,
212    ) -> Vec<WarningPosition> {
213        // Early return: if no names configured or content is empty
214        if self.config.names.is_empty() || content.is_empty() || self.combined_pattern.is_none() {
215            return Vec::new();
216        }
217
218        // Early return: quick check if any of the configured names might be in content
219        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
220
221        if !has_potential_matches {
222            return Vec::new();
223        }
224
225        // Check if we have cached results
226        let hash = fast_hash(content);
227        {
228            // Use a separate scope for borrowing to minimize lock time
229            if let Ok(cache) = self.content_cache.lock()
230                && let Some(cached) = cache.get(&hash)
231            {
232                return cached.clone();
233            }
234        }
235
236        let mut violations = Vec::new();
237
238        // Get the regex from global cache
239        let combined_regex = match &self.combined_pattern {
240            Some(pattern) => match get_cached_regex(pattern) {
241                Ok(regex) => regex,
242                Err(_) => return Vec::new(),
243            },
244            None => return Vec::new(),
245        };
246
247        // Use ctx.lines for better performance
248        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
249            let line_num = line_idx + 1;
250            let line = line_info.content(ctx.content);
251
252            // Skip code fence lines (```language or ~~~language)
253            let trimmed = line.trim_start();
254            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
255                continue;
256            }
257
258            // Skip if in code block (when code_blocks = false)
259            if !self.config.code_blocks && line_info.in_code_block {
260                continue;
261            }
262
263            // Skip if in HTML block (when html_elements = false)
264            if !self.config.html_elements && line_info.in_html_block {
265                continue;
266            }
267
268            // Skip HTML comments using pre-computed line flag
269            if !self.config.html_comments && line_info.in_html_comment {
270                continue;
271            }
272
273            // Skip JSX expressions and MDX comments (MDX flavor)
274            if line_info.in_jsx_expression || line_info.in_mdx_comment {
275                continue;
276            }
277
278            // Skip Obsidian comments (Obsidian flavor)
279            if line_info.in_obsidian_comment {
280                continue;
281            }
282
283            // For frontmatter lines, determine offset where checkable value content starts.
284            // YAML keys should not be checked against proper names - only values.
285            let fm_value_offset = if line_info.in_front_matter {
286                Self::frontmatter_value_offset(line)
287            } else {
288                0
289            };
290            if fm_value_offset == usize::MAX {
291                continue;
292            }
293
294            // Skip inline config comments (rumdl, markdownlint, Vale, remark-lint directives)
295            if is_inline_config_comment(trimmed) {
296                continue;
297            }
298
299            // Early return: skip lines that don't contain any potential matches
300            let line_lower = line.to_lowercase();
301            let has_line_matches = self.name_variants.iter().any(|name| line_lower.contains(name));
302
303            if !has_line_matches {
304                continue;
305            }
306
307            // Use the combined regex to find all matches in one pass
308            for cap in combined_regex.find_iter(line) {
309                let found_name = &line[cap.start()..cap.end()];
310
311                // Check word boundaries manually for Unicode support
312                let start_pos = cap.start();
313                let end_pos = cap.end();
314
315                // Skip matches in the key portion of frontmatter lines
316                if start_pos < fm_value_offset {
317                    continue;
318                }
319
320                // Skip matches inside HTML tag attributes (handles multi-line tags)
321                let byte_pos = line_info.byte_offset + start_pos;
322                if ctx.is_in_html_tag(byte_pos) {
323                    continue;
324                }
325
326                if !Self::is_at_word_boundary(line, start_pos, true) || !Self::is_at_word_boundary(line, end_pos, false)
327                {
328                    continue; // Not at word boundary
329                }
330
331                // Skip if in inline code when code_blocks is false
332                if !self.config.code_blocks {
333                    if ctx.is_in_code_block_or_span(byte_pos) {
334                        continue;
335                    }
336                    // pulldown-cmark doesn't parse markdown syntax inside HTML
337                    // comments, HTML blocks, or frontmatter, so backtick-wrapped
338                    // text isn't detected by is_in_code_block_or_span. Check directly.
339                    if (line_info.in_html_comment || line_info.in_html_block || line_info.in_front_matter)
340                        && Self::is_in_backtick_code_in_line(line, start_pos)
341                    {
342                        continue;
343                    }
344                }
345
346                // Skip if in link URL or reference definition
347                if Self::is_in_link(ctx, byte_pos) {
348                    continue;
349                }
350
351                // Skip if inside an angle-bracket URL (e.g., <https://...>)
352                // The link parser skips autolinks inside HTML comments,
353                // so we detect them directly in the line text.
354                if Self::is_in_angle_bracket_url(line, start_pos) {
355                    continue;
356                }
357
358                // Skip if inside a Markdown inline link URL in contexts where
359                // pulldown-cmark doesn't parse Markdown syntax (HTML comments,
360                // HTML blocks, frontmatter).
361                if (line_info.in_html_comment || line_info.in_html_block || line_info.in_front_matter)
362                    && Self::is_in_markdown_link_url(line, start_pos)
363                {
364                    continue;
365                }
366
367                // Skip if inside the URL portion of a WikiLink followed by a
368                // parenthesised destination — [[text]](url). pulldown-cmark
369                // registers [[text]] as a WikiLink in ctx.links but leaves the
370                // (url) as plain text, so is_in_link() misses those bytes.
371                if Self::is_in_wikilink_url(ctx, byte_pos) {
372                    continue;
373                }
374
375                // Find which proper name this matches
376                if let Some(proper_name) = self.get_proper_name_for(found_name) {
377                    // Only flag if it's not already correct
378                    if found_name != proper_name {
379                        violations.push((line_num, cap.start() + 1, found_name.to_string()));
380                    }
381                }
382            }
383        }
384
385        // Store in cache (ignore if mutex is poisoned)
386        if let Ok(mut cache) = self.content_cache.lock() {
387            cache.insert(hash, violations.clone());
388        }
389        violations
390    }
391
392    /// Check if a byte position is within a link URL (not link text)
393    ///
394    /// Link text should be checked for proper names, but URLs should be skipped.
395    /// For `[text](url)` - check text, skip url
396    /// For `[text][ref]` - check text, skip reference portion
397    /// For `[[text]]` (WikiLinks) - check text, skip brackets
398    fn is_in_link(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
399        use pulldown_cmark::LinkType;
400
401        // Binary search links (sorted by byte_offset) to find candidate containing byte_pos
402        let link_idx = ctx.links.partition_point(|link| link.byte_offset <= byte_pos);
403        if link_idx > 0 {
404            let link = &ctx.links[link_idx - 1];
405            if byte_pos < link.byte_end {
406                // WikiLinks [[text]] start with '[[', regular links [text] start with '['
407                let text_start = if matches!(link.link_type, LinkType::WikiLink { .. }) {
408                    link.byte_offset + 2
409                } else {
410                    link.byte_offset + 1
411                };
412                let text_end = text_start + link.text.len();
413
414                // If position is within the text portion, skip only if text is a URL
415                if byte_pos >= text_start && byte_pos < text_end {
416                    return Self::link_text_is_url(&link.text);
417                }
418                // Position is in the URL/reference portion, skip it
419                return true;
420            }
421        }
422
423        // Binary search images (sorted by byte_offset) to find candidate containing byte_pos
424        let image_idx = ctx.images.partition_point(|img| img.byte_offset <= byte_pos);
425        if image_idx > 0 {
426            let image = &ctx.images[image_idx - 1];
427            if byte_pos < image.byte_end {
428                // Image starts with '![' so alt text starts at byte_offset + 2
429                let alt_start = image.byte_offset + 2;
430                let alt_end = alt_start + image.alt_text.len();
431
432                // If position is within the alt text portion, don't skip
433                if byte_pos >= alt_start && byte_pos < alt_end {
434                    return false;
435                }
436                // Position is in the URL/reference portion, skip it
437                return true;
438            }
439        }
440
441        // Check pre-computed reference definitions
442        ctx.is_in_reference_def(byte_pos)
443    }
444
445    /// Check if link text is a URL that should not have proper name corrections.
446    /// Matches markdownlint behavior: skip text starting with `http://`, `https://`, or `www.`.
447    fn link_text_is_url(text: &str) -> bool {
448        let lower = text.trim().to_ascii_lowercase();
449        lower.starts_with("http://") || lower.starts_with("https://") || lower.starts_with("www.")
450    }
451
452    /// Check if a position within a line falls inside an angle-bracket URL (`<scheme://...>`).
453    ///
454    /// The link parser skips autolinks inside HTML comments, so `ctx.links` won't
455    /// contain them. This function detects angle-bracket URLs directly in the line
456    /// text, covering both HTML comments and regular text as a safety net.
457    fn is_in_angle_bracket_url(line: &str, pos: usize) -> bool {
458        let bytes = line.as_bytes();
459        let len = bytes.len();
460        let mut i = 0;
461        while i < len {
462            if bytes[i] == b'<' {
463                let after_open = i + 1;
464                // Check for a valid URI scheme per CommonMark autolink spec:
465                // scheme = [a-zA-Z][a-zA-Z0-9+.-]{0,31}
466                // followed by ':'
467                if after_open < len && bytes[after_open].is_ascii_alphabetic() {
468                    let mut s = after_open + 1;
469                    let scheme_max = (after_open + 32).min(len);
470                    while s < scheme_max
471                        && (bytes[s].is_ascii_alphanumeric()
472                            || bytes[s] == b'+'
473                            || bytes[s] == b'-'
474                            || bytes[s] == b'.')
475                    {
476                        s += 1;
477                    }
478                    if s < len && bytes[s] == b':' {
479                        // Valid scheme found; scan for closing '>' with no spaces or '<'
480                        let mut j = s + 1;
481                        let mut found_close = false;
482                        while j < len {
483                            match bytes[j] {
484                                b'>' => {
485                                    found_close = true;
486                                    break;
487                                }
488                                b' ' | b'<' => break,
489                                _ => j += 1,
490                            }
491                        }
492                        if found_close && pos >= i && pos <= j {
493                            return true;
494                        }
495                        if found_close {
496                            i = j + 1;
497                            continue;
498                        }
499                    }
500                }
501            }
502            i += 1;
503        }
504        false
505    }
506
507    /// Check if `byte_pos` falls inside the URL of a `[[text]](url)` construct.
508    ///
509    /// pulldown-cmark with WikiLinks enabled parses `[[text]]` as a WikiLink and
510    /// records it in `ctx.links`, but the immediately following `(url)` is left as
511    /// plain text and is therefore absent from `ctx.links`. This function detects
512    /// that gap by looking for a WikiLink entry whose `byte_end` falls exactly on a
513    /// `(` in the raw content, then checking whether `byte_pos` lies inside the
514    /// matching parenthesised URL span.
515    ///
516    /// Unlike `is_in_markdown_link_url`, this function is anchored to real parser
517    /// output (`ctx.links`) and will not suppress violations in text that merely
518    /// looks like a link (e.g. `[foo](github x)` with a space in the URL).
519    fn is_in_wikilink_url(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
520        use pulldown_cmark::LinkType;
521        let content = ctx.content.as_bytes();
522
523        // ctx.links is sorted by byte_offset; only links that start at or before
524        // byte_pos can have a URL that encloses it.
525        let end = ctx.links.partition_point(|l| l.byte_offset <= byte_pos);
526
527        for link in &ctx.links[..end] {
528            if !matches!(link.link_type, LinkType::WikiLink { .. }) {
529                continue;
530            }
531            let wiki_end = link.byte_end;
532            // The WikiLink must end before byte_pos and be immediately followed by '('.
533            if wiki_end >= byte_pos || wiki_end >= content.len() || content[wiki_end] != b'(' {
534                continue;
535            }
536            // Scan to the matching ')' tracking nested parens and backslash escapes.
537            // Per CommonMark, an unquoted inline link destination cannot contain
538            // spaces, tabs, or newlines. If we encounter one, this is parenthesised
539            // prose rather than a URL, and pulldown-cmark will not parse it as a link.
540            let mut depth: u32 = 1;
541            let mut k = wiki_end + 1;
542            let mut valid_destination = true;
543            while k < content.len() && depth > 0 {
544                match content[k] {
545                    b'\\' => {
546                        k += 1; // skip escaped character
547                    }
548                    b'(' => depth += 1,
549                    b')' => depth -= 1,
550                    b' ' | b'\t' | b'\n' | b'\r' => {
551                        valid_destination = false;
552                        break;
553                    }
554                    _ => {}
555                }
556                k += 1;
557            }
558            // byte_pos is inside the URL if it falls between '(' and the matching ')'
559            // and the destination is valid (no unescaped whitespace).
560            if valid_destination && depth == 0 && byte_pos > wiki_end && byte_pos < k {
561                return true;
562            }
563        }
564        false
565    }
566
567    /// Check if a position within a line falls inside a Markdown link's
568    /// non-text portion (URL or reference label).
569    ///
570    /// Used as a text-level fallback for HTML comments, HTML blocks, and
571    /// frontmatter where pulldown-cmark skips link parsing entirely. Operates on
572    /// raw line bytes and therefore cannot distinguish real links from text that
573    /// merely resembles link syntax; do not call on regular markdown lines.
574    /// - `[text](url)` — returns true if `pos` is within `(...)`
575    /// - `[text][ref]` — returns true if `pos` is within the second `[...]`
576    fn is_in_markdown_link_url(line: &str, pos: usize) -> bool {
577        let bytes = line.as_bytes();
578        let len = bytes.len();
579        let mut i = 0;
580
581        while i < len {
582            // Look for unescaped '[' (handle double-escaped \\[ as unescaped)
583            if bytes[i] == b'[' && (i == 0 || bytes[i - 1] != b'\\' || (i >= 2 && bytes[i - 2] == b'\\')) {
584                // Find matching ']' handling nested brackets
585                let mut depth: u32 = 1;
586                let mut j = i + 1;
587                while j < len && depth > 0 {
588                    match bytes[j] {
589                        b'\\' => {
590                            j += 1; // skip escaped char
591                        }
592                        b'[' => depth += 1,
593                        b']' => depth -= 1,
594                        _ => {}
595                    }
596                    j += 1;
597                }
598
599                // j is now one past the ']'
600                if depth == 0 && j < len {
601                    if bytes[j] == b'(' {
602                        // Inline link: [text](url)
603                        let url_start = j;
604                        let mut paren_depth: u32 = 1;
605                        let mut k = j + 1;
606                        while k < len && paren_depth > 0 {
607                            match bytes[k] {
608                                b'\\' => {
609                                    k += 1; // skip escaped char
610                                }
611                                b'(' => paren_depth += 1,
612                                b')' => paren_depth -= 1,
613                                _ => {}
614                            }
615                            k += 1;
616                        }
617
618                        if paren_depth == 0 {
619                            if pos > url_start && pos < k {
620                                return true;
621                            }
622                            i = k;
623                            continue;
624                        }
625                    } else if bytes[j] == b'[' {
626                        // Reference link: [text][ref]
627                        let ref_start = j;
628                        let mut ref_depth: u32 = 1;
629                        let mut k = j + 1;
630                        while k < len && ref_depth > 0 {
631                            match bytes[k] {
632                                b'\\' => {
633                                    k += 1;
634                                }
635                                b'[' => ref_depth += 1,
636                                b']' => ref_depth -= 1,
637                                _ => {}
638                            }
639                            k += 1;
640                        }
641
642                        if ref_depth == 0 {
643                            if pos > ref_start && pos < k {
644                                return true;
645                            }
646                            i = k;
647                            continue;
648                        }
649                    }
650                }
651            }
652            i += 1;
653        }
654        false
655    }
656
657    /// Check if a position within a line falls inside backtick-delimited code.
658    ///
659    /// pulldown-cmark does not parse markdown syntax inside HTML comments, so
660    /// `ctx.is_in_code_block_or_span` returns false for backtick-wrapped text
661    /// within comments. This function detects backtick code spans directly in
662    /// the line text following CommonMark rules: a code span starts with N
663    /// backticks and ends with exactly N backticks.
664    fn is_in_backtick_code_in_line(line: &str, pos: usize) -> bool {
665        let bytes = line.as_bytes();
666        let len = bytes.len();
667        let mut i = 0;
668        while i < len {
669            if bytes[i] == b'`' {
670                // Count the opening backtick sequence length
671                let open_start = i;
672                while i < len && bytes[i] == b'`' {
673                    i += 1;
674                }
675                let tick_len = i - open_start;
676
677                // Scan forward for a closing sequence of exactly tick_len backticks
678                while i < len {
679                    if bytes[i] == b'`' {
680                        let close_start = i;
681                        while i < len && bytes[i] == b'`' {
682                            i += 1;
683                        }
684                        if i - close_start == tick_len {
685                            // Matched pair found; the code span content is between
686                            // the end of the opening backticks and the start of the
687                            // closing backticks (exclusive of the backticks themselves).
688                            let content_start = open_start + tick_len;
689                            let content_end = close_start;
690                            if pos >= content_start && pos < content_end {
691                                return true;
692                            }
693                            // Continue scanning after this pair
694                            break;
695                        }
696                        // Not the right length; keep scanning
697                    } else {
698                        i += 1;
699                    }
700                }
701            } else {
702                i += 1;
703            }
704        }
705        false
706    }
707
708    // Check if a character is a word boundary (handles Unicode)
709    fn is_word_boundary_char(c: char) -> bool {
710        !c.is_alphanumeric()
711    }
712
713    // Check if position is at a word boundary using byte-level lookups.
714    fn is_at_word_boundary(content: &str, pos: usize, is_start: bool) -> bool {
715        if is_start {
716            if pos == 0 {
717                return true;
718            }
719            match content[..pos].chars().next_back() {
720                None => true,
721                Some(c) => Self::is_word_boundary_char(c),
722            }
723        } else {
724            if pos >= content.len() {
725                return true;
726            }
727            match content[pos..].chars().next() {
728                None => true,
729                Some(c) => Self::is_word_boundary_char(c),
730            }
731        }
732    }
733
734    /// For a frontmatter line, return the byte offset where the checkable
735    /// value portion starts. Returns `usize::MAX` if the entire line should be
736    /// skipped (frontmatter delimiters, key-only lines, YAML comments, flow constructs).
737    fn frontmatter_value_offset(line: &str) -> usize {
738        let trimmed = line.trim();
739
740        // Skip frontmatter delimiters and empty lines
741        if trimmed == "---" || trimmed == "+++" || trimmed.is_empty() {
742            return usize::MAX;
743        }
744
745        // Skip YAML comments
746        if trimmed.starts_with('#') {
747            return usize::MAX;
748        }
749
750        // YAML list item: "  - item" or "  - key: value"
751        let stripped = line.trim_start();
752        if let Some(after_dash) = stripped.strip_prefix("- ") {
753            let leading = line.len() - stripped.len();
754            // Check if the list item contains a mapping (e.g., "- key: value")
755            if let Some(result) = Self::kv_value_offset(line, after_dash, leading + 2) {
756                return result;
757            }
758            // Bare list item value (no colon) - check content after "- "
759            return leading + 2;
760        }
761        if stripped == "-" {
762            return usize::MAX;
763        }
764
765        // Key-value pair with colon separator (YAML): "key: value"
766        if let Some(result) = Self::kv_value_offset(line, stripped, line.len() - stripped.len()) {
767            return result;
768        }
769
770        // Key-value pair with equals separator (TOML): "key = value"
771        if let Some(eq_pos) = line.find('=') {
772            let after_eq = eq_pos + 1;
773            if after_eq < line.len() && line.as_bytes()[after_eq] == b' ' {
774                let value_start = after_eq + 1;
775                let value_slice = &line[value_start..];
776                let value_trimmed = value_slice.trim();
777                if value_trimmed.is_empty() {
778                    return usize::MAX;
779                }
780                // For quoted values, skip the opening quote character
781                if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
782                    || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
783                {
784                    let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
785                    return value_start + quote_offset + 1;
786                }
787                return value_start;
788            }
789            // Equals with no space after or at end of line -> no value to check
790            return usize::MAX;
791        }
792
793        // No separator found - continuation line or bare value, check the whole line
794        0
795    }
796
797    /// Parse a key-value pair using colon separator within `content` that starts
798    /// at `base_offset` in the original line. Returns `Some(offset)` if a colon
799    /// separator is found, `None` if no colon is present.
800    fn kv_value_offset(line: &str, content: &str, base_offset: usize) -> Option<usize> {
801        let colon_pos = content.find(':')?;
802        let abs_colon = base_offset + colon_pos;
803        let after_colon = abs_colon + 1;
804        if after_colon < line.len() && line.as_bytes()[after_colon] == b' ' {
805            let value_start = after_colon + 1;
806            let value_slice = &line[value_start..];
807            let value_trimmed = value_slice.trim();
808            if value_trimmed.is_empty() {
809                return Some(usize::MAX);
810            }
811            // Skip flow mappings and flow sequences - too complex for heuristic parsing
812            if value_trimmed.starts_with('{') || value_trimmed.starts_with('[') {
813                return Some(usize::MAX);
814            }
815            // For quoted values, skip the opening quote character
816            if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
817                || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
818            {
819                let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
820                return Some(value_start + quote_offset + 1);
821            }
822            return Some(value_start);
823        }
824        // Colon with no space after or at end of line -> no value to check
825        Some(usize::MAX)
826    }
827
828    // Get the proper name that should be used for a found name
829    fn get_proper_name_for(&self, found_name: &str) -> Option<String> {
830        let found_lower = found_name.to_lowercase();
831
832        // Iterate through the configured proper names
833        for name in &self.config.names {
834            let lower_name = name.to_lowercase();
835            let lower_name_no_dots = lower_name.replace('.', "");
836
837            // Direct match
838            if found_lower == lower_name || found_lower == lower_name_no_dots {
839                return Some(name.clone());
840            }
841
842            // Check ASCII-normalized version
843            let ascii_normalized = Self::ascii_normalize(&lower_name);
844
845            let ascii_no_dots = ascii_normalized.replace('.', "");
846
847            if found_lower == ascii_normalized || found_lower == ascii_no_dots {
848                return Some(name.clone());
849            }
850        }
851        None
852    }
853}
854
855impl Rule for MD044ProperNames {
856    fn name(&self) -> &'static str {
857        "MD044"
858    }
859
860    fn description(&self) -> &'static str {
861        "Proper names should have the correct capitalization"
862    }
863
864    fn category(&self) -> RuleCategory {
865        RuleCategory::Other
866    }
867
868    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
869        if self.config.names.is_empty() {
870            return true;
871        }
872        // Quick check if any configured name variants exist (case-insensitive)
873        let content_lower = if ctx.content.is_ascii() {
874            ctx.content.to_ascii_lowercase()
875        } else {
876            ctx.content.to_lowercase()
877        };
878        !self.name_variants.iter().any(|name| content_lower.contains(name))
879    }
880
881    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
882        let content = ctx.content;
883        if content.is_empty() || self.config.names.is_empty() || self.combined_pattern.is_none() {
884            return Ok(Vec::new());
885        }
886
887        // Compute lowercase content once and reuse across all checks
888        let content_lower = if content.is_ascii() {
889            content.to_ascii_lowercase()
890        } else {
891            content.to_lowercase()
892        };
893
894        // Early return: use pre-computed name_variants for the quick check
895        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
896
897        if !has_potential_matches {
898            return Ok(Vec::new());
899        }
900
901        let line_index = &ctx.line_index;
902        let violations = self.find_name_violations(content, ctx, &content_lower);
903
904        let warnings = violations
905            .into_iter()
906            .filter_map(|(line, column, found_name)| {
907                self.get_proper_name_for(&found_name).map(|proper_name| LintWarning {
908                    rule_name: Some(self.name().to_string()),
909                    line,
910                    column,
911                    end_line: line,
912                    end_column: column + found_name.len(),
913                    message: format!("Proper name '{found_name}' should be '{proper_name}'"),
914                    severity: Severity::Warning,
915                    fix: Some(Fix {
916                        range: line_index.line_col_to_byte_range_with_length(line, column, found_name.len()),
917                        replacement: proper_name,
918                    }),
919                })
920            })
921            .collect();
922
923        Ok(warnings)
924    }
925
926    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
927        let content = ctx.content;
928        if content.is_empty() || self.config.names.is_empty() {
929            return Ok(content.to_string());
930        }
931
932        let content_lower = if content.is_ascii() {
933            content.to_ascii_lowercase()
934        } else {
935            content.to_lowercase()
936        };
937        let violations = self.find_name_violations(content, ctx, &content_lower);
938        if violations.is_empty() {
939            return Ok(content.to_string());
940        }
941
942        // Process lines and build the fixed content
943        let mut fixed_lines = Vec::new();
944
945        // Group violations by line
946        let mut violations_by_line: HashMap<usize, Vec<(usize, String)>> = HashMap::new();
947        for (line_num, col_num, found_name) in violations {
948            violations_by_line
949                .entry(line_num)
950                .or_default()
951                .push((col_num, found_name));
952        }
953
954        // Sort violations within each line in reverse order
955        for violations in violations_by_line.values_mut() {
956            violations.sort_by_key(|b| std::cmp::Reverse(b.0));
957        }
958
959        // Process each line
960        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
961            let line_num = line_idx + 1;
962
963            // Skip lines where this rule is disabled by inline config
964            if ctx.inline_config().is_rule_disabled(self.name(), line_num) {
965                fixed_lines.push(line_info.content(ctx.content).to_string());
966                continue;
967            }
968
969            if let Some(line_violations) = violations_by_line.get(&line_num) {
970                // This line has violations, fix them
971                let mut fixed_line = line_info.content(ctx.content).to_string();
972
973                for (col_num, found_name) in line_violations {
974                    if let Some(proper_name) = self.get_proper_name_for(found_name) {
975                        let start_col = col_num - 1; // Convert to 0-based
976                        let end_col = start_col + found_name.len();
977
978                        if end_col <= fixed_line.len()
979                            && fixed_line.is_char_boundary(start_col)
980                            && fixed_line.is_char_boundary(end_col)
981                        {
982                            fixed_line.replace_range(start_col..end_col, &proper_name);
983                        }
984                    }
985                }
986
987                fixed_lines.push(fixed_line);
988            } else {
989                // No violations on this line, keep it as is
990                fixed_lines.push(line_info.content(ctx.content).to_string());
991            }
992        }
993
994        // Join lines with newlines, preserving the original ending
995        let mut result = fixed_lines.join("\n");
996        if content.ends_with('\n') && !result.ends_with('\n') {
997            result.push('\n');
998        }
999        Ok(result)
1000    }
1001
1002    fn as_any(&self) -> &dyn std::any::Any {
1003        self
1004    }
1005
1006    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1007        let json_value = serde_json::to_value(&self.config).ok()?;
1008        Some((
1009            self.name().to_string(),
1010            crate::rule_config_serde::json_to_toml_value(&json_value)?,
1011        ))
1012    }
1013
1014    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
1015    where
1016        Self: Sized,
1017    {
1018        let rule_config = crate::rule_config_serde::load_rule_config::<MD044Config>(config);
1019        Box::new(Self::from_config_struct(rule_config))
1020    }
1021}
1022
1023#[cfg(test)]
1024mod tests {
1025    use super::*;
1026    use crate::lint_context::LintContext;
1027
1028    fn create_context(content: &str) -> LintContext<'_> {
1029        LintContext::new(content, crate::config::MarkdownFlavor::Standard, None)
1030    }
1031
1032    #[test]
1033    fn test_correctly_capitalized_names() {
1034        let rule = MD044ProperNames::new(
1035            vec![
1036                "JavaScript".to_string(),
1037                "TypeScript".to_string(),
1038                "Node.js".to_string(),
1039            ],
1040            true,
1041        );
1042
1043        let content = "This document uses JavaScript, TypeScript, and Node.js correctly.";
1044        let ctx = create_context(content);
1045        let result = rule.check(&ctx).unwrap();
1046        assert!(result.is_empty(), "Should not flag correctly capitalized names");
1047    }
1048
1049    #[test]
1050    fn test_incorrectly_capitalized_names() {
1051        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1052
1053        let content = "This document uses javascript and typescript incorrectly.";
1054        let ctx = create_context(content);
1055        let result = rule.check(&ctx).unwrap();
1056
1057        assert_eq!(result.len(), 2, "Should flag two incorrect capitalizations");
1058        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
1059        assert_eq!(result[0].line, 1);
1060        assert_eq!(result[0].column, 20);
1061        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
1062        assert_eq!(result[1].line, 1);
1063        assert_eq!(result[1].column, 35);
1064    }
1065
1066    #[test]
1067    fn test_names_at_beginning_of_sentences() {
1068        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "Python".to_string()], true);
1069
1070        let content = "javascript is a great language. python is also popular.";
1071        let ctx = create_context(content);
1072        let result = rule.check(&ctx).unwrap();
1073
1074        assert_eq!(result.len(), 2, "Should flag names at beginning of sentences");
1075        assert_eq!(result[0].line, 1);
1076        assert_eq!(result[0].column, 1);
1077        assert_eq!(result[1].line, 1);
1078        assert_eq!(result[1].column, 33);
1079    }
1080
1081    #[test]
1082    fn test_names_in_code_blocks_checked_by_default() {
1083        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1084
1085        let content = r#"Here is some text with JavaScript.
1086
1087```javascript
1088// This javascript should be checked
1089const lang = "javascript";
1090```
1091
1092But this javascript should be flagged."#;
1093
1094        let ctx = create_context(content);
1095        let result = rule.check(&ctx).unwrap();
1096
1097        assert_eq!(result.len(), 3, "Should flag javascript inside and outside code blocks");
1098        assert_eq!(result[0].line, 4);
1099        assert_eq!(result[1].line, 5);
1100        assert_eq!(result[2].line, 8);
1101    }
1102
1103    #[test]
1104    fn test_names_in_code_blocks_ignored_when_disabled() {
1105        let rule = MD044ProperNames::new(
1106            vec!["JavaScript".to_string()],
1107            false, // code_blocks = false means skip code blocks
1108        );
1109
1110        let content = r#"```
1111javascript in code block
1112```"#;
1113
1114        let ctx = create_context(content);
1115        let result = rule.check(&ctx).unwrap();
1116
1117        assert_eq!(
1118            result.len(),
1119            0,
1120            "Should not flag javascript in code blocks when code_blocks is false"
1121        );
1122    }
1123
1124    #[test]
1125    fn test_names_in_inline_code_checked_by_default() {
1126        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1127
1128        let content = "This is `javascript` in inline code and javascript outside.";
1129        let ctx = create_context(content);
1130        let result = rule.check(&ctx).unwrap();
1131
1132        // When code_blocks=true, inline code should be checked
1133        assert_eq!(result.len(), 2, "Should flag javascript inside and outside inline code");
1134        assert_eq!(result[0].column, 10); // javascript in inline code
1135        assert_eq!(result[1].column, 41); // javascript outside
1136    }
1137
1138    #[test]
1139    fn test_multiple_names_in_same_line() {
1140        let rule = MD044ProperNames::new(
1141            vec!["JavaScript".to_string(), "TypeScript".to_string(), "React".to_string()],
1142            true,
1143        );
1144
1145        let content = "I use javascript, typescript, and react in my projects.";
1146        let ctx = create_context(content);
1147        let result = rule.check(&ctx).unwrap();
1148
1149        assert_eq!(result.len(), 3, "Should flag all three incorrect names");
1150        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
1151        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
1152        assert_eq!(result[2].message, "Proper name 'react' should be 'React'");
1153    }
1154
1155    #[test]
1156    fn test_case_sensitivity() {
1157        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1158
1159        let content = "JAVASCRIPT, Javascript, javascript, and JavaScript variations.";
1160        let ctx = create_context(content);
1161        let result = rule.check(&ctx).unwrap();
1162
1163        assert_eq!(result.len(), 3, "Should flag all incorrect case variations");
1164        // JavaScript (correct) should not be flagged
1165        assert!(result.iter().all(|w| w.message.contains("should be 'JavaScript'")));
1166    }
1167
1168    #[test]
1169    fn test_configuration_with_custom_name_list() {
1170        let config = MD044Config {
1171            names: vec!["GitHub".to_string(), "GitLab".to_string(), "DevOps".to_string()],
1172            code_blocks: true,
1173            html_elements: true,
1174            html_comments: true,
1175        };
1176        let rule = MD044ProperNames::from_config_struct(config);
1177
1178        let content = "We use github, gitlab, and devops for our workflow.";
1179        let ctx = create_context(content);
1180        let result = rule.check(&ctx).unwrap();
1181
1182        assert_eq!(result.len(), 3, "Should flag all custom names");
1183        assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
1184        assert_eq!(result[1].message, "Proper name 'gitlab' should be 'GitLab'");
1185        assert_eq!(result[2].message, "Proper name 'devops' should be 'DevOps'");
1186    }
1187
1188    #[test]
1189    fn test_empty_configuration() {
1190        let rule = MD044ProperNames::new(vec![], true);
1191
1192        let content = "This has javascript and typescript but no configured names.";
1193        let ctx = create_context(content);
1194        let result = rule.check(&ctx).unwrap();
1195
1196        assert!(result.is_empty(), "Should not flag anything with empty configuration");
1197    }
1198
1199    #[test]
1200    fn test_names_with_special_characters() {
1201        let rule = MD044ProperNames::new(
1202            vec!["Node.js".to_string(), "ASP.NET".to_string(), "C++".to_string()],
1203            true,
1204        );
1205
1206        let content = "We use nodejs, asp.net, ASP.NET, and c++ in our stack.";
1207        let ctx = create_context(content);
1208        let result = rule.check(&ctx).unwrap();
1209
1210        // nodejs should match Node.js (dotless variation)
1211        // asp.net should be flagged (wrong case)
1212        // ASP.NET should not be flagged (correct)
1213        // c++ should be flagged
1214        assert_eq!(result.len(), 3, "Should handle special characters correctly");
1215
1216        let messages: Vec<&str> = result.iter().map(|w| w.message.as_str()).collect();
1217        assert!(messages.contains(&"Proper name 'nodejs' should be 'Node.js'"));
1218        assert!(messages.contains(&"Proper name 'asp.net' should be 'ASP.NET'"));
1219        assert!(messages.contains(&"Proper name 'c++' should be 'C++'"));
1220    }
1221
1222    #[test]
1223    fn test_word_boundaries() {
1224        let rule = MD044ProperNames::new(vec!["Java".to_string(), "Script".to_string()], true);
1225
1226        let content = "JavaScript is not java or script, but Java and Script are separate.";
1227        let ctx = create_context(content);
1228        let result = rule.check(&ctx).unwrap();
1229
1230        // Should only flag lowercase "java" and "script" as separate words
1231        assert_eq!(result.len(), 2, "Should respect word boundaries");
1232        assert!(result.iter().any(|w| w.column == 19)); // "java" position
1233        assert!(result.iter().any(|w| w.column == 27)); // "script" position
1234    }
1235
1236    #[test]
1237    fn test_fix_method() {
1238        let rule = MD044ProperNames::new(
1239            vec![
1240                "JavaScript".to_string(),
1241                "TypeScript".to_string(),
1242                "Node.js".to_string(),
1243            ],
1244            true,
1245        );
1246
1247        let content = "I love javascript, typescript, and nodejs!";
1248        let ctx = create_context(content);
1249        let fixed = rule.fix(&ctx).unwrap();
1250
1251        assert_eq!(fixed, "I love JavaScript, TypeScript, and Node.js!");
1252    }
1253
1254    #[test]
1255    fn test_fix_multiple_occurrences() {
1256        let rule = MD044ProperNames::new(vec!["Python".to_string()], true);
1257
1258        let content = "python is great. I use python daily. PYTHON is powerful.";
1259        let ctx = create_context(content);
1260        let fixed = rule.fix(&ctx).unwrap();
1261
1262        assert_eq!(fixed, "Python is great. I use Python daily. Python is powerful.");
1263    }
1264
1265    #[test]
1266    fn test_fix_checks_code_blocks_by_default() {
1267        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1268
1269        let content = r#"I love javascript.
1270
1271```
1272const lang = "javascript";
1273```
1274
1275More javascript here."#;
1276
1277        let ctx = create_context(content);
1278        let fixed = rule.fix(&ctx).unwrap();
1279
1280        let expected = r#"I love JavaScript.
1281
1282```
1283const lang = "JavaScript";
1284```
1285
1286More JavaScript here."#;
1287
1288        assert_eq!(fixed, expected);
1289    }
1290
1291    #[test]
1292    fn test_multiline_content() {
1293        let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
1294
1295        let content = r#"First line with rust.
1296Second line with python.
1297Third line with RUST and PYTHON."#;
1298
1299        let ctx = create_context(content);
1300        let result = rule.check(&ctx).unwrap();
1301
1302        assert_eq!(result.len(), 4, "Should flag all incorrect occurrences");
1303        assert_eq!(result[0].line, 1);
1304        assert_eq!(result[1].line, 2);
1305        assert_eq!(result[2].line, 3);
1306        assert_eq!(result[3].line, 3);
1307    }
1308
1309    #[test]
1310    fn test_default_config() {
1311        let config = MD044Config::default();
1312        assert!(config.names.is_empty());
1313        assert!(!config.code_blocks);
1314        assert!(config.html_elements);
1315        assert!(config.html_comments);
1316    }
1317
1318    #[test]
1319    fn test_default_config_checks_html_comments() {
1320        let config = MD044Config {
1321            names: vec!["JavaScript".to_string()],
1322            ..MD044Config::default()
1323        };
1324        let rule = MD044ProperNames::from_config_struct(config);
1325
1326        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1327        let ctx = create_context(content);
1328        let result = rule.check(&ctx).unwrap();
1329
1330        assert_eq!(result.len(), 1, "Default config should check HTML comments");
1331        assert_eq!(result[0].line, 3);
1332    }
1333
1334    #[test]
1335    fn test_default_config_skips_code_blocks() {
1336        let config = MD044Config {
1337            names: vec!["JavaScript".to_string()],
1338            ..MD044Config::default()
1339        };
1340        let rule = MD044ProperNames::from_config_struct(config);
1341
1342        let content = "# Guide\n\n```\njavascript in code\n```\n";
1343        let ctx = create_context(content);
1344        let result = rule.check(&ctx).unwrap();
1345
1346        assert_eq!(result.len(), 0, "Default config should skip code blocks");
1347    }
1348
1349    #[test]
1350    fn test_standalone_html_comment_checked() {
1351        let config = MD044Config {
1352            names: vec!["Test".to_string()],
1353            ..MD044Config::default()
1354        };
1355        let rule = MD044ProperNames::from_config_struct(config);
1356
1357        let content = "# Heading\n\n<!-- this is a test example -->\n";
1358        let ctx = create_context(content);
1359        let result = rule.check(&ctx).unwrap();
1360
1361        assert_eq!(result.len(), 1, "Should flag proper name in standalone HTML comment");
1362        assert_eq!(result[0].line, 3);
1363    }
1364
1365    #[test]
1366    fn test_inline_config_comments_not_flagged() {
1367        let config = MD044Config {
1368            names: vec!["RUMDL".to_string()],
1369            ..MD044Config::default()
1370        };
1371        let rule = MD044ProperNames::from_config_struct(config);
1372
1373        // Lines 1, 3, 4, 6 are inline config comments — should not be flagged.
1374        // Lines 2, 5 contain "rumdl" in regular text — flagged by rule.check(),
1375        // but would be suppressed by the linting engine's inline config filtering.
1376        let content = "<!-- rumdl-disable MD044 -->\nSome rumdl text here.\n<!-- rumdl-enable MD044 -->\n<!-- markdownlint-disable -->\nMore rumdl text.\n<!-- markdownlint-enable -->\n";
1377        let ctx = create_context(content);
1378        let result = rule.check(&ctx).unwrap();
1379
1380        assert_eq!(result.len(), 2, "Should only flag body lines, not config comments");
1381        assert_eq!(result[0].line, 2);
1382        assert_eq!(result[1].line, 5);
1383    }
1384
1385    #[test]
1386    fn test_html_comment_skipped_when_disabled() {
1387        let config = MD044Config {
1388            names: vec!["Test".to_string()],
1389            code_blocks: true,
1390            html_elements: true,
1391            html_comments: false,
1392        };
1393        let rule = MD044ProperNames::from_config_struct(config);
1394
1395        let content = "# Heading\n\n<!-- this is a test example -->\n\nRegular test here.\n";
1396        let ctx = create_context(content);
1397        let result = rule.check(&ctx).unwrap();
1398
1399        assert_eq!(
1400            result.len(),
1401            1,
1402            "Should only flag 'test' outside HTML comment when html_comments=false"
1403        );
1404        assert_eq!(result[0].line, 5);
1405    }
1406
1407    #[test]
1408    fn test_fix_corrects_html_comment_content() {
1409        let config = MD044Config {
1410            names: vec!["JavaScript".to_string()],
1411            ..MD044Config::default()
1412        };
1413        let rule = MD044ProperNames::from_config_struct(config);
1414
1415        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1416        let ctx = create_context(content);
1417        let fixed = rule.fix(&ctx).unwrap();
1418
1419        assert_eq!(fixed, "# Guide\n\n<!-- JavaScript mentioned here -->\n");
1420    }
1421
1422    #[test]
1423    fn test_fix_does_not_modify_inline_config_comments() {
1424        let config = MD044Config {
1425            names: vec!["RUMDL".to_string()],
1426            ..MD044Config::default()
1427        };
1428        let rule = MD044ProperNames::from_config_struct(config);
1429
1430        let content = "<!-- rumdl-disable -->\nSome rumdl text.\n<!-- rumdl-enable -->\n";
1431        let ctx = create_context(content);
1432        let fixed = rule.fix(&ctx).unwrap();
1433
1434        // Config comments should be untouched
1435        assert!(fixed.contains("<!-- rumdl-disable -->"));
1436        assert!(fixed.contains("<!-- rumdl-enable -->"));
1437        // Body text inside disable block should NOT be fixed (rule is disabled)
1438        assert!(
1439            fixed.contains("Some rumdl text."),
1440            "Line inside rumdl-disable block should not be modified by fix()"
1441        );
1442    }
1443
1444    #[test]
1445    fn test_fix_respects_inline_disable_partial() {
1446        let config = MD044Config {
1447            names: vec!["RUMDL".to_string()],
1448            ..MD044Config::default()
1449        };
1450        let rule = MD044ProperNames::from_config_struct(config);
1451
1452        let content =
1453            "<!-- rumdl-disable MD044 -->\nSome rumdl text.\n<!-- rumdl-enable MD044 -->\n\nSome rumdl text outside.\n";
1454        let ctx = create_context(content);
1455        let fixed = rule.fix(&ctx).unwrap();
1456
1457        // Line inside disable block should be preserved
1458        assert!(
1459            fixed.contains("Some rumdl text.\n<!-- rumdl-enable"),
1460            "Line inside disable block should not be modified"
1461        );
1462        // Line outside disable block should be fixed
1463        assert!(
1464            fixed.contains("Some RUMDL text outside."),
1465            "Line outside disable block should be fixed"
1466        );
1467    }
1468
1469    #[test]
1470    fn test_performance_with_many_names() {
1471        let mut names = vec![];
1472        for i in 0..50 {
1473            names.push(format!("ProperName{i}"));
1474        }
1475
1476        let rule = MD044ProperNames::new(names, true);
1477
1478        let content = "This has propername0, propername25, and propername49 incorrectly.";
1479        let ctx = create_context(content);
1480        let result = rule.check(&ctx).unwrap();
1481
1482        assert_eq!(result.len(), 3, "Should handle many configured names efficiently");
1483    }
1484
1485    #[test]
1486    fn test_large_name_count_performance() {
1487        // Verify MD044 can handle large numbers of names without regex limitations
1488        // This test confirms that fancy-regex handles large patterns well
1489        let names = (0..1000).map(|i| format!("ProperName{i}")).collect::<Vec<_>>();
1490
1491        let rule = MD044ProperNames::new(names, true);
1492
1493        // The combined pattern should be created successfully
1494        assert!(rule.combined_pattern.is_some());
1495
1496        // Should be able to check content without errors
1497        let content = "This has propername0 and propername999 in it.";
1498        let ctx = create_context(content);
1499        let result = rule.check(&ctx).unwrap();
1500
1501        // Should detect both incorrect names
1502        assert_eq!(result.len(), 2, "Should handle 1000 names without issues");
1503    }
1504
1505    #[test]
1506    fn test_cache_behavior() {
1507        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1508
1509        let content = "Using javascript here.";
1510        let ctx = create_context(content);
1511
1512        // First check
1513        let result1 = rule.check(&ctx).unwrap();
1514        assert_eq!(result1.len(), 1);
1515
1516        // Second check should use cache
1517        let result2 = rule.check(&ctx).unwrap();
1518        assert_eq!(result2.len(), 1);
1519
1520        // Results should be identical
1521        assert_eq!(result1[0].line, result2[0].line);
1522        assert_eq!(result1[0].column, result2[0].column);
1523    }
1524
1525    #[test]
1526    fn test_html_comments_not_checked_when_disabled() {
1527        let config = MD044Config {
1528            names: vec!["JavaScript".to_string()],
1529            code_blocks: true,    // Check code blocks
1530            html_elements: true,  // Check HTML elements
1531            html_comments: false, // Don't check HTML comments
1532        };
1533        let rule = MD044ProperNames::from_config_struct(config);
1534
1535        let content = r#"Regular javascript here.
1536<!-- This javascript in HTML comment should be ignored -->
1537More javascript outside."#;
1538
1539        let ctx = create_context(content);
1540        let result = rule.check(&ctx).unwrap();
1541
1542        assert_eq!(result.len(), 2, "Should only flag javascript outside HTML comments");
1543        assert_eq!(result[0].line, 1);
1544        assert_eq!(result[1].line, 3);
1545    }
1546
1547    #[test]
1548    fn test_html_comments_checked_when_enabled() {
1549        let config = MD044Config {
1550            names: vec!["JavaScript".to_string()],
1551            code_blocks: true,   // Check code blocks
1552            html_elements: true, // Check HTML elements
1553            html_comments: true, // Check HTML comments
1554        };
1555        let rule = MD044ProperNames::from_config_struct(config);
1556
1557        let content = r#"Regular javascript here.
1558<!-- This javascript in HTML comment should be checked -->
1559More javascript outside."#;
1560
1561        let ctx = create_context(content);
1562        let result = rule.check(&ctx).unwrap();
1563
1564        assert_eq!(
1565            result.len(),
1566            3,
1567            "Should flag all javascript occurrences including in HTML comments"
1568        );
1569    }
1570
1571    #[test]
1572    fn test_multiline_html_comments() {
1573        let config = MD044Config {
1574            names: vec!["Python".to_string(), "JavaScript".to_string()],
1575            code_blocks: true,    // Check code blocks
1576            html_elements: true,  // Check HTML elements
1577            html_comments: false, // Don't check HTML comments
1578        };
1579        let rule = MD044ProperNames::from_config_struct(config);
1580
1581        let content = r#"Regular python here.
1582<!--
1583This is a multiline comment
1584with javascript and python
1585that should be ignored
1586-->
1587More javascript outside."#;
1588
1589        let ctx = create_context(content);
1590        let result = rule.check(&ctx).unwrap();
1591
1592        assert_eq!(result.len(), 2, "Should only flag names outside HTML comments");
1593        assert_eq!(result[0].line, 1); // python
1594        assert_eq!(result[1].line, 7); // javascript
1595    }
1596
1597    #[test]
1598    fn test_fix_preserves_html_comments_when_disabled() {
1599        let config = MD044Config {
1600            names: vec!["JavaScript".to_string()],
1601            code_blocks: true,    // Check code blocks
1602            html_elements: true,  // Check HTML elements
1603            html_comments: false, // Don't check HTML comments
1604        };
1605        let rule = MD044ProperNames::from_config_struct(config);
1606
1607        let content = r#"javascript here.
1608<!-- javascript in comment -->
1609More javascript."#;
1610
1611        let ctx = create_context(content);
1612        let fixed = rule.fix(&ctx).unwrap();
1613
1614        let expected = r#"JavaScript here.
1615<!-- javascript in comment -->
1616More JavaScript."#;
1617
1618        assert_eq!(
1619            fixed, expected,
1620            "Should not fix names inside HTML comments when disabled"
1621        );
1622    }
1623
1624    #[test]
1625    fn test_proper_names_in_link_text_are_flagged() {
1626        let rule = MD044ProperNames::new(
1627            vec!["JavaScript".to_string(), "Node.js".to_string(), "Python".to_string()],
1628            true,
1629        );
1630
1631        let content = r#"Check this [javascript documentation](https://javascript.info) for info.
1632
1633Visit [node.js homepage](https://nodejs.org) and [python tutorial](https://python.org).
1634
1635Real javascript should be flagged.
1636
1637Also see the [typescript guide][ts-ref] for more.
1638
1639Real python should be flagged too.
1640
1641[ts-ref]: https://typescript.org/handbook"#;
1642
1643        let ctx = create_context(content);
1644        let result = rule.check(&ctx).unwrap();
1645
1646        // Link text should be checked, URLs should not be checked
1647        // Line 1: [javascript documentation] - "javascript" should be flagged
1648        // Line 3: [node.js homepage] - "node.js" should be flagged (matches "Node.js")
1649        // Line 3: [python tutorial] - "python" should be flagged
1650        // Line 5: standalone javascript
1651        // Line 9: standalone python
1652        assert_eq!(result.len(), 5, "Expected 5 warnings: 3 in link text + 2 standalone");
1653
1654        // Verify line numbers for link text warnings
1655        let line_1_warnings: Vec<_> = result.iter().filter(|w| w.line == 1).collect();
1656        assert_eq!(line_1_warnings.len(), 1);
1657        assert!(
1658            line_1_warnings[0]
1659                .message
1660                .contains("'javascript' should be 'JavaScript'")
1661        );
1662
1663        let line_3_warnings: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1664        assert_eq!(line_3_warnings.len(), 2); // node.js and python
1665
1666        // Standalone warnings
1667        assert!(result.iter().any(|w| w.line == 5 && w.message.contains("'javascript'")));
1668        assert!(result.iter().any(|w| w.line == 9 && w.message.contains("'python'")));
1669    }
1670
1671    #[test]
1672    fn test_link_urls_not_flagged() {
1673        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1674
1675        // URL contains "javascript" but should NOT be flagged
1676        let content = r#"[Link Text](https://javascript.info/guide)"#;
1677
1678        let ctx = create_context(content);
1679        let result = rule.check(&ctx).unwrap();
1680
1681        // URL should not be checked
1682        assert!(result.is_empty(), "URLs should not be checked for proper names");
1683    }
1684
1685    #[test]
1686    fn test_proper_names_in_image_alt_text_are_flagged() {
1687        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1688
1689        let content = r#"Here is a ![javascript logo](javascript.png "javascript icon") image.
1690
1691Real javascript should be flagged."#;
1692
1693        let ctx = create_context(content);
1694        let result = rule.check(&ctx).unwrap();
1695
1696        // Image alt text should be checked, URL and title should not be checked
1697        // Line 1: ![javascript logo] - "javascript" should be flagged
1698        // Line 3: standalone javascript
1699        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in alt text + 1 standalone");
1700        assert!(result[0].message.contains("'javascript' should be 'JavaScript'"));
1701        assert!(result[0].line == 1); // "![javascript logo]"
1702        assert!(result[1].message.contains("'javascript' should be 'JavaScript'"));
1703        assert!(result[1].line == 3); // "Real javascript should be flagged."
1704    }
1705
1706    #[test]
1707    fn test_image_urls_not_flagged() {
1708        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1709
1710        // URL contains "javascript" but should NOT be flagged
1711        let content = r#"![Logo](https://javascript.info/logo.png)"#;
1712
1713        let ctx = create_context(content);
1714        let result = rule.check(&ctx).unwrap();
1715
1716        // Image URL should not be checked
1717        assert!(result.is_empty(), "Image URLs should not be checked for proper names");
1718    }
1719
1720    #[test]
1721    fn test_reference_link_text_flagged_but_definition_not() {
1722        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1723
1724        let content = r#"Check the [javascript guide][js-ref] for details.
1725
1726Real javascript should be flagged.
1727
1728[js-ref]: https://javascript.info/typescript/guide"#;
1729
1730        let ctx = create_context(content);
1731        let result = rule.check(&ctx).unwrap();
1732
1733        // Link text should be checked, reference definitions should not
1734        // Line 1: [javascript guide] - should be flagged
1735        // Line 3: standalone javascript - should be flagged
1736        // Line 5: reference definition - should NOT be flagged
1737        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in link text + 1 standalone");
1738        assert!(result.iter().any(|w| w.line == 1 && w.message.contains("'javascript'")));
1739        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1740    }
1741
1742    #[test]
1743    fn test_reference_definitions_not_flagged() {
1744        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1745
1746        // Reference definition should NOT be flagged
1747        let content = r#"[js-ref]: https://javascript.info/guide"#;
1748
1749        let ctx = create_context(content);
1750        let result = rule.check(&ctx).unwrap();
1751
1752        // Reference definition URLs should not be checked
1753        assert!(result.is_empty(), "Reference definitions should not be checked");
1754    }
1755
1756    #[test]
1757    fn test_wikilinks_text_is_flagged() {
1758        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1759
1760        // WikiLinks [[destination]] should have their text checked
1761        let content = r#"[[javascript]]
1762
1763Regular javascript here.
1764
1765[[JavaScript|display text]]"#;
1766
1767        let ctx = create_context(content);
1768        let result = rule.check(&ctx).unwrap();
1769
1770        // Line 1: [[javascript]] - should be flagged (WikiLink text)
1771        // Line 3: standalone javascript - should be flagged
1772        // Line 5: [[JavaScript|display text]] - correct capitalization, no flag
1773        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in WikiLink + 1 standalone");
1774        assert!(
1775            result
1776                .iter()
1777                .any(|w| w.line == 1 && w.column == 3 && w.message.contains("'javascript'"))
1778        );
1779        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1780    }
1781
1782    #[test]
1783    fn test_url_link_text_not_flagged() {
1784        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1785
1786        // Link text that is itself a URL should not be flagged
1787        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1788
1789[http://github.com/org/repo](http://github.com/org/repo)
1790
1791[www.github.com/org/repo](https://www.github.com/org/repo)"#;
1792
1793        let ctx = create_context(content);
1794        let result = rule.check(&ctx).unwrap();
1795
1796        assert!(
1797            result.is_empty(),
1798            "URL-like link text should not be flagged, got: {result:?}"
1799        );
1800    }
1801
1802    #[test]
1803    fn test_url_link_text_with_leading_space_not_flagged() {
1804        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1805
1806        // Leading/trailing whitespace in link text should be trimmed before URL check
1807        let content = r#"[ https://github.com/org/repo](https://github.com/org/repo)"#;
1808
1809        let ctx = create_context(content);
1810        let result = rule.check(&ctx).unwrap();
1811
1812        assert!(
1813            result.is_empty(),
1814            "URL-like link text with leading space should not be flagged, got: {result:?}"
1815        );
1816    }
1817
1818    #[test]
1819    fn test_url_link_text_uppercase_scheme_not_flagged() {
1820        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1821
1822        let content = r#"[HTTPS://GITHUB.COM/org/repo](https://github.com/org/repo)"#;
1823
1824        let ctx = create_context(content);
1825        let result = rule.check(&ctx).unwrap();
1826
1827        assert!(
1828            result.is_empty(),
1829            "URL-like link text with uppercase scheme should not be flagged, got: {result:?}"
1830        );
1831    }
1832
1833    #[test]
1834    fn test_non_url_link_text_still_flagged() {
1835        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1836
1837        // Link text that is NOT a URL should still be flagged
1838        let content = r#"[github.com/org/repo](https://github.com/org/repo)
1839
1840[Visit github](https://github.com/org/repo)
1841
1842[//github.com/org/repo](//github.com/org/repo)
1843
1844[ftp://github.com/org/repo](ftp://github.com/org/repo)"#;
1845
1846        let ctx = create_context(content);
1847        let result = rule.check(&ctx).unwrap();
1848
1849        assert_eq!(result.len(), 4, "Non-URL link text should be flagged, got: {result:?}");
1850        assert!(result.iter().any(|w| w.line == 1)); // github.com (no protocol)
1851        assert!(result.iter().any(|w| w.line == 3)); // Visit github
1852        assert!(result.iter().any(|w| w.line == 5)); // //github.com (protocol-relative)
1853        assert!(result.iter().any(|w| w.line == 7)); // ftp://github.com
1854    }
1855
1856    #[test]
1857    fn test_url_link_text_fix_not_applied() {
1858        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1859
1860        let content = "[https://github.com/org/repo](https://github.com/org/repo)\n";
1861
1862        let ctx = create_context(content);
1863        let result = rule.fix(&ctx).unwrap();
1864
1865        assert_eq!(result, content, "Fix should not modify URL-like link text");
1866    }
1867
1868    #[test]
1869    fn test_mixed_url_and_regular_link_text() {
1870        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1871
1872        // Mix of URL link text (should skip) and regular text (should flag)
1873        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1874
1875Visit [github documentation](https://github.com/docs) for details.
1876
1877[www.github.com/pricing](https://www.github.com/pricing)"#;
1878
1879        let ctx = create_context(content);
1880        let result = rule.check(&ctx).unwrap();
1881
1882        // Only line 3 should be flagged ("github documentation" is not a URL)
1883        assert_eq!(
1884            result.len(),
1885            1,
1886            "Only non-URL link text should be flagged, got: {result:?}"
1887        );
1888        assert_eq!(result[0].line, 3);
1889    }
1890
1891    #[test]
1892    fn test_html_attribute_values_not_flagged() {
1893        // Matches inside HTML tag attributes (between `<` and `>`) are not flagged.
1894        // Attribute values are not prose — they hold URLs, class names, data values, etc.
1895        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1896        let content = "# Heading\n\ntest\n\n<img src=\"www.example.test/test_image.png\">\n";
1897        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1898        let result = rule.check(&ctx).unwrap();
1899
1900        // Nothing on line 5 should be flagged — everything is inside the `<img ...>` tag
1901        let line5_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
1902        assert!(
1903            line5_violations.is_empty(),
1904            "Should not flag anything inside HTML tag attributes: {line5_violations:?}"
1905        );
1906
1907        // Plain text on line 3 is still flagged
1908        let line3_violations: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1909        assert_eq!(line3_violations.len(), 1, "Plain 'test' on line 3 should be flagged");
1910    }
1911
1912    #[test]
1913    fn test_html_text_content_still_flagged() {
1914        // Text between HTML tags (not inside `<...>`) is still checked.
1915        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1916        let content = "# Heading\n\n<a href=\"https://example.test/page\">test link</a>\n";
1917        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1918        let result = rule.check(&ctx).unwrap();
1919
1920        // "example.test" in the href attribute → not flagged (inside `<...>`)
1921        // "test link" in the anchor text → flagged (between `>` and `<`)
1922        assert_eq!(
1923            result.len(),
1924            1,
1925            "Should flag only 'test' in anchor text, not in href: {result:?}"
1926        );
1927        assert_eq!(result[0].column, 37, "Should flag col 37 ('test link' in anchor text)");
1928    }
1929
1930    #[test]
1931    fn test_html_attribute_various_not_flagged() {
1932        // All attribute types are ignored: src, href, alt, class, data-*, title, etc.
1933        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1934        let content = concat!(
1935            "# Heading\n\n",
1936            "<img src=\"test.png\" alt=\"test image\">\n",
1937            "<span class=\"test-class\" data-test=\"value\">test content</span>\n",
1938        );
1939        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1940        let result = rule.check(&ctx).unwrap();
1941
1942        // Only "test content" (between tags on line 4) should be flagged
1943        assert_eq!(
1944            result.len(),
1945            1,
1946            "Should flag only 'test content' between tags: {result:?}"
1947        );
1948        assert_eq!(result[0].line, 4);
1949    }
1950
1951    #[test]
1952    fn test_plain_text_underscore_boundary_unchanged() {
1953        // Plain text (outside HTML tags) still uses original word boundary semantics where
1954        // underscore is a boundary character, matching markdownlint's behavior via AST splitting.
1955        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1956        let content = "# Heading\n\ntest_image is here and just_test ends here\n";
1957        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1958        let result = rule.check(&ctx).unwrap();
1959
1960        // Both "test_image" (test at start) and "just_test" (test at end) are flagged
1961        // because in plain text, "_" is a word boundary
1962        assert_eq!(
1963            result.len(),
1964            2,
1965            "Should flag 'test' in both 'test_image' and 'just_test': {result:?}"
1966        );
1967        let cols: Vec<usize> = result.iter().map(|w| w.column).collect();
1968        assert!(cols.contains(&1), "Should flag col 1 (test_image): {cols:?}");
1969        assert!(cols.contains(&29), "Should flag col 29 (just_test): {cols:?}");
1970    }
1971
1972    #[test]
1973    fn test_frontmatter_yaml_keys_not_flagged() {
1974        // YAML keys in frontmatter should NOT be checked for proper name violations.
1975        // Only values should be checked.
1976        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1977
1978        let content = "---\ntitle: Heading\ntest: Some Test value\n---\n\nTest\n";
1979        let ctx = create_context(content);
1980        let result = rule.check(&ctx).unwrap();
1981
1982        // "test" in the YAML key (line 3) should NOT be flagged
1983        // "Test" in the YAML value (line 3) is correct capitalization, no flag
1984        // "Test" in body (line 6) is correct capitalization, no flag
1985        assert!(
1986            result.is_empty(),
1987            "Should not flag YAML keys or correctly capitalized values: {result:?}"
1988        );
1989    }
1990
1991    #[test]
1992    fn test_frontmatter_yaml_values_flagged() {
1993        // Incorrectly capitalized names in YAML values should be flagged.
1994        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1995
1996        let content = "---\ntitle: Heading\nkey: a test value\n---\n\nTest\n";
1997        let ctx = create_context(content);
1998        let result = rule.check(&ctx).unwrap();
1999
2000        // "test" in the YAML value (line 3) SHOULD be flagged
2001        assert_eq!(result.len(), 1, "Should flag 'test' in YAML value: {result:?}");
2002        assert_eq!(result[0].line, 3);
2003        assert_eq!(result[0].column, 8); // "key: a " = 7 chars, then "test" at column 8
2004    }
2005
2006    #[test]
2007    fn test_frontmatter_key_matches_name_not_flagged() {
2008        // A YAML key that happens to match a configured name should NOT be flagged.
2009        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2010
2011        let content = "---\ntest: other value\n---\n\nBody text\n";
2012        let ctx = create_context(content);
2013        let result = rule.check(&ctx).unwrap();
2014
2015        assert!(
2016            result.is_empty(),
2017            "Should not flag YAML key that matches configured name: {result:?}"
2018        );
2019    }
2020
2021    #[test]
2022    fn test_frontmatter_empty_value_not_flagged() {
2023        // YAML key with no value should be skipped entirely.
2024        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2025
2026        let content = "---\ntest:\ntest: \n---\n\nBody text\n";
2027        let ctx = create_context(content);
2028        let result = rule.check(&ctx).unwrap();
2029
2030        assert!(
2031            result.is_empty(),
2032            "Should not flag YAML keys with empty values: {result:?}"
2033        );
2034    }
2035
2036    #[test]
2037    fn test_frontmatter_nested_yaml_key_not_flagged() {
2038        // Nested/indented YAML keys should also be skipped.
2039        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2040
2041        let content = "---\nparent:\n  test: nested value\n---\n\nBody text\n";
2042        let ctx = create_context(content);
2043        let result = rule.check(&ctx).unwrap();
2044
2045        // "test" as a nested key should NOT be flagged
2046        assert!(result.is_empty(), "Should not flag nested YAML keys: {result:?}");
2047    }
2048
2049    #[test]
2050    fn test_frontmatter_list_items_checked() {
2051        // YAML list items are values and should be checked for proper names.
2052        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2053
2054        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
2055        let ctx = create_context(content);
2056        let result = rule.check(&ctx).unwrap();
2057
2058        // "test" as a list item value SHOULD be flagged
2059        assert_eq!(result.len(), 1, "Should flag 'test' in YAML list item: {result:?}");
2060        assert_eq!(result[0].line, 3);
2061    }
2062
2063    #[test]
2064    fn test_frontmatter_value_with_multiple_colons() {
2065        // For "key: value: more", key is before first colon.
2066        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2067
2068        let content = "---\ntest: description: a test thing\n---\n\nBody text\n";
2069        let ctx = create_context(content);
2070        let result = rule.check(&ctx).unwrap();
2071
2072        // "test" as key should NOT be flagged
2073        // "test" in value portion ("description: a test thing") SHOULD be flagged
2074        assert_eq!(
2075            result.len(),
2076            1,
2077            "Should flag 'test' in value after first colon: {result:?}"
2078        );
2079        assert_eq!(result[0].line, 2);
2080        assert!(result[0].column > 6, "Violation column should be in value portion");
2081    }
2082
2083    #[test]
2084    fn test_frontmatter_does_not_affect_body() {
2085        // Body text after frontmatter should still be fully checked.
2086        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2087
2088        let content = "---\ntitle: Heading\n---\n\ntest should be flagged here\n";
2089        let ctx = create_context(content);
2090        let result = rule.check(&ctx).unwrap();
2091
2092        assert_eq!(result.len(), 1, "Should flag 'test' in body text: {result:?}");
2093        assert_eq!(result[0].line, 5);
2094    }
2095
2096    #[test]
2097    fn test_frontmatter_fix_corrects_values_preserves_keys() {
2098        // Fix should correct YAML values but preserve keys.
2099        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2100
2101        let content = "---\ntest: a test value\n---\n\ntest here\n";
2102        let ctx = create_context(content);
2103        let fixed = rule.fix(&ctx).unwrap();
2104
2105        // Key "test" should remain lowercase; value "test" should become "Test"
2106        assert_eq!(fixed, "---\ntest: a Test value\n---\n\nTest here\n");
2107    }
2108
2109    #[test]
2110    fn test_frontmatter_multiword_value_flagged() {
2111        // Multiple proper names in a single YAML value should all be flagged.
2112        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
2113
2114        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
2115        let ctx = create_context(content);
2116        let result = rule.check(&ctx).unwrap();
2117
2118        assert_eq!(result.len(), 2, "Should flag both names in YAML value: {result:?}");
2119        assert!(result.iter().all(|w| w.line == 2));
2120    }
2121
2122    #[test]
2123    fn test_frontmatter_yaml_comments_not_checked() {
2124        // YAML comments inside frontmatter should be skipped entirely.
2125        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2126
2127        let content = "---\n# test comment\ntitle: Heading\n---\n\nBody text\n";
2128        let ctx = create_context(content);
2129        let result = rule.check(&ctx).unwrap();
2130
2131        assert!(result.is_empty(), "Should not flag names in YAML comments: {result:?}");
2132    }
2133
2134    #[test]
2135    fn test_frontmatter_delimiters_not_checked() {
2136        // Frontmatter delimiter lines (--- or +++) should never be checked.
2137        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2138
2139        let content = "---\ntitle: Heading\n---\n\ntest here\n";
2140        let ctx = create_context(content);
2141        let result = rule.check(&ctx).unwrap();
2142
2143        // Only the body "test" on line 5 should be flagged
2144        assert_eq!(result.len(), 1, "Should only flag body text: {result:?}");
2145        assert_eq!(result[0].line, 5);
2146    }
2147
2148    #[test]
2149    fn test_frontmatter_continuation_lines_checked() {
2150        // Continuation lines (indented, no colon) are value content and should be checked.
2151        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2152
2153        let content = "---\ndescription: >\n  a test value\n  continued here\n---\n\nBody\n";
2154        let ctx = create_context(content);
2155        let result = rule.check(&ctx).unwrap();
2156
2157        // "test" on the continuation line should be flagged
2158        assert_eq!(result.len(), 1, "Should flag 'test' in continuation line: {result:?}");
2159        assert_eq!(result[0].line, 3);
2160    }
2161
2162    #[test]
2163    fn test_frontmatter_quoted_values_checked() {
2164        // Quoted YAML values should have their content checked (inside the quotes).
2165        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2166
2167        let content = "---\ntitle: \"a test title\"\n---\n\nBody\n";
2168        let ctx = create_context(content);
2169        let result = rule.check(&ctx).unwrap();
2170
2171        assert_eq!(result.len(), 1, "Should flag 'test' in quoted YAML value: {result:?}");
2172        assert_eq!(result[0].line, 2);
2173    }
2174
2175    #[test]
2176    fn test_frontmatter_single_quoted_values_checked() {
2177        // Single-quoted YAML values should have their content checked.
2178        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2179
2180        let content = "---\ntitle: 'a test title'\n---\n\nBody\n";
2181        let ctx = create_context(content);
2182        let result = rule.check(&ctx).unwrap();
2183
2184        assert_eq!(
2185            result.len(),
2186            1,
2187            "Should flag 'test' in single-quoted YAML value: {result:?}"
2188        );
2189        assert_eq!(result[0].line, 2);
2190    }
2191
2192    #[test]
2193    fn test_frontmatter_fix_multiword_values() {
2194        // Fix should correct all proper names in frontmatter values.
2195        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
2196
2197        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
2198        let ctx = create_context(content);
2199        let fixed = rule.fix(&ctx).unwrap();
2200
2201        assert_eq!(
2202            fixed,
2203            "---\ndescription: Learn JavaScript and TypeScript\n---\n\nBody\n"
2204        );
2205    }
2206
2207    #[test]
2208    fn test_frontmatter_fix_preserves_yaml_structure() {
2209        // Fix should preserve YAML structure while correcting values.
2210        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2211
2212        let content = "---\ntags:\n  - test\n  - other\ntitle: a test doc\n---\n\ntest body\n";
2213        let ctx = create_context(content);
2214        let fixed = rule.fix(&ctx).unwrap();
2215
2216        assert_eq!(
2217            fixed,
2218            "---\ntags:\n  - Test\n  - other\ntitle: a Test doc\n---\n\nTest body\n"
2219        );
2220    }
2221
2222    #[test]
2223    fn test_frontmatter_toml_delimiters_not_checked() {
2224        // TOML frontmatter with +++ delimiters should also be handled.
2225        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2226
2227        let content = "+++\ntitle = \"a test title\"\n+++\n\ntest body\n";
2228        let ctx = create_context(content);
2229        let result = rule.check(&ctx).unwrap();
2230
2231        // "title" as TOML key should NOT be flagged
2232        // "test" in TOML quoted value SHOULD be flagged (line 2)
2233        // "test" in body SHOULD be flagged (line 5)
2234        assert_eq!(result.len(), 2, "Should flag TOML value and body: {result:?}");
2235        let fm_violations: Vec<_> = result.iter().filter(|w| w.line == 2).collect();
2236        assert_eq!(fm_violations.len(), 1, "Should flag 'test' in TOML value: {result:?}");
2237        let body_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
2238        assert_eq!(body_violations.len(), 1, "Should flag body 'test': {result:?}");
2239    }
2240
2241    #[test]
2242    fn test_frontmatter_toml_key_not_flagged() {
2243        // TOML keys should NOT be flagged, only values.
2244        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2245
2246        let content = "+++\ntest = \"other value\"\n+++\n\nBody text\n";
2247        let ctx = create_context(content);
2248        let result = rule.check(&ctx).unwrap();
2249
2250        assert!(
2251            result.is_empty(),
2252            "Should not flag TOML key that matches configured name: {result:?}"
2253        );
2254    }
2255
2256    #[test]
2257    fn test_frontmatter_toml_fix_preserves_keys() {
2258        // Fix should correct TOML values but preserve keys.
2259        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2260
2261        let content = "+++\ntest = \"a test value\"\n+++\n\ntest here\n";
2262        let ctx = create_context(content);
2263        let fixed = rule.fix(&ctx).unwrap();
2264
2265        // Key "test" should remain lowercase; value "test" should become "Test"
2266        assert_eq!(fixed, "+++\ntest = \"a Test value\"\n+++\n\nTest here\n");
2267    }
2268
2269    #[test]
2270    fn test_frontmatter_list_item_mapping_key_not_flagged() {
2271        // In "- test: nested value", "test" is a YAML key within a list-item mapping.
2272        // The key should NOT be flagged; only the value should be checked.
2273        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2274
2275        let content = "---\nitems:\n  - test: nested value\n---\n\nBody text\n";
2276        let ctx = create_context(content);
2277        let result = rule.check(&ctx).unwrap();
2278
2279        assert!(
2280            result.is_empty(),
2281            "Should not flag YAML key in list-item mapping: {result:?}"
2282        );
2283    }
2284
2285    #[test]
2286    fn test_frontmatter_list_item_mapping_value_flagged() {
2287        // In "- key: test value", the value portion should be checked.
2288        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2289
2290        let content = "---\nitems:\n  - key: a test value\n---\n\nBody text\n";
2291        let ctx = create_context(content);
2292        let result = rule.check(&ctx).unwrap();
2293
2294        assert_eq!(
2295            result.len(),
2296            1,
2297            "Should flag 'test' in list-item mapping value: {result:?}"
2298        );
2299        assert_eq!(result[0].line, 3);
2300    }
2301
2302    #[test]
2303    fn test_frontmatter_bare_list_item_still_flagged() {
2304        // Bare list items without a colon (e.g., "- test") are values and should be flagged.
2305        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2306
2307        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
2308        let ctx = create_context(content);
2309        let result = rule.check(&ctx).unwrap();
2310
2311        assert_eq!(result.len(), 1, "Should flag 'test' in bare list item: {result:?}");
2312        assert_eq!(result[0].line, 3);
2313    }
2314
2315    #[test]
2316    fn test_frontmatter_flow_mapping_not_flagged() {
2317        // Flow mappings like {test: value} contain YAML keys that should not be flagged.
2318        // The entire flow construct should be skipped.
2319        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2320
2321        let content = "---\nflow_map: {test: value, other: test}\n---\n\nBody text\n";
2322        let ctx = create_context(content);
2323        let result = rule.check(&ctx).unwrap();
2324
2325        assert!(
2326            result.is_empty(),
2327            "Should not flag names inside flow mappings: {result:?}"
2328        );
2329    }
2330
2331    #[test]
2332    fn test_frontmatter_flow_sequence_not_flagged() {
2333        // Flow sequences like [test, other] should also be skipped.
2334        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2335
2336        let content = "---\nitems: [test, other, test]\n---\n\nBody text\n";
2337        let ctx = create_context(content);
2338        let result = rule.check(&ctx).unwrap();
2339
2340        assert!(
2341            result.is_empty(),
2342            "Should not flag names inside flow sequences: {result:?}"
2343        );
2344    }
2345
2346    #[test]
2347    fn test_frontmatter_list_item_mapping_fix_preserves_key() {
2348        // Fix should correct values in list-item mappings but preserve keys.
2349        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2350
2351        let content = "---\nitems:\n  - test: a test value\n---\n\ntest here\n";
2352        let ctx = create_context(content);
2353        let fixed = rule.fix(&ctx).unwrap();
2354
2355        // "test" as list-item key should remain lowercase;
2356        // "test" in value portion should become "Test"
2357        assert_eq!(fixed, "---\nitems:\n  - test: a Test value\n---\n\nTest here\n");
2358    }
2359
2360    #[test]
2361    fn test_frontmatter_backtick_code_not_flagged() {
2362        // Names inside backticks in frontmatter should NOT be flagged when code_blocks=false.
2363        let config = MD044Config {
2364            names: vec!["GoodApplication".to_string()],
2365            code_blocks: false,
2366            ..MD044Config::default()
2367        };
2368        let rule = MD044ProperNames::from_config_struct(config);
2369
2370        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nIntroductory `goodapplication` CLI text.\n";
2371        let ctx = create_context(content);
2372        let result = rule.check(&ctx).unwrap();
2373
2374        // Neither the frontmatter nor the body backtick-wrapped name should be flagged
2375        assert!(
2376            result.is_empty(),
2377            "Should not flag names inside backticks in frontmatter or body: {result:?}"
2378        );
2379    }
2380
2381    #[test]
2382    fn test_frontmatter_unquoted_backtick_code_not_flagged() {
2383        // Exact case from issue #513: unquoted YAML frontmatter with backticks
2384        let config = MD044Config {
2385            names: vec!["GoodApplication".to_string()],
2386            code_blocks: false,
2387            ..MD044Config::default()
2388        };
2389        let rule = MD044ProperNames::from_config_struct(config);
2390
2391        let content = "---\ntitle: `goodapplication` CLI\n---\n\nIntroductory `goodapplication` CLI text.\n";
2392        let ctx = create_context(content);
2393        let result = rule.check(&ctx).unwrap();
2394
2395        assert!(
2396            result.is_empty(),
2397            "Should not flag names inside backticks in unquoted YAML frontmatter: {result:?}"
2398        );
2399    }
2400
2401    #[test]
2402    fn test_frontmatter_bare_name_still_flagged_with_backtick_nearby() {
2403        // Names outside backticks in frontmatter should still be flagged.
2404        let config = MD044Config {
2405            names: vec!["GoodApplication".to_string()],
2406            code_blocks: false,
2407            ..MD044Config::default()
2408        };
2409        let rule = MD044ProperNames::from_config_struct(config);
2410
2411        let content = "---\ntitle: goodapplication `goodapplication` CLI\n---\n\nBody\n";
2412        let ctx = create_context(content);
2413        let result = rule.check(&ctx).unwrap();
2414
2415        // Only the bare "goodapplication" (before backticks) should be flagged
2416        assert_eq!(
2417            result.len(),
2418            1,
2419            "Should flag bare name but not backtick-wrapped name: {result:?}"
2420        );
2421        assert_eq!(result[0].line, 2);
2422        assert_eq!(result[0].column, 8); // "title: " = 7 chars, name at column 8
2423    }
2424
2425    #[test]
2426    fn test_frontmatter_backtick_code_with_code_blocks_true() {
2427        // When code_blocks=true, names inside backticks ARE checked.
2428        let config = MD044Config {
2429            names: vec!["GoodApplication".to_string()],
2430            code_blocks: true,
2431            ..MD044Config::default()
2432        };
2433        let rule = MD044ProperNames::from_config_struct(config);
2434
2435        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nBody\n";
2436        let ctx = create_context(content);
2437        let result = rule.check(&ctx).unwrap();
2438
2439        // With code_blocks=true, backtick-wrapped name SHOULD be flagged
2440        assert_eq!(
2441            result.len(),
2442            1,
2443            "Should flag backtick-wrapped name when code_blocks=true: {result:?}"
2444        );
2445        assert_eq!(result[0].line, 2);
2446    }
2447
2448    #[test]
2449    fn test_frontmatter_fix_preserves_backtick_code() {
2450        // Fix should NOT change names inside backticks in frontmatter.
2451        let config = MD044Config {
2452            names: vec!["GoodApplication".to_string()],
2453            code_blocks: false,
2454            ..MD044Config::default()
2455        };
2456        let rule = MD044ProperNames::from_config_struct(config);
2457
2458        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nIntroductory `goodapplication` CLI text.\n";
2459        let ctx = create_context(content);
2460        let fixed = rule.fix(&ctx).unwrap();
2461
2462        // Neither backtick-wrapped occurrence should be changed
2463        assert_eq!(
2464            fixed, content,
2465            "Fix should not modify names inside backticks in frontmatter"
2466        );
2467    }
2468
2469    // --- Angle-bracket URL tests (issue #457) ---
2470
2471    #[test]
2472    fn test_angle_bracket_url_in_html_comment_not_flagged() {
2473        // Angle-bracket URLs inside HTML comments should be skipped
2474        let config = MD044Config {
2475            names: vec!["Test".to_string()],
2476            ..MD044Config::default()
2477        };
2478        let rule = MD044ProperNames::from_config_struct(config);
2479
2480        let content = "---\ntitle: Level 1 heading\n---\n\n<https://www.example.test>\n\n<!-- This is a Test https://www.example.test -->\n<!-- This is a Test <https://www.example.test> -->\n";
2481        let ctx = create_context(content);
2482        let result = rule.check(&ctx).unwrap();
2483
2484        // Line 7: "Test" in comment prose before bare URL -- already correct capitalization
2485        // Line 7: "test" in bare URL (not in angle brackets) -- but "test" is in URL domain, not prose.
2486        //   However, .example.test has "test" at a word boundary (after '.'), so it IS flagged.
2487        // Line 8: "Test" in comment prose -- correct capitalization, not flagged
2488        // Line 8: "test" in <https://www.example.test> -- inside angle-bracket URL, NOT flagged
2489
2490        // The key assertion: line 8's angle-bracket URL should NOT produce a warning
2491        let line8_warnings: Vec<_> = result.iter().filter(|w| w.line == 8).collect();
2492        assert!(
2493            line8_warnings.is_empty(),
2494            "Should not flag names inside angle-bracket URLs in HTML comments: {line8_warnings:?}"
2495        );
2496    }
2497
2498    #[test]
2499    fn test_bare_url_in_html_comment_still_flagged() {
2500        // Bare URLs (not in angle brackets) inside HTML comments should still be checked
2501        let config = MD044Config {
2502            names: vec!["Test".to_string()],
2503            ..MD044Config::default()
2504        };
2505        let rule = MD044ProperNames::from_config_struct(config);
2506
2507        let content = "<!-- This is a test https://www.example.test -->\n";
2508        let ctx = create_context(content);
2509        let result = rule.check(&ctx).unwrap();
2510
2511        // "test" appears as prose text before URL and also in the bare URL domain
2512        // At minimum, the prose "test" should be flagged
2513        assert!(
2514            !result.is_empty(),
2515            "Should flag 'test' in prose text of HTML comment with bare URL"
2516        );
2517    }
2518
2519    #[test]
2520    fn test_angle_bracket_url_in_regular_markdown_not_flagged() {
2521        // Angle-bracket URLs in regular markdown are already handled by the link parser,
2522        // but the angle-bracket check provides a safety net
2523        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2524
2525        let content = "<https://www.example.test>\n";
2526        let ctx = create_context(content);
2527        let result = rule.check(&ctx).unwrap();
2528
2529        assert!(
2530            result.is_empty(),
2531            "Should not flag names inside angle-bracket URLs in regular markdown: {result:?}"
2532        );
2533    }
2534
2535    #[test]
2536    fn test_multiple_angle_bracket_urls_in_one_comment() {
2537        let config = MD044Config {
2538            names: vec!["Test".to_string()],
2539            ..MD044Config::default()
2540        };
2541        let rule = MD044ProperNames::from_config_struct(config);
2542
2543        let content = "<!-- See <https://test.example.com> and <https://www.example.test> for details -->\n";
2544        let ctx = create_context(content);
2545        let result = rule.check(&ctx).unwrap();
2546
2547        // Both URLs are inside angle brackets, so "test" inside them should NOT be flagged
2548        assert!(
2549            result.is_empty(),
2550            "Should not flag names inside multiple angle-bracket URLs: {result:?}"
2551        );
2552    }
2553
2554    #[test]
2555    fn test_angle_bracket_non_url_still_flagged() {
2556        // <Test> is NOT a URL (no scheme), so is_in_angle_bracket_url does NOT protect it.
2557        // Whether it gets flagged depends on HTML tag detection, not on our URL check.
2558        assert!(
2559            !MD044ProperNames::is_in_angle_bracket_url("<test> which is not a URL.", 1),
2560            "is_in_angle_bracket_url should return false for non-URL angle brackets"
2561        );
2562    }
2563
2564    #[test]
2565    fn test_angle_bracket_mailto_url_not_flagged() {
2566        let config = MD044Config {
2567            names: vec!["Test".to_string()],
2568            ..MD044Config::default()
2569        };
2570        let rule = MD044ProperNames::from_config_struct(config);
2571
2572        let content = "<!-- Contact <mailto:test@example.com> for help -->\n";
2573        let ctx = create_context(content);
2574        let result = rule.check(&ctx).unwrap();
2575
2576        assert!(
2577            result.is_empty(),
2578            "Should not flag names inside angle-bracket mailto URLs: {result:?}"
2579        );
2580    }
2581
2582    #[test]
2583    fn test_angle_bracket_ftp_url_not_flagged() {
2584        let config = MD044Config {
2585            names: vec!["Test".to_string()],
2586            ..MD044Config::default()
2587        };
2588        let rule = MD044ProperNames::from_config_struct(config);
2589
2590        let content = "<!-- Download from <ftp://test.example.com/file> -->\n";
2591        let ctx = create_context(content);
2592        let result = rule.check(&ctx).unwrap();
2593
2594        assert!(
2595            result.is_empty(),
2596            "Should not flag names inside angle-bracket FTP URLs: {result:?}"
2597        );
2598    }
2599
2600    #[test]
2601    fn test_angle_bracket_url_fix_preserves_url() {
2602        // Fix should not modify text inside angle-bracket URLs
2603        let config = MD044Config {
2604            names: vec!["Test".to_string()],
2605            ..MD044Config::default()
2606        };
2607        let rule = MD044ProperNames::from_config_struct(config);
2608
2609        let content = "<!-- test text <https://www.example.test> -->\n";
2610        let ctx = create_context(content);
2611        let fixed = rule.fix(&ctx).unwrap();
2612
2613        // "test" in prose should be fixed, URL should be preserved
2614        assert!(
2615            fixed.contains("<https://www.example.test>"),
2616            "Fix should preserve angle-bracket URLs: {fixed}"
2617        );
2618        assert!(
2619            fixed.contains("Test text"),
2620            "Fix should correct prose 'test' to 'Test': {fixed}"
2621        );
2622    }
2623
2624    #[test]
2625    fn test_is_in_angle_bracket_url_helper() {
2626        // Direct tests of the helper function
2627        let line = "text <https://example.test> more text";
2628
2629        // Inside the URL
2630        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 5)); // '<'
2631        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 6)); // 'h'
2632        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 15)); // middle of URL
2633        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 26)); // '>'
2634
2635        // Outside the URL
2636        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 0)); // 't' at start
2637        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 4)); // space before '<'
2638        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 27)); // space after '>'
2639
2640        // Non-URL angle brackets
2641        assert!(!MD044ProperNames::is_in_angle_bracket_url("<notaurl>", 1));
2642
2643        // mailto scheme
2644        assert!(MD044ProperNames::is_in_angle_bracket_url(
2645            "<mailto:test@example.com>",
2646            10
2647        ));
2648
2649        // ftp scheme
2650        assert!(MD044ProperNames::is_in_angle_bracket_url(
2651            "<ftp://test.example.com>",
2652            10
2653        ));
2654    }
2655
2656    #[test]
2657    fn test_is_in_angle_bracket_url_uppercase_scheme() {
2658        // RFC 3986: URI schemes are case-insensitive
2659        assert!(MD044ProperNames::is_in_angle_bracket_url(
2660            "<HTTPS://test.example.com>",
2661            10
2662        ));
2663        assert!(MD044ProperNames::is_in_angle_bracket_url(
2664            "<Http://test.example.com>",
2665            10
2666        ));
2667    }
2668
2669    #[test]
2670    fn test_is_in_angle_bracket_url_uncommon_schemes() {
2671        // ssh scheme
2672        assert!(MD044ProperNames::is_in_angle_bracket_url(
2673            "<ssh://test@example.com>",
2674            10
2675        ));
2676        // file scheme
2677        assert!(MD044ProperNames::is_in_angle_bracket_url("<file:///test/path>", 10));
2678        // data scheme (no authority, just colon)
2679        assert!(MD044ProperNames::is_in_angle_bracket_url("<data:text/plain;test>", 10));
2680    }
2681
2682    #[test]
2683    fn test_is_in_angle_bracket_url_unclosed() {
2684        // Unclosed angle bracket should NOT match
2685        assert!(!MD044ProperNames::is_in_angle_bracket_url(
2686            "<https://test.example.com",
2687            10
2688        ));
2689    }
2690
2691    #[test]
2692    fn test_vale_inline_config_comments_not_flagged() {
2693        let config = MD044Config {
2694            names: vec!["Vale".to_string(), "JavaScript".to_string()],
2695            ..MD044Config::default()
2696        };
2697        let rule = MD044ProperNames::from_config_struct(config);
2698
2699        let content = "\
2700<!-- vale off -->
2701Some javascript text here.
2702<!-- vale on -->
2703<!-- vale Style.Rule = NO -->
2704More javascript text.
2705<!-- vale Style.Rule = YES -->
2706<!-- vale JavaScript.Grammar = NO -->
2707";
2708        let ctx = create_context(content);
2709        let result = rule.check(&ctx).unwrap();
2710
2711        // Only the body text lines (2, 5) should be flagged for "javascript"
2712        assert_eq!(result.len(), 2, "Should only flag body lines, not Vale config comments");
2713        assert_eq!(result[0].line, 2);
2714        assert_eq!(result[1].line, 5);
2715    }
2716
2717    #[test]
2718    fn test_remark_lint_inline_config_comments_not_flagged() {
2719        let config = MD044Config {
2720            names: vec!["JavaScript".to_string()],
2721            ..MD044Config::default()
2722        };
2723        let rule = MD044ProperNames::from_config_struct(config);
2724
2725        let content = "\
2726<!-- lint disable remark-lint-some-rule -->
2727Some javascript text here.
2728<!-- lint enable remark-lint-some-rule -->
2729<!-- lint ignore remark-lint-some-rule -->
2730More javascript text.
2731";
2732        let ctx = create_context(content);
2733        let result = rule.check(&ctx).unwrap();
2734
2735        assert_eq!(
2736            result.len(),
2737            2,
2738            "Should only flag body lines, not remark-lint config comments"
2739        );
2740        assert_eq!(result[0].line, 2);
2741        assert_eq!(result[1].line, 5);
2742    }
2743
2744    #[test]
2745    fn test_fix_does_not_modify_vale_remark_lint_comments() {
2746        let config = MD044Config {
2747            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2748            ..MD044Config::default()
2749        };
2750        let rule = MD044ProperNames::from_config_struct(config);
2751
2752        let content = "\
2753<!-- vale off -->
2754Some javascript text.
2755<!-- vale on -->
2756<!-- lint disable remark-lint-some-rule -->
2757More javascript text.
2758<!-- lint enable remark-lint-some-rule -->
2759";
2760        let ctx = create_context(content);
2761        let fixed = rule.fix(&ctx).unwrap();
2762
2763        // Config directive lines must be preserved unchanged
2764        assert!(fixed.contains("<!-- vale off -->"));
2765        assert!(fixed.contains("<!-- vale on -->"));
2766        assert!(fixed.contains("<!-- lint disable remark-lint-some-rule -->"));
2767        assert!(fixed.contains("<!-- lint enable remark-lint-some-rule -->"));
2768        // Body text should be fixed
2769        assert!(fixed.contains("Some JavaScript text."));
2770        assert!(fixed.contains("More JavaScript text."));
2771    }
2772
2773    #[test]
2774    fn test_mixed_tool_directives_all_skipped() {
2775        let config = MD044Config {
2776            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2777            ..MD044Config::default()
2778        };
2779        let rule = MD044ProperNames::from_config_struct(config);
2780
2781        let content = "\
2782<!-- rumdl-disable MD044 -->
2783Some javascript text.
2784<!-- markdownlint-disable -->
2785More javascript text.
2786<!-- vale off -->
2787Even more javascript text.
2788<!-- lint disable some-rule -->
2789Final javascript text.
2790<!-- rumdl-enable MD044 -->
2791<!-- markdownlint-enable -->
2792<!-- vale on -->
2793<!-- lint enable some-rule -->
2794";
2795        let ctx = create_context(content);
2796        let result = rule.check(&ctx).unwrap();
2797
2798        // Only body text lines should be flagged (lines 2, 4, 6, 8)
2799        assert_eq!(
2800            result.len(),
2801            4,
2802            "Should only flag body lines, not any tool directive comments"
2803        );
2804        assert_eq!(result[0].line, 2);
2805        assert_eq!(result[1].line, 4);
2806        assert_eq!(result[2].line, 6);
2807        assert_eq!(result[3].line, 8);
2808    }
2809
2810    #[test]
2811    fn test_vale_remark_lint_edge_cases_not_matched() {
2812        let config = MD044Config {
2813            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2814            ..MD044Config::default()
2815        };
2816        let rule = MD044ProperNames::from_config_struct(config);
2817
2818        // These are regular HTML comments, NOT tool directives:
2819        // - "<!-- vale -->" is not a valid Vale directive (no action keyword)
2820        // - "<!-- vale is a tool -->" starts with "vale" but is prose, not a directive
2821        // - "<!-- valedictorian javascript -->" does not start with "<!-- vale "
2822        // - "<!-- linting javascript tips -->" does not start with "<!-- lint "
2823        // - "<!-- vale javascript -->" starts with "vale" but has no action keyword
2824        // - "<!-- lint your javascript code -->" starts with "lint" but has no action keyword
2825        let content = "\
2826<!-- vale -->
2827<!-- vale is a tool for writing -->
2828<!-- valedictorian javascript -->
2829<!-- linting javascript tips -->
2830<!-- vale javascript -->
2831<!-- lint your javascript code -->
2832";
2833        let ctx = create_context(content);
2834        let result = rule.check(&ctx).unwrap();
2835
2836        // Line 1: "<!-- vale -->" contains "vale" (wrong case for "Vale") -> flagged
2837        // Line 2: "<!-- vale is a tool for writing -->" contains "vale" -> flagged
2838        // Line 3: "<!-- valedictorian javascript -->" contains "javascript" -> flagged
2839        // Line 4: "<!-- linting javascript tips -->" contains "javascript" -> flagged
2840        // Line 5: "<!-- vale javascript -->" contains "vale" and "javascript" -> flagged for both
2841        // Line 6: "<!-- lint your javascript code -->" contains "javascript" -> flagged
2842        assert_eq!(
2843            result.len(),
2844            7,
2845            "Should flag proper names in non-directive HTML comments: got {result:?}"
2846        );
2847        assert_eq!(result[0].line, 1); // "vale" in <!-- vale -->
2848        assert_eq!(result[1].line, 2); // "vale" in <!-- vale is a tool -->
2849        assert_eq!(result[2].line, 3); // "javascript" in <!-- valedictorian javascript -->
2850        assert_eq!(result[3].line, 4); // "javascript" in <!-- linting javascript tips -->
2851        assert_eq!(result[4].line, 5); // "vale" in <!-- vale javascript -->
2852        assert_eq!(result[5].line, 5); // "javascript" in <!-- vale javascript -->
2853        assert_eq!(result[6].line, 6); // "javascript" in <!-- lint your javascript code -->
2854    }
2855
2856    #[test]
2857    fn test_vale_style_directives_skipped() {
2858        let config = MD044Config {
2859            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2860            ..MD044Config::default()
2861        };
2862        let rule = MD044ProperNames::from_config_struct(config);
2863
2864        // These ARE valid Vale directives and should be skipped:
2865        let content = "\
2866<!-- vale style = MyStyle -->
2867<!-- vale styles = Style1, Style2 -->
2868<!-- vale MyRule.Name = YES -->
2869<!-- vale MyRule.Name = NO -->
2870Some javascript text.
2871";
2872        let ctx = create_context(content);
2873        let result = rule.check(&ctx).unwrap();
2874
2875        // Only line 5 (body text) should be flagged
2876        assert_eq!(
2877            result.len(),
2878            1,
2879            "Should only flag body lines, not Vale style/rule directives: got {result:?}"
2880        );
2881        assert_eq!(result[0].line, 5);
2882    }
2883
2884    // --- is_in_backtick_code_in_line unit tests ---
2885
2886    #[test]
2887    fn test_backtick_code_single_backticks() {
2888        let line = "hello `world` bye";
2889        // 'w' is at index 7, inside the backtick span (content between backticks at 6 and 12)
2890        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 7));
2891        // 'h' at index 0 is outside
2892        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2893        // 'b' at index 14 is outside
2894        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 14));
2895    }
2896
2897    #[test]
2898    fn test_backtick_code_double_backticks() {
2899        let line = "a ``code`` b";
2900        // 'c' is at index 4, inside ``...``
2901        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 4));
2902        // 'a' at index 0 is outside
2903        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2904        // 'b' at index 11 is outside
2905        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 11));
2906    }
2907
2908    #[test]
2909    fn test_backtick_code_unclosed() {
2910        let line = "a `code b";
2911        // No closing backtick, so nothing is a code span
2912        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 3));
2913    }
2914
2915    #[test]
2916    fn test_backtick_code_mismatched_count() {
2917        // Single backtick opening, double backtick is not a match
2918        let line = "a `code`` b";
2919        // The single ` at index 2 doesn't match `` at index 7-8
2920        // So 'c' at index 3 is NOT in a code span
2921        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 3));
2922    }
2923
2924    #[test]
2925    fn test_backtick_code_multiple_spans() {
2926        let line = "`first` and `second`";
2927        // 'f' at index 1 (inside first span)
2928        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 1));
2929        // 'a' at index 8 (between spans)
2930        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 8));
2931        // 's' at index 13 (inside second span)
2932        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 13));
2933    }
2934
2935    #[test]
2936    fn test_backtick_code_on_backtick_boundary() {
2937        let line = "`code`";
2938        // Position 0 is the opening backtick itself, not inside the span
2939        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2940        // Position 5 is the closing backtick, not inside the span
2941        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 5));
2942        // Position 1-4 are inside the span
2943        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 1));
2944        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 4));
2945    }
2946
2947    // Double-bracket WikiLink + URL: [[text]](url)
2948    // pulldown-cmark parses [[text]] as a WikiLink but leaves the (url)
2949    // as plain text, so ctx.links does not cover the URL portion.
2950    // MD044 must fall back to is_in_markdown_link_url for all lines.
2951
2952    #[test]
2953    fn test_double_bracket_link_url_not_flagged() {
2954        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
2955        // Exact reproduction from issue #564
2956        let content = "[[rumdl]](https://github.com/rvben/rumdl)";
2957        let ctx = create_context(content);
2958        let result = rule.check(&ctx).unwrap();
2959        assert!(
2960            result.is_empty(),
2961            "URL inside [[text]](url) must not be flagged, got: {result:?}"
2962        );
2963    }
2964
2965    #[test]
2966    fn test_double_bracket_link_url_not_fixed() {
2967        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
2968        let content = "[[rumdl]](https://github.com/rvben/rumdl)\n";
2969        let ctx = create_context(content);
2970        let fixed = rule.fix(&ctx).unwrap();
2971        assert_eq!(
2972            fixed, content,
2973            "fix() must leave the URL inside [[text]](url) unchanged"
2974        );
2975    }
2976
2977    #[test]
2978    fn test_double_bracket_link_text_still_flagged() {
2979        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
2980        // The link text portion [[github]](url) should still be checked.
2981        let content = "[[github]](https://example.com)";
2982        let ctx = create_context(content);
2983        let result = rule.check(&ctx).unwrap();
2984        assert_eq!(
2985            result.len(),
2986            1,
2987            "Incorrect name in [[text]] link text should still be flagged, got: {result:?}"
2988        );
2989        assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
2990    }
2991
2992    #[test]
2993    fn test_double_bracket_link_mixed_line() {
2994        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
2995        // URL must be skipped, standalone text must be flagged.
2996        let content = "See [[rumdl]](https://github.com/rvben/rumdl) and github for more.";
2997        let ctx = create_context(content);
2998        let result = rule.check(&ctx).unwrap();
2999        assert_eq!(
3000            result.len(),
3001            1,
3002            "Only the standalone 'github' after the link should be flagged, got: {result:?}"
3003        );
3004        assert!(result[0].message.contains("'github'"));
3005        // "See " (4) + "[[rumdl]](https://github.com/rvben/rumdl)" (42) + " and " (4) = column 51
3006        assert_eq!(
3007            result[0].column, 51,
3008            "Flagged column should be the trailing 'github', not the one in the URL"
3009        );
3010    }
3011
3012    #[test]
3013    fn test_regular_link_url_still_not_flagged() {
3014        // Confirm existing [text](url) behavior is unaffected by the fix.
3015        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
3016        let content = "[rumdl](https://github.com/rvben/rumdl)";
3017        let ctx = create_context(content);
3018        let result = rule.check(&ctx).unwrap();
3019        assert!(
3020            result.is_empty(),
3021            "URL inside regular [text](url) must still not be flagged, got: {result:?}"
3022        );
3023    }
3024
3025    #[test]
3026    fn test_link_like_text_in_code_span_still_flagged_when_code_blocks_enabled() {
3027        // When code-blocks = true the user explicitly opts into checking code spans.
3028        // A code span containing link-like text (`[foo](https://github.com)`) must
3029        // NOT be silently suppressed by is_in_markdown_link_url: the content is
3030        // literal characters, not a real Markdown link.
3031        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
3032        let content = "`[foo](https://github.com/org/repo)`";
3033        let ctx = create_context(content);
3034        let result = rule.check(&ctx).unwrap();
3035        assert_eq!(
3036            result.len(),
3037            1,
3038            "Proper name inside a code span must be flagged when code-blocks=true, got: {result:?}"
3039        );
3040        assert!(result[0].message.contains("'github'"));
3041    }
3042
3043    #[test]
3044    fn test_malformed_link_not_treated_as_url() {
3045        // [text](url with spaces) is NOT a valid Markdown link; pulldown-cmark
3046        // does not parse it, so the name inside must still be flagged.
3047        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
3048        let content = "See [rumdl](github repo) for details.";
3049        let ctx = create_context(content);
3050        let result = rule.check(&ctx).unwrap();
3051        assert_eq!(
3052            result.len(),
3053            1,
3054            "Name inside malformed [text](url with spaces) must still be flagged, got: {result:?}"
3055        );
3056        assert!(result[0].message.contains("'github'"));
3057    }
3058
3059    #[test]
3060    fn test_wikilink_followed_by_prose_parens_still_flagged() {
3061        // [[note]](github repo) — WikiLink followed by parenthesised prose, NOT
3062        // a valid link URL (space in destination). pulldown-cmark does not parse
3063        // it as a link, so the name inside must still be flagged.
3064        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
3065        let content = "[[note]](github repo)";
3066        let ctx = create_context(content);
3067        let result = rule.check(&ctx).unwrap();
3068        assert_eq!(
3069            result.len(),
3070            1,
3071            "Name inside [[wikilink]](prose with spaces) must still be flagged, got: {result:?}"
3072        );
3073        assert!(result[0].message.contains("'github'"));
3074    }
3075}
rumdl_lib/rules/md044_proper_names.rs

rumdl_lib/rules/
md044_proper_names.rs