rumdl_lib/rules/
md044_proper_names.rs

1use crate::utils::fast_hash;
2use crate::utils::regex_cache::{escape_regex, get_cached_regex};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use std::collections::{HashMap, HashSet};
6use std::sync::{Arc, Mutex};
7
8mod md044_config;
9pub(super) use md044_config::MD044Config;
10
11type WarningPosition = (usize, usize, String); // (line, column, found_name)
12
13/// Rule MD044: Proper names should be capitalized
14///
15/// See [docs/md044.md](../../docs/md044.md) for full documentation, configuration, and examples.
16///
17/// This rule is triggered when proper names are not capitalized correctly in the document.
18/// For example, if you have defined "JavaScript" as a proper name, the rule will flag any
19/// occurrences of "javascript" or "Javascript" as violations.
20///
21/// ## Purpose
22///
23/// Ensuring consistent capitalization of proper names improves document quality and
24/// professionalism. This is especially important for technical documentation where
25/// product names, programming languages, and technologies often have specific
26/// capitalization conventions.
27///
28/// ## Configuration Options
29///
30/// The rule supports the following configuration options:
31///
32/// ```yaml
33/// MD044:
34///   names: []                # List of proper names to check for correct capitalization
35///   code-blocks: false       # Whether to check code blocks (default: false)
36/// ```
37///
38/// Example configuration:
39///
40/// ```yaml
41/// MD044:
42///   names: ["JavaScript", "Node.js", "TypeScript"]
43///   code-blocks: true
44/// ```
45///
46/// ## Performance Optimizations
47///
48/// This rule implements several performance optimizations:
49///
50/// 1. **Regex Caching**: Pre-compiles and caches regex patterns for each proper name
51/// 2. **Content Caching**: Caches results based on content hashing for repeated checks
52/// 3. **Efficient Text Processing**: Uses optimized algorithms to avoid redundant text processing
53/// 4. **Smart Code Block Detection**: Efficiently identifies and optionally excludes code blocks
54///
55/// ## Edge Cases Handled
56///
57/// - **Word Boundaries**: Only matches complete words, not substrings within other words
58/// - **Case Sensitivity**: Properly handles case-specific matching
59/// - **Code Blocks**: Optionally checks code blocks (controlled by code-blocks setting)
60/// - **Markdown Formatting**: Handles proper names within Markdown formatting elements
61///
62/// ## Fix Behavior
63///
64/// When fixing issues, this rule replaces incorrect capitalization with the correct form
65/// as defined in the configuration.
66///
67/// Check if a trimmed line is an inline config comment from a linting tool.
68/// Recognized tools: rumdl, markdownlint, Vale, and remark-lint.
69fn is_inline_config_comment(trimmed: &str) -> bool {
70    trimmed.starts_with("<!-- rumdl-")
71        || trimmed.starts_with("<!-- markdownlint-")
72        || trimmed.starts_with("<!-- vale off")
73        || trimmed.starts_with("<!-- vale on")
74        || (trimmed.starts_with("<!-- vale ") && trimmed.contains(" = "))
75        || trimmed.starts_with("<!-- vale style")
76        || trimmed.starts_with("<!-- lint disable ")
77        || trimmed.starts_with("<!-- lint enable ")
78        || trimmed.starts_with("<!-- lint ignore ")
79}
80
81#[derive(Clone)]
82pub struct MD044ProperNames {
83    config: MD044Config,
84    // Cache the combined regex pattern string
85    combined_pattern: Option<String>,
86    // Precomputed lowercase name variants for fast pre-checks
87    name_variants: Vec<String>,
88    // Cache for name violations by content hash
89    content_cache: Arc<Mutex<HashMap<u64, Vec<WarningPosition>>>>,
90}
91
92impl MD044ProperNames {
93    pub fn new(names: Vec<String>, code_blocks: bool) -> Self {
94        let config = MD044Config {
95            names,
96            code_blocks,
97            html_elements: true, // Default to checking HTML elements
98            html_comments: true, // Default to checking HTML comments
99        };
100        let combined_pattern = Self::create_combined_pattern(&config);
101        let name_variants = Self::build_name_variants(&config);
102        Self {
103            config,
104            combined_pattern,
105            name_variants,
106            content_cache: Arc::new(Mutex::new(HashMap::new())),
107        }
108    }
109
110    // Helper function for consistent ASCII normalization
111    fn ascii_normalize(s: &str) -> String {
112        s.replace(['é', 'è', 'ê', 'ë'], "e")
113            .replace(['à', 'á', 'â', 'ä', 'ã', 'å'], "a")
114            .replace(['ï', 'î', 'í', 'ì'], "i")
115            .replace(['ü', 'ú', 'ù', 'û'], "u")
116            .replace(['ö', 'ó', 'ò', 'ô', 'õ'], "o")
117            .replace('ñ', "n")
118            .replace('ç', "c")
119    }
120
121    pub fn from_config_struct(config: MD044Config) -> Self {
122        let combined_pattern = Self::create_combined_pattern(&config);
123        let name_variants = Self::build_name_variants(&config);
124        Self {
125            config,
126            combined_pattern,
127            name_variants,
128            content_cache: Arc::new(Mutex::new(HashMap::new())),
129        }
130    }
131
132    // Create a combined regex pattern for all proper names
133    fn create_combined_pattern(config: &MD044Config) -> Option<String> {
134        if config.names.is_empty() {
135            return None;
136        }
137
138        // Create patterns for all names and their variations
139        let mut patterns: Vec<String> = config
140            .names
141            .iter()
142            .flat_map(|name| {
143                let mut variations = vec![];
144                let lower_name = name.to_lowercase();
145
146                // Add the lowercase version
147                variations.push(escape_regex(&lower_name));
148
149                // Add version without dots
150                let lower_name_no_dots = lower_name.replace('.', "");
151                if lower_name != lower_name_no_dots {
152                    variations.push(escape_regex(&lower_name_no_dots));
153                }
154
155                // Add ASCII-normalized versions for common accented characters
156                let ascii_normalized = Self::ascii_normalize(&lower_name);
157
158                if ascii_normalized != lower_name {
159                    variations.push(escape_regex(&ascii_normalized));
160
161                    // Also add version without dots
162                    let ascii_no_dots = ascii_normalized.replace('.', "");
163                    if ascii_normalized != ascii_no_dots {
164                        variations.push(escape_regex(&ascii_no_dots));
165                    }
166                }
167
168                variations
169            })
170            .collect();
171
172        // Sort patterns by length (longest first) to avoid shorter patterns matching within longer ones
173        patterns.sort_by_key(|b| std::cmp::Reverse(b.len()));
174
175        // Combine all patterns into a single regex with capture groups
176        // Don't use \b as it doesn't work with Unicode - we'll check boundaries manually
177        Some(format!(r"(?i)({})", patterns.join("|")))
178    }
179
180    fn build_name_variants(config: &MD044Config) -> Vec<String> {
181        let mut variants = HashSet::new();
182        for name in &config.names {
183            let lower_name = name.to_lowercase();
184            variants.insert(lower_name.clone());
185
186            let lower_no_dots = lower_name.replace('.', "");
187            if lower_name != lower_no_dots {
188                variants.insert(lower_no_dots);
189            }
190
191            let ascii_normalized = Self::ascii_normalize(&lower_name);
192            if ascii_normalized != lower_name {
193                variants.insert(ascii_normalized.clone());
194
195                let ascii_no_dots = ascii_normalized.replace('.', "");
196                if ascii_normalized != ascii_no_dots {
197                    variants.insert(ascii_no_dots);
198                }
199            }
200        }
201
202        variants.into_iter().collect()
203    }
204
205    // Find all name violations in the content and return positions.
206    // `content_lower` is the pre-computed lowercase version of `content` to avoid redundant allocations.
207    fn find_name_violations(
208        &self,
209        content: &str,
210        ctx: &crate::lint_context::LintContext,
211        content_lower: &str,
212    ) -> Vec<WarningPosition> {
213        // Early return: if no names configured or content is empty
214        if self.config.names.is_empty() || content.is_empty() || self.combined_pattern.is_none() {
215            return Vec::new();
216        }
217
218        // Early return: quick check if any of the configured names might be in content
219        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
220
221        if !has_potential_matches {
222            return Vec::new();
223        }
224
225        // Check if we have cached results
226        let hash = fast_hash(content);
227        {
228            // Use a separate scope for borrowing to minimize lock time
229            if let Ok(cache) = self.content_cache.lock()
230                && let Some(cached) = cache.get(&hash)
231            {
232                return cached.clone();
233            }
234        }
235
236        let mut violations = Vec::new();
237
238        // Get the regex from global cache
239        let combined_regex = match &self.combined_pattern {
240            Some(pattern) => match get_cached_regex(pattern) {
241                Ok(regex) => regex,
242                Err(_) => return Vec::new(),
243            },
244            None => return Vec::new(),
245        };
246
247        // Use ctx.lines for better performance
248        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
249            let line_num = line_idx + 1;
250            let line = line_info.content(ctx.content);
251
252            // Skip code fence lines (```language or ~~~language)
253            let trimmed = line.trim_start();
254            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
255                continue;
256            }
257
258            // Skip if in code block (when code_blocks = false)
259            if !self.config.code_blocks && line_info.in_code_block {
260                continue;
261            }
262
263            // Skip if in HTML block (when html_elements = false)
264            if !self.config.html_elements && line_info.in_html_block {
265                continue;
266            }
267
268            // Skip HTML comments using pre-computed line flag
269            if !self.config.html_comments && line_info.in_html_comment {
270                continue;
271            }
272
273            // Skip JSX expressions and MDX comments (MDX flavor)
274            if line_info.in_jsx_expression || line_info.in_mdx_comment {
275                continue;
276            }
277
278            // Skip Obsidian comments (Obsidian flavor)
279            if line_info.in_obsidian_comment {
280                continue;
281            }
282
283            // For frontmatter lines, determine offset where checkable value content starts.
284            // YAML keys should not be checked against proper names - only values.
285            let fm_value_offset = if line_info.in_front_matter {
286                Self::frontmatter_value_offset(line)
287            } else {
288                0
289            };
290            if fm_value_offset == usize::MAX {
291                continue;
292            }
293
294            // Skip inline config comments (rumdl, markdownlint, Vale, remark-lint directives)
295            if is_inline_config_comment(trimmed) {
296                continue;
297            }
298
299            // Early return: skip lines that don't contain any potential matches
300            let line_lower = line.to_lowercase();
301            let has_line_matches = self.name_variants.iter().any(|name| line_lower.contains(name));
302
303            if !has_line_matches {
304                continue;
305            }
306
307            // Use the combined regex to find all matches in one pass
308            for cap in combined_regex.find_iter(line) {
309                let found_name = &line[cap.start()..cap.end()];
310
311                // Check word boundaries manually for Unicode support
312                let start_pos = cap.start();
313                let end_pos = cap.end();
314
315                // Skip matches in the key portion of frontmatter lines
316                if start_pos < fm_value_offset {
317                    continue;
318                }
319
320                // Skip matches inside HTML tag attributes (handles multi-line tags)
321                let byte_pos = line_info.byte_offset + start_pos;
322                if ctx.is_in_html_tag(byte_pos) {
323                    continue;
324                }
325
326                if !Self::is_at_word_boundary(line, start_pos, true) || !Self::is_at_word_boundary(line, end_pos, false)
327                {
328                    continue; // Not at word boundary
329                }
330
331                // Skip if in inline code when code_blocks is false
332                if !self.config.code_blocks {
333                    if ctx.is_in_code_block_or_span(byte_pos) {
334                        continue;
335                    }
336                    // pulldown-cmark doesn't parse markdown syntax inside HTML
337                    // comments, HTML blocks, or frontmatter, so backtick-wrapped
338                    // text isn't detected by is_in_code_block_or_span. Check directly.
339                    if (line_info.in_html_comment || line_info.in_html_block || line_info.in_front_matter)
340                        && Self::is_in_backtick_code_in_line(line, start_pos)
341                    {
342                        continue;
343                    }
344                }
345
346                // Skip if in link URL or reference definition
347                if Self::is_in_link(ctx, byte_pos) {
348                    continue;
349                }
350
351                // Skip if inside an angle-bracket URL (e.g., <https://...>)
352                // The link parser skips autolinks inside HTML comments,
353                // so we detect them directly in the line text.
354                if Self::is_in_angle_bracket_url(line, start_pos) {
355                    continue;
356                }
357
358                // Skip if inside a Markdown inline link URL in contexts where
359                // pulldown-cmark doesn't parse Markdown syntax (HTML comments,
360                // HTML blocks, frontmatter).
361                if (line_info.in_html_comment || line_info.in_html_block || line_info.in_front_matter)
362                    && Self::is_in_markdown_link_url(line, start_pos)
363                {
364                    continue;
365                }
366
367                // Skip if inside the URL portion of a WikiLink followed by a
368                // parenthesised destination — [[text]](url). pulldown-cmark
369                // registers [[text]] as a WikiLink in ctx.links but leaves the
370                // (url) as plain text, so is_in_link() misses those bytes.
371                if Self::is_in_wikilink_url(ctx, byte_pos) {
372                    continue;
373                }
374
375                // Find which proper name this matches
376                if let Some(proper_name) = self.get_proper_name_for(found_name) {
377                    // Only flag if it's not already correct
378                    if found_name != proper_name {
379                        violations.push((line_num, cap.start() + 1, found_name.to_string()));
380                    }
381                }
382            }
383        }
384
385        // Store in cache (ignore if mutex is poisoned)
386        if let Ok(mut cache) = self.content_cache.lock() {
387            cache.insert(hash, violations.clone());
388        }
389        violations
390    }
391
392    /// Check if a byte position is within a link URL (not link text)
393    ///
394    /// Link text should be checked for proper names, but URLs should be skipped.
395    /// For `[text](url)` - check text, skip url
396    /// For `[text][ref]` - check text, skip reference portion
397    /// For `[[text]]` (WikiLinks) - check text, skip brackets
398    fn is_in_link(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
399        use pulldown_cmark::LinkType;
400
401        // Binary search links (sorted by byte_offset) to find candidate containing byte_pos
402        let link_idx = ctx.links.partition_point(|link| link.byte_offset <= byte_pos);
403        if link_idx > 0 {
404            let link = &ctx.links[link_idx - 1];
405            if byte_pos < link.byte_end {
406                // WikiLinks [[text]] start with '[[', regular links [text] start with '['
407                let text_start = if matches!(link.link_type, LinkType::WikiLink { .. }) {
408                    link.byte_offset + 2
409                } else {
410                    link.byte_offset + 1
411                };
412                let text_end = text_start + link.text.len();
413
414                // If position is within the text portion, skip only if text is a URL.
415                // WikiLinks use the page name as both text and url; never treat them
416                // as bare-domain URLs regardless of whether the name contains dots.
417                if byte_pos >= text_start && byte_pos < text_end {
418                    let is_wikilink = matches!(link.link_type, LinkType::WikiLink { .. });
419                    return Self::link_text_is_url(&link.text)
420                        || (!is_wikilink && Self::link_text_matches_link_url(&link.text, &link.url));
421                }
422                // Position is in the URL/reference portion, skip it
423                return true;
424            }
425        }
426
427        // Binary search images (sorted by byte_offset) to find candidate containing byte_pos
428        let image_idx = ctx.images.partition_point(|img| img.byte_offset <= byte_pos);
429        if image_idx > 0 {
430            let image = &ctx.images[image_idx - 1];
431            if byte_pos < image.byte_end {
432                // Image starts with '![' so alt text starts at byte_offset + 2
433                let alt_start = image.byte_offset + 2;
434                let alt_end = alt_start + image.alt_text.len();
435
436                // If position is within the alt text portion, don't skip
437                if byte_pos >= alt_start && byte_pos < alt_end {
438                    return false;
439                }
440                // Position is in the URL/reference portion, skip it
441                return true;
442            }
443        }
444
445        // Check pre-computed reference definitions
446        ctx.is_in_reference_def(byte_pos)
447    }
448
449    /// Check if link text is a URL that should not have proper name corrections.
450    fn link_text_is_url(text: &str) -> bool {
451        let lower = text.trim().to_ascii_lowercase();
452        lower.starts_with("http://")
453            || lower.starts_with("https://")
454            || lower.starts_with("www.")
455            || lower.starts_with("//")
456    }
457
458    /// Check if link text is the bare hostname/path of its destination URL.
459    ///
460    /// When the display text is the URL with the scheme stripped (e.g.,
461    /// `[example.github.io](https://example.github.io)`), the text is a domain
462    /// label, not a prose reference to a product, and should not be corrected.
463    ///
464    /// Requires the text to contain a dot, which distinguishes domain-like display
465    /// text from single-word WikiLink targets (e.g. `[[javascript]]`) where
466    /// `url == text` but neither is a domain name. Dotted WikiLink targets are
467    /// excluded separately via the `!is_wikilink` guard in `is_in_link`. Comparison
468    /// is case-insensitive because URL schemes and hostnames are case-insensitive.
469    fn link_text_matches_link_url(text: &str, url: &str) -> bool {
470        let text = text.trim();
471        // Only domain-like text (containing a dot) can be a bare hostname.
472        if !text.contains('.') {
473            return false;
474        }
475        let url_lower = url.to_ascii_lowercase();
476        let url_without_scheme = url_lower
477            .strip_prefix("https://")
478            .or_else(|| url_lower.strip_prefix("http://"))
479            .or_else(|| url_lower.strip_prefix("//"))
480            .unwrap_or(&url_lower);
481        let text_lower = text.to_ascii_lowercase();
482        // Exact match: text equals the URL with the scheme removed.
483        if url_without_scheme == text_lower.as_str() {
484            return true;
485        }
486        // Prefix match: text is the hostname portion and the URL has a path/query/fragment.
487        url_without_scheme.len() > text_lower.len()
488            && url_without_scheme.starts_with(text_lower.as_str())
489            && matches!(
490                url_without_scheme.as_bytes().get(text_lower.len()),
491                Some(b'/') | Some(b'?') | Some(b'#')
492            )
493    }
494
495    /// Check if a position within a line falls inside an angle-bracket URL (`<scheme://...>`).
496    ///
497    /// The link parser skips autolinks inside HTML comments, so `ctx.links` won't
498    /// contain them. This function detects angle-bracket URLs directly in the line
499    /// text, covering both HTML comments and regular text as a safety net.
500    fn is_in_angle_bracket_url(line: &str, pos: usize) -> bool {
501        let bytes = line.as_bytes();
502        let len = bytes.len();
503        let mut i = 0;
504        while i < len {
505            if bytes[i] == b'<' {
506                let after_open = i + 1;
507                // Check for a valid URI scheme per CommonMark autolink spec:
508                // scheme = [a-zA-Z][a-zA-Z0-9+.-]{0,31}
509                // followed by ':'
510                if after_open < len && bytes[after_open].is_ascii_alphabetic() {
511                    let mut s = after_open + 1;
512                    let scheme_max = (after_open + 32).min(len);
513                    while s < scheme_max
514                        && (bytes[s].is_ascii_alphanumeric()
515                            || bytes[s] == b'+'
516                            || bytes[s] == b'-'
517                            || bytes[s] == b'.')
518                    {
519                        s += 1;
520                    }
521                    if s < len && bytes[s] == b':' {
522                        // Valid scheme found; scan for closing '>' with no spaces or '<'
523                        let mut j = s + 1;
524                        let mut found_close = false;
525                        while j < len {
526                            match bytes[j] {
527                                b'>' => {
528                                    found_close = true;
529                                    break;
530                                }
531                                b' ' | b'<' => break,
532                                _ => j += 1,
533                            }
534                        }
535                        if found_close && pos >= i && pos <= j {
536                            return true;
537                        }
538                        if found_close {
539                            i = j + 1;
540                            continue;
541                        }
542                    }
543                }
544            }
545            i += 1;
546        }
547        false
548    }
549
550    /// Check if `byte_pos` falls inside the URL of a `[[text]](url)` construct.
551    ///
552    /// pulldown-cmark with WikiLinks enabled parses `[[text]]` as a WikiLink and
553    /// records it in `ctx.links`, but the immediately following `(url)` is left as
554    /// plain text and is therefore absent from `ctx.links`. This function detects
555    /// that gap by looking for a WikiLink entry whose `byte_end` falls exactly on a
556    /// `(` in the raw content, then checking whether `byte_pos` lies inside the
557    /// matching parenthesised URL span.
558    ///
559    /// Unlike `is_in_markdown_link_url`, this function is anchored to real parser
560    /// output (`ctx.links`) and will not suppress violations in text that merely
561    /// looks like a link (e.g. `[foo](github x)` with a space in the URL).
562    fn is_in_wikilink_url(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
563        use pulldown_cmark::LinkType;
564        let content = ctx.content.as_bytes();
565
566        // ctx.links is sorted by byte_offset; only links that start at or before
567        // byte_pos can have a URL that encloses it.
568        let end = ctx.links.partition_point(|l| l.byte_offset <= byte_pos);
569
570        for link in &ctx.links[..end] {
571            if !matches!(link.link_type, LinkType::WikiLink { .. }) {
572                continue;
573            }
574            let wiki_end = link.byte_end;
575            // The WikiLink must end before byte_pos and be immediately followed by '('.
576            if wiki_end >= byte_pos || wiki_end >= content.len() || content[wiki_end] != b'(' {
577                continue;
578            }
579            // Scan to the matching ')' tracking nested parens and backslash escapes.
580            // Per CommonMark, an unquoted inline link destination cannot contain
581            // spaces, tabs, or newlines. If we encounter one, this is parenthesised
582            // prose rather than a URL, and pulldown-cmark will not parse it as a link.
583            let mut depth: u32 = 1;
584            let mut k = wiki_end + 1;
585            let mut valid_destination = true;
586            while k < content.len() && depth > 0 {
587                match content[k] {
588                    b'\\' => {
589                        k += 1; // skip escaped character
590                    }
591                    b'(' => depth += 1,
592                    b')' => depth -= 1,
593                    b' ' | b'\t' | b'\n' | b'\r' => {
594                        valid_destination = false;
595                        break;
596                    }
597                    _ => {}
598                }
599                k += 1;
600            }
601            // byte_pos is inside the URL if it falls between '(' and the matching ')'
602            // and the destination is valid (no unescaped whitespace).
603            if valid_destination && depth == 0 && byte_pos > wiki_end && byte_pos < k {
604                return true;
605            }
606        }
607        false
608    }
609
610    /// Check if a position within a line falls inside a Markdown link's
611    /// non-text portion (URL or reference label).
612    ///
613    /// Used as a text-level fallback for HTML comments, HTML blocks, and
614    /// frontmatter where pulldown-cmark skips link parsing entirely. Operates on
615    /// raw line bytes and therefore cannot distinguish real links from text that
616    /// merely resembles link syntax; do not call on regular markdown lines.
617    /// - `[text](url)` — returns true if `pos` is within `(...)`
618    /// - `[text][ref]` — returns true if `pos` is within the second `[...]`
619    fn is_in_markdown_link_url(line: &str, pos: usize) -> bool {
620        let bytes = line.as_bytes();
621        let len = bytes.len();
622        let mut i = 0;
623
624        while i < len {
625            // Look for unescaped '[' (handle double-escaped \\[ as unescaped)
626            if bytes[i] == b'[' && (i == 0 || bytes[i - 1] != b'\\' || (i >= 2 && bytes[i - 2] == b'\\')) {
627                // Find matching ']' handling nested brackets
628                let mut depth: u32 = 1;
629                let mut j = i + 1;
630                while j < len && depth > 0 {
631                    match bytes[j] {
632                        b'\\' => {
633                            j += 1; // skip escaped char
634                        }
635                        b'[' => depth += 1,
636                        b']' => depth -= 1,
637                        _ => {}
638                    }
639                    j += 1;
640                }
641
642                // j is now one past the ']'
643                if depth == 0 && j < len {
644                    if bytes[j] == b'(' {
645                        // Inline link: [text](url)
646                        let url_start = j;
647                        let mut paren_depth: u32 = 1;
648                        let mut k = j + 1;
649                        while k < len && paren_depth > 0 {
650                            match bytes[k] {
651                                b'\\' => {
652                                    k += 1; // skip escaped char
653                                }
654                                b'(' => paren_depth += 1,
655                                b')' => paren_depth -= 1,
656                                _ => {}
657                            }
658                            k += 1;
659                        }
660
661                        if paren_depth == 0 {
662                            if pos > url_start && pos < k {
663                                return true;
664                            }
665                            i = k;
666                            continue;
667                        }
668                    } else if bytes[j] == b'[' {
669                        // Reference link: [text][ref]
670                        let ref_start = j;
671                        let mut ref_depth: u32 = 1;
672                        let mut k = j + 1;
673                        while k < len && ref_depth > 0 {
674                            match bytes[k] {
675                                b'\\' => {
676                                    k += 1;
677                                }
678                                b'[' => ref_depth += 1,
679                                b']' => ref_depth -= 1,
680                                _ => {}
681                            }
682                            k += 1;
683                        }
684
685                        if ref_depth == 0 {
686                            if pos > ref_start && pos < k {
687                                return true;
688                            }
689                            i = k;
690                            continue;
691                        }
692                    }
693                }
694            }
695            i += 1;
696        }
697        false
698    }
699
700    /// Check if a position within a line falls inside backtick-delimited code.
701    ///
702    /// pulldown-cmark does not parse markdown syntax inside HTML comments, so
703    /// `ctx.is_in_code_block_or_span` returns false for backtick-wrapped text
704    /// within comments. This function detects backtick code spans directly in
705    /// the line text following CommonMark rules: a code span starts with N
706    /// backticks and ends with exactly N backticks.
707    fn is_in_backtick_code_in_line(line: &str, pos: usize) -> bool {
708        let bytes = line.as_bytes();
709        let len = bytes.len();
710        let mut i = 0;
711        while i < len {
712            if bytes[i] == b'`' {
713                // Count the opening backtick sequence length
714                let open_start = i;
715                while i < len && bytes[i] == b'`' {
716                    i += 1;
717                }
718                let tick_len = i - open_start;
719
720                // Scan forward for a closing sequence of exactly tick_len backticks
721                while i < len {
722                    if bytes[i] == b'`' {
723                        let close_start = i;
724                        while i < len && bytes[i] == b'`' {
725                            i += 1;
726                        }
727                        if i - close_start == tick_len {
728                            // Matched pair found; the code span content is between
729                            // the end of the opening backticks and the start of the
730                            // closing backticks (exclusive of the backticks themselves).
731                            let content_start = open_start + tick_len;
732                            let content_end = close_start;
733                            if pos >= content_start && pos < content_end {
734                                return true;
735                            }
736                            // Continue scanning after this pair
737                            break;
738                        }
739                        // Not the right length; keep scanning
740                    } else {
741                        i += 1;
742                    }
743                }
744            } else {
745                i += 1;
746            }
747        }
748        false
749    }
750
751    // Check if a character is a word boundary (handles Unicode)
752    fn is_word_boundary_char(c: char) -> bool {
753        !c.is_alphanumeric()
754    }
755
756    // Check if position is at a word boundary using byte-level lookups.
757    fn is_at_word_boundary(content: &str, pos: usize, is_start: bool) -> bool {
758        if is_start {
759            if pos == 0 {
760                return true;
761            }
762            match content[..pos].chars().next_back() {
763                None => true,
764                Some(c) => Self::is_word_boundary_char(c),
765            }
766        } else {
767            if pos >= content.len() {
768                return true;
769            }
770            match content[pos..].chars().next() {
771                None => true,
772                Some(c) => Self::is_word_boundary_char(c),
773            }
774        }
775    }
776
777    /// For a frontmatter line, return the byte offset where the checkable
778    /// value portion starts. Returns `usize::MAX` if the entire line should be
779    /// skipped (frontmatter delimiters, key-only lines, YAML comments, flow constructs).
780    fn frontmatter_value_offset(line: &str) -> usize {
781        let trimmed = line.trim();
782
783        // Skip frontmatter delimiters and empty lines
784        if trimmed == "---" || trimmed == "+++" || trimmed.is_empty() {
785            return usize::MAX;
786        }
787
788        // Skip YAML comments
789        if trimmed.starts_with('#') {
790            return usize::MAX;
791        }
792
793        // YAML list item: "  - item" or "  - key: value"
794        let stripped = line.trim_start();
795        if let Some(after_dash) = stripped.strip_prefix("- ") {
796            let leading = line.len() - stripped.len();
797            // Check if the list item contains a mapping (e.g., "- key: value")
798            if let Some(result) = Self::kv_value_offset(line, after_dash, leading + 2) {
799                return result;
800            }
801            // Bare list item value (no colon) - check content after "- "
802            return leading + 2;
803        }
804        if stripped == "-" {
805            return usize::MAX;
806        }
807
808        // Key-value pair with colon separator (YAML): "key: value"
809        if let Some(result) = Self::kv_value_offset(line, stripped, line.len() - stripped.len()) {
810            return result;
811        }
812
813        // Key-value pair with equals separator (TOML): "key = value"
814        if let Some(eq_pos) = line.find('=') {
815            let after_eq = eq_pos + 1;
816            if after_eq < line.len() && line.as_bytes()[after_eq] == b' ' {
817                let value_start = after_eq + 1;
818                let value_slice = &line[value_start..];
819                let value_trimmed = value_slice.trim();
820                if value_trimmed.is_empty() {
821                    return usize::MAX;
822                }
823                // For quoted values, skip the opening quote character
824                if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
825                    || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
826                {
827                    let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
828                    return value_start + quote_offset + 1;
829                }
830                return value_start;
831            }
832            // Equals with no space after or at end of line -> no value to check
833            return usize::MAX;
834        }
835
836        // No separator found - continuation line or bare value, check the whole line
837        0
838    }
839
840    /// Parse a key-value pair using colon separator within `content` that starts
841    /// at `base_offset` in the original line. Returns `Some(offset)` if a colon
842    /// separator is found, `None` if no colon is present.
843    fn kv_value_offset(line: &str, content: &str, base_offset: usize) -> Option<usize> {
844        let colon_pos = content.find(':')?;
845        let abs_colon = base_offset + colon_pos;
846        let after_colon = abs_colon + 1;
847        if after_colon < line.len() && line.as_bytes()[after_colon] == b' ' {
848            let value_start = after_colon + 1;
849            let value_slice = &line[value_start..];
850            let value_trimmed = value_slice.trim();
851            if value_trimmed.is_empty() {
852                return Some(usize::MAX);
853            }
854            // Skip flow mappings and flow sequences - too complex for heuristic parsing
855            if value_trimmed.starts_with('{') || value_trimmed.starts_with('[') {
856                return Some(usize::MAX);
857            }
858            // For quoted values, skip the opening quote character
859            if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
860                || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
861            {
862                let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
863                return Some(value_start + quote_offset + 1);
864            }
865            return Some(value_start);
866        }
867        // Colon with no space after or at end of line -> no value to check
868        Some(usize::MAX)
869    }
870
871    // Get the proper name that should be used for a found name
872    fn get_proper_name_for(&self, found_name: &str) -> Option<String> {
873        let found_lower = found_name.to_lowercase();
874
875        // Iterate through the configured proper names
876        for name in &self.config.names {
877            let lower_name = name.to_lowercase();
878            let lower_name_no_dots = lower_name.replace('.', "");
879
880            // Direct match
881            if found_lower == lower_name || found_lower == lower_name_no_dots {
882                return Some(name.clone());
883            }
884
885            // Check ASCII-normalized version
886            let ascii_normalized = Self::ascii_normalize(&lower_name);
887
888            let ascii_no_dots = ascii_normalized.replace('.', "");
889
890            if found_lower == ascii_normalized || found_lower == ascii_no_dots {
891                return Some(name.clone());
892            }
893        }
894        None
895    }
896}
897
898impl Rule for MD044ProperNames {
899    fn name(&self) -> &'static str {
900        "MD044"
901    }
902
903    fn description(&self) -> &'static str {
904        "Proper names should have the correct capitalization"
905    }
906
907    fn category(&self) -> RuleCategory {
908        RuleCategory::Other
909    }
910
911    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
912        if self.config.names.is_empty() {
913            return true;
914        }
915        // Quick check if any configured name variants exist (case-insensitive)
916        let content_lower = if ctx.content.is_ascii() {
917            ctx.content.to_ascii_lowercase()
918        } else {
919            ctx.content.to_lowercase()
920        };
921        !self.name_variants.iter().any(|name| content_lower.contains(name))
922    }
923
924    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
925        let content = ctx.content;
926        if content.is_empty() || self.config.names.is_empty() || self.combined_pattern.is_none() {
927            return Ok(Vec::new());
928        }
929
930        // Compute lowercase content once and reuse across all checks
931        let content_lower = if content.is_ascii() {
932            content.to_ascii_lowercase()
933        } else {
934            content.to_lowercase()
935        };
936
937        // Early return: use pre-computed name_variants for the quick check
938        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
939
940        if !has_potential_matches {
941            return Ok(Vec::new());
942        }
943
944        let line_index = &ctx.line_index;
945        let violations = self.find_name_violations(content, ctx, &content_lower);
946
947        let warnings = violations
948            .into_iter()
949            .filter_map(|(line, column, found_name)| {
950                self.get_proper_name_for(&found_name).map(|proper_name| {
951                    // `column` is a 1-indexed byte offset into the line (from regex .start() + 1).
952                    // Build the Fix range directly in bytes to avoid the character-based
953                    // line_col_to_byte_range_with_length function, which would misinterpret
954                    // the byte offset as a character count on lines with multi-byte content.
955                    let line_start = line_index.get_line_start_byte(line).unwrap_or(0);
956                    let byte_start = line_start + (column - 1);
957                    let byte_end = byte_start + found_name.len();
958                    LintWarning {
959                        rule_name: Some(self.name().to_string()),
960                        line,
961                        column,
962                        end_line: line,
963                        end_column: column + found_name.len(),
964                        message: format!("Proper name '{found_name}' should be '{proper_name}'"),
965                        severity: Severity::Warning,
966                        fix: Some(Fix::new(byte_start..byte_end, proper_name)),
967                    }
968                })
969            })
970            .collect();
971
972        Ok(warnings)
973    }
974
975    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
976        if self.should_skip(ctx) {
977            return Ok(ctx.content.to_string());
978        }
979        let warnings = self.check(ctx)?;
980        if warnings.is_empty() {
981            return Ok(ctx.content.to_string());
982        }
983        let warnings =
984            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
985        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
986            .map_err(crate::rule::LintError::InvalidInput)
987    }
988
989    fn as_any(&self) -> &dyn std::any::Any {
990        self
991    }
992
993    fn default_config_section(&self) -> Option<(String, toml::Value)> {
994        let json_value = serde_json::to_value(&self.config).ok()?;
995        Some((
996            self.name().to_string(),
997            crate::rule_config_serde::json_to_toml_value(&json_value)?,
998        ))
999    }
1000
1001    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
1002    where
1003        Self: Sized,
1004    {
1005        let rule_config = crate::rule_config_serde::load_rule_config::<MD044Config>(config);
1006        Box::new(Self::from_config_struct(rule_config))
1007    }
1008}
1009
1010#[cfg(test)]
1011mod tests {
1012    use super::*;
1013    use crate::lint_context::LintContext;
1014
1015    fn create_context(content: &str) -> LintContext<'_> {
1016        LintContext::new(content, crate::config::MarkdownFlavor::Standard, None)
1017    }
1018
1019    #[test]
1020    fn test_correctly_capitalized_names() {
1021        let rule = MD044ProperNames::new(
1022            vec![
1023                "JavaScript".to_string(),
1024                "TypeScript".to_string(),
1025                "Node.js".to_string(),
1026            ],
1027            true,
1028        );
1029
1030        let content = "This document uses JavaScript, TypeScript, and Node.js correctly.";
1031        let ctx = create_context(content);
1032        let result = rule.check(&ctx).unwrap();
1033        assert!(result.is_empty(), "Should not flag correctly capitalized names");
1034    }
1035
1036    #[test]
1037    fn test_incorrectly_capitalized_names() {
1038        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1039
1040        let content = "This document uses javascript and typescript incorrectly.";
1041        let ctx = create_context(content);
1042        let result = rule.check(&ctx).unwrap();
1043
1044        assert_eq!(result.len(), 2, "Should flag two incorrect capitalizations");
1045        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
1046        assert_eq!(result[0].line, 1);
1047        assert_eq!(result[0].column, 20);
1048        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
1049        assert_eq!(result[1].line, 1);
1050        assert_eq!(result[1].column, 35);
1051    }
1052
1053    #[test]
1054    fn test_names_at_beginning_of_sentences() {
1055        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "Python".to_string()], true);
1056
1057        let content = "javascript is a great language. python is also popular.";
1058        let ctx = create_context(content);
1059        let result = rule.check(&ctx).unwrap();
1060
1061        assert_eq!(result.len(), 2, "Should flag names at beginning of sentences");
1062        assert_eq!(result[0].line, 1);
1063        assert_eq!(result[0].column, 1);
1064        assert_eq!(result[1].line, 1);
1065        assert_eq!(result[1].column, 33);
1066    }
1067
1068    #[test]
1069    fn test_names_in_code_blocks_checked_by_default() {
1070        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1071
1072        let content = r#"Here is some text with JavaScript.
1073
1074```javascript
1075// This javascript should be checked
1076const lang = "javascript";
1077```
1078
1079But this javascript should be flagged."#;
1080
1081        let ctx = create_context(content);
1082        let result = rule.check(&ctx).unwrap();
1083
1084        assert_eq!(result.len(), 3, "Should flag javascript inside and outside code blocks");
1085        assert_eq!(result[0].line, 4);
1086        assert_eq!(result[1].line, 5);
1087        assert_eq!(result[2].line, 8);
1088    }
1089
1090    #[test]
1091    fn test_names_in_code_blocks_ignored_when_disabled() {
1092        let rule = MD044ProperNames::new(
1093            vec!["JavaScript".to_string()],
1094            false, // code_blocks = false means skip code blocks
1095        );
1096
1097        let content = r#"```
1098javascript in code block
1099```"#;
1100
1101        let ctx = create_context(content);
1102        let result = rule.check(&ctx).unwrap();
1103
1104        assert_eq!(
1105            result.len(),
1106            0,
1107            "Should not flag javascript in code blocks when code_blocks is false"
1108        );
1109    }
1110
1111    #[test]
1112    fn test_names_in_inline_code_checked_by_default() {
1113        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1114
1115        let content = "This is `javascript` in inline code and javascript outside.";
1116        let ctx = create_context(content);
1117        let result = rule.check(&ctx).unwrap();
1118
1119        // When code_blocks=true, inline code should be checked
1120        assert_eq!(result.len(), 2, "Should flag javascript inside and outside inline code");
1121        assert_eq!(result[0].column, 10); // javascript in inline code
1122        assert_eq!(result[1].column, 41); // javascript outside
1123    }
1124
1125    #[test]
1126    fn test_multiple_names_in_same_line() {
1127        let rule = MD044ProperNames::new(
1128            vec!["JavaScript".to_string(), "TypeScript".to_string(), "React".to_string()],
1129            true,
1130        );
1131
1132        let content = "I use javascript, typescript, and react in my projects.";
1133        let ctx = create_context(content);
1134        let result = rule.check(&ctx).unwrap();
1135
1136        assert_eq!(result.len(), 3, "Should flag all three incorrect names");
1137        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
1138        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
1139        assert_eq!(result[2].message, "Proper name 'react' should be 'React'");
1140    }
1141
1142    #[test]
1143    fn test_case_sensitivity() {
1144        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1145
1146        let content = "JAVASCRIPT, Javascript, javascript, and JavaScript variations.";
1147        let ctx = create_context(content);
1148        let result = rule.check(&ctx).unwrap();
1149
1150        assert_eq!(result.len(), 3, "Should flag all incorrect case variations");
1151        // JavaScript (correct) should not be flagged
1152        assert!(result.iter().all(|w| w.message.contains("should be 'JavaScript'")));
1153    }
1154
1155    #[test]
1156    fn test_configuration_with_custom_name_list() {
1157        let config = MD044Config {
1158            names: vec!["GitHub".to_string(), "GitLab".to_string(), "DevOps".to_string()],
1159            code_blocks: true,
1160            html_elements: true,
1161            html_comments: true,
1162        };
1163        let rule = MD044ProperNames::from_config_struct(config);
1164
1165        let content = "We use github, gitlab, and devops for our workflow.";
1166        let ctx = create_context(content);
1167        let result = rule.check(&ctx).unwrap();
1168
1169        assert_eq!(result.len(), 3, "Should flag all custom names");
1170        assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
1171        assert_eq!(result[1].message, "Proper name 'gitlab' should be 'GitLab'");
1172        assert_eq!(result[2].message, "Proper name 'devops' should be 'DevOps'");
1173    }
1174
1175    #[test]
1176    fn test_empty_configuration() {
1177        let rule = MD044ProperNames::new(vec![], true);
1178
1179        let content = "This has javascript and typescript but no configured names.";
1180        let ctx = create_context(content);
1181        let result = rule.check(&ctx).unwrap();
1182
1183        assert!(result.is_empty(), "Should not flag anything with empty configuration");
1184    }
1185
1186    #[test]
1187    fn test_names_with_special_characters() {
1188        let rule = MD044ProperNames::new(
1189            vec!["Node.js".to_string(), "ASP.NET".to_string(), "C++".to_string()],
1190            true,
1191        );
1192
1193        let content = "We use nodejs, asp.net, ASP.NET, and c++ in our stack.";
1194        let ctx = create_context(content);
1195        let result = rule.check(&ctx).unwrap();
1196
1197        // nodejs should match Node.js (dotless variation)
1198        // asp.net should be flagged (wrong case)
1199        // ASP.NET should not be flagged (correct)
1200        // c++ should be flagged
1201        assert_eq!(result.len(), 3, "Should handle special characters correctly");
1202
1203        let messages: Vec<&str> = result.iter().map(|w| w.message.as_str()).collect();
1204        assert!(messages.contains(&"Proper name 'nodejs' should be 'Node.js'"));
1205        assert!(messages.contains(&"Proper name 'asp.net' should be 'ASP.NET'"));
1206        assert!(messages.contains(&"Proper name 'c++' should be 'C++'"));
1207    }
1208
1209    #[test]
1210    fn test_word_boundaries() {
1211        let rule = MD044ProperNames::new(vec!["Java".to_string(), "Script".to_string()], true);
1212
1213        let content = "JavaScript is not java or script, but Java and Script are separate.";
1214        let ctx = create_context(content);
1215        let result = rule.check(&ctx).unwrap();
1216
1217        // Should only flag lowercase "java" and "script" as separate words
1218        assert_eq!(result.len(), 2, "Should respect word boundaries");
1219        assert!(result.iter().any(|w| w.column == 19)); // "java" position
1220        assert!(result.iter().any(|w| w.column == 27)); // "script" position
1221    }
1222
1223    #[test]
1224    fn test_fix_method() {
1225        let rule = MD044ProperNames::new(
1226            vec![
1227                "JavaScript".to_string(),
1228                "TypeScript".to_string(),
1229                "Node.js".to_string(),
1230            ],
1231            true,
1232        );
1233
1234        let content = "I love javascript, typescript, and nodejs!";
1235        let ctx = create_context(content);
1236        let fixed = rule.fix(&ctx).unwrap();
1237
1238        assert_eq!(fixed, "I love JavaScript, TypeScript, and Node.js!");
1239    }
1240
1241    #[test]
1242    fn test_fix_multiple_occurrences() {
1243        let rule = MD044ProperNames::new(vec!["Python".to_string()], true);
1244
1245        let content = "python is great. I use python daily. PYTHON is powerful.";
1246        let ctx = create_context(content);
1247        let fixed = rule.fix(&ctx).unwrap();
1248
1249        assert_eq!(fixed, "Python is great. I use Python daily. Python is powerful.");
1250    }
1251
1252    #[test]
1253    fn test_fix_checks_code_blocks_by_default() {
1254        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1255
1256        let content = r#"I love javascript.
1257
1258```
1259const lang = "javascript";
1260```
1261
1262More javascript here."#;
1263
1264        let ctx = create_context(content);
1265        let fixed = rule.fix(&ctx).unwrap();
1266
1267        let expected = r#"I love JavaScript.
1268
1269```
1270const lang = "JavaScript";
1271```
1272
1273More JavaScript here."#;
1274
1275        assert_eq!(fixed, expected);
1276    }
1277
1278    #[test]
1279    fn test_multiline_content() {
1280        let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
1281
1282        let content = r#"First line with rust.
1283Second line with python.
1284Third line with RUST and PYTHON."#;
1285
1286        let ctx = create_context(content);
1287        let result = rule.check(&ctx).unwrap();
1288
1289        assert_eq!(result.len(), 4, "Should flag all incorrect occurrences");
1290        assert_eq!(result[0].line, 1);
1291        assert_eq!(result[1].line, 2);
1292        assert_eq!(result[2].line, 3);
1293        assert_eq!(result[3].line, 3);
1294    }
1295
1296    #[test]
1297    fn test_default_config() {
1298        let config = MD044Config::default();
1299        assert!(config.names.is_empty());
1300        assert!(!config.code_blocks);
1301        assert!(config.html_elements);
1302        assert!(config.html_comments);
1303    }
1304
1305    #[test]
1306    fn test_default_config_checks_html_comments() {
1307        let config = MD044Config {
1308            names: vec!["JavaScript".to_string()],
1309            ..MD044Config::default()
1310        };
1311        let rule = MD044ProperNames::from_config_struct(config);
1312
1313        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1314        let ctx = create_context(content);
1315        let result = rule.check(&ctx).unwrap();
1316
1317        assert_eq!(result.len(), 1, "Default config should check HTML comments");
1318        assert_eq!(result[0].line, 3);
1319    }
1320
1321    #[test]
1322    fn test_default_config_skips_code_blocks() {
1323        let config = MD044Config {
1324            names: vec!["JavaScript".to_string()],
1325            ..MD044Config::default()
1326        };
1327        let rule = MD044ProperNames::from_config_struct(config);
1328
1329        let content = "# Guide\n\n```\njavascript in code\n```\n";
1330        let ctx = create_context(content);
1331        let result = rule.check(&ctx).unwrap();
1332
1333        assert_eq!(result.len(), 0, "Default config should skip code blocks");
1334    }
1335
1336    #[test]
1337    fn test_standalone_html_comment_checked() {
1338        let config = MD044Config {
1339            names: vec!["Test".to_string()],
1340            ..MD044Config::default()
1341        };
1342        let rule = MD044ProperNames::from_config_struct(config);
1343
1344        let content = "# Heading\n\n<!-- this is a test example -->\n";
1345        let ctx = create_context(content);
1346        let result = rule.check(&ctx).unwrap();
1347
1348        assert_eq!(result.len(), 1, "Should flag proper name in standalone HTML comment");
1349        assert_eq!(result[0].line, 3);
1350    }
1351
1352    #[test]
1353    fn test_inline_config_comments_not_flagged() {
1354        let config = MD044Config {
1355            names: vec!["RUMDL".to_string()],
1356            ..MD044Config::default()
1357        };
1358        let rule = MD044ProperNames::from_config_struct(config);
1359
1360        // Lines 1, 3, 4, 6 are inline config comments — should not be flagged.
1361        // Lines 2, 5 contain "rumdl" in regular text — flagged by rule.check(),
1362        // but would be suppressed by the linting engine's inline config filtering.
1363        let content = "<!-- rumdl-disable MD044 -->\nSome rumdl text here.\n<!-- rumdl-enable MD044 -->\n<!-- markdownlint-disable -->\nMore rumdl text.\n<!-- markdownlint-enable -->\n";
1364        let ctx = create_context(content);
1365        let result = rule.check(&ctx).unwrap();
1366
1367        assert_eq!(result.len(), 2, "Should only flag body lines, not config comments");
1368        assert_eq!(result[0].line, 2);
1369        assert_eq!(result[1].line, 5);
1370    }
1371
1372    #[test]
1373    fn test_html_comment_skipped_when_disabled() {
1374        let config = MD044Config {
1375            names: vec!["Test".to_string()],
1376            code_blocks: true,
1377            html_elements: true,
1378            html_comments: false,
1379        };
1380        let rule = MD044ProperNames::from_config_struct(config);
1381
1382        let content = "# Heading\n\n<!-- this is a test example -->\n\nRegular test here.\n";
1383        let ctx = create_context(content);
1384        let result = rule.check(&ctx).unwrap();
1385
1386        assert_eq!(
1387            result.len(),
1388            1,
1389            "Should only flag 'test' outside HTML comment when html_comments=false"
1390        );
1391        assert_eq!(result[0].line, 5);
1392    }
1393
1394    #[test]
1395    fn test_fix_corrects_html_comment_content() {
1396        let config = MD044Config {
1397            names: vec!["JavaScript".to_string()],
1398            ..MD044Config::default()
1399        };
1400        let rule = MD044ProperNames::from_config_struct(config);
1401
1402        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1403        let ctx = create_context(content);
1404        let fixed = rule.fix(&ctx).unwrap();
1405
1406        assert_eq!(fixed, "# Guide\n\n<!-- JavaScript mentioned here -->\n");
1407    }
1408
1409    #[test]
1410    fn test_fix_does_not_modify_inline_config_comments() {
1411        let config = MD044Config {
1412            names: vec!["RUMDL".to_string()],
1413            ..MD044Config::default()
1414        };
1415        let rule = MD044ProperNames::from_config_struct(config);
1416
1417        let content = "<!-- rumdl-disable -->\nSome rumdl text.\n<!-- rumdl-enable -->\n";
1418        let ctx = create_context(content);
1419        let fixed = rule.fix(&ctx).unwrap();
1420
1421        // Config comments should be untouched
1422        assert!(fixed.contains("<!-- rumdl-disable -->"));
1423        assert!(fixed.contains("<!-- rumdl-enable -->"));
1424        // Body text inside disable block should NOT be fixed (rule is disabled)
1425        assert!(
1426            fixed.contains("Some rumdl text."),
1427            "Line inside rumdl-disable block should not be modified by fix()"
1428        );
1429    }
1430
1431    #[test]
1432    fn test_fix_respects_inline_disable_partial() {
1433        let config = MD044Config {
1434            names: vec!["RUMDL".to_string()],
1435            ..MD044Config::default()
1436        };
1437        let rule = MD044ProperNames::from_config_struct(config);
1438
1439        let content =
1440            "<!-- rumdl-disable MD044 -->\nSome rumdl text.\n<!-- rumdl-enable MD044 -->\n\nSome rumdl text outside.\n";
1441        let ctx = create_context(content);
1442        let fixed = rule.fix(&ctx).unwrap();
1443
1444        // Line inside disable block should be preserved
1445        assert!(
1446            fixed.contains("Some rumdl text.\n<!-- rumdl-enable"),
1447            "Line inside disable block should not be modified"
1448        );
1449        // Line outside disable block should be fixed
1450        assert!(
1451            fixed.contains("Some RUMDL text outside."),
1452            "Line outside disable block should be fixed"
1453        );
1454    }
1455
1456    #[test]
1457    fn test_performance_with_many_names() {
1458        let mut names = vec![];
1459        for i in 0..50 {
1460            names.push(format!("ProperName{i}"));
1461        }
1462
1463        let rule = MD044ProperNames::new(names, true);
1464
1465        let content = "This has propername0, propername25, and propername49 incorrectly.";
1466        let ctx = create_context(content);
1467        let result = rule.check(&ctx).unwrap();
1468
1469        assert_eq!(result.len(), 3, "Should handle many configured names efficiently");
1470    }
1471
1472    #[test]
1473    fn test_large_name_count_performance() {
1474        // Verify MD044 can handle large numbers of names without regex limitations
1475        // This test confirms that fancy-regex handles large patterns well
1476        let names = (0..1000).map(|i| format!("ProperName{i}")).collect::<Vec<_>>();
1477
1478        let rule = MD044ProperNames::new(names, true);
1479
1480        // The combined pattern should be created successfully
1481        assert!(rule.combined_pattern.is_some());
1482
1483        // Should be able to check content without errors
1484        let content = "This has propername0 and propername999 in it.";
1485        let ctx = create_context(content);
1486        let result = rule.check(&ctx).unwrap();
1487
1488        // Should detect both incorrect names
1489        assert_eq!(result.len(), 2, "Should handle 1000 names without issues");
1490    }
1491
1492    #[test]
1493    fn test_cache_behavior() {
1494        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1495
1496        let content = "Using javascript here.";
1497        let ctx = create_context(content);
1498
1499        // First check
1500        let result1 = rule.check(&ctx).unwrap();
1501        assert_eq!(result1.len(), 1);
1502
1503        // Second check should use cache
1504        let result2 = rule.check(&ctx).unwrap();
1505        assert_eq!(result2.len(), 1);
1506
1507        // Results should be identical
1508        assert_eq!(result1[0].line, result2[0].line);
1509        assert_eq!(result1[0].column, result2[0].column);
1510    }
1511
1512    #[test]
1513    fn test_html_comments_not_checked_when_disabled() {
1514        let config = MD044Config {
1515            names: vec!["JavaScript".to_string()],
1516            code_blocks: true,    // Check code blocks
1517            html_elements: true,  // Check HTML elements
1518            html_comments: false, // Don't check HTML comments
1519        };
1520        let rule = MD044ProperNames::from_config_struct(config);
1521
1522        let content = r#"Regular javascript here.
1523<!-- This javascript in HTML comment should be ignored -->
1524More javascript outside."#;
1525
1526        let ctx = create_context(content);
1527        let result = rule.check(&ctx).unwrap();
1528
1529        assert_eq!(result.len(), 2, "Should only flag javascript outside HTML comments");
1530        assert_eq!(result[0].line, 1);
1531        assert_eq!(result[1].line, 3);
1532    }
1533
1534    #[test]
1535    fn test_html_comments_checked_when_enabled() {
1536        let config = MD044Config {
1537            names: vec!["JavaScript".to_string()],
1538            code_blocks: true,   // Check code blocks
1539            html_elements: true, // Check HTML elements
1540            html_comments: true, // Check HTML comments
1541        };
1542        let rule = MD044ProperNames::from_config_struct(config);
1543
1544        let content = r#"Regular javascript here.
1545<!-- This javascript in HTML comment should be checked -->
1546More javascript outside."#;
1547
1548        let ctx = create_context(content);
1549        let result = rule.check(&ctx).unwrap();
1550
1551        assert_eq!(
1552            result.len(),
1553            3,
1554            "Should flag all javascript occurrences including in HTML comments"
1555        );
1556    }
1557
1558    #[test]
1559    fn test_multiline_html_comments() {
1560        let config = MD044Config {
1561            names: vec!["Python".to_string(), "JavaScript".to_string()],
1562            code_blocks: true,    // Check code blocks
1563            html_elements: true,  // Check HTML elements
1564            html_comments: false, // Don't check HTML comments
1565        };
1566        let rule = MD044ProperNames::from_config_struct(config);
1567
1568        let content = r#"Regular python here.
1569<!--
1570This is a multiline comment
1571with javascript and python
1572that should be ignored
1573-->
1574More javascript outside."#;
1575
1576        let ctx = create_context(content);
1577        let result = rule.check(&ctx).unwrap();
1578
1579        assert_eq!(result.len(), 2, "Should only flag names outside HTML comments");
1580        assert_eq!(result[0].line, 1); // python
1581        assert_eq!(result[1].line, 7); // javascript
1582    }
1583
1584    #[test]
1585    fn test_fix_preserves_html_comments_when_disabled() {
1586        let config = MD044Config {
1587            names: vec!["JavaScript".to_string()],
1588            code_blocks: true,    // Check code blocks
1589            html_elements: true,  // Check HTML elements
1590            html_comments: false, // Don't check HTML comments
1591        };
1592        let rule = MD044ProperNames::from_config_struct(config);
1593
1594        let content = r#"javascript here.
1595<!-- javascript in comment -->
1596More javascript."#;
1597
1598        let ctx = create_context(content);
1599        let fixed = rule.fix(&ctx).unwrap();
1600
1601        let expected = r#"JavaScript here.
1602<!-- javascript in comment -->
1603More JavaScript."#;
1604
1605        assert_eq!(
1606            fixed, expected,
1607            "Should not fix names inside HTML comments when disabled"
1608        );
1609    }
1610
1611    #[test]
1612    fn test_proper_names_in_link_text_are_flagged() {
1613        let rule = MD044ProperNames::new(
1614            vec!["JavaScript".to_string(), "Node.js".to_string(), "Python".to_string()],
1615            true,
1616        );
1617
1618        let content = r#"Check this [javascript documentation](https://javascript.info) for info.
1619
1620Visit [node.js homepage](https://nodejs.org) and [python tutorial](https://python.org).
1621
1622Real javascript should be flagged.
1623
1624Also see the [typescript guide][ts-ref] for more.
1625
1626Real python should be flagged too.
1627
1628[ts-ref]: https://typescript.org/handbook"#;
1629
1630        let ctx = create_context(content);
1631        let result = rule.check(&ctx).unwrap();
1632
1633        // Link text should be checked, URLs should not be checked
1634        // Line 1: [javascript documentation] - "javascript" should be flagged
1635        // Line 3: [node.js homepage] - "node.js" should be flagged (matches "Node.js")
1636        // Line 3: [python tutorial] - "python" should be flagged
1637        // Line 5: standalone javascript
1638        // Line 9: standalone python
1639        assert_eq!(result.len(), 5, "Expected 5 warnings: 3 in link text + 2 standalone");
1640
1641        // Verify line numbers for link text warnings
1642        let line_1_warnings: Vec<_> = result.iter().filter(|w| w.line == 1).collect();
1643        assert_eq!(line_1_warnings.len(), 1);
1644        assert!(
1645            line_1_warnings[0]
1646                .message
1647                .contains("'javascript' should be 'JavaScript'")
1648        );
1649
1650        let line_3_warnings: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1651        assert_eq!(line_3_warnings.len(), 2); // node.js and python
1652
1653        // Standalone warnings
1654        assert!(result.iter().any(|w| w.line == 5 && w.message.contains("'javascript'")));
1655        assert!(result.iter().any(|w| w.line == 9 && w.message.contains("'python'")));
1656    }
1657
1658    #[test]
1659    fn test_link_urls_not_flagged() {
1660        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1661
1662        // URL contains "javascript" but should NOT be flagged
1663        let content = r#"[Link Text](https://javascript.info/guide)"#;
1664
1665        let ctx = create_context(content);
1666        let result = rule.check(&ctx).unwrap();
1667
1668        // URL should not be checked
1669        assert!(result.is_empty(), "URLs should not be checked for proper names");
1670    }
1671
1672    #[test]
1673    fn test_proper_names_in_image_alt_text_are_flagged() {
1674        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1675
1676        let content = r#"Here is a ![javascript logo](javascript.png "javascript icon") image.
1677
1678Real javascript should be flagged."#;
1679
1680        let ctx = create_context(content);
1681        let result = rule.check(&ctx).unwrap();
1682
1683        // Image alt text should be checked, URL and title should not be checked
1684        // Line 1: ![javascript logo] - "javascript" should be flagged
1685        // Line 3: standalone javascript
1686        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in alt text + 1 standalone");
1687        assert!(result[0].message.contains("'javascript' should be 'JavaScript'"));
1688        assert!(result[0].line == 1); // "![javascript logo]"
1689        assert!(result[1].message.contains("'javascript' should be 'JavaScript'"));
1690        assert!(result[1].line == 3); // "Real javascript should be flagged."
1691    }
1692
1693    #[test]
1694    fn test_image_urls_not_flagged() {
1695        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1696
1697        // URL contains "javascript" but should NOT be flagged
1698        let content = r#"![Logo](https://javascript.info/logo.png)"#;
1699
1700        let ctx = create_context(content);
1701        let result = rule.check(&ctx).unwrap();
1702
1703        // Image URL should not be checked
1704        assert!(result.is_empty(), "Image URLs should not be checked for proper names");
1705    }
1706
1707    #[test]
1708    fn test_reference_link_text_flagged_but_definition_not() {
1709        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1710
1711        let content = r#"Check the [javascript guide][js-ref] for details.
1712
1713Real javascript should be flagged.
1714
1715[js-ref]: https://javascript.info/typescript/guide"#;
1716
1717        let ctx = create_context(content);
1718        let result = rule.check(&ctx).unwrap();
1719
1720        // Link text should be checked, reference definitions should not
1721        // Line 1: [javascript guide] - should be flagged
1722        // Line 3: standalone javascript - should be flagged
1723        // Line 5: reference definition - should NOT be flagged
1724        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in link text + 1 standalone");
1725        assert!(result.iter().any(|w| w.line == 1 && w.message.contains("'javascript'")));
1726        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1727    }
1728
1729    #[test]
1730    fn test_reference_definitions_not_flagged() {
1731        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1732
1733        // Reference definition should NOT be flagged
1734        let content = r#"[js-ref]: https://javascript.info/guide"#;
1735
1736        let ctx = create_context(content);
1737        let result = rule.check(&ctx).unwrap();
1738
1739        // Reference definition URLs should not be checked
1740        assert!(result.is_empty(), "Reference definitions should not be checked");
1741    }
1742
1743    #[test]
1744    fn test_wikilinks_text_is_flagged() {
1745        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1746
1747        // WikiLinks [[destination]] should have their text checked
1748        let content = r#"[[javascript]]
1749
1750Regular javascript here.
1751
1752[[JavaScript|display text]]"#;
1753
1754        let ctx = create_context(content);
1755        let result = rule.check(&ctx).unwrap();
1756
1757        // Line 1: [[javascript]] - should be flagged (WikiLink text)
1758        // Line 3: standalone javascript - should be flagged
1759        // Line 5: [[JavaScript|display text]] - correct capitalization, no flag
1760        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in WikiLink + 1 standalone");
1761        assert!(
1762            result
1763                .iter()
1764                .any(|w| w.line == 1 && w.column == 3 && w.message.contains("'javascript'"))
1765        );
1766        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1767    }
1768
1769    #[test]
1770    fn test_url_link_text_not_flagged() {
1771        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1772
1773        // Link text that is itself a URL should not be flagged
1774        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1775
1776[http://github.com/org/repo](http://github.com/org/repo)
1777
1778[www.github.com/org/repo](https://www.github.com/org/repo)"#;
1779
1780        let ctx = create_context(content);
1781        let result = rule.check(&ctx).unwrap();
1782
1783        assert!(
1784            result.is_empty(),
1785            "URL-like link text should not be flagged, got: {result:?}"
1786        );
1787    }
1788
1789    #[test]
1790    fn test_url_link_text_with_leading_space_not_flagged() {
1791        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1792
1793        // Leading/trailing whitespace in link text should be trimmed before URL check
1794        let content = r#"[ https://github.com/org/repo](https://github.com/org/repo)"#;
1795
1796        let ctx = create_context(content);
1797        let result = rule.check(&ctx).unwrap();
1798
1799        assert!(
1800            result.is_empty(),
1801            "URL-like link text with leading space should not be flagged, got: {result:?}"
1802        );
1803    }
1804
1805    #[test]
1806    fn test_url_link_text_uppercase_scheme_not_flagged() {
1807        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1808
1809        let content = r#"[HTTPS://GITHUB.COM/org/repo](https://github.com/org/repo)"#;
1810
1811        let ctx = create_context(content);
1812        let result = rule.check(&ctx).unwrap();
1813
1814        assert!(
1815            result.is_empty(),
1816            "URL-like link text with uppercase scheme should not be flagged, got: {result:?}"
1817        );
1818    }
1819
1820    #[test]
1821    fn test_non_url_link_text_still_flagged() {
1822        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1823
1824        // Only prose descriptions in link text should be flagged.
1825        // Bare-domain, protocol-relative, and scheme-prefixed link texts that
1826        // match the destination URL are all URLs and must not be corrected.
1827        let content = r#"[github.com/org/repo](https://github.com/org/repo)
1828
1829[Visit github](https://github.com/org/repo)
1830
1831[//github.com/org/repo](//github.com/org/repo)
1832
1833[ftp://github.com/org/repo](ftp://github.com/org/repo)"#;
1834
1835        let ctx = create_context(content);
1836        let result = rule.check(&ctx).unwrap();
1837
1838        // Line 1: bare-domain text matches destination — not flagged
1839        // Line 3: prose description — flagged
1840        // Line 5: protocol-relative URL text — not flagged
1841        // Line 7: ftp:// URL text matches destination — not flagged
1842        assert_eq!(
1843            result.len(),
1844            1,
1845            "Only prose link text should be flagged, got: {result:?}"
1846        );
1847        assert!(
1848            result.iter().any(|w| w.line == 3),
1849            "Expected 'Visit github' on line 3 to be flagged"
1850        );
1851    }
1852
1853    #[test]
1854    fn test_url_link_text_fix_not_applied() {
1855        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1856
1857        let content = "[https://github.com/org/repo](https://github.com/org/repo)\n";
1858
1859        let ctx = create_context(content);
1860        let result = rule.fix(&ctx).unwrap();
1861
1862        assert_eq!(result, content, "Fix should not modify URL-like link text");
1863    }
1864
1865    #[test]
1866    fn test_mixed_url_and_regular_link_text() {
1867        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1868
1869        // Mix of URL link text (should skip) and regular text (should flag)
1870        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1871
1872Visit [github documentation](https://github.com/docs) for details.
1873
1874[www.github.com/pricing](https://www.github.com/pricing)"#;
1875
1876        let ctx = create_context(content);
1877        let result = rule.check(&ctx).unwrap();
1878
1879        // Only line 3 should be flagged ("github documentation" is not a URL)
1880        assert_eq!(
1881            result.len(),
1882            1,
1883            "Only non-URL link text should be flagged, got: {result:?}"
1884        );
1885        assert_eq!(result[0].line, 3);
1886    }
1887
1888    #[test]
1889    fn test_html_attribute_values_not_flagged() {
1890        // Matches inside HTML tag attributes (between `<` and `>`) are not flagged.
1891        // Attribute values are not prose — they hold URLs, class names, data values, etc.
1892        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1893        let content = "# Heading\n\ntest\n\n<img src=\"www.example.test/test_image.png\">\n";
1894        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1895        let result = rule.check(&ctx).unwrap();
1896
1897        // Nothing on line 5 should be flagged — everything is inside the `<img ...>` tag
1898        let line5_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
1899        assert!(
1900            line5_violations.is_empty(),
1901            "Should not flag anything inside HTML tag attributes: {line5_violations:?}"
1902        );
1903
1904        // Plain text on line 3 is still flagged
1905        let line3_violations: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1906        assert_eq!(line3_violations.len(), 1, "Plain 'test' on line 3 should be flagged");
1907    }
1908
1909    #[test]
1910    fn test_html_text_content_still_flagged() {
1911        // Text between HTML tags (not inside `<...>`) is still checked.
1912        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1913        let content = "# Heading\n\n<a href=\"https://example.test/page\">test link</a>\n";
1914        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1915        let result = rule.check(&ctx).unwrap();
1916
1917        // "example.test" in the href attribute → not flagged (inside `<...>`)
1918        // "test link" in the anchor text → flagged (between `>` and `<`)
1919        assert_eq!(
1920            result.len(),
1921            1,
1922            "Should flag only 'test' in anchor text, not in href: {result:?}"
1923        );
1924        assert_eq!(result[0].column, 37, "Should flag col 37 ('test link' in anchor text)");
1925    }
1926
1927    #[test]
1928    fn test_html_attribute_various_not_flagged() {
1929        // All attribute types are ignored: src, href, alt, class, data-*, title, etc.
1930        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1931        let content = concat!(
1932            "# Heading\n\n",
1933            "<img src=\"test.png\" alt=\"test image\">\n",
1934            "<span class=\"test-class\" data-test=\"value\">test content</span>\n",
1935        );
1936        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1937        let result = rule.check(&ctx).unwrap();
1938
1939        // Only "test content" (between tags on line 4) should be flagged
1940        assert_eq!(
1941            result.len(),
1942            1,
1943            "Should flag only 'test content' between tags: {result:?}"
1944        );
1945        assert_eq!(result[0].line, 4);
1946    }
1947
1948    #[test]
1949    fn test_plain_text_underscore_boundary_unchanged() {
1950        // Plain text (outside HTML tags) still uses original word boundary semantics where
1951        // underscore is a boundary character, matching markdownlint's behavior via AST splitting.
1952        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1953        let content = "# Heading\n\ntest_image is here and just_test ends here\n";
1954        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1955        let result = rule.check(&ctx).unwrap();
1956
1957        // Both "test_image" (test at start) and "just_test" (test at end) are flagged
1958        // because in plain text, "_" is a word boundary
1959        assert_eq!(
1960            result.len(),
1961            2,
1962            "Should flag 'test' in both 'test_image' and 'just_test': {result:?}"
1963        );
1964        let cols: Vec<usize> = result.iter().map(|w| w.column).collect();
1965        assert!(cols.contains(&1), "Should flag col 1 (test_image): {cols:?}");
1966        assert!(cols.contains(&29), "Should flag col 29 (just_test): {cols:?}");
1967    }
1968
1969    #[test]
1970    fn test_frontmatter_yaml_keys_not_flagged() {
1971        // YAML keys in frontmatter should NOT be checked for proper name violations.
1972        // Only values should be checked.
1973        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1974
1975        let content = "---\ntitle: Heading\ntest: Some Test value\n---\n\nTest\n";
1976        let ctx = create_context(content);
1977        let result = rule.check(&ctx).unwrap();
1978
1979        // "test" in the YAML key (line 3) should NOT be flagged
1980        // "Test" in the YAML value (line 3) is correct capitalization, no flag
1981        // "Test" in body (line 6) is correct capitalization, no flag
1982        assert!(
1983            result.is_empty(),
1984            "Should not flag YAML keys or correctly capitalized values: {result:?}"
1985        );
1986    }
1987
1988    #[test]
1989    fn test_frontmatter_yaml_values_flagged() {
1990        // Incorrectly capitalized names in YAML values should be flagged.
1991        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1992
1993        let content = "---\ntitle: Heading\nkey: a test value\n---\n\nTest\n";
1994        let ctx = create_context(content);
1995        let result = rule.check(&ctx).unwrap();
1996
1997        // "test" in the YAML value (line 3) SHOULD be flagged
1998        assert_eq!(result.len(), 1, "Should flag 'test' in YAML value: {result:?}");
1999        assert_eq!(result[0].line, 3);
2000        assert_eq!(result[0].column, 8); // "key: a " = 7 chars, then "test" at column 8
2001    }
2002
2003    #[test]
2004    fn test_frontmatter_key_matches_name_not_flagged() {
2005        // A YAML key that happens to match a configured name should NOT be flagged.
2006        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2007
2008        let content = "---\ntest: other value\n---\n\nBody text\n";
2009        let ctx = create_context(content);
2010        let result = rule.check(&ctx).unwrap();
2011
2012        assert!(
2013            result.is_empty(),
2014            "Should not flag YAML key that matches configured name: {result:?}"
2015        );
2016    }
2017
2018    #[test]
2019    fn test_frontmatter_empty_value_not_flagged() {
2020        // YAML key with no value should be skipped entirely.
2021        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2022
2023        let content = "---\ntest:\ntest: \n---\n\nBody text\n";
2024        let ctx = create_context(content);
2025        let result = rule.check(&ctx).unwrap();
2026
2027        assert!(
2028            result.is_empty(),
2029            "Should not flag YAML keys with empty values: {result:?}"
2030        );
2031    }
2032
2033    #[test]
2034    fn test_frontmatter_nested_yaml_key_not_flagged() {
2035        // Nested/indented YAML keys should also be skipped.
2036        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2037
2038        let content = "---\nparent:\n  test: nested value\n---\n\nBody text\n";
2039        let ctx = create_context(content);
2040        let result = rule.check(&ctx).unwrap();
2041
2042        // "test" as a nested key should NOT be flagged
2043        assert!(result.is_empty(), "Should not flag nested YAML keys: {result:?}");
2044    }
2045
2046    #[test]
2047    fn test_frontmatter_list_items_checked() {
2048        // YAML list items are values and should be checked for proper names.
2049        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2050
2051        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
2052        let ctx = create_context(content);
2053        let result = rule.check(&ctx).unwrap();
2054
2055        // "test" as a list item value SHOULD be flagged
2056        assert_eq!(result.len(), 1, "Should flag 'test' in YAML list item: {result:?}");
2057        assert_eq!(result[0].line, 3);
2058    }
2059
2060    #[test]
2061    fn test_frontmatter_value_with_multiple_colons() {
2062        // For "key: value: more", key is before first colon.
2063        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2064
2065        let content = "---\ntest: description: a test thing\n---\n\nBody text\n";
2066        let ctx = create_context(content);
2067        let result = rule.check(&ctx).unwrap();
2068
2069        // "test" as key should NOT be flagged
2070        // "test" in value portion ("description: a test thing") SHOULD be flagged
2071        assert_eq!(
2072            result.len(),
2073            1,
2074            "Should flag 'test' in value after first colon: {result:?}"
2075        );
2076        assert_eq!(result[0].line, 2);
2077        assert!(result[0].column > 6, "Violation column should be in value portion");
2078    }
2079
2080    #[test]
2081    fn test_frontmatter_does_not_affect_body() {
2082        // Body text after frontmatter should still be fully checked.
2083        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2084
2085        let content = "---\ntitle: Heading\n---\n\ntest should be flagged here\n";
2086        let ctx = create_context(content);
2087        let result = rule.check(&ctx).unwrap();
2088
2089        assert_eq!(result.len(), 1, "Should flag 'test' in body text: {result:?}");
2090        assert_eq!(result[0].line, 5);
2091    }
2092
2093    #[test]
2094    fn test_frontmatter_fix_corrects_values_preserves_keys() {
2095        // Fix should correct YAML values but preserve keys.
2096        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2097
2098        let content = "---\ntest: a test value\n---\n\ntest here\n";
2099        let ctx = create_context(content);
2100        let fixed = rule.fix(&ctx).unwrap();
2101
2102        // Key "test" should remain lowercase; value "test" should become "Test"
2103        assert_eq!(fixed, "---\ntest: a Test value\n---\n\nTest here\n");
2104    }
2105
2106    #[test]
2107    fn test_frontmatter_multiword_value_flagged() {
2108        // Multiple proper names in a single YAML value should all be flagged.
2109        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
2110
2111        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
2112        let ctx = create_context(content);
2113        let result = rule.check(&ctx).unwrap();
2114
2115        assert_eq!(result.len(), 2, "Should flag both names in YAML value: {result:?}");
2116        assert!(result.iter().all(|w| w.line == 2));
2117    }
2118
2119    #[test]
2120    fn test_frontmatter_yaml_comments_not_checked() {
2121        // YAML comments inside frontmatter should be skipped entirely.
2122        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2123
2124        let content = "---\n# test comment\ntitle: Heading\n---\n\nBody text\n";
2125        let ctx = create_context(content);
2126        let result = rule.check(&ctx).unwrap();
2127
2128        assert!(result.is_empty(), "Should not flag names in YAML comments: {result:?}");
2129    }
2130
2131    #[test]
2132    fn test_frontmatter_delimiters_not_checked() {
2133        // Frontmatter delimiter lines (--- or +++) should never be checked.
2134        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2135
2136        let content = "---\ntitle: Heading\n---\n\ntest here\n";
2137        let ctx = create_context(content);
2138        let result = rule.check(&ctx).unwrap();
2139
2140        // Only the body "test" on line 5 should be flagged
2141        assert_eq!(result.len(), 1, "Should only flag body text: {result:?}");
2142        assert_eq!(result[0].line, 5);
2143    }
2144
2145    #[test]
2146    fn test_frontmatter_continuation_lines_checked() {
2147        // Continuation lines (indented, no colon) are value content and should be checked.
2148        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2149
2150        let content = "---\ndescription: >\n  a test value\n  continued here\n---\n\nBody\n";
2151        let ctx = create_context(content);
2152        let result = rule.check(&ctx).unwrap();
2153
2154        // "test" on the continuation line should be flagged
2155        assert_eq!(result.len(), 1, "Should flag 'test' in continuation line: {result:?}");
2156        assert_eq!(result[0].line, 3);
2157    }
2158
2159    #[test]
2160    fn test_frontmatter_quoted_values_checked() {
2161        // Quoted YAML values should have their content checked (inside the quotes).
2162        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2163
2164        let content = "---\ntitle: \"a test title\"\n---\n\nBody\n";
2165        let ctx = create_context(content);
2166        let result = rule.check(&ctx).unwrap();
2167
2168        assert_eq!(result.len(), 1, "Should flag 'test' in quoted YAML value: {result:?}");
2169        assert_eq!(result[0].line, 2);
2170    }
2171
2172    #[test]
2173    fn test_frontmatter_single_quoted_values_checked() {
2174        // Single-quoted YAML values should have their content checked.
2175        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2176
2177        let content = "---\ntitle: 'a test title'\n---\n\nBody\n";
2178        let ctx = create_context(content);
2179        let result = rule.check(&ctx).unwrap();
2180
2181        assert_eq!(
2182            result.len(),
2183            1,
2184            "Should flag 'test' in single-quoted YAML value: {result:?}"
2185        );
2186        assert_eq!(result[0].line, 2);
2187    }
2188
2189    #[test]
2190    fn test_frontmatter_fix_multiword_values() {
2191        // Fix should correct all proper names in frontmatter values.
2192        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
2193
2194        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
2195        let ctx = create_context(content);
2196        let fixed = rule.fix(&ctx).unwrap();
2197
2198        assert_eq!(
2199            fixed,
2200            "---\ndescription: Learn JavaScript and TypeScript\n---\n\nBody\n"
2201        );
2202    }
2203
2204    #[test]
2205    fn test_frontmatter_fix_preserves_yaml_structure() {
2206        // Fix should preserve YAML structure while correcting values.
2207        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2208
2209        let content = "---\ntags:\n  - test\n  - other\ntitle: a test doc\n---\n\ntest body\n";
2210        let ctx = create_context(content);
2211        let fixed = rule.fix(&ctx).unwrap();
2212
2213        assert_eq!(
2214            fixed,
2215            "---\ntags:\n  - Test\n  - other\ntitle: a Test doc\n---\n\nTest body\n"
2216        );
2217    }
2218
2219    #[test]
2220    fn test_frontmatter_toml_delimiters_not_checked() {
2221        // TOML frontmatter with +++ delimiters should also be handled.
2222        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2223
2224        let content = "+++\ntitle = \"a test title\"\n+++\n\ntest body\n";
2225        let ctx = create_context(content);
2226        let result = rule.check(&ctx).unwrap();
2227
2228        // "title" as TOML key should NOT be flagged
2229        // "test" in TOML quoted value SHOULD be flagged (line 2)
2230        // "test" in body SHOULD be flagged (line 5)
2231        assert_eq!(result.len(), 2, "Should flag TOML value and body: {result:?}");
2232        let fm_violations: Vec<_> = result.iter().filter(|w| w.line == 2).collect();
2233        assert_eq!(fm_violations.len(), 1, "Should flag 'test' in TOML value: {result:?}");
2234        let body_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
2235        assert_eq!(body_violations.len(), 1, "Should flag body 'test': {result:?}");
2236    }
2237
2238    #[test]
2239    fn test_frontmatter_toml_key_not_flagged() {
2240        // TOML keys should NOT be flagged, only values.
2241        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2242
2243        let content = "+++\ntest = \"other value\"\n+++\n\nBody text\n";
2244        let ctx = create_context(content);
2245        let result = rule.check(&ctx).unwrap();
2246
2247        assert!(
2248            result.is_empty(),
2249            "Should not flag TOML key that matches configured name: {result:?}"
2250        );
2251    }
2252
2253    #[test]
2254    fn test_frontmatter_toml_fix_preserves_keys() {
2255        // Fix should correct TOML values but preserve keys.
2256        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2257
2258        let content = "+++\ntest = \"a test value\"\n+++\n\ntest here\n";
2259        let ctx = create_context(content);
2260        let fixed = rule.fix(&ctx).unwrap();
2261
2262        // Key "test" should remain lowercase; value "test" should become "Test"
2263        assert_eq!(fixed, "+++\ntest = \"a Test value\"\n+++\n\nTest here\n");
2264    }
2265
2266    #[test]
2267    fn test_frontmatter_list_item_mapping_key_not_flagged() {
2268        // In "- test: nested value", "test" is a YAML key within a list-item mapping.
2269        // The key should NOT be flagged; only the value should be checked.
2270        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2271
2272        let content = "---\nitems:\n  - test: nested value\n---\n\nBody text\n";
2273        let ctx = create_context(content);
2274        let result = rule.check(&ctx).unwrap();
2275
2276        assert!(
2277            result.is_empty(),
2278            "Should not flag YAML key in list-item mapping: {result:?}"
2279        );
2280    }
2281
2282    #[test]
2283    fn test_frontmatter_list_item_mapping_value_flagged() {
2284        // In "- key: test value", the value portion should be checked.
2285        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2286
2287        let content = "---\nitems:\n  - key: a test value\n---\n\nBody text\n";
2288        let ctx = create_context(content);
2289        let result = rule.check(&ctx).unwrap();
2290
2291        assert_eq!(
2292            result.len(),
2293            1,
2294            "Should flag 'test' in list-item mapping value: {result:?}"
2295        );
2296        assert_eq!(result[0].line, 3);
2297    }
2298
2299    #[test]
2300    fn test_frontmatter_bare_list_item_still_flagged() {
2301        // Bare list items without a colon (e.g., "- test") are values and should be flagged.
2302        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2303
2304        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
2305        let ctx = create_context(content);
2306        let result = rule.check(&ctx).unwrap();
2307
2308        assert_eq!(result.len(), 1, "Should flag 'test' in bare list item: {result:?}");
2309        assert_eq!(result[0].line, 3);
2310    }
2311
2312    #[test]
2313    fn test_frontmatter_flow_mapping_not_flagged() {
2314        // Flow mappings like {test: value} contain YAML keys that should not be flagged.
2315        // The entire flow construct should be skipped.
2316        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2317
2318        let content = "---\nflow_map: {test: value, other: test}\n---\n\nBody text\n";
2319        let ctx = create_context(content);
2320        let result = rule.check(&ctx).unwrap();
2321
2322        assert!(
2323            result.is_empty(),
2324            "Should not flag names inside flow mappings: {result:?}"
2325        );
2326    }
2327
2328    #[test]
2329    fn test_frontmatter_flow_sequence_not_flagged() {
2330        // Flow sequences like [test, other] should also be skipped.
2331        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2332
2333        let content = "---\nitems: [test, other, test]\n---\n\nBody text\n";
2334        let ctx = create_context(content);
2335        let result = rule.check(&ctx).unwrap();
2336
2337        assert!(
2338            result.is_empty(),
2339            "Should not flag names inside flow sequences: {result:?}"
2340        );
2341    }
2342
2343    #[test]
2344    fn test_frontmatter_list_item_mapping_fix_preserves_key() {
2345        // Fix should correct values in list-item mappings but preserve keys.
2346        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2347
2348        let content = "---\nitems:\n  - test: a test value\n---\n\ntest here\n";
2349        let ctx = create_context(content);
2350        let fixed = rule.fix(&ctx).unwrap();
2351
2352        // "test" as list-item key should remain lowercase;
2353        // "test" in value portion should become "Test"
2354        assert_eq!(fixed, "---\nitems:\n  - test: a Test value\n---\n\nTest here\n");
2355    }
2356
2357    #[test]
2358    fn test_frontmatter_backtick_code_not_flagged() {
2359        // Names inside backticks in frontmatter should NOT be flagged when code_blocks=false.
2360        let config = MD044Config {
2361            names: vec!["GoodApplication".to_string()],
2362            code_blocks: false,
2363            ..MD044Config::default()
2364        };
2365        let rule = MD044ProperNames::from_config_struct(config);
2366
2367        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nIntroductory `goodapplication` CLI text.\n";
2368        let ctx = create_context(content);
2369        let result = rule.check(&ctx).unwrap();
2370
2371        // Neither the frontmatter nor the body backtick-wrapped name should be flagged
2372        assert!(
2373            result.is_empty(),
2374            "Should not flag names inside backticks in frontmatter or body: {result:?}"
2375        );
2376    }
2377
2378    #[test]
2379    fn test_frontmatter_unquoted_backtick_code_not_flagged() {
2380        // Exact case from issue #513: unquoted YAML frontmatter with backticks
2381        let config = MD044Config {
2382            names: vec!["GoodApplication".to_string()],
2383            code_blocks: false,
2384            ..MD044Config::default()
2385        };
2386        let rule = MD044ProperNames::from_config_struct(config);
2387
2388        let content = "---\ntitle: `goodapplication` CLI\n---\n\nIntroductory `goodapplication` CLI text.\n";
2389        let ctx = create_context(content);
2390        let result = rule.check(&ctx).unwrap();
2391
2392        assert!(
2393            result.is_empty(),
2394            "Should not flag names inside backticks in unquoted YAML frontmatter: {result:?}"
2395        );
2396    }
2397
2398    #[test]
2399    fn test_frontmatter_bare_name_still_flagged_with_backtick_nearby() {
2400        // Names outside backticks in frontmatter should still be flagged.
2401        let config = MD044Config {
2402            names: vec!["GoodApplication".to_string()],
2403            code_blocks: false,
2404            ..MD044Config::default()
2405        };
2406        let rule = MD044ProperNames::from_config_struct(config);
2407
2408        let content = "---\ntitle: goodapplication `goodapplication` CLI\n---\n\nBody\n";
2409        let ctx = create_context(content);
2410        let result = rule.check(&ctx).unwrap();
2411
2412        // Only the bare "goodapplication" (before backticks) should be flagged
2413        assert_eq!(
2414            result.len(),
2415            1,
2416            "Should flag bare name but not backtick-wrapped name: {result:?}"
2417        );
2418        assert_eq!(result[0].line, 2);
2419        assert_eq!(result[0].column, 8); // "title: " = 7 chars, name at column 8
2420    }
2421
2422    #[test]
2423    fn test_frontmatter_backtick_code_with_code_blocks_true() {
2424        // When code_blocks=true, names inside backticks ARE checked.
2425        let config = MD044Config {
2426            names: vec!["GoodApplication".to_string()],
2427            code_blocks: true,
2428            ..MD044Config::default()
2429        };
2430        let rule = MD044ProperNames::from_config_struct(config);
2431
2432        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nBody\n";
2433        let ctx = create_context(content);
2434        let result = rule.check(&ctx).unwrap();
2435
2436        // With code_blocks=true, backtick-wrapped name SHOULD be flagged
2437        assert_eq!(
2438            result.len(),
2439            1,
2440            "Should flag backtick-wrapped name when code_blocks=true: {result:?}"
2441        );
2442        assert_eq!(result[0].line, 2);
2443    }
2444
2445    #[test]
2446    fn test_frontmatter_fix_preserves_backtick_code() {
2447        // Fix should NOT change names inside backticks in frontmatter.
2448        let config = MD044Config {
2449            names: vec!["GoodApplication".to_string()],
2450            code_blocks: false,
2451            ..MD044Config::default()
2452        };
2453        let rule = MD044ProperNames::from_config_struct(config);
2454
2455        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nIntroductory `goodapplication` CLI text.\n";
2456        let ctx = create_context(content);
2457        let fixed = rule.fix(&ctx).unwrap();
2458
2459        // Neither backtick-wrapped occurrence should be changed
2460        assert_eq!(
2461            fixed, content,
2462            "Fix should not modify names inside backticks in frontmatter"
2463        );
2464    }
2465
2466    // --- Angle-bracket URL tests (issue #457) ---
2467
2468    #[test]
2469    fn test_angle_bracket_url_in_html_comment_not_flagged() {
2470        // Angle-bracket URLs inside HTML comments should be skipped
2471        let config = MD044Config {
2472            names: vec!["Test".to_string()],
2473            ..MD044Config::default()
2474        };
2475        let rule = MD044ProperNames::from_config_struct(config);
2476
2477        let content = "---\ntitle: Level 1 heading\n---\n\n<https://www.example.test>\n\n<!-- This is a Test https://www.example.test -->\n<!-- This is a Test <https://www.example.test> -->\n";
2478        let ctx = create_context(content);
2479        let result = rule.check(&ctx).unwrap();
2480
2481        // Line 7: "Test" in comment prose before bare URL -- already correct capitalization
2482        // Line 7: "test" in bare URL (not in angle brackets) -- but "test" is in URL domain, not prose.
2483        //   However, .example.test has "test" at a word boundary (after '.'), so it IS flagged.
2484        // Line 8: "Test" in comment prose -- correct capitalization, not flagged
2485        // Line 8: "test" in <https://www.example.test> -- inside angle-bracket URL, NOT flagged
2486
2487        // The key assertion: line 8's angle-bracket URL should NOT produce a warning
2488        let line8_warnings: Vec<_> = result.iter().filter(|w| w.line == 8).collect();
2489        assert!(
2490            line8_warnings.is_empty(),
2491            "Should not flag names inside angle-bracket URLs in HTML comments: {line8_warnings:?}"
2492        );
2493    }
2494
2495    #[test]
2496    fn test_bare_url_in_html_comment_still_flagged() {
2497        // Bare URLs (not in angle brackets) inside HTML comments should still be checked
2498        let config = MD044Config {
2499            names: vec!["Test".to_string()],
2500            ..MD044Config::default()
2501        };
2502        let rule = MD044ProperNames::from_config_struct(config);
2503
2504        let content = "<!-- This is a test https://www.example.test -->\n";
2505        let ctx = create_context(content);
2506        let result = rule.check(&ctx).unwrap();
2507
2508        // "test" appears as prose text before URL and also in the bare URL domain
2509        // At minimum, the prose "test" should be flagged
2510        assert!(
2511            !result.is_empty(),
2512            "Should flag 'test' in prose text of HTML comment with bare URL"
2513        );
2514    }
2515
2516    #[test]
2517    fn test_angle_bracket_url_in_regular_markdown_not_flagged() {
2518        // Angle-bracket URLs in regular markdown are already handled by the link parser,
2519        // but the angle-bracket check provides a safety net
2520        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2521
2522        let content = "<https://www.example.test>\n";
2523        let ctx = create_context(content);
2524        let result = rule.check(&ctx).unwrap();
2525
2526        assert!(
2527            result.is_empty(),
2528            "Should not flag names inside angle-bracket URLs in regular markdown: {result:?}"
2529        );
2530    }
2531
2532    #[test]
2533    fn test_multiple_angle_bracket_urls_in_one_comment() {
2534        let config = MD044Config {
2535            names: vec!["Test".to_string()],
2536            ..MD044Config::default()
2537        };
2538        let rule = MD044ProperNames::from_config_struct(config);
2539
2540        let content = "<!-- See <https://test.example.com> and <https://www.example.test> for details -->\n";
2541        let ctx = create_context(content);
2542        let result = rule.check(&ctx).unwrap();
2543
2544        // Both URLs are inside angle brackets, so "test" inside them should NOT be flagged
2545        assert!(
2546            result.is_empty(),
2547            "Should not flag names inside multiple angle-bracket URLs: {result:?}"
2548        );
2549    }
2550
2551    #[test]
2552    fn test_angle_bracket_non_url_still_flagged() {
2553        // <Test> is NOT a URL (no scheme), so is_in_angle_bracket_url does NOT protect it.
2554        // Whether it gets flagged depends on HTML tag detection, not on our URL check.
2555        assert!(
2556            !MD044ProperNames::is_in_angle_bracket_url("<test> which is not a URL.", 1),
2557            "is_in_angle_bracket_url should return false for non-URL angle brackets"
2558        );
2559    }
2560
2561    #[test]
2562    fn test_angle_bracket_mailto_url_not_flagged() {
2563        let config = MD044Config {
2564            names: vec!["Test".to_string()],
2565            ..MD044Config::default()
2566        };
2567        let rule = MD044ProperNames::from_config_struct(config);
2568
2569        let content = "<!-- Contact <mailto:test@example.com> for help -->\n";
2570        let ctx = create_context(content);
2571        let result = rule.check(&ctx).unwrap();
2572
2573        assert!(
2574            result.is_empty(),
2575            "Should not flag names inside angle-bracket mailto URLs: {result:?}"
2576        );
2577    }
2578
2579    #[test]
2580    fn test_angle_bracket_ftp_url_not_flagged() {
2581        let config = MD044Config {
2582            names: vec!["Test".to_string()],
2583            ..MD044Config::default()
2584        };
2585        let rule = MD044ProperNames::from_config_struct(config);
2586
2587        let content = "<!-- Download from <ftp://test.example.com/file> -->\n";
2588        let ctx = create_context(content);
2589        let result = rule.check(&ctx).unwrap();
2590
2591        assert!(
2592            result.is_empty(),
2593            "Should not flag names inside angle-bracket FTP URLs: {result:?}"
2594        );
2595    }
2596
2597    #[test]
2598    fn test_angle_bracket_url_fix_preserves_url() {
2599        // Fix should not modify text inside angle-bracket URLs
2600        let config = MD044Config {
2601            names: vec!["Test".to_string()],
2602            ..MD044Config::default()
2603        };
2604        let rule = MD044ProperNames::from_config_struct(config);
2605
2606        let content = "<!-- test text <https://www.example.test> -->\n";
2607        let ctx = create_context(content);
2608        let fixed = rule.fix(&ctx).unwrap();
2609
2610        // "test" in prose should be fixed, URL should be preserved
2611        assert!(
2612            fixed.contains("<https://www.example.test>"),
2613            "Fix should preserve angle-bracket URLs: {fixed}"
2614        );
2615        assert!(
2616            fixed.contains("Test text"),
2617            "Fix should correct prose 'test' to 'Test': {fixed}"
2618        );
2619    }
2620
2621    #[test]
2622    fn test_is_in_angle_bracket_url_helper() {
2623        // Direct tests of the helper function
2624        let line = "text <https://example.test> more text";
2625
2626        // Inside the URL
2627        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 5)); // '<'
2628        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 6)); // 'h'
2629        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 15)); // middle of URL
2630        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 26)); // '>'
2631
2632        // Outside the URL
2633        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 0)); // 't' at start
2634        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 4)); // space before '<'
2635        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 27)); // space after '>'
2636
2637        // Non-URL angle brackets
2638        assert!(!MD044ProperNames::is_in_angle_bracket_url("<notaurl>", 1));
2639
2640        // mailto scheme
2641        assert!(MD044ProperNames::is_in_angle_bracket_url(
2642            "<mailto:test@example.com>",
2643            10
2644        ));
2645
2646        // ftp scheme
2647        assert!(MD044ProperNames::is_in_angle_bracket_url(
2648            "<ftp://test.example.com>",
2649            10
2650        ));
2651    }
2652
2653    #[test]
2654    fn test_is_in_angle_bracket_url_uppercase_scheme() {
2655        // RFC 3986: URI schemes are case-insensitive
2656        assert!(MD044ProperNames::is_in_angle_bracket_url(
2657            "<HTTPS://test.example.com>",
2658            10
2659        ));
2660        assert!(MD044ProperNames::is_in_angle_bracket_url(
2661            "<Http://test.example.com>",
2662            10
2663        ));
2664    }
2665
2666    #[test]
2667    fn test_is_in_angle_bracket_url_uncommon_schemes() {
2668        // ssh scheme
2669        assert!(MD044ProperNames::is_in_angle_bracket_url(
2670            "<ssh://test@example.com>",
2671            10
2672        ));
2673        // file scheme
2674        assert!(MD044ProperNames::is_in_angle_bracket_url("<file:///test/path>", 10));
2675        // data scheme (no authority, just colon)
2676        assert!(MD044ProperNames::is_in_angle_bracket_url("<data:text/plain;test>", 10));
2677    }
2678
2679    #[test]
2680    fn test_is_in_angle_bracket_url_unclosed() {
2681        // Unclosed angle bracket should NOT match
2682        assert!(!MD044ProperNames::is_in_angle_bracket_url(
2683            "<https://test.example.com",
2684            10
2685        ));
2686    }
2687
2688    #[test]
2689    fn test_vale_inline_config_comments_not_flagged() {
2690        let config = MD044Config {
2691            names: vec!["Vale".to_string(), "JavaScript".to_string()],
2692            ..MD044Config::default()
2693        };
2694        let rule = MD044ProperNames::from_config_struct(config);
2695
2696        let content = "\
2697<!-- vale off -->
2698Some javascript text here.
2699<!-- vale on -->
2700<!-- vale Style.Rule = NO -->
2701More javascript text.
2702<!-- vale Style.Rule = YES -->
2703<!-- vale JavaScript.Grammar = NO -->
2704";
2705        let ctx = create_context(content);
2706        let result = rule.check(&ctx).unwrap();
2707
2708        // Only the body text lines (2, 5) should be flagged for "javascript"
2709        assert_eq!(result.len(), 2, "Should only flag body lines, not Vale config comments");
2710        assert_eq!(result[0].line, 2);
2711        assert_eq!(result[1].line, 5);
2712    }
2713
2714    #[test]
2715    fn test_remark_lint_inline_config_comments_not_flagged() {
2716        let config = MD044Config {
2717            names: vec!["JavaScript".to_string()],
2718            ..MD044Config::default()
2719        };
2720        let rule = MD044ProperNames::from_config_struct(config);
2721
2722        let content = "\
2723<!-- lint disable remark-lint-some-rule -->
2724Some javascript text here.
2725<!-- lint enable remark-lint-some-rule -->
2726<!-- lint ignore remark-lint-some-rule -->
2727More javascript text.
2728";
2729        let ctx = create_context(content);
2730        let result = rule.check(&ctx).unwrap();
2731
2732        assert_eq!(
2733            result.len(),
2734            2,
2735            "Should only flag body lines, not remark-lint config comments"
2736        );
2737        assert_eq!(result[0].line, 2);
2738        assert_eq!(result[1].line, 5);
2739    }
2740
2741    #[test]
2742    fn test_fix_does_not_modify_vale_remark_lint_comments() {
2743        let config = MD044Config {
2744            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2745            ..MD044Config::default()
2746        };
2747        let rule = MD044ProperNames::from_config_struct(config);
2748
2749        let content = "\
2750<!-- vale off -->
2751Some javascript text.
2752<!-- vale on -->
2753<!-- lint disable remark-lint-some-rule -->
2754More javascript text.
2755<!-- lint enable remark-lint-some-rule -->
2756";
2757        let ctx = create_context(content);
2758        let fixed = rule.fix(&ctx).unwrap();
2759
2760        // Config directive lines must be preserved unchanged
2761        assert!(fixed.contains("<!-- vale off -->"));
2762        assert!(fixed.contains("<!-- vale on -->"));
2763        assert!(fixed.contains("<!-- lint disable remark-lint-some-rule -->"));
2764        assert!(fixed.contains("<!-- lint enable remark-lint-some-rule -->"));
2765        // Body text should be fixed
2766        assert!(fixed.contains("Some JavaScript text."));
2767        assert!(fixed.contains("More JavaScript text."));
2768    }
2769
2770    #[test]
2771    fn test_mixed_tool_directives_all_skipped() {
2772        let config = MD044Config {
2773            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2774            ..MD044Config::default()
2775        };
2776        let rule = MD044ProperNames::from_config_struct(config);
2777
2778        let content = "\
2779<!-- rumdl-disable MD044 -->
2780Some javascript text.
2781<!-- markdownlint-disable -->
2782More javascript text.
2783<!-- vale off -->
2784Even more javascript text.
2785<!-- lint disable some-rule -->
2786Final javascript text.
2787<!-- rumdl-enable MD044 -->
2788<!-- markdownlint-enable -->
2789<!-- vale on -->
2790<!-- lint enable some-rule -->
2791";
2792        let ctx = create_context(content);
2793        let result = rule.check(&ctx).unwrap();
2794
2795        // Only body text lines should be flagged (lines 2, 4, 6, 8)
2796        assert_eq!(
2797            result.len(),
2798            4,
2799            "Should only flag body lines, not any tool directive comments"
2800        );
2801        assert_eq!(result[0].line, 2);
2802        assert_eq!(result[1].line, 4);
2803        assert_eq!(result[2].line, 6);
2804        assert_eq!(result[3].line, 8);
2805    }
2806
2807    #[test]
2808    fn test_vale_remark_lint_edge_cases_not_matched() {
2809        let config = MD044Config {
2810            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2811            ..MD044Config::default()
2812        };
2813        let rule = MD044ProperNames::from_config_struct(config);
2814
2815        // These are regular HTML comments, NOT tool directives:
2816        // - "<!-- vale -->" is not a valid Vale directive (no action keyword)
2817        // - "<!-- vale is a tool -->" starts with "vale" but is prose, not a directive
2818        // - "<!-- valedictorian javascript -->" does not start with "<!-- vale "
2819        // - "<!-- linting javascript tips -->" does not start with "<!-- lint "
2820        // - "<!-- vale javascript -->" starts with "vale" but has no action keyword
2821        // - "<!-- lint your javascript code -->" starts with "lint" but has no action keyword
2822        let content = "\
2823<!-- vale -->
2824<!-- vale is a tool for writing -->
2825<!-- valedictorian javascript -->
2826<!-- linting javascript tips -->
2827<!-- vale javascript -->
2828<!-- lint your javascript code -->
2829";
2830        let ctx = create_context(content);
2831        let result = rule.check(&ctx).unwrap();
2832
2833        // Line 1: "<!-- vale -->" contains "vale" (wrong case for "Vale") -> flagged
2834        // Line 2: "<!-- vale is a tool for writing -->" contains "vale" -> flagged
2835        // Line 3: "<!-- valedictorian javascript -->" contains "javascript" -> flagged
2836        // Line 4: "<!-- linting javascript tips -->" contains "javascript" -> flagged
2837        // Line 5: "<!-- vale javascript -->" contains "vale" and "javascript" -> flagged for both
2838        // Line 6: "<!-- lint your javascript code -->" contains "javascript" -> flagged
2839        assert_eq!(
2840            result.len(),
2841            7,
2842            "Should flag proper names in non-directive HTML comments: got {result:?}"
2843        );
2844        assert_eq!(result[0].line, 1); // "vale" in <!-- vale -->
2845        assert_eq!(result[1].line, 2); // "vale" in <!-- vale is a tool -->
2846        assert_eq!(result[2].line, 3); // "javascript" in <!-- valedictorian javascript -->
2847        assert_eq!(result[3].line, 4); // "javascript" in <!-- linting javascript tips -->
2848        assert_eq!(result[4].line, 5); // "vale" in <!-- vale javascript -->
2849        assert_eq!(result[5].line, 5); // "javascript" in <!-- vale javascript -->
2850        assert_eq!(result[6].line, 6); // "javascript" in <!-- lint your javascript code -->
2851    }
2852
2853    #[test]
2854    fn test_vale_style_directives_skipped() {
2855        let config = MD044Config {
2856            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2857            ..MD044Config::default()
2858        };
2859        let rule = MD044ProperNames::from_config_struct(config);
2860
2861        // These ARE valid Vale directives and should be skipped:
2862        let content = "\
2863<!-- vale style = MyStyle -->
2864<!-- vale styles = Style1, Style2 -->
2865<!-- vale MyRule.Name = YES -->
2866<!-- vale MyRule.Name = NO -->
2867Some javascript text.
2868";
2869        let ctx = create_context(content);
2870        let result = rule.check(&ctx).unwrap();
2871
2872        // Only line 5 (body text) should be flagged
2873        assert_eq!(
2874            result.len(),
2875            1,
2876            "Should only flag body lines, not Vale style/rule directives: got {result:?}"
2877        );
2878        assert_eq!(result[0].line, 5);
2879    }
2880
2881    // --- is_in_backtick_code_in_line unit tests ---
2882
2883    #[test]
2884    fn test_backtick_code_single_backticks() {
2885        let line = "hello `world` bye";
2886        // 'w' is at index 7, inside the backtick span (content between backticks at 6 and 12)
2887        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 7));
2888        // 'h' at index 0 is outside
2889        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2890        // 'b' at index 14 is outside
2891        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 14));
2892    }
2893
2894    #[test]
2895    fn test_backtick_code_double_backticks() {
2896        let line = "a ``code`` b";
2897        // 'c' is at index 4, inside ``...``
2898        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 4));
2899        // 'a' at index 0 is outside
2900        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2901        // 'b' at index 11 is outside
2902        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 11));
2903    }
2904
2905    #[test]
2906    fn test_backtick_code_unclosed() {
2907        let line = "a `code b";
2908        // No closing backtick, so nothing is a code span
2909        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 3));
2910    }
2911
2912    #[test]
2913    fn test_backtick_code_mismatched_count() {
2914        // Single backtick opening, double backtick is not a match
2915        let line = "a `code`` b";
2916        // The single ` at index 2 doesn't match `` at index 7-8
2917        // So 'c' at index 3 is NOT in a code span
2918        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 3));
2919    }
2920
2921    #[test]
2922    fn test_backtick_code_multiple_spans() {
2923        let line = "`first` and `second`";
2924        // 'f' at index 1 (inside first span)
2925        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 1));
2926        // 'a' at index 8 (between spans)
2927        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 8));
2928        // 's' at index 13 (inside second span)
2929        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 13));
2930    }
2931
2932    #[test]
2933    fn test_backtick_code_on_backtick_boundary() {
2934        let line = "`code`";
2935        // Position 0 is the opening backtick itself, not inside the span
2936        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2937        // Position 5 is the closing backtick, not inside the span
2938        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 5));
2939        // Position 1-4 are inside the span
2940        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 1));
2941        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 4));
2942    }
2943
2944    // Double-bracket WikiLink + URL: [[text]](url)
2945    // pulldown-cmark parses [[text]] as a WikiLink but leaves the (url)
2946    // as plain text, so ctx.links does not cover the URL portion.
2947    // MD044 must fall back to is_in_markdown_link_url for all lines.
2948
2949    #[test]
2950    fn test_double_bracket_link_url_not_flagged() {
2951        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
2952        // Exact reproduction from issue #564
2953        let content = "[[rumdl]](https://github.com/rvben/rumdl)";
2954        let ctx = create_context(content);
2955        let result = rule.check(&ctx).unwrap();
2956        assert!(
2957            result.is_empty(),
2958            "URL inside [[text]](url) must not be flagged, got: {result:?}"
2959        );
2960    }
2961
2962    #[test]
2963    fn test_double_bracket_link_url_not_fixed() {
2964        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
2965        let content = "[[rumdl]](https://github.com/rvben/rumdl)\n";
2966        let ctx = create_context(content);
2967        let fixed = rule.fix(&ctx).unwrap();
2968        assert_eq!(
2969            fixed, content,
2970            "fix() must leave the URL inside [[text]](url) unchanged"
2971        );
2972    }
2973
2974    #[test]
2975    fn test_double_bracket_link_text_still_flagged() {
2976        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
2977        // The link text portion [[github]](url) should still be checked.
2978        let content = "[[github]](https://example.com)";
2979        let ctx = create_context(content);
2980        let result = rule.check(&ctx).unwrap();
2981        assert_eq!(
2982            result.len(),
2983            1,
2984            "Incorrect name in [[text]] link text should still be flagged, got: {result:?}"
2985        );
2986        assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
2987    }
2988
2989    #[test]
2990    fn test_double_bracket_link_mixed_line() {
2991        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
2992        // URL must be skipped, standalone text must be flagged.
2993        let content = "See [[rumdl]](https://github.com/rvben/rumdl) and github for more.";
2994        let ctx = create_context(content);
2995        let result = rule.check(&ctx).unwrap();
2996        assert_eq!(
2997            result.len(),
2998            1,
2999            "Only the standalone 'github' after the link should be flagged, got: {result:?}"
3000        );
3001        assert!(result[0].message.contains("'github'"));
3002        // "See " (4) + "[[rumdl]](https://github.com/rvben/rumdl)" (42) + " and " (4) = column 51
3003        assert_eq!(
3004            result[0].column, 51,
3005            "Flagged column should be the trailing 'github', not the one in the URL"
3006        );
3007    }
3008
3009    #[test]
3010    fn test_regular_link_url_still_not_flagged() {
3011        // Confirm existing [text](url) behavior is unaffected by the fix.
3012        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
3013        let content = "[rumdl](https://github.com/rvben/rumdl)";
3014        let ctx = create_context(content);
3015        let result = rule.check(&ctx).unwrap();
3016        assert!(
3017            result.is_empty(),
3018            "URL inside regular [text](url) must still not be flagged, got: {result:?}"
3019        );
3020    }
3021
3022    #[test]
3023    fn test_link_like_text_in_code_span_still_flagged_when_code_blocks_enabled() {
3024        // When code-blocks = true the user explicitly opts into checking code spans.
3025        // A code span containing link-like text (`[foo](https://github.com)`) must
3026        // NOT be silently suppressed by is_in_markdown_link_url: the content is
3027        // literal characters, not a real Markdown link.
3028        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
3029        let content = "`[foo](https://github.com/org/repo)`";
3030        let ctx = create_context(content);
3031        let result = rule.check(&ctx).unwrap();
3032        assert_eq!(
3033            result.len(),
3034            1,
3035            "Proper name inside a code span must be flagged when code-blocks=true, got: {result:?}"
3036        );
3037        assert!(result[0].message.contains("'github'"));
3038    }
3039
3040    #[test]
3041    fn test_malformed_link_not_treated_as_url() {
3042        // [text](url with spaces) is NOT a valid Markdown link; pulldown-cmark
3043        // does not parse it, so the name inside must still be flagged.
3044        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
3045        let content = "See [rumdl](github repo) for details.";
3046        let ctx = create_context(content);
3047        let result = rule.check(&ctx).unwrap();
3048        assert_eq!(
3049            result.len(),
3050            1,
3051            "Name inside malformed [text](url with spaces) must still be flagged, got: {result:?}"
3052        );
3053        assert!(result[0].message.contains("'github'"));
3054    }
3055
3056    #[test]
3057    fn test_wikilink_followed_by_prose_parens_still_flagged() {
3058        // [[note]](github repo) — WikiLink followed by parenthesised prose, NOT
3059        // a valid link URL (space in destination). pulldown-cmark does not parse
3060        // it as a link, so the name inside must still be flagged.
3061        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
3062        let content = "[[note]](github repo)";
3063        let ctx = create_context(content);
3064        let result = rule.check(&ctx).unwrap();
3065        assert_eq!(
3066            result.len(),
3067            1,
3068            "Name inside [[wikilink]](prose with spaces) must still be flagged, got: {result:?}"
3069        );
3070        assert!(result[0].message.contains("'github'"));
3071    }
3072
3073    /// Roundtrip safety: fix() output must produce zero warnings on re-check.
3074    #[test]
3075    fn test_roundtrip_fix_then_check_basic() {
3076        let rule = MD044ProperNames::new(
3077            vec![
3078                "JavaScript".to_string(),
3079                "TypeScript".to_string(),
3080                "Node.js".to_string(),
3081            ],
3082            true,
3083        );
3084        let content = "I love javascript, typescript, and nodejs!";
3085        let ctx = create_context(content);
3086        let fixed = rule.fix(&ctx).unwrap();
3087        let ctx2 = create_context(&fixed);
3088        let warnings = rule.check(&ctx2).unwrap();
3089        assert!(
3090            warnings.is_empty(),
3091            "Re-check after fix should produce zero warnings, got: {warnings:?}"
3092        );
3093    }
3094
3095    /// Roundtrip safety: fix() output must produce zero warnings for multiline content.
3096    #[test]
3097    fn test_roundtrip_fix_then_check_multiline() {
3098        let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
3099        let content = "First line with rust.\nSecond line with python.\nThird line with RUST and PYTHON.\n";
3100        let ctx = create_context(content);
3101        let fixed = rule.fix(&ctx).unwrap();
3102        let ctx2 = create_context(&fixed);
3103        let warnings = rule.check(&ctx2).unwrap();
3104        assert!(
3105            warnings.is_empty(),
3106            "Re-check after fix should produce zero warnings, got: {warnings:?}"
3107        );
3108    }
3109
3110    /// Roundtrip safety: fix() with inline config disable blocks.
3111    #[test]
3112    fn test_roundtrip_fix_then_check_inline_config() {
3113        let config = MD044Config {
3114            names: vec!["RUMDL".to_string()],
3115            ..MD044Config::default()
3116        };
3117        let rule = MD044ProperNames::from_config_struct(config);
3118        let content =
3119            "<!-- rumdl-disable MD044 -->\nSome rumdl text.\n<!-- rumdl-enable MD044 -->\n\nSome rumdl text outside.\n";
3120        let ctx = create_context(content);
3121        let fixed = rule.fix(&ctx).unwrap();
3122        // The disabled block should be preserved, the outside text fixed
3123        assert!(
3124            fixed.contains("Some rumdl text.\n"),
3125            "Disabled block text should be preserved"
3126        );
3127        assert!(
3128            fixed.contains("Some RUMDL text outside."),
3129            "Outside text should be fixed"
3130        );
3131    }
3132
3133    /// Roundtrip safety: fix() with HTML comment content.
3134    #[test]
3135    fn test_roundtrip_fix_then_check_html_comments() {
3136        let config = MD044Config {
3137            names: vec!["JavaScript".to_string()],
3138            ..MD044Config::default()
3139        };
3140        let rule = MD044ProperNames::from_config_struct(config);
3141        let content = "# Guide\n\n<!-- javascript mentioned here -->\n\njavascript outside\n";
3142        let ctx = create_context(content);
3143        let fixed = rule.fix(&ctx).unwrap();
3144        let ctx2 = create_context(&fixed);
3145        let warnings = rule.check(&ctx2).unwrap();
3146        assert!(
3147            warnings.is_empty(),
3148            "Re-check after fix should produce zero warnings, got: {warnings:?}"
3149        );
3150    }
3151
3152    /// Roundtrip safety: fix() preserves content when no violations exist.
3153    #[test]
3154    fn test_roundtrip_no_op_when_correct() {
3155        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
3156        let content = "This uses JavaScript and TypeScript correctly.\n";
3157        let ctx = create_context(content);
3158        let fixed = rule.fix(&ctx).unwrap();
3159        assert_eq!(fixed, content, "Fix should be a no-op when content is already correct");
3160    }
3161
3162    // --- Bare-domain link text: display text is the destination URL with scheme stripped ---
3163
3164    #[test]
3165    fn test_bare_domain_link_text_not_flagged() {
3166        // `[ravencentric.github.io](https://ravencentric.github.io)` — the display text
3167        // is the URL with the scheme stripped; "github" here is a domain label, not a
3168        // reference to "GitHub" the product, and must not be corrected.
3169        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
3170        let content = "My site is [ravencentric.github.io](https://ravencentric.github.io).\n";
3171        let ctx = create_context(content);
3172        let result = rule.check(&ctx).unwrap();
3173        assert!(
3174            result.is_empty(),
3175            "Should not flag 'github' in a bare-domain link text that matches the link URL: {result:?}"
3176        );
3177    }
3178
3179    #[test]
3180    fn test_bare_domain_link_text_not_fixed() {
3181        // fix() must not rewrite the link text when it is the bare URL hostname.
3182        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
3183        let content = "My site is [ravencentric.github.io](https://ravencentric.github.io).\n";
3184        let ctx = create_context(content);
3185        let fixed = rule.fix(&ctx).unwrap();
3186        assert_eq!(
3187            fixed, content,
3188            "fix() must not alter bare-domain link text that matches the destination URL"
3189        );
3190    }
3191
3192    #[test]
3193    fn test_bare_domain_link_text_with_path_not_flagged() {
3194        // Display text is the hostname only; destination has a path.
3195        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
3196        let content = "Visit [ravencentric.github.io](https://ravencentric.github.io/projects).\n";
3197        let ctx = create_context(content);
3198        let result = rule.check(&ctx).unwrap();
3199        assert!(
3200            result.is_empty(),
3201            "Should not flag 'github' when bare-domain text is the hostname of its destination URL: {result:?}"
3202        );
3203    }
3204
3205    #[test]
3206    fn test_bare_domain_link_text_full_path_not_flagged() {
3207        // Display text is the full URL-without-scheme including a path.
3208        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
3209        let content = "See [ravencentric.github.io/blog](https://ravencentric.github.io/blog).\n";
3210        let ctx = create_context(content);
3211        let result = rule.check(&ctx).unwrap();
3212        assert!(
3213            result.is_empty(),
3214            "Should not flag 'github' when link text is the full URL path without scheme: {result:?}"
3215        );
3216    }
3217
3218    #[test]
3219    fn test_github_product_name_in_link_text_still_flagged() {
3220        // `[github pages](https://pages.github.com)` — the display text is a human
3221        // description, not a bare domain; "github" should still be corrected to "GitHub".
3222        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
3223        let content = "Hosted on [github pages](https://pages.github.com).\n";
3224        let ctx = create_context(content);
3225        let result = rule.check(&ctx).unwrap();
3226        assert!(
3227            !result.is_empty(),
3228            "Should still flag 'github' in descriptive link text that does not match the destination URL"
3229        );
3230    }
3231
3232    #[test]
3233    fn test_protocol_relative_bare_domain_link_text_not_flagged() {
3234        // Protocol-relative URL `[github.io](//github.io)`.
3235        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
3236        let content = "See [github.io](//github.io).\n";
3237        let ctx = create_context(content);
3238        let result = rule.check(&ctx).unwrap();
3239        assert!(
3240            result.is_empty(),
3241            "Should not flag 'github' in bare-domain text matching a protocol-relative destination: {result:?}"
3242        );
3243    }
3244
3245    #[test]
3246    fn test_dotted_wikilink_target_still_flagged() {
3247        // `[[node.js]]` is a WikiLink whose page name contains a dot.
3248        // The dot guard alone does not protect it because text == url == "node.js".
3249        // The is_in_link WikiLink guard must prevent bare-domain suppression,
3250        // so the improper capitalization is still caught.
3251        let rule = MD044ProperNames::new(vec!["Node.js".to_string()], false);
3252        let content = "See [[node.js]] for details.\n";
3253        let ctx = create_context(content);
3254        let result = rule.check(&ctx).unwrap();
3255        assert!(
3256            !result.is_empty(),
3257            "Should flag 'node.js' in a dotted WikiLink target: {result:?}"
3258        );
3259    }
3260
3261    #[test]
3262    fn test_bare_domain_link_text_case_insensitive_url() {
3263        // URL with uppercase scheme `[github.io](HTTPS://github.io)` — the scheme is
3264        // case-insensitive, so the display text should still be recognised as a bare domain.
3265        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
3266        let content = "See [github.io](HTTPS://github.io).\n";
3267        let ctx = create_context(content);
3268        let result = rule.check(&ctx).unwrap();
3269        assert!(
3270            result.is_empty(),
3271            "Should not flag bare-domain text when destination URL has an uppercase scheme: {result:?}"
3272        );
3273    }
3274}
rumdl_lib/rules/md044_proper_names.rs

rumdl_lib/rules/
md044_proper_names.rs