rumdl_lib/rules/
md044_proper_names.rs

1use crate::utils::fast_hash;
2use crate::utils::regex_cache::{escape_regex, get_cached_regex};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use std::collections::{HashMap, HashSet};
6use std::sync::{Arc, Mutex};
7
8mod md044_config;
9pub use md044_config::MD044Config;
10
11type WarningPosition = (usize, usize, String); // (line, column, found_name)
12
13/// Rule MD044: Proper names should be capitalized
14///
15/// See [docs/md044.md](../../docs/md044.md) for full documentation, configuration, and examples.
16///
17/// This rule is triggered when proper names are not capitalized correctly in the document.
18/// For example, if you have defined "JavaScript" as a proper name, the rule will flag any
19/// occurrences of "javascript" or "Javascript" as violations.
20///
21/// ## Purpose
22///
23/// Ensuring consistent capitalization of proper names improves document quality and
24/// professionalism. This is especially important for technical documentation where
25/// product names, programming languages, and technologies often have specific
26/// capitalization conventions.
27///
28/// ## Configuration Options
29///
30/// The rule supports the following configuration options:
31///
32/// ```yaml
33/// MD044:
34///   names: []                # List of proper names to check for correct capitalization
35///   code-blocks: false       # Whether to check code blocks (default: false)
36/// ```
37///
38/// Example configuration:
39///
40/// ```yaml
41/// MD044:
42///   names: ["JavaScript", "Node.js", "TypeScript"]
43///   code-blocks: true
44/// ```
45///
46/// ## Performance Optimizations
47///
48/// This rule implements several performance optimizations:
49///
50/// 1. **Regex Caching**: Pre-compiles and caches regex patterns for each proper name
51/// 2. **Content Caching**: Caches results based on content hashing for repeated checks
52/// 3. **Efficient Text Processing**: Uses optimized algorithms to avoid redundant text processing
53/// 4. **Smart Code Block Detection**: Efficiently identifies and optionally excludes code blocks
54///
55/// ## Edge Cases Handled
56///
57/// - **Word Boundaries**: Only matches complete words, not substrings within other words
58/// - **Case Sensitivity**: Properly handles case-specific matching
59/// - **Code Blocks**: Optionally checks code blocks (controlled by code-blocks setting)
60/// - **Markdown Formatting**: Handles proper names within Markdown formatting elements
61///
62/// ## Fix Behavior
63///
64/// When fixing issues, this rule replaces incorrect capitalization with the correct form
65/// as defined in the configuration.
66///
67/// Check if a trimmed line is an inline config comment from a linting tool.
68/// Recognized tools: rumdl, markdownlint, Vale, and remark-lint.
69fn is_inline_config_comment(trimmed: &str) -> bool {
70    trimmed.starts_with("<!-- rumdl-")
71        || trimmed.starts_with("<!-- markdownlint-")
72        || trimmed.starts_with("<!-- vale off")
73        || trimmed.starts_with("<!-- vale on")
74        || (trimmed.starts_with("<!-- vale ") && trimmed.contains(" = "))
75        || trimmed.starts_with("<!-- vale style")
76        || trimmed.starts_with("<!-- lint disable ")
77        || trimmed.starts_with("<!-- lint enable ")
78        || trimmed.starts_with("<!-- lint ignore ")
79}
80
81#[derive(Clone)]
82pub struct MD044ProperNames {
83    config: MD044Config,
84    // Cache the combined regex pattern string
85    combined_pattern: Option<String>,
86    // Precomputed lowercase name variants for fast pre-checks
87    name_variants: Vec<String>,
88    // Cache for name violations by content hash
89    content_cache: Arc<Mutex<HashMap<u64, Vec<WarningPosition>>>>,
90}
91
92impl MD044ProperNames {
93    pub fn new(names: Vec<String>, code_blocks: bool) -> Self {
94        let config = MD044Config {
95            names,
96            code_blocks,
97            html_elements: true, // Default to checking HTML elements
98            html_comments: true, // Default to checking HTML comments
99        };
100        let combined_pattern = Self::create_combined_pattern(&config);
101        let name_variants = Self::build_name_variants(&config);
102        Self {
103            config,
104            combined_pattern,
105            name_variants,
106            content_cache: Arc::new(Mutex::new(HashMap::new())),
107        }
108    }
109
110    // Helper function for consistent ASCII normalization
111    fn ascii_normalize(s: &str) -> String {
112        s.replace(['é', 'è', 'ê', 'ë'], "e")
113            .replace(['à', 'á', 'â', 'ä', 'ã', 'å'], "a")
114            .replace(['ï', 'î', 'í', 'ì'], "i")
115            .replace(['ü', 'ú', 'ù', 'û'], "u")
116            .replace(['ö', 'ó', 'ò', 'ô', 'õ'], "o")
117            .replace('ñ', "n")
118            .replace('ç', "c")
119    }
120
121    pub fn from_config_struct(config: MD044Config) -> Self {
122        let combined_pattern = Self::create_combined_pattern(&config);
123        let name_variants = Self::build_name_variants(&config);
124        Self {
125            config,
126            combined_pattern,
127            name_variants,
128            content_cache: Arc::new(Mutex::new(HashMap::new())),
129        }
130    }
131
132    // Create a combined regex pattern for all proper names
133    fn create_combined_pattern(config: &MD044Config) -> Option<String> {
134        if config.names.is_empty() {
135            return None;
136        }
137
138        // Create patterns for all names and their variations
139        let mut patterns: Vec<String> = config
140            .names
141            .iter()
142            .flat_map(|name| {
143                let mut variations = vec![];
144                let lower_name = name.to_lowercase();
145
146                // Add the lowercase version
147                variations.push(escape_regex(&lower_name));
148
149                // Add version without dots
150                let lower_name_no_dots = lower_name.replace('.', "");
151                if lower_name != lower_name_no_dots {
152                    variations.push(escape_regex(&lower_name_no_dots));
153                }
154
155                // Add ASCII-normalized versions for common accented characters
156                let ascii_normalized = Self::ascii_normalize(&lower_name);
157
158                if ascii_normalized != lower_name {
159                    variations.push(escape_regex(&ascii_normalized));
160
161                    // Also add version without dots
162                    let ascii_no_dots = ascii_normalized.replace('.', "");
163                    if ascii_normalized != ascii_no_dots {
164                        variations.push(escape_regex(&ascii_no_dots));
165                    }
166                }
167
168                variations
169            })
170            .collect();
171
172        // Sort patterns by length (longest first) to avoid shorter patterns matching within longer ones
173        patterns.sort_by_key(|b| std::cmp::Reverse(b.len()));
174
175        // Combine all patterns into a single regex with capture groups
176        // Don't use \b as it doesn't work with Unicode - we'll check boundaries manually
177        Some(format!(r"(?i)({})", patterns.join("|")))
178    }
179
180    fn build_name_variants(config: &MD044Config) -> Vec<String> {
181        let mut variants = HashSet::new();
182        for name in &config.names {
183            let lower_name = name.to_lowercase();
184            variants.insert(lower_name.clone());
185
186            let lower_no_dots = lower_name.replace('.', "");
187            if lower_name != lower_no_dots {
188                variants.insert(lower_no_dots);
189            }
190
191            let ascii_normalized = Self::ascii_normalize(&lower_name);
192            if ascii_normalized != lower_name {
193                variants.insert(ascii_normalized.clone());
194
195                let ascii_no_dots = ascii_normalized.replace('.', "");
196                if ascii_normalized != ascii_no_dots {
197                    variants.insert(ascii_no_dots);
198                }
199            }
200        }
201
202        variants.into_iter().collect()
203    }
204
205    // Find all name violations in the content and return positions.
206    // `content_lower` is the pre-computed lowercase version of `content` to avoid redundant allocations.
207    fn find_name_violations(
208        &self,
209        content: &str,
210        ctx: &crate::lint_context::LintContext,
211        content_lower: &str,
212    ) -> Vec<WarningPosition> {
213        // Early return: if no names configured or content is empty
214        if self.config.names.is_empty() || content.is_empty() || self.combined_pattern.is_none() {
215            return Vec::new();
216        }
217
218        // Early return: quick check if any of the configured names might be in content
219        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
220
221        if !has_potential_matches {
222            return Vec::new();
223        }
224
225        // Check if we have cached results
226        let hash = fast_hash(content);
227        {
228            // Use a separate scope for borrowing to minimize lock time
229            if let Ok(cache) = self.content_cache.lock()
230                && let Some(cached) = cache.get(&hash)
231            {
232                return cached.clone();
233            }
234        }
235
236        let mut violations = Vec::new();
237
238        // Get the regex from global cache
239        let combined_regex = match &self.combined_pattern {
240            Some(pattern) => match get_cached_regex(pattern) {
241                Ok(regex) => regex,
242                Err(_) => return Vec::new(),
243            },
244            None => return Vec::new(),
245        };
246
247        // Use ctx.lines for better performance
248        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
249            let line_num = line_idx + 1;
250            let line = line_info.content(ctx.content);
251
252            // Skip code fence lines (```language or ~~~language)
253            let trimmed = line.trim_start();
254            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
255                continue;
256            }
257
258            // Skip if in code block (when code_blocks = false)
259            if !self.config.code_blocks && line_info.in_code_block {
260                continue;
261            }
262
263            // Skip if in HTML block (when html_elements = false)
264            if !self.config.html_elements && line_info.in_html_block {
265                continue;
266            }
267
268            // Skip HTML comments using pre-computed line flag
269            if !self.config.html_comments && line_info.in_html_comment {
270                continue;
271            }
272
273            // Skip JSX expressions and MDX comments (MDX flavor)
274            if line_info.in_jsx_expression || line_info.in_mdx_comment {
275                continue;
276            }
277
278            // Skip Obsidian comments (Obsidian flavor)
279            if line_info.in_obsidian_comment {
280                continue;
281            }
282
283            // For frontmatter lines, determine offset where checkable value content starts.
284            // YAML keys should not be checked against proper names - only values.
285            let fm_value_offset = if line_info.in_front_matter {
286                Self::frontmatter_value_offset(line)
287            } else {
288                0
289            };
290            if fm_value_offset == usize::MAX {
291                continue;
292            }
293
294            // Skip inline config comments (rumdl, markdownlint, Vale, remark-lint directives)
295            if is_inline_config_comment(trimmed) {
296                continue;
297            }
298
299            // Early return: skip lines that don't contain any potential matches
300            let line_lower = line.to_lowercase();
301            let has_line_matches = self.name_variants.iter().any(|name| line_lower.contains(name));
302
303            if !has_line_matches {
304                continue;
305            }
306
307            // Use the combined regex to find all matches in one pass
308            for cap in combined_regex.find_iter(line) {
309                let found_name = &line[cap.start()..cap.end()];
310
311                // Check word boundaries manually for Unicode support
312                let start_pos = cap.start();
313                let end_pos = cap.end();
314
315                // Skip matches in the key portion of frontmatter lines
316                if start_pos < fm_value_offset {
317                    continue;
318                }
319
320                // Skip matches inside HTML tag attributes (handles multi-line tags)
321                let byte_pos = line_info.byte_offset + start_pos;
322                if ctx.is_in_html_tag(byte_pos) {
323                    continue;
324                }
325
326                if !Self::is_at_word_boundary(line, start_pos, true) || !Self::is_at_word_boundary(line, end_pos, false)
327                {
328                    continue; // Not at word boundary
329                }
330
331                // Skip if in inline code when code_blocks is false
332                if !self.config.code_blocks {
333                    if ctx.is_in_code_block_or_span(byte_pos) {
334                        continue;
335                    }
336                    // pulldown-cmark doesn't parse markdown syntax inside HTML
337                    // comments, HTML blocks, or frontmatter, so backtick-wrapped
338                    // text isn't detected by is_in_code_block_or_span. Check directly.
339                    if (line_info.in_html_comment || line_info.in_html_block || line_info.in_front_matter)
340                        && Self::is_in_backtick_code_in_line(line, start_pos)
341                    {
342                        continue;
343                    }
344                }
345
346                // Skip if in link URL or reference definition
347                if Self::is_in_link(ctx, byte_pos) {
348                    continue;
349                }
350
351                // Skip if inside an angle-bracket URL (e.g., <https://...>)
352                // The link parser skips autolinks inside HTML comments,
353                // so we detect them directly in the line text.
354                if Self::is_in_angle_bracket_url(line, start_pos) {
355                    continue;
356                }
357
358                // Skip if inside a Markdown inline link URL in contexts where
359                // pulldown-cmark doesn't parse Markdown syntax (HTML comments,
360                // HTML blocks, frontmatter).
361                if (line_info.in_html_comment || line_info.in_html_block || line_info.in_front_matter)
362                    && Self::is_in_markdown_link_url(line, start_pos)
363                {
364                    continue;
365                }
366
367                // Skip if inside the URL portion of a WikiLink followed by a
368                // parenthesised destination — [[text]](url). pulldown-cmark
369                // registers [[text]] as a WikiLink in ctx.links but leaves the
370                // (url) as plain text, so is_in_link() misses those bytes.
371                if Self::is_in_wikilink_url(ctx, byte_pos) {
372                    continue;
373                }
374
375                // Find which proper name this matches
376                if let Some(proper_name) = self.get_proper_name_for(found_name) {
377                    // Only flag if it's not already correct
378                    if found_name != proper_name {
379                        violations.push((line_num, cap.start() + 1, found_name.to_string()));
380                    }
381                }
382            }
383        }
384
385        // Store in cache (ignore if mutex is poisoned)
386        if let Ok(mut cache) = self.content_cache.lock() {
387            cache.insert(hash, violations.clone());
388        }
389        violations
390    }
391
392    /// Check if a byte position is within a link URL (not link text)
393    ///
394    /// Link text should be checked for proper names, but URLs should be skipped.
395    /// For `[text](url)` - check text, skip url
396    /// For `[text][ref]` - check text, skip reference portion
397    /// For `[[text]]` (WikiLinks) - check text, skip brackets
398    fn is_in_link(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
399        use pulldown_cmark::LinkType;
400
401        // Binary search links (sorted by byte_offset) to find candidate containing byte_pos
402        let link_idx = ctx.links.partition_point(|link| link.byte_offset <= byte_pos);
403        if link_idx > 0 {
404            let link = &ctx.links[link_idx - 1];
405            if byte_pos < link.byte_end {
406                // WikiLinks [[text]] start with '[[', regular links [text] start with '['
407                let text_start = if matches!(link.link_type, LinkType::WikiLink { .. }) {
408                    link.byte_offset + 2
409                } else {
410                    link.byte_offset + 1
411                };
412                let text_end = text_start + link.text.len();
413
414                // If position is within the text portion, skip only if text is a URL
415                if byte_pos >= text_start && byte_pos < text_end {
416                    return Self::link_text_is_url(&link.text);
417                }
418                // Position is in the URL/reference portion, skip it
419                return true;
420            }
421        }
422
423        // Binary search images (sorted by byte_offset) to find candidate containing byte_pos
424        let image_idx = ctx.images.partition_point(|img| img.byte_offset <= byte_pos);
425        if image_idx > 0 {
426            let image = &ctx.images[image_idx - 1];
427            if byte_pos < image.byte_end {
428                // Image starts with '![' so alt text starts at byte_offset + 2
429                let alt_start = image.byte_offset + 2;
430                let alt_end = alt_start + image.alt_text.len();
431
432                // If position is within the alt text portion, don't skip
433                if byte_pos >= alt_start && byte_pos < alt_end {
434                    return false;
435                }
436                // Position is in the URL/reference portion, skip it
437                return true;
438            }
439        }
440
441        // Check pre-computed reference definitions
442        ctx.is_in_reference_def(byte_pos)
443    }
444
445    /// Check if link text is a URL that should not have proper name corrections.
446    /// Matches markdownlint behavior: skip text starting with `http://`, `https://`, or `www.`.
447    fn link_text_is_url(text: &str) -> bool {
448        let lower = text.trim().to_ascii_lowercase();
449        lower.starts_with("http://") || lower.starts_with("https://") || lower.starts_with("www.")
450    }
451
452    /// Check if a position within a line falls inside an angle-bracket URL (`<scheme://...>`).
453    ///
454    /// The link parser skips autolinks inside HTML comments, so `ctx.links` won't
455    /// contain them. This function detects angle-bracket URLs directly in the line
456    /// text, covering both HTML comments and regular text as a safety net.
457    fn is_in_angle_bracket_url(line: &str, pos: usize) -> bool {
458        let bytes = line.as_bytes();
459        let len = bytes.len();
460        let mut i = 0;
461        while i < len {
462            if bytes[i] == b'<' {
463                let after_open = i + 1;
464                // Check for a valid URI scheme per CommonMark autolink spec:
465                // scheme = [a-zA-Z][a-zA-Z0-9+.-]{0,31}
466                // followed by ':'
467                if after_open < len && bytes[after_open].is_ascii_alphabetic() {
468                    let mut s = after_open + 1;
469                    let scheme_max = (after_open + 32).min(len);
470                    while s < scheme_max
471                        && (bytes[s].is_ascii_alphanumeric()
472                            || bytes[s] == b'+'
473                            || bytes[s] == b'-'
474                            || bytes[s] == b'.')
475                    {
476                        s += 1;
477                    }
478                    if s < len && bytes[s] == b':' {
479                        // Valid scheme found; scan for closing '>' with no spaces or '<'
480                        let mut j = s + 1;
481                        let mut found_close = false;
482                        while j < len {
483                            match bytes[j] {
484                                b'>' => {
485                                    found_close = true;
486                                    break;
487                                }
488                                b' ' | b'<' => break,
489                                _ => j += 1,
490                            }
491                        }
492                        if found_close && pos >= i && pos <= j {
493                            return true;
494                        }
495                        if found_close {
496                            i = j + 1;
497                            continue;
498                        }
499                    }
500                }
501            }
502            i += 1;
503        }
504        false
505    }
506
507    /// Check if `byte_pos` falls inside the URL of a `[[text]](url)` construct.
508    ///
509    /// pulldown-cmark with WikiLinks enabled parses `[[text]]` as a WikiLink and
510    /// records it in `ctx.links`, but the immediately following `(url)` is left as
511    /// plain text and is therefore absent from `ctx.links`. This function detects
512    /// that gap by looking for a WikiLink entry whose `byte_end` falls exactly on a
513    /// `(` in the raw content, then checking whether `byte_pos` lies inside the
514    /// matching parenthesised URL span.
515    ///
516    /// Unlike `is_in_markdown_link_url`, this function is anchored to real parser
517    /// output (`ctx.links`) and will not suppress violations in text that merely
518    /// looks like a link (e.g. `[foo](github x)` with a space in the URL).
519    fn is_in_wikilink_url(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
520        use pulldown_cmark::LinkType;
521        let content = ctx.content.as_bytes();
522
523        // ctx.links is sorted by byte_offset; only links that start at or before
524        // byte_pos can have a URL that encloses it.
525        let end = ctx.links.partition_point(|l| l.byte_offset <= byte_pos);
526
527        for link in &ctx.links[..end] {
528            if !matches!(link.link_type, LinkType::WikiLink { .. }) {
529                continue;
530            }
531            let wiki_end = link.byte_end;
532            // The WikiLink must end before byte_pos and be immediately followed by '('.
533            if wiki_end >= byte_pos || wiki_end >= content.len() || content[wiki_end] != b'(' {
534                continue;
535            }
536            // Scan to the matching ')' tracking nested parens and backslash escapes.
537            // Per CommonMark, an unquoted inline link destination cannot contain
538            // spaces, tabs, or newlines. If we encounter one, this is parenthesised
539            // prose rather than a URL, and pulldown-cmark will not parse it as a link.
540            let mut depth: u32 = 1;
541            let mut k = wiki_end + 1;
542            let mut valid_destination = true;
543            while k < content.len() && depth > 0 {
544                match content[k] {
545                    b'\\' => {
546                        k += 1; // skip escaped character
547                    }
548                    b'(' => depth += 1,
549                    b')' => depth -= 1,
550                    b' ' | b'\t' | b'\n' | b'\r' => {
551                        valid_destination = false;
552                        break;
553                    }
554                    _ => {}
555                }
556                k += 1;
557            }
558            // byte_pos is inside the URL if it falls between '(' and the matching ')'
559            // and the destination is valid (no unescaped whitespace).
560            if valid_destination && depth == 0 && byte_pos > wiki_end && byte_pos < k {
561                return true;
562            }
563        }
564        false
565    }
566
567    /// Check if a position within a line falls inside a Markdown link's
568    /// non-text portion (URL or reference label).
569    ///
570    /// Used as a text-level fallback for HTML comments, HTML blocks, and
571    /// frontmatter where pulldown-cmark skips link parsing entirely. Operates on
572    /// raw line bytes and therefore cannot distinguish real links from text that
573    /// merely resembles link syntax; do not call on regular markdown lines.
574    /// - `[text](url)` — returns true if `pos` is within `(...)`
575    /// - `[text][ref]` — returns true if `pos` is within the second `[...]`
576    fn is_in_markdown_link_url(line: &str, pos: usize) -> bool {
577        let bytes = line.as_bytes();
578        let len = bytes.len();
579        let mut i = 0;
580
581        while i < len {
582            // Look for unescaped '[' (handle double-escaped \\[ as unescaped)
583            if bytes[i] == b'[' && (i == 0 || bytes[i - 1] != b'\\' || (i >= 2 && bytes[i - 2] == b'\\')) {
584                // Find matching ']' handling nested brackets
585                let mut depth: u32 = 1;
586                let mut j = i + 1;
587                while j < len && depth > 0 {
588                    match bytes[j] {
589                        b'\\' => {
590                            j += 1; // skip escaped char
591                        }
592                        b'[' => depth += 1,
593                        b']' => depth -= 1,
594                        _ => {}
595                    }
596                    j += 1;
597                }
598
599                // j is now one past the ']'
600                if depth == 0 && j < len {
601                    if bytes[j] == b'(' {
602                        // Inline link: [text](url)
603                        let url_start = j;
604                        let mut paren_depth: u32 = 1;
605                        let mut k = j + 1;
606                        while k < len && paren_depth > 0 {
607                            match bytes[k] {
608                                b'\\' => {
609                                    k += 1; // skip escaped char
610                                }
611                                b'(' => paren_depth += 1,
612                                b')' => paren_depth -= 1,
613                                _ => {}
614                            }
615                            k += 1;
616                        }
617
618                        if paren_depth == 0 {
619                            if pos > url_start && pos < k {
620                                return true;
621                            }
622                            i = k;
623                            continue;
624                        }
625                    } else if bytes[j] == b'[' {
626                        // Reference link: [text][ref]
627                        let ref_start = j;
628                        let mut ref_depth: u32 = 1;
629                        let mut k = j + 1;
630                        while k < len && ref_depth > 0 {
631                            match bytes[k] {
632                                b'\\' => {
633                                    k += 1;
634                                }
635                                b'[' => ref_depth += 1,
636                                b']' => ref_depth -= 1,
637                                _ => {}
638                            }
639                            k += 1;
640                        }
641
642                        if ref_depth == 0 {
643                            if pos > ref_start && pos < k {
644                                return true;
645                            }
646                            i = k;
647                            continue;
648                        }
649                    }
650                }
651            }
652            i += 1;
653        }
654        false
655    }
656
657    /// Check if a position within a line falls inside backtick-delimited code.
658    ///
659    /// pulldown-cmark does not parse markdown syntax inside HTML comments, so
660    /// `ctx.is_in_code_block_or_span` returns false for backtick-wrapped text
661    /// within comments. This function detects backtick code spans directly in
662    /// the line text following CommonMark rules: a code span starts with N
663    /// backticks and ends with exactly N backticks.
664    fn is_in_backtick_code_in_line(line: &str, pos: usize) -> bool {
665        let bytes = line.as_bytes();
666        let len = bytes.len();
667        let mut i = 0;
668        while i < len {
669            if bytes[i] == b'`' {
670                // Count the opening backtick sequence length
671                let open_start = i;
672                while i < len && bytes[i] == b'`' {
673                    i += 1;
674                }
675                let tick_len = i - open_start;
676
677                // Scan forward for a closing sequence of exactly tick_len backticks
678                while i < len {
679                    if bytes[i] == b'`' {
680                        let close_start = i;
681                        while i < len && bytes[i] == b'`' {
682                            i += 1;
683                        }
684                        if i - close_start == tick_len {
685                            // Matched pair found; the code span content is between
686                            // the end of the opening backticks and the start of the
687                            // closing backticks (exclusive of the backticks themselves).
688                            let content_start = open_start + tick_len;
689                            let content_end = close_start;
690                            if pos >= content_start && pos < content_end {
691                                return true;
692                            }
693                            // Continue scanning after this pair
694                            break;
695                        }
696                        // Not the right length; keep scanning
697                    } else {
698                        i += 1;
699                    }
700                }
701            } else {
702                i += 1;
703            }
704        }
705        false
706    }
707
708    // Check if a character is a word boundary (handles Unicode)
709    fn is_word_boundary_char(c: char) -> bool {
710        !c.is_alphanumeric()
711    }
712
713    // Check if position is at a word boundary using byte-level lookups.
714    fn is_at_word_boundary(content: &str, pos: usize, is_start: bool) -> bool {
715        if is_start {
716            if pos == 0 {
717                return true;
718            }
719            match content[..pos].chars().next_back() {
720                None => true,
721                Some(c) => Self::is_word_boundary_char(c),
722            }
723        } else {
724            if pos >= content.len() {
725                return true;
726            }
727            match content[pos..].chars().next() {
728                None => true,
729                Some(c) => Self::is_word_boundary_char(c),
730            }
731        }
732    }
733
734    /// For a frontmatter line, return the byte offset where the checkable
735    /// value portion starts. Returns `usize::MAX` if the entire line should be
736    /// skipped (frontmatter delimiters, key-only lines, YAML comments, flow constructs).
737    fn frontmatter_value_offset(line: &str) -> usize {
738        let trimmed = line.trim();
739
740        // Skip frontmatter delimiters and empty lines
741        if trimmed == "---" || trimmed == "+++" || trimmed.is_empty() {
742            return usize::MAX;
743        }
744
745        // Skip YAML comments
746        if trimmed.starts_with('#') {
747            return usize::MAX;
748        }
749
750        // YAML list item: "  - item" or "  - key: value"
751        let stripped = line.trim_start();
752        if let Some(after_dash) = stripped.strip_prefix("- ") {
753            let leading = line.len() - stripped.len();
754            // Check if the list item contains a mapping (e.g., "- key: value")
755            if let Some(result) = Self::kv_value_offset(line, after_dash, leading + 2) {
756                return result;
757            }
758            // Bare list item value (no colon) - check content after "- "
759            return leading + 2;
760        }
761        if stripped == "-" {
762            return usize::MAX;
763        }
764
765        // Key-value pair with colon separator (YAML): "key: value"
766        if let Some(result) = Self::kv_value_offset(line, stripped, line.len() - stripped.len()) {
767            return result;
768        }
769
770        // Key-value pair with equals separator (TOML): "key = value"
771        if let Some(eq_pos) = line.find('=') {
772            let after_eq = eq_pos + 1;
773            if after_eq < line.len() && line.as_bytes()[after_eq] == b' ' {
774                let value_start = after_eq + 1;
775                let value_slice = &line[value_start..];
776                let value_trimmed = value_slice.trim();
777                if value_trimmed.is_empty() {
778                    return usize::MAX;
779                }
780                // For quoted values, skip the opening quote character
781                if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
782                    || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
783                {
784                    let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
785                    return value_start + quote_offset + 1;
786                }
787                return value_start;
788            }
789            // Equals with no space after or at end of line -> no value to check
790            return usize::MAX;
791        }
792
793        // No separator found - continuation line or bare value, check the whole line
794        0
795    }
796
797    /// Parse a key-value pair using colon separator within `content` that starts
798    /// at `base_offset` in the original line. Returns `Some(offset)` if a colon
799    /// separator is found, `None` if no colon is present.
800    fn kv_value_offset(line: &str, content: &str, base_offset: usize) -> Option<usize> {
801        let colon_pos = content.find(':')?;
802        let abs_colon = base_offset + colon_pos;
803        let after_colon = abs_colon + 1;
804        if after_colon < line.len() && line.as_bytes()[after_colon] == b' ' {
805            let value_start = after_colon + 1;
806            let value_slice = &line[value_start..];
807            let value_trimmed = value_slice.trim();
808            if value_trimmed.is_empty() {
809                return Some(usize::MAX);
810            }
811            // Skip flow mappings and flow sequences - too complex for heuristic parsing
812            if value_trimmed.starts_with('{') || value_trimmed.starts_with('[') {
813                return Some(usize::MAX);
814            }
815            // For quoted values, skip the opening quote character
816            if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
817                || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
818            {
819                let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
820                return Some(value_start + quote_offset + 1);
821            }
822            return Some(value_start);
823        }
824        // Colon with no space after or at end of line -> no value to check
825        Some(usize::MAX)
826    }
827
828    // Get the proper name that should be used for a found name
829    fn get_proper_name_for(&self, found_name: &str) -> Option<String> {
830        let found_lower = found_name.to_lowercase();
831
832        // Iterate through the configured proper names
833        for name in &self.config.names {
834            let lower_name = name.to_lowercase();
835            let lower_name_no_dots = lower_name.replace('.', "");
836
837            // Direct match
838            if found_lower == lower_name || found_lower == lower_name_no_dots {
839                return Some(name.clone());
840            }
841
842            // Check ASCII-normalized version
843            let ascii_normalized = Self::ascii_normalize(&lower_name);
844
845            let ascii_no_dots = ascii_normalized.replace('.', "");
846
847            if found_lower == ascii_normalized || found_lower == ascii_no_dots {
848                return Some(name.clone());
849            }
850        }
851        None
852    }
853}
854
855impl Rule for MD044ProperNames {
856    fn name(&self) -> &'static str {
857        "MD044"
858    }
859
860    fn description(&self) -> &'static str {
861        "Proper names should have the correct capitalization"
862    }
863
864    fn category(&self) -> RuleCategory {
865        RuleCategory::Other
866    }
867
868    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
869        if self.config.names.is_empty() {
870            return true;
871        }
872        // Quick check if any configured name variants exist (case-insensitive)
873        let content_lower = if ctx.content.is_ascii() {
874            ctx.content.to_ascii_lowercase()
875        } else {
876            ctx.content.to_lowercase()
877        };
878        !self.name_variants.iter().any(|name| content_lower.contains(name))
879    }
880
881    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
882        let content = ctx.content;
883        if content.is_empty() || self.config.names.is_empty() || self.combined_pattern.is_none() {
884            return Ok(Vec::new());
885        }
886
887        // Compute lowercase content once and reuse across all checks
888        let content_lower = if content.is_ascii() {
889            content.to_ascii_lowercase()
890        } else {
891            content.to_lowercase()
892        };
893
894        // Early return: use pre-computed name_variants for the quick check
895        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
896
897        if !has_potential_matches {
898            return Ok(Vec::new());
899        }
900
901        let line_index = &ctx.line_index;
902        let violations = self.find_name_violations(content, ctx, &content_lower);
903
904        let warnings = violations
905            .into_iter()
906            .filter_map(|(line, column, found_name)| {
907                self.get_proper_name_for(&found_name).map(|proper_name| {
908                    // `column` is a 1-indexed byte offset into the line (from regex .start() + 1).
909                    // Build the Fix range directly in bytes to avoid the character-based
910                    // line_col_to_byte_range_with_length function, which would misinterpret
911                    // the byte offset as a character count on lines with multi-byte content.
912                    let line_start = line_index.get_line_start_byte(line).unwrap_or(0);
913                    let byte_start = line_start + (column - 1);
914                    let byte_end = byte_start + found_name.len();
915                    LintWarning {
916                        rule_name: Some(self.name().to_string()),
917                        line,
918                        column,
919                        end_line: line,
920                        end_column: column + found_name.len(),
921                        message: format!("Proper name '{found_name}' should be '{proper_name}'"),
922                        severity: Severity::Warning,
923                        fix: Some(Fix {
924                            range: byte_start..byte_end,
925                            replacement: proper_name,
926                        }),
927                    }
928                })
929            })
930            .collect();
931
932        Ok(warnings)
933    }
934
935    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
936        if self.should_skip(ctx) {
937            return Ok(ctx.content.to_string());
938        }
939        let warnings = self.check(ctx)?;
940        if warnings.is_empty() {
941            return Ok(ctx.content.to_string());
942        }
943        let warnings =
944            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
945        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings)
946            .map_err(crate::rule::LintError::InvalidInput)
947    }
948
949    fn as_any(&self) -> &dyn std::any::Any {
950        self
951    }
952
953    fn default_config_section(&self) -> Option<(String, toml::Value)> {
954        let json_value = serde_json::to_value(&self.config).ok()?;
955        Some((
956            self.name().to_string(),
957            crate::rule_config_serde::json_to_toml_value(&json_value)?,
958        ))
959    }
960
961    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
962    where
963        Self: Sized,
964    {
965        let rule_config = crate::rule_config_serde::load_rule_config::<MD044Config>(config);
966        Box::new(Self::from_config_struct(rule_config))
967    }
968}
969
970#[cfg(test)]
971mod tests {
972    use super::*;
973    use crate::lint_context::LintContext;
974
975    fn create_context(content: &str) -> LintContext<'_> {
976        LintContext::new(content, crate::config::MarkdownFlavor::Standard, None)
977    }
978
979    #[test]
980    fn test_correctly_capitalized_names() {
981        let rule = MD044ProperNames::new(
982            vec![
983                "JavaScript".to_string(),
984                "TypeScript".to_string(),
985                "Node.js".to_string(),
986            ],
987            true,
988        );
989
990        let content = "This document uses JavaScript, TypeScript, and Node.js correctly.";
991        let ctx = create_context(content);
992        let result = rule.check(&ctx).unwrap();
993        assert!(result.is_empty(), "Should not flag correctly capitalized names");
994    }
995
996    #[test]
997    fn test_incorrectly_capitalized_names() {
998        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
999
1000        let content = "This document uses javascript and typescript incorrectly.";
1001        let ctx = create_context(content);
1002        let result = rule.check(&ctx).unwrap();
1003
1004        assert_eq!(result.len(), 2, "Should flag two incorrect capitalizations");
1005        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
1006        assert_eq!(result[0].line, 1);
1007        assert_eq!(result[0].column, 20);
1008        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
1009        assert_eq!(result[1].line, 1);
1010        assert_eq!(result[1].column, 35);
1011    }
1012
1013    #[test]
1014    fn test_names_at_beginning_of_sentences() {
1015        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "Python".to_string()], true);
1016
1017        let content = "javascript is a great language. python is also popular.";
1018        let ctx = create_context(content);
1019        let result = rule.check(&ctx).unwrap();
1020
1021        assert_eq!(result.len(), 2, "Should flag names at beginning of sentences");
1022        assert_eq!(result[0].line, 1);
1023        assert_eq!(result[0].column, 1);
1024        assert_eq!(result[1].line, 1);
1025        assert_eq!(result[1].column, 33);
1026    }
1027
1028    #[test]
1029    fn test_names_in_code_blocks_checked_by_default() {
1030        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1031
1032        let content = r#"Here is some text with JavaScript.
1033
1034```javascript
1035// This javascript should be checked
1036const lang = "javascript";
1037```
1038
1039But this javascript should be flagged."#;
1040
1041        let ctx = create_context(content);
1042        let result = rule.check(&ctx).unwrap();
1043
1044        assert_eq!(result.len(), 3, "Should flag javascript inside and outside code blocks");
1045        assert_eq!(result[0].line, 4);
1046        assert_eq!(result[1].line, 5);
1047        assert_eq!(result[2].line, 8);
1048    }
1049
1050    #[test]
1051    fn test_names_in_code_blocks_ignored_when_disabled() {
1052        let rule = MD044ProperNames::new(
1053            vec!["JavaScript".to_string()],
1054            false, // code_blocks = false means skip code blocks
1055        );
1056
1057        let content = r#"```
1058javascript in code block
1059```"#;
1060
1061        let ctx = create_context(content);
1062        let result = rule.check(&ctx).unwrap();
1063
1064        assert_eq!(
1065            result.len(),
1066            0,
1067            "Should not flag javascript in code blocks when code_blocks is false"
1068        );
1069    }
1070
1071    #[test]
1072    fn test_names_in_inline_code_checked_by_default() {
1073        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1074
1075        let content = "This is `javascript` in inline code and javascript outside.";
1076        let ctx = create_context(content);
1077        let result = rule.check(&ctx).unwrap();
1078
1079        // When code_blocks=true, inline code should be checked
1080        assert_eq!(result.len(), 2, "Should flag javascript inside and outside inline code");
1081        assert_eq!(result[0].column, 10); // javascript in inline code
1082        assert_eq!(result[1].column, 41); // javascript outside
1083    }
1084
1085    #[test]
1086    fn test_multiple_names_in_same_line() {
1087        let rule = MD044ProperNames::new(
1088            vec!["JavaScript".to_string(), "TypeScript".to_string(), "React".to_string()],
1089            true,
1090        );
1091
1092        let content = "I use javascript, typescript, and react in my projects.";
1093        let ctx = create_context(content);
1094        let result = rule.check(&ctx).unwrap();
1095
1096        assert_eq!(result.len(), 3, "Should flag all three incorrect names");
1097        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
1098        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
1099        assert_eq!(result[2].message, "Proper name 'react' should be 'React'");
1100    }
1101
1102    #[test]
1103    fn test_case_sensitivity() {
1104        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1105
1106        let content = "JAVASCRIPT, Javascript, javascript, and JavaScript variations.";
1107        let ctx = create_context(content);
1108        let result = rule.check(&ctx).unwrap();
1109
1110        assert_eq!(result.len(), 3, "Should flag all incorrect case variations");
1111        // JavaScript (correct) should not be flagged
1112        assert!(result.iter().all(|w| w.message.contains("should be 'JavaScript'")));
1113    }
1114
1115    #[test]
1116    fn test_configuration_with_custom_name_list() {
1117        let config = MD044Config {
1118            names: vec!["GitHub".to_string(), "GitLab".to_string(), "DevOps".to_string()],
1119            code_blocks: true,
1120            html_elements: true,
1121            html_comments: true,
1122        };
1123        let rule = MD044ProperNames::from_config_struct(config);
1124
1125        let content = "We use github, gitlab, and devops for our workflow.";
1126        let ctx = create_context(content);
1127        let result = rule.check(&ctx).unwrap();
1128
1129        assert_eq!(result.len(), 3, "Should flag all custom names");
1130        assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
1131        assert_eq!(result[1].message, "Proper name 'gitlab' should be 'GitLab'");
1132        assert_eq!(result[2].message, "Proper name 'devops' should be 'DevOps'");
1133    }
1134
1135    #[test]
1136    fn test_empty_configuration() {
1137        let rule = MD044ProperNames::new(vec![], true);
1138
1139        let content = "This has javascript and typescript but no configured names.";
1140        let ctx = create_context(content);
1141        let result = rule.check(&ctx).unwrap();
1142
1143        assert!(result.is_empty(), "Should not flag anything with empty configuration");
1144    }
1145
1146    #[test]
1147    fn test_names_with_special_characters() {
1148        let rule = MD044ProperNames::new(
1149            vec!["Node.js".to_string(), "ASP.NET".to_string(), "C++".to_string()],
1150            true,
1151        );
1152
1153        let content = "We use nodejs, asp.net, ASP.NET, and c++ in our stack.";
1154        let ctx = create_context(content);
1155        let result = rule.check(&ctx).unwrap();
1156
1157        // nodejs should match Node.js (dotless variation)
1158        // asp.net should be flagged (wrong case)
1159        // ASP.NET should not be flagged (correct)
1160        // c++ should be flagged
1161        assert_eq!(result.len(), 3, "Should handle special characters correctly");
1162
1163        let messages: Vec<&str> = result.iter().map(|w| w.message.as_str()).collect();
1164        assert!(messages.contains(&"Proper name 'nodejs' should be 'Node.js'"));
1165        assert!(messages.contains(&"Proper name 'asp.net' should be 'ASP.NET'"));
1166        assert!(messages.contains(&"Proper name 'c++' should be 'C++'"));
1167    }
1168
1169    #[test]
1170    fn test_word_boundaries() {
1171        let rule = MD044ProperNames::new(vec!["Java".to_string(), "Script".to_string()], true);
1172
1173        let content = "JavaScript is not java or script, but Java and Script are separate.";
1174        let ctx = create_context(content);
1175        let result = rule.check(&ctx).unwrap();
1176
1177        // Should only flag lowercase "java" and "script" as separate words
1178        assert_eq!(result.len(), 2, "Should respect word boundaries");
1179        assert!(result.iter().any(|w| w.column == 19)); // "java" position
1180        assert!(result.iter().any(|w| w.column == 27)); // "script" position
1181    }
1182
1183    #[test]
1184    fn test_fix_method() {
1185        let rule = MD044ProperNames::new(
1186            vec![
1187                "JavaScript".to_string(),
1188                "TypeScript".to_string(),
1189                "Node.js".to_string(),
1190            ],
1191            true,
1192        );
1193
1194        let content = "I love javascript, typescript, and nodejs!";
1195        let ctx = create_context(content);
1196        let fixed = rule.fix(&ctx).unwrap();
1197
1198        assert_eq!(fixed, "I love JavaScript, TypeScript, and Node.js!");
1199    }
1200
1201    #[test]
1202    fn test_fix_multiple_occurrences() {
1203        let rule = MD044ProperNames::new(vec!["Python".to_string()], true);
1204
1205        let content = "python is great. I use python daily. PYTHON is powerful.";
1206        let ctx = create_context(content);
1207        let fixed = rule.fix(&ctx).unwrap();
1208
1209        assert_eq!(fixed, "Python is great. I use Python daily. Python is powerful.");
1210    }
1211
1212    #[test]
1213    fn test_fix_checks_code_blocks_by_default() {
1214        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1215
1216        let content = r#"I love javascript.
1217
1218```
1219const lang = "javascript";
1220```
1221
1222More javascript here."#;
1223
1224        let ctx = create_context(content);
1225        let fixed = rule.fix(&ctx).unwrap();
1226
1227        let expected = r#"I love JavaScript.
1228
1229```
1230const lang = "JavaScript";
1231```
1232
1233More JavaScript here."#;
1234
1235        assert_eq!(fixed, expected);
1236    }
1237
1238    #[test]
1239    fn test_multiline_content() {
1240        let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
1241
1242        let content = r#"First line with rust.
1243Second line with python.
1244Third line with RUST and PYTHON."#;
1245
1246        let ctx = create_context(content);
1247        let result = rule.check(&ctx).unwrap();
1248
1249        assert_eq!(result.len(), 4, "Should flag all incorrect occurrences");
1250        assert_eq!(result[0].line, 1);
1251        assert_eq!(result[1].line, 2);
1252        assert_eq!(result[2].line, 3);
1253        assert_eq!(result[3].line, 3);
1254    }
1255
1256    #[test]
1257    fn test_default_config() {
1258        let config = MD044Config::default();
1259        assert!(config.names.is_empty());
1260        assert!(!config.code_blocks);
1261        assert!(config.html_elements);
1262        assert!(config.html_comments);
1263    }
1264
1265    #[test]
1266    fn test_default_config_checks_html_comments() {
1267        let config = MD044Config {
1268            names: vec!["JavaScript".to_string()],
1269            ..MD044Config::default()
1270        };
1271        let rule = MD044ProperNames::from_config_struct(config);
1272
1273        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1274        let ctx = create_context(content);
1275        let result = rule.check(&ctx).unwrap();
1276
1277        assert_eq!(result.len(), 1, "Default config should check HTML comments");
1278        assert_eq!(result[0].line, 3);
1279    }
1280
1281    #[test]
1282    fn test_default_config_skips_code_blocks() {
1283        let config = MD044Config {
1284            names: vec!["JavaScript".to_string()],
1285            ..MD044Config::default()
1286        };
1287        let rule = MD044ProperNames::from_config_struct(config);
1288
1289        let content = "# Guide\n\n```\njavascript in code\n```\n";
1290        let ctx = create_context(content);
1291        let result = rule.check(&ctx).unwrap();
1292
1293        assert_eq!(result.len(), 0, "Default config should skip code blocks");
1294    }
1295
1296    #[test]
1297    fn test_standalone_html_comment_checked() {
1298        let config = MD044Config {
1299            names: vec!["Test".to_string()],
1300            ..MD044Config::default()
1301        };
1302        let rule = MD044ProperNames::from_config_struct(config);
1303
1304        let content = "# Heading\n\n<!-- this is a test example -->\n";
1305        let ctx = create_context(content);
1306        let result = rule.check(&ctx).unwrap();
1307
1308        assert_eq!(result.len(), 1, "Should flag proper name in standalone HTML comment");
1309        assert_eq!(result[0].line, 3);
1310    }
1311
1312    #[test]
1313    fn test_inline_config_comments_not_flagged() {
1314        let config = MD044Config {
1315            names: vec!["RUMDL".to_string()],
1316            ..MD044Config::default()
1317        };
1318        let rule = MD044ProperNames::from_config_struct(config);
1319
1320        // Lines 1, 3, 4, 6 are inline config comments — should not be flagged.
1321        // Lines 2, 5 contain "rumdl" in regular text — flagged by rule.check(),
1322        // but would be suppressed by the linting engine's inline config filtering.
1323        let content = "<!-- rumdl-disable MD044 -->\nSome rumdl text here.\n<!-- rumdl-enable MD044 -->\n<!-- markdownlint-disable -->\nMore rumdl text.\n<!-- markdownlint-enable -->\n";
1324        let ctx = create_context(content);
1325        let result = rule.check(&ctx).unwrap();
1326
1327        assert_eq!(result.len(), 2, "Should only flag body lines, not config comments");
1328        assert_eq!(result[0].line, 2);
1329        assert_eq!(result[1].line, 5);
1330    }
1331
1332    #[test]
1333    fn test_html_comment_skipped_when_disabled() {
1334        let config = MD044Config {
1335            names: vec!["Test".to_string()],
1336            code_blocks: true,
1337            html_elements: true,
1338            html_comments: false,
1339        };
1340        let rule = MD044ProperNames::from_config_struct(config);
1341
1342        let content = "# Heading\n\n<!-- this is a test example -->\n\nRegular test here.\n";
1343        let ctx = create_context(content);
1344        let result = rule.check(&ctx).unwrap();
1345
1346        assert_eq!(
1347            result.len(),
1348            1,
1349            "Should only flag 'test' outside HTML comment when html_comments=false"
1350        );
1351        assert_eq!(result[0].line, 5);
1352    }
1353
1354    #[test]
1355    fn test_fix_corrects_html_comment_content() {
1356        let config = MD044Config {
1357            names: vec!["JavaScript".to_string()],
1358            ..MD044Config::default()
1359        };
1360        let rule = MD044ProperNames::from_config_struct(config);
1361
1362        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1363        let ctx = create_context(content);
1364        let fixed = rule.fix(&ctx).unwrap();
1365
1366        assert_eq!(fixed, "# Guide\n\n<!-- JavaScript mentioned here -->\n");
1367    }
1368
1369    #[test]
1370    fn test_fix_does_not_modify_inline_config_comments() {
1371        let config = MD044Config {
1372            names: vec!["RUMDL".to_string()],
1373            ..MD044Config::default()
1374        };
1375        let rule = MD044ProperNames::from_config_struct(config);
1376
1377        let content = "<!-- rumdl-disable -->\nSome rumdl text.\n<!-- rumdl-enable -->\n";
1378        let ctx = create_context(content);
1379        let fixed = rule.fix(&ctx).unwrap();
1380
1381        // Config comments should be untouched
1382        assert!(fixed.contains("<!-- rumdl-disable -->"));
1383        assert!(fixed.contains("<!-- rumdl-enable -->"));
1384        // Body text inside disable block should NOT be fixed (rule is disabled)
1385        assert!(
1386            fixed.contains("Some rumdl text."),
1387            "Line inside rumdl-disable block should not be modified by fix()"
1388        );
1389    }
1390
1391    #[test]
1392    fn test_fix_respects_inline_disable_partial() {
1393        let config = MD044Config {
1394            names: vec!["RUMDL".to_string()],
1395            ..MD044Config::default()
1396        };
1397        let rule = MD044ProperNames::from_config_struct(config);
1398
1399        let content =
1400            "<!-- rumdl-disable MD044 -->\nSome rumdl text.\n<!-- rumdl-enable MD044 -->\n\nSome rumdl text outside.\n";
1401        let ctx = create_context(content);
1402        let fixed = rule.fix(&ctx).unwrap();
1403
1404        // Line inside disable block should be preserved
1405        assert!(
1406            fixed.contains("Some rumdl text.\n<!-- rumdl-enable"),
1407            "Line inside disable block should not be modified"
1408        );
1409        // Line outside disable block should be fixed
1410        assert!(
1411            fixed.contains("Some RUMDL text outside."),
1412            "Line outside disable block should be fixed"
1413        );
1414    }
1415
1416    #[test]
1417    fn test_performance_with_many_names() {
1418        let mut names = vec![];
1419        for i in 0..50 {
1420            names.push(format!("ProperName{i}"));
1421        }
1422
1423        let rule = MD044ProperNames::new(names, true);
1424
1425        let content = "This has propername0, propername25, and propername49 incorrectly.";
1426        let ctx = create_context(content);
1427        let result = rule.check(&ctx).unwrap();
1428
1429        assert_eq!(result.len(), 3, "Should handle many configured names efficiently");
1430    }
1431
1432    #[test]
1433    fn test_large_name_count_performance() {
1434        // Verify MD044 can handle large numbers of names without regex limitations
1435        // This test confirms that fancy-regex handles large patterns well
1436        let names = (0..1000).map(|i| format!("ProperName{i}")).collect::<Vec<_>>();
1437
1438        let rule = MD044ProperNames::new(names, true);
1439
1440        // The combined pattern should be created successfully
1441        assert!(rule.combined_pattern.is_some());
1442
1443        // Should be able to check content without errors
1444        let content = "This has propername0 and propername999 in it.";
1445        let ctx = create_context(content);
1446        let result = rule.check(&ctx).unwrap();
1447
1448        // Should detect both incorrect names
1449        assert_eq!(result.len(), 2, "Should handle 1000 names without issues");
1450    }
1451
1452    #[test]
1453    fn test_cache_behavior() {
1454        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1455
1456        let content = "Using javascript here.";
1457        let ctx = create_context(content);
1458
1459        // First check
1460        let result1 = rule.check(&ctx).unwrap();
1461        assert_eq!(result1.len(), 1);
1462
1463        // Second check should use cache
1464        let result2 = rule.check(&ctx).unwrap();
1465        assert_eq!(result2.len(), 1);
1466
1467        // Results should be identical
1468        assert_eq!(result1[0].line, result2[0].line);
1469        assert_eq!(result1[0].column, result2[0].column);
1470    }
1471
1472    #[test]
1473    fn test_html_comments_not_checked_when_disabled() {
1474        let config = MD044Config {
1475            names: vec!["JavaScript".to_string()],
1476            code_blocks: true,    // Check code blocks
1477            html_elements: true,  // Check HTML elements
1478            html_comments: false, // Don't check HTML comments
1479        };
1480        let rule = MD044ProperNames::from_config_struct(config);
1481
1482        let content = r#"Regular javascript here.
1483<!-- This javascript in HTML comment should be ignored -->
1484More javascript outside."#;
1485
1486        let ctx = create_context(content);
1487        let result = rule.check(&ctx).unwrap();
1488
1489        assert_eq!(result.len(), 2, "Should only flag javascript outside HTML comments");
1490        assert_eq!(result[0].line, 1);
1491        assert_eq!(result[1].line, 3);
1492    }
1493
1494    #[test]
1495    fn test_html_comments_checked_when_enabled() {
1496        let config = MD044Config {
1497            names: vec!["JavaScript".to_string()],
1498            code_blocks: true,   // Check code blocks
1499            html_elements: true, // Check HTML elements
1500            html_comments: true, // Check HTML comments
1501        };
1502        let rule = MD044ProperNames::from_config_struct(config);
1503
1504        let content = r#"Regular javascript here.
1505<!-- This javascript in HTML comment should be checked -->
1506More javascript outside."#;
1507
1508        let ctx = create_context(content);
1509        let result = rule.check(&ctx).unwrap();
1510
1511        assert_eq!(
1512            result.len(),
1513            3,
1514            "Should flag all javascript occurrences including in HTML comments"
1515        );
1516    }
1517
1518    #[test]
1519    fn test_multiline_html_comments() {
1520        let config = MD044Config {
1521            names: vec!["Python".to_string(), "JavaScript".to_string()],
1522            code_blocks: true,    // Check code blocks
1523            html_elements: true,  // Check HTML elements
1524            html_comments: false, // Don't check HTML comments
1525        };
1526        let rule = MD044ProperNames::from_config_struct(config);
1527
1528        let content = r#"Regular python here.
1529<!--
1530This is a multiline comment
1531with javascript and python
1532that should be ignored
1533-->
1534More javascript outside."#;
1535
1536        let ctx = create_context(content);
1537        let result = rule.check(&ctx).unwrap();
1538
1539        assert_eq!(result.len(), 2, "Should only flag names outside HTML comments");
1540        assert_eq!(result[0].line, 1); // python
1541        assert_eq!(result[1].line, 7); // javascript
1542    }
1543
1544    #[test]
1545    fn test_fix_preserves_html_comments_when_disabled() {
1546        let config = MD044Config {
1547            names: vec!["JavaScript".to_string()],
1548            code_blocks: true,    // Check code blocks
1549            html_elements: true,  // Check HTML elements
1550            html_comments: false, // Don't check HTML comments
1551        };
1552        let rule = MD044ProperNames::from_config_struct(config);
1553
1554        let content = r#"javascript here.
1555<!-- javascript in comment -->
1556More javascript."#;
1557
1558        let ctx = create_context(content);
1559        let fixed = rule.fix(&ctx).unwrap();
1560
1561        let expected = r#"JavaScript here.
1562<!-- javascript in comment -->
1563More JavaScript."#;
1564
1565        assert_eq!(
1566            fixed, expected,
1567            "Should not fix names inside HTML comments when disabled"
1568        );
1569    }
1570
1571    #[test]
1572    fn test_proper_names_in_link_text_are_flagged() {
1573        let rule = MD044ProperNames::new(
1574            vec!["JavaScript".to_string(), "Node.js".to_string(), "Python".to_string()],
1575            true,
1576        );
1577
1578        let content = r#"Check this [javascript documentation](https://javascript.info) for info.
1579
1580Visit [node.js homepage](https://nodejs.org) and [python tutorial](https://python.org).
1581
1582Real javascript should be flagged.
1583
1584Also see the [typescript guide][ts-ref] for more.
1585
1586Real python should be flagged too.
1587
1588[ts-ref]: https://typescript.org/handbook"#;
1589
1590        let ctx = create_context(content);
1591        let result = rule.check(&ctx).unwrap();
1592
1593        // Link text should be checked, URLs should not be checked
1594        // Line 1: [javascript documentation] - "javascript" should be flagged
1595        // Line 3: [node.js homepage] - "node.js" should be flagged (matches "Node.js")
1596        // Line 3: [python tutorial] - "python" should be flagged
1597        // Line 5: standalone javascript
1598        // Line 9: standalone python
1599        assert_eq!(result.len(), 5, "Expected 5 warnings: 3 in link text + 2 standalone");
1600
1601        // Verify line numbers for link text warnings
1602        let line_1_warnings: Vec<_> = result.iter().filter(|w| w.line == 1).collect();
1603        assert_eq!(line_1_warnings.len(), 1);
1604        assert!(
1605            line_1_warnings[0]
1606                .message
1607                .contains("'javascript' should be 'JavaScript'")
1608        );
1609
1610        let line_3_warnings: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1611        assert_eq!(line_3_warnings.len(), 2); // node.js and python
1612
1613        // Standalone warnings
1614        assert!(result.iter().any(|w| w.line == 5 && w.message.contains("'javascript'")));
1615        assert!(result.iter().any(|w| w.line == 9 && w.message.contains("'python'")));
1616    }
1617
1618    #[test]
1619    fn test_link_urls_not_flagged() {
1620        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1621
1622        // URL contains "javascript" but should NOT be flagged
1623        let content = r#"[Link Text](https://javascript.info/guide)"#;
1624
1625        let ctx = create_context(content);
1626        let result = rule.check(&ctx).unwrap();
1627
1628        // URL should not be checked
1629        assert!(result.is_empty(), "URLs should not be checked for proper names");
1630    }
1631
1632    #[test]
1633    fn test_proper_names_in_image_alt_text_are_flagged() {
1634        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1635
1636        let content = r#"Here is a ![javascript logo](javascript.png "javascript icon") image.
1637
1638Real javascript should be flagged."#;
1639
1640        let ctx = create_context(content);
1641        let result = rule.check(&ctx).unwrap();
1642
1643        // Image alt text should be checked, URL and title should not be checked
1644        // Line 1: ![javascript logo] - "javascript" should be flagged
1645        // Line 3: standalone javascript
1646        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in alt text + 1 standalone");
1647        assert!(result[0].message.contains("'javascript' should be 'JavaScript'"));
1648        assert!(result[0].line == 1); // "![javascript logo]"
1649        assert!(result[1].message.contains("'javascript' should be 'JavaScript'"));
1650        assert!(result[1].line == 3); // "Real javascript should be flagged."
1651    }
1652
1653    #[test]
1654    fn test_image_urls_not_flagged() {
1655        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1656
1657        // URL contains "javascript" but should NOT be flagged
1658        let content = r#"![Logo](https://javascript.info/logo.png)"#;
1659
1660        let ctx = create_context(content);
1661        let result = rule.check(&ctx).unwrap();
1662
1663        // Image URL should not be checked
1664        assert!(result.is_empty(), "Image URLs should not be checked for proper names");
1665    }
1666
1667    #[test]
1668    fn test_reference_link_text_flagged_but_definition_not() {
1669        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1670
1671        let content = r#"Check the [javascript guide][js-ref] for details.
1672
1673Real javascript should be flagged.
1674
1675[js-ref]: https://javascript.info/typescript/guide"#;
1676
1677        let ctx = create_context(content);
1678        let result = rule.check(&ctx).unwrap();
1679
1680        // Link text should be checked, reference definitions should not
1681        // Line 1: [javascript guide] - should be flagged
1682        // Line 3: standalone javascript - should be flagged
1683        // Line 5: reference definition - should NOT be flagged
1684        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in link text + 1 standalone");
1685        assert!(result.iter().any(|w| w.line == 1 && w.message.contains("'javascript'")));
1686        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1687    }
1688
1689    #[test]
1690    fn test_reference_definitions_not_flagged() {
1691        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1692
1693        // Reference definition should NOT be flagged
1694        let content = r#"[js-ref]: https://javascript.info/guide"#;
1695
1696        let ctx = create_context(content);
1697        let result = rule.check(&ctx).unwrap();
1698
1699        // Reference definition URLs should not be checked
1700        assert!(result.is_empty(), "Reference definitions should not be checked");
1701    }
1702
1703    #[test]
1704    fn test_wikilinks_text_is_flagged() {
1705        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1706
1707        // WikiLinks [[destination]] should have their text checked
1708        let content = r#"[[javascript]]
1709
1710Regular javascript here.
1711
1712[[JavaScript|display text]]"#;
1713
1714        let ctx = create_context(content);
1715        let result = rule.check(&ctx).unwrap();
1716
1717        // Line 1: [[javascript]] - should be flagged (WikiLink text)
1718        // Line 3: standalone javascript - should be flagged
1719        // Line 5: [[JavaScript|display text]] - correct capitalization, no flag
1720        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in WikiLink + 1 standalone");
1721        assert!(
1722            result
1723                .iter()
1724                .any(|w| w.line == 1 && w.column == 3 && w.message.contains("'javascript'"))
1725        );
1726        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1727    }
1728
1729    #[test]
1730    fn test_url_link_text_not_flagged() {
1731        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1732
1733        // Link text that is itself a URL should not be flagged
1734        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1735
1736[http://github.com/org/repo](http://github.com/org/repo)
1737
1738[www.github.com/org/repo](https://www.github.com/org/repo)"#;
1739
1740        let ctx = create_context(content);
1741        let result = rule.check(&ctx).unwrap();
1742
1743        assert!(
1744            result.is_empty(),
1745            "URL-like link text should not be flagged, got: {result:?}"
1746        );
1747    }
1748
1749    #[test]
1750    fn test_url_link_text_with_leading_space_not_flagged() {
1751        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1752
1753        // Leading/trailing whitespace in link text should be trimmed before URL check
1754        let content = r#"[ https://github.com/org/repo](https://github.com/org/repo)"#;
1755
1756        let ctx = create_context(content);
1757        let result = rule.check(&ctx).unwrap();
1758
1759        assert!(
1760            result.is_empty(),
1761            "URL-like link text with leading space should not be flagged, got: {result:?}"
1762        );
1763    }
1764
1765    #[test]
1766    fn test_url_link_text_uppercase_scheme_not_flagged() {
1767        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1768
1769        let content = r#"[HTTPS://GITHUB.COM/org/repo](https://github.com/org/repo)"#;
1770
1771        let ctx = create_context(content);
1772        let result = rule.check(&ctx).unwrap();
1773
1774        assert!(
1775            result.is_empty(),
1776            "URL-like link text with uppercase scheme should not be flagged, got: {result:?}"
1777        );
1778    }
1779
1780    #[test]
1781    fn test_non_url_link_text_still_flagged() {
1782        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1783
1784        // Link text that is NOT a URL should still be flagged
1785        let content = r#"[github.com/org/repo](https://github.com/org/repo)
1786
1787[Visit github](https://github.com/org/repo)
1788
1789[//github.com/org/repo](//github.com/org/repo)
1790
1791[ftp://github.com/org/repo](ftp://github.com/org/repo)"#;
1792
1793        let ctx = create_context(content);
1794        let result = rule.check(&ctx).unwrap();
1795
1796        assert_eq!(result.len(), 4, "Non-URL link text should be flagged, got: {result:?}");
1797        assert!(result.iter().any(|w| w.line == 1)); // github.com (no protocol)
1798        assert!(result.iter().any(|w| w.line == 3)); // Visit github
1799        assert!(result.iter().any(|w| w.line == 5)); // //github.com (protocol-relative)
1800        assert!(result.iter().any(|w| w.line == 7)); // ftp://github.com
1801    }
1802
1803    #[test]
1804    fn test_url_link_text_fix_not_applied() {
1805        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1806
1807        let content = "[https://github.com/org/repo](https://github.com/org/repo)\n";
1808
1809        let ctx = create_context(content);
1810        let result = rule.fix(&ctx).unwrap();
1811
1812        assert_eq!(result, content, "Fix should not modify URL-like link text");
1813    }
1814
1815    #[test]
1816    fn test_mixed_url_and_regular_link_text() {
1817        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1818
1819        // Mix of URL link text (should skip) and regular text (should flag)
1820        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1821
1822Visit [github documentation](https://github.com/docs) for details.
1823
1824[www.github.com/pricing](https://www.github.com/pricing)"#;
1825
1826        let ctx = create_context(content);
1827        let result = rule.check(&ctx).unwrap();
1828
1829        // Only line 3 should be flagged ("github documentation" is not a URL)
1830        assert_eq!(
1831            result.len(),
1832            1,
1833            "Only non-URL link text should be flagged, got: {result:?}"
1834        );
1835        assert_eq!(result[0].line, 3);
1836    }
1837
1838    #[test]
1839    fn test_html_attribute_values_not_flagged() {
1840        // Matches inside HTML tag attributes (between `<` and `>`) are not flagged.
1841        // Attribute values are not prose — they hold URLs, class names, data values, etc.
1842        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1843        let content = "# Heading\n\ntest\n\n<img src=\"www.example.test/test_image.png\">\n";
1844        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1845        let result = rule.check(&ctx).unwrap();
1846
1847        // Nothing on line 5 should be flagged — everything is inside the `<img ...>` tag
1848        let line5_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
1849        assert!(
1850            line5_violations.is_empty(),
1851            "Should not flag anything inside HTML tag attributes: {line5_violations:?}"
1852        );
1853
1854        // Plain text on line 3 is still flagged
1855        let line3_violations: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1856        assert_eq!(line3_violations.len(), 1, "Plain 'test' on line 3 should be flagged");
1857    }
1858
1859    #[test]
1860    fn test_html_text_content_still_flagged() {
1861        // Text between HTML tags (not inside `<...>`) is still checked.
1862        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1863        let content = "# Heading\n\n<a href=\"https://example.test/page\">test link</a>\n";
1864        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1865        let result = rule.check(&ctx).unwrap();
1866
1867        // "example.test" in the href attribute → not flagged (inside `<...>`)
1868        // "test link" in the anchor text → flagged (between `>` and `<`)
1869        assert_eq!(
1870            result.len(),
1871            1,
1872            "Should flag only 'test' in anchor text, not in href: {result:?}"
1873        );
1874        assert_eq!(result[0].column, 37, "Should flag col 37 ('test link' in anchor text)");
1875    }
1876
1877    #[test]
1878    fn test_html_attribute_various_not_flagged() {
1879        // All attribute types are ignored: src, href, alt, class, data-*, title, etc.
1880        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1881        let content = concat!(
1882            "# Heading\n\n",
1883            "<img src=\"test.png\" alt=\"test image\">\n",
1884            "<span class=\"test-class\" data-test=\"value\">test content</span>\n",
1885        );
1886        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1887        let result = rule.check(&ctx).unwrap();
1888
1889        // Only "test content" (between tags on line 4) should be flagged
1890        assert_eq!(
1891            result.len(),
1892            1,
1893            "Should flag only 'test content' between tags: {result:?}"
1894        );
1895        assert_eq!(result[0].line, 4);
1896    }
1897
1898    #[test]
1899    fn test_plain_text_underscore_boundary_unchanged() {
1900        // Plain text (outside HTML tags) still uses original word boundary semantics where
1901        // underscore is a boundary character, matching markdownlint's behavior via AST splitting.
1902        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1903        let content = "# Heading\n\ntest_image is here and just_test ends here\n";
1904        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1905        let result = rule.check(&ctx).unwrap();
1906
1907        // Both "test_image" (test at start) and "just_test" (test at end) are flagged
1908        // because in plain text, "_" is a word boundary
1909        assert_eq!(
1910            result.len(),
1911            2,
1912            "Should flag 'test' in both 'test_image' and 'just_test': {result:?}"
1913        );
1914        let cols: Vec<usize> = result.iter().map(|w| w.column).collect();
1915        assert!(cols.contains(&1), "Should flag col 1 (test_image): {cols:?}");
1916        assert!(cols.contains(&29), "Should flag col 29 (just_test): {cols:?}");
1917    }
1918
1919    #[test]
1920    fn test_frontmatter_yaml_keys_not_flagged() {
1921        // YAML keys in frontmatter should NOT be checked for proper name violations.
1922        // Only values should be checked.
1923        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1924
1925        let content = "---\ntitle: Heading\ntest: Some Test value\n---\n\nTest\n";
1926        let ctx = create_context(content);
1927        let result = rule.check(&ctx).unwrap();
1928
1929        // "test" in the YAML key (line 3) should NOT be flagged
1930        // "Test" in the YAML value (line 3) is correct capitalization, no flag
1931        // "Test" in body (line 6) is correct capitalization, no flag
1932        assert!(
1933            result.is_empty(),
1934            "Should not flag YAML keys or correctly capitalized values: {result:?}"
1935        );
1936    }
1937
1938    #[test]
1939    fn test_frontmatter_yaml_values_flagged() {
1940        // Incorrectly capitalized names in YAML values should be flagged.
1941        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1942
1943        let content = "---\ntitle: Heading\nkey: a test value\n---\n\nTest\n";
1944        let ctx = create_context(content);
1945        let result = rule.check(&ctx).unwrap();
1946
1947        // "test" in the YAML value (line 3) SHOULD be flagged
1948        assert_eq!(result.len(), 1, "Should flag 'test' in YAML value: {result:?}");
1949        assert_eq!(result[0].line, 3);
1950        assert_eq!(result[0].column, 8); // "key: a " = 7 chars, then "test" at column 8
1951    }
1952
1953    #[test]
1954    fn test_frontmatter_key_matches_name_not_flagged() {
1955        // A YAML key that happens to match a configured name should NOT be flagged.
1956        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1957
1958        let content = "---\ntest: other value\n---\n\nBody text\n";
1959        let ctx = create_context(content);
1960        let result = rule.check(&ctx).unwrap();
1961
1962        assert!(
1963            result.is_empty(),
1964            "Should not flag YAML key that matches configured name: {result:?}"
1965        );
1966    }
1967
1968    #[test]
1969    fn test_frontmatter_empty_value_not_flagged() {
1970        // YAML key with no value should be skipped entirely.
1971        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1972
1973        let content = "---\ntest:\ntest: \n---\n\nBody text\n";
1974        let ctx = create_context(content);
1975        let result = rule.check(&ctx).unwrap();
1976
1977        assert!(
1978            result.is_empty(),
1979            "Should not flag YAML keys with empty values: {result:?}"
1980        );
1981    }
1982
1983    #[test]
1984    fn test_frontmatter_nested_yaml_key_not_flagged() {
1985        // Nested/indented YAML keys should also be skipped.
1986        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1987
1988        let content = "---\nparent:\n  test: nested value\n---\n\nBody text\n";
1989        let ctx = create_context(content);
1990        let result = rule.check(&ctx).unwrap();
1991
1992        // "test" as a nested key should NOT be flagged
1993        assert!(result.is_empty(), "Should not flag nested YAML keys: {result:?}");
1994    }
1995
1996    #[test]
1997    fn test_frontmatter_list_items_checked() {
1998        // YAML list items are values and should be checked for proper names.
1999        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2000
2001        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
2002        let ctx = create_context(content);
2003        let result = rule.check(&ctx).unwrap();
2004
2005        // "test" as a list item value SHOULD be flagged
2006        assert_eq!(result.len(), 1, "Should flag 'test' in YAML list item: {result:?}");
2007        assert_eq!(result[0].line, 3);
2008    }
2009
2010    #[test]
2011    fn test_frontmatter_value_with_multiple_colons() {
2012        // For "key: value: more", key is before first colon.
2013        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2014
2015        let content = "---\ntest: description: a test thing\n---\n\nBody text\n";
2016        let ctx = create_context(content);
2017        let result = rule.check(&ctx).unwrap();
2018
2019        // "test" as key should NOT be flagged
2020        // "test" in value portion ("description: a test thing") SHOULD be flagged
2021        assert_eq!(
2022            result.len(),
2023            1,
2024            "Should flag 'test' in value after first colon: {result:?}"
2025        );
2026        assert_eq!(result[0].line, 2);
2027        assert!(result[0].column > 6, "Violation column should be in value portion");
2028    }
2029
2030    #[test]
2031    fn test_frontmatter_does_not_affect_body() {
2032        // Body text after frontmatter should still be fully checked.
2033        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2034
2035        let content = "---\ntitle: Heading\n---\n\ntest should be flagged here\n";
2036        let ctx = create_context(content);
2037        let result = rule.check(&ctx).unwrap();
2038
2039        assert_eq!(result.len(), 1, "Should flag 'test' in body text: {result:?}");
2040        assert_eq!(result[0].line, 5);
2041    }
2042
2043    #[test]
2044    fn test_frontmatter_fix_corrects_values_preserves_keys() {
2045        // Fix should correct YAML values but preserve keys.
2046        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2047
2048        let content = "---\ntest: a test value\n---\n\ntest here\n";
2049        let ctx = create_context(content);
2050        let fixed = rule.fix(&ctx).unwrap();
2051
2052        // Key "test" should remain lowercase; value "test" should become "Test"
2053        assert_eq!(fixed, "---\ntest: a Test value\n---\n\nTest here\n");
2054    }
2055
2056    #[test]
2057    fn test_frontmatter_multiword_value_flagged() {
2058        // Multiple proper names in a single YAML value should all be flagged.
2059        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
2060
2061        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
2062        let ctx = create_context(content);
2063        let result = rule.check(&ctx).unwrap();
2064
2065        assert_eq!(result.len(), 2, "Should flag both names in YAML value: {result:?}");
2066        assert!(result.iter().all(|w| w.line == 2));
2067    }
2068
2069    #[test]
2070    fn test_frontmatter_yaml_comments_not_checked() {
2071        // YAML comments inside frontmatter should be skipped entirely.
2072        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2073
2074        let content = "---\n# test comment\ntitle: Heading\n---\n\nBody text\n";
2075        let ctx = create_context(content);
2076        let result = rule.check(&ctx).unwrap();
2077
2078        assert!(result.is_empty(), "Should not flag names in YAML comments: {result:?}");
2079    }
2080
2081    #[test]
2082    fn test_frontmatter_delimiters_not_checked() {
2083        // Frontmatter delimiter lines (--- or +++) should never be checked.
2084        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2085
2086        let content = "---\ntitle: Heading\n---\n\ntest here\n";
2087        let ctx = create_context(content);
2088        let result = rule.check(&ctx).unwrap();
2089
2090        // Only the body "test" on line 5 should be flagged
2091        assert_eq!(result.len(), 1, "Should only flag body text: {result:?}");
2092        assert_eq!(result[0].line, 5);
2093    }
2094
2095    #[test]
2096    fn test_frontmatter_continuation_lines_checked() {
2097        // Continuation lines (indented, no colon) are value content and should be checked.
2098        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2099
2100        let content = "---\ndescription: >\n  a test value\n  continued here\n---\n\nBody\n";
2101        let ctx = create_context(content);
2102        let result = rule.check(&ctx).unwrap();
2103
2104        // "test" on the continuation line should be flagged
2105        assert_eq!(result.len(), 1, "Should flag 'test' in continuation line: {result:?}");
2106        assert_eq!(result[0].line, 3);
2107    }
2108
2109    #[test]
2110    fn test_frontmatter_quoted_values_checked() {
2111        // Quoted YAML values should have their content checked (inside the quotes).
2112        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2113
2114        let content = "---\ntitle: \"a test title\"\n---\n\nBody\n";
2115        let ctx = create_context(content);
2116        let result = rule.check(&ctx).unwrap();
2117
2118        assert_eq!(result.len(), 1, "Should flag 'test' in quoted YAML value: {result:?}");
2119        assert_eq!(result[0].line, 2);
2120    }
2121
2122    #[test]
2123    fn test_frontmatter_single_quoted_values_checked() {
2124        // Single-quoted YAML values should have their content checked.
2125        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2126
2127        let content = "---\ntitle: 'a test title'\n---\n\nBody\n";
2128        let ctx = create_context(content);
2129        let result = rule.check(&ctx).unwrap();
2130
2131        assert_eq!(
2132            result.len(),
2133            1,
2134            "Should flag 'test' in single-quoted YAML value: {result:?}"
2135        );
2136        assert_eq!(result[0].line, 2);
2137    }
2138
2139    #[test]
2140    fn test_frontmatter_fix_multiword_values() {
2141        // Fix should correct all proper names in frontmatter values.
2142        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
2143
2144        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
2145        let ctx = create_context(content);
2146        let fixed = rule.fix(&ctx).unwrap();
2147
2148        assert_eq!(
2149            fixed,
2150            "---\ndescription: Learn JavaScript and TypeScript\n---\n\nBody\n"
2151        );
2152    }
2153
2154    #[test]
2155    fn test_frontmatter_fix_preserves_yaml_structure() {
2156        // Fix should preserve YAML structure while correcting values.
2157        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2158
2159        let content = "---\ntags:\n  - test\n  - other\ntitle: a test doc\n---\n\ntest body\n";
2160        let ctx = create_context(content);
2161        let fixed = rule.fix(&ctx).unwrap();
2162
2163        assert_eq!(
2164            fixed,
2165            "---\ntags:\n  - Test\n  - other\ntitle: a Test doc\n---\n\nTest body\n"
2166        );
2167    }
2168
2169    #[test]
2170    fn test_frontmatter_toml_delimiters_not_checked() {
2171        // TOML frontmatter with +++ delimiters should also be handled.
2172        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2173
2174        let content = "+++\ntitle = \"a test title\"\n+++\n\ntest body\n";
2175        let ctx = create_context(content);
2176        let result = rule.check(&ctx).unwrap();
2177
2178        // "title" as TOML key should NOT be flagged
2179        // "test" in TOML quoted value SHOULD be flagged (line 2)
2180        // "test" in body SHOULD be flagged (line 5)
2181        assert_eq!(result.len(), 2, "Should flag TOML value and body: {result:?}");
2182        let fm_violations: Vec<_> = result.iter().filter(|w| w.line == 2).collect();
2183        assert_eq!(fm_violations.len(), 1, "Should flag 'test' in TOML value: {result:?}");
2184        let body_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
2185        assert_eq!(body_violations.len(), 1, "Should flag body 'test': {result:?}");
2186    }
2187
2188    #[test]
2189    fn test_frontmatter_toml_key_not_flagged() {
2190        // TOML keys should NOT be flagged, only values.
2191        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2192
2193        let content = "+++\ntest = \"other value\"\n+++\n\nBody text\n";
2194        let ctx = create_context(content);
2195        let result = rule.check(&ctx).unwrap();
2196
2197        assert!(
2198            result.is_empty(),
2199            "Should not flag TOML key that matches configured name: {result:?}"
2200        );
2201    }
2202
2203    #[test]
2204    fn test_frontmatter_toml_fix_preserves_keys() {
2205        // Fix should correct TOML values but preserve keys.
2206        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2207
2208        let content = "+++\ntest = \"a test value\"\n+++\n\ntest here\n";
2209        let ctx = create_context(content);
2210        let fixed = rule.fix(&ctx).unwrap();
2211
2212        // Key "test" should remain lowercase; value "test" should become "Test"
2213        assert_eq!(fixed, "+++\ntest = \"a Test value\"\n+++\n\nTest here\n");
2214    }
2215
2216    #[test]
2217    fn test_frontmatter_list_item_mapping_key_not_flagged() {
2218        // In "- test: nested value", "test" is a YAML key within a list-item mapping.
2219        // The key should NOT be flagged; only the value should be checked.
2220        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2221
2222        let content = "---\nitems:\n  - test: nested value\n---\n\nBody text\n";
2223        let ctx = create_context(content);
2224        let result = rule.check(&ctx).unwrap();
2225
2226        assert!(
2227            result.is_empty(),
2228            "Should not flag YAML key in list-item mapping: {result:?}"
2229        );
2230    }
2231
2232    #[test]
2233    fn test_frontmatter_list_item_mapping_value_flagged() {
2234        // In "- key: test value", the value portion should be checked.
2235        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2236
2237        let content = "---\nitems:\n  - key: a test value\n---\n\nBody text\n";
2238        let ctx = create_context(content);
2239        let result = rule.check(&ctx).unwrap();
2240
2241        assert_eq!(
2242            result.len(),
2243            1,
2244            "Should flag 'test' in list-item mapping value: {result:?}"
2245        );
2246        assert_eq!(result[0].line, 3);
2247    }
2248
2249    #[test]
2250    fn test_frontmatter_bare_list_item_still_flagged() {
2251        // Bare list items without a colon (e.g., "- test") are values and should be flagged.
2252        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2253
2254        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
2255        let ctx = create_context(content);
2256        let result = rule.check(&ctx).unwrap();
2257
2258        assert_eq!(result.len(), 1, "Should flag 'test' in bare list item: {result:?}");
2259        assert_eq!(result[0].line, 3);
2260    }
2261
2262    #[test]
2263    fn test_frontmatter_flow_mapping_not_flagged() {
2264        // Flow mappings like {test: value} contain YAML keys that should not be flagged.
2265        // The entire flow construct should be skipped.
2266        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2267
2268        let content = "---\nflow_map: {test: value, other: test}\n---\n\nBody text\n";
2269        let ctx = create_context(content);
2270        let result = rule.check(&ctx).unwrap();
2271
2272        assert!(
2273            result.is_empty(),
2274            "Should not flag names inside flow mappings: {result:?}"
2275        );
2276    }
2277
2278    #[test]
2279    fn test_frontmatter_flow_sequence_not_flagged() {
2280        // Flow sequences like [test, other] should also be skipped.
2281        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2282
2283        let content = "---\nitems: [test, other, test]\n---\n\nBody text\n";
2284        let ctx = create_context(content);
2285        let result = rule.check(&ctx).unwrap();
2286
2287        assert!(
2288            result.is_empty(),
2289            "Should not flag names inside flow sequences: {result:?}"
2290        );
2291    }
2292
2293    #[test]
2294    fn test_frontmatter_list_item_mapping_fix_preserves_key() {
2295        // Fix should correct values in list-item mappings but preserve keys.
2296        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2297
2298        let content = "---\nitems:\n  - test: a test value\n---\n\ntest here\n";
2299        let ctx = create_context(content);
2300        let fixed = rule.fix(&ctx).unwrap();
2301
2302        // "test" as list-item key should remain lowercase;
2303        // "test" in value portion should become "Test"
2304        assert_eq!(fixed, "---\nitems:\n  - test: a Test value\n---\n\nTest here\n");
2305    }
2306
2307    #[test]
2308    fn test_frontmatter_backtick_code_not_flagged() {
2309        // Names inside backticks in frontmatter should NOT be flagged when code_blocks=false.
2310        let config = MD044Config {
2311            names: vec!["GoodApplication".to_string()],
2312            code_blocks: false,
2313            ..MD044Config::default()
2314        };
2315        let rule = MD044ProperNames::from_config_struct(config);
2316
2317        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nIntroductory `goodapplication` CLI text.\n";
2318        let ctx = create_context(content);
2319        let result = rule.check(&ctx).unwrap();
2320
2321        // Neither the frontmatter nor the body backtick-wrapped name should be flagged
2322        assert!(
2323            result.is_empty(),
2324            "Should not flag names inside backticks in frontmatter or body: {result:?}"
2325        );
2326    }
2327
2328    #[test]
2329    fn test_frontmatter_unquoted_backtick_code_not_flagged() {
2330        // Exact case from issue #513: unquoted YAML frontmatter with backticks
2331        let config = MD044Config {
2332            names: vec!["GoodApplication".to_string()],
2333            code_blocks: false,
2334            ..MD044Config::default()
2335        };
2336        let rule = MD044ProperNames::from_config_struct(config);
2337
2338        let content = "---\ntitle: `goodapplication` CLI\n---\n\nIntroductory `goodapplication` CLI text.\n";
2339        let ctx = create_context(content);
2340        let result = rule.check(&ctx).unwrap();
2341
2342        assert!(
2343            result.is_empty(),
2344            "Should not flag names inside backticks in unquoted YAML frontmatter: {result:?}"
2345        );
2346    }
2347
2348    #[test]
2349    fn test_frontmatter_bare_name_still_flagged_with_backtick_nearby() {
2350        // Names outside backticks in frontmatter should still be flagged.
2351        let config = MD044Config {
2352            names: vec!["GoodApplication".to_string()],
2353            code_blocks: false,
2354            ..MD044Config::default()
2355        };
2356        let rule = MD044ProperNames::from_config_struct(config);
2357
2358        let content = "---\ntitle: goodapplication `goodapplication` CLI\n---\n\nBody\n";
2359        let ctx = create_context(content);
2360        let result = rule.check(&ctx).unwrap();
2361
2362        // Only the bare "goodapplication" (before backticks) should be flagged
2363        assert_eq!(
2364            result.len(),
2365            1,
2366            "Should flag bare name but not backtick-wrapped name: {result:?}"
2367        );
2368        assert_eq!(result[0].line, 2);
2369        assert_eq!(result[0].column, 8); // "title: " = 7 chars, name at column 8
2370    }
2371
2372    #[test]
2373    fn test_frontmatter_backtick_code_with_code_blocks_true() {
2374        // When code_blocks=true, names inside backticks ARE checked.
2375        let config = MD044Config {
2376            names: vec!["GoodApplication".to_string()],
2377            code_blocks: true,
2378            ..MD044Config::default()
2379        };
2380        let rule = MD044ProperNames::from_config_struct(config);
2381
2382        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nBody\n";
2383        let ctx = create_context(content);
2384        let result = rule.check(&ctx).unwrap();
2385
2386        // With code_blocks=true, backtick-wrapped name SHOULD be flagged
2387        assert_eq!(
2388            result.len(),
2389            1,
2390            "Should flag backtick-wrapped name when code_blocks=true: {result:?}"
2391        );
2392        assert_eq!(result[0].line, 2);
2393    }
2394
2395    #[test]
2396    fn test_frontmatter_fix_preserves_backtick_code() {
2397        // Fix should NOT change names inside backticks in frontmatter.
2398        let config = MD044Config {
2399            names: vec!["GoodApplication".to_string()],
2400            code_blocks: false,
2401            ..MD044Config::default()
2402        };
2403        let rule = MD044ProperNames::from_config_struct(config);
2404
2405        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nIntroductory `goodapplication` CLI text.\n";
2406        let ctx = create_context(content);
2407        let fixed = rule.fix(&ctx).unwrap();
2408
2409        // Neither backtick-wrapped occurrence should be changed
2410        assert_eq!(
2411            fixed, content,
2412            "Fix should not modify names inside backticks in frontmatter"
2413        );
2414    }
2415
2416    // --- Angle-bracket URL tests (issue #457) ---
2417
2418    #[test]
2419    fn test_angle_bracket_url_in_html_comment_not_flagged() {
2420        // Angle-bracket URLs inside HTML comments should be skipped
2421        let config = MD044Config {
2422            names: vec!["Test".to_string()],
2423            ..MD044Config::default()
2424        };
2425        let rule = MD044ProperNames::from_config_struct(config);
2426
2427        let content = "---\ntitle: Level 1 heading\n---\n\n<https://www.example.test>\n\n<!-- This is a Test https://www.example.test -->\n<!-- This is a Test <https://www.example.test> -->\n";
2428        let ctx = create_context(content);
2429        let result = rule.check(&ctx).unwrap();
2430
2431        // Line 7: "Test" in comment prose before bare URL -- already correct capitalization
2432        // Line 7: "test" in bare URL (not in angle brackets) -- but "test" is in URL domain, not prose.
2433        //   However, .example.test has "test" at a word boundary (after '.'), so it IS flagged.
2434        // Line 8: "Test" in comment prose -- correct capitalization, not flagged
2435        // Line 8: "test" in <https://www.example.test> -- inside angle-bracket URL, NOT flagged
2436
2437        // The key assertion: line 8's angle-bracket URL should NOT produce a warning
2438        let line8_warnings: Vec<_> = result.iter().filter(|w| w.line == 8).collect();
2439        assert!(
2440            line8_warnings.is_empty(),
2441            "Should not flag names inside angle-bracket URLs in HTML comments: {line8_warnings:?}"
2442        );
2443    }
2444
2445    #[test]
2446    fn test_bare_url_in_html_comment_still_flagged() {
2447        // Bare URLs (not in angle brackets) inside HTML comments should still be checked
2448        let config = MD044Config {
2449            names: vec!["Test".to_string()],
2450            ..MD044Config::default()
2451        };
2452        let rule = MD044ProperNames::from_config_struct(config);
2453
2454        let content = "<!-- This is a test https://www.example.test -->\n";
2455        let ctx = create_context(content);
2456        let result = rule.check(&ctx).unwrap();
2457
2458        // "test" appears as prose text before URL and also in the bare URL domain
2459        // At minimum, the prose "test" should be flagged
2460        assert!(
2461            !result.is_empty(),
2462            "Should flag 'test' in prose text of HTML comment with bare URL"
2463        );
2464    }
2465
2466    #[test]
2467    fn test_angle_bracket_url_in_regular_markdown_not_flagged() {
2468        // Angle-bracket URLs in regular markdown are already handled by the link parser,
2469        // but the angle-bracket check provides a safety net
2470        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2471
2472        let content = "<https://www.example.test>\n";
2473        let ctx = create_context(content);
2474        let result = rule.check(&ctx).unwrap();
2475
2476        assert!(
2477            result.is_empty(),
2478            "Should not flag names inside angle-bracket URLs in regular markdown: {result:?}"
2479        );
2480    }
2481
2482    #[test]
2483    fn test_multiple_angle_bracket_urls_in_one_comment() {
2484        let config = MD044Config {
2485            names: vec!["Test".to_string()],
2486            ..MD044Config::default()
2487        };
2488        let rule = MD044ProperNames::from_config_struct(config);
2489
2490        let content = "<!-- See <https://test.example.com> and <https://www.example.test> for details -->\n";
2491        let ctx = create_context(content);
2492        let result = rule.check(&ctx).unwrap();
2493
2494        // Both URLs are inside angle brackets, so "test" inside them should NOT be flagged
2495        assert!(
2496            result.is_empty(),
2497            "Should not flag names inside multiple angle-bracket URLs: {result:?}"
2498        );
2499    }
2500
2501    #[test]
2502    fn test_angle_bracket_non_url_still_flagged() {
2503        // <Test> is NOT a URL (no scheme), so is_in_angle_bracket_url does NOT protect it.
2504        // Whether it gets flagged depends on HTML tag detection, not on our URL check.
2505        assert!(
2506            !MD044ProperNames::is_in_angle_bracket_url("<test> which is not a URL.", 1),
2507            "is_in_angle_bracket_url should return false for non-URL angle brackets"
2508        );
2509    }
2510
2511    #[test]
2512    fn test_angle_bracket_mailto_url_not_flagged() {
2513        let config = MD044Config {
2514            names: vec!["Test".to_string()],
2515            ..MD044Config::default()
2516        };
2517        let rule = MD044ProperNames::from_config_struct(config);
2518
2519        let content = "<!-- Contact <mailto:test@example.com> for help -->\n";
2520        let ctx = create_context(content);
2521        let result = rule.check(&ctx).unwrap();
2522
2523        assert!(
2524            result.is_empty(),
2525            "Should not flag names inside angle-bracket mailto URLs: {result:?}"
2526        );
2527    }
2528
2529    #[test]
2530    fn test_angle_bracket_ftp_url_not_flagged() {
2531        let config = MD044Config {
2532            names: vec!["Test".to_string()],
2533            ..MD044Config::default()
2534        };
2535        let rule = MD044ProperNames::from_config_struct(config);
2536
2537        let content = "<!-- Download from <ftp://test.example.com/file> -->\n";
2538        let ctx = create_context(content);
2539        let result = rule.check(&ctx).unwrap();
2540
2541        assert!(
2542            result.is_empty(),
2543            "Should not flag names inside angle-bracket FTP URLs: {result:?}"
2544        );
2545    }
2546
2547    #[test]
2548    fn test_angle_bracket_url_fix_preserves_url() {
2549        // Fix should not modify text inside angle-bracket URLs
2550        let config = MD044Config {
2551            names: vec!["Test".to_string()],
2552            ..MD044Config::default()
2553        };
2554        let rule = MD044ProperNames::from_config_struct(config);
2555
2556        let content = "<!-- test text <https://www.example.test> -->\n";
2557        let ctx = create_context(content);
2558        let fixed = rule.fix(&ctx).unwrap();
2559
2560        // "test" in prose should be fixed, URL should be preserved
2561        assert!(
2562            fixed.contains("<https://www.example.test>"),
2563            "Fix should preserve angle-bracket URLs: {fixed}"
2564        );
2565        assert!(
2566            fixed.contains("Test text"),
2567            "Fix should correct prose 'test' to 'Test': {fixed}"
2568        );
2569    }
2570
2571    #[test]
2572    fn test_is_in_angle_bracket_url_helper() {
2573        // Direct tests of the helper function
2574        let line = "text <https://example.test> more text";
2575
2576        // Inside the URL
2577        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 5)); // '<'
2578        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 6)); // 'h'
2579        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 15)); // middle of URL
2580        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 26)); // '>'
2581
2582        // Outside the URL
2583        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 0)); // 't' at start
2584        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 4)); // space before '<'
2585        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 27)); // space after '>'
2586
2587        // Non-URL angle brackets
2588        assert!(!MD044ProperNames::is_in_angle_bracket_url("<notaurl>", 1));
2589
2590        // mailto scheme
2591        assert!(MD044ProperNames::is_in_angle_bracket_url(
2592            "<mailto:test@example.com>",
2593            10
2594        ));
2595
2596        // ftp scheme
2597        assert!(MD044ProperNames::is_in_angle_bracket_url(
2598            "<ftp://test.example.com>",
2599            10
2600        ));
2601    }
2602
2603    #[test]
2604    fn test_is_in_angle_bracket_url_uppercase_scheme() {
2605        // RFC 3986: URI schemes are case-insensitive
2606        assert!(MD044ProperNames::is_in_angle_bracket_url(
2607            "<HTTPS://test.example.com>",
2608            10
2609        ));
2610        assert!(MD044ProperNames::is_in_angle_bracket_url(
2611            "<Http://test.example.com>",
2612            10
2613        ));
2614    }
2615
2616    #[test]
2617    fn test_is_in_angle_bracket_url_uncommon_schemes() {
2618        // ssh scheme
2619        assert!(MD044ProperNames::is_in_angle_bracket_url(
2620            "<ssh://test@example.com>",
2621            10
2622        ));
2623        // file scheme
2624        assert!(MD044ProperNames::is_in_angle_bracket_url("<file:///test/path>", 10));
2625        // data scheme (no authority, just colon)
2626        assert!(MD044ProperNames::is_in_angle_bracket_url("<data:text/plain;test>", 10));
2627    }
2628
2629    #[test]
2630    fn test_is_in_angle_bracket_url_unclosed() {
2631        // Unclosed angle bracket should NOT match
2632        assert!(!MD044ProperNames::is_in_angle_bracket_url(
2633            "<https://test.example.com",
2634            10
2635        ));
2636    }
2637
2638    #[test]
2639    fn test_vale_inline_config_comments_not_flagged() {
2640        let config = MD044Config {
2641            names: vec!["Vale".to_string(), "JavaScript".to_string()],
2642            ..MD044Config::default()
2643        };
2644        let rule = MD044ProperNames::from_config_struct(config);
2645
2646        let content = "\
2647<!-- vale off -->
2648Some javascript text here.
2649<!-- vale on -->
2650<!-- vale Style.Rule = NO -->
2651More javascript text.
2652<!-- vale Style.Rule = YES -->
2653<!-- vale JavaScript.Grammar = NO -->
2654";
2655        let ctx = create_context(content);
2656        let result = rule.check(&ctx).unwrap();
2657
2658        // Only the body text lines (2, 5) should be flagged for "javascript"
2659        assert_eq!(result.len(), 2, "Should only flag body lines, not Vale config comments");
2660        assert_eq!(result[0].line, 2);
2661        assert_eq!(result[1].line, 5);
2662    }
2663
2664    #[test]
2665    fn test_remark_lint_inline_config_comments_not_flagged() {
2666        let config = MD044Config {
2667            names: vec!["JavaScript".to_string()],
2668            ..MD044Config::default()
2669        };
2670        let rule = MD044ProperNames::from_config_struct(config);
2671
2672        let content = "\
2673<!-- lint disable remark-lint-some-rule -->
2674Some javascript text here.
2675<!-- lint enable remark-lint-some-rule -->
2676<!-- lint ignore remark-lint-some-rule -->
2677More javascript text.
2678";
2679        let ctx = create_context(content);
2680        let result = rule.check(&ctx).unwrap();
2681
2682        assert_eq!(
2683            result.len(),
2684            2,
2685            "Should only flag body lines, not remark-lint config comments"
2686        );
2687        assert_eq!(result[0].line, 2);
2688        assert_eq!(result[1].line, 5);
2689    }
2690
2691    #[test]
2692    fn test_fix_does_not_modify_vale_remark_lint_comments() {
2693        let config = MD044Config {
2694            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2695            ..MD044Config::default()
2696        };
2697        let rule = MD044ProperNames::from_config_struct(config);
2698
2699        let content = "\
2700<!-- vale off -->
2701Some javascript text.
2702<!-- vale on -->
2703<!-- lint disable remark-lint-some-rule -->
2704More javascript text.
2705<!-- lint enable remark-lint-some-rule -->
2706";
2707        let ctx = create_context(content);
2708        let fixed = rule.fix(&ctx).unwrap();
2709
2710        // Config directive lines must be preserved unchanged
2711        assert!(fixed.contains("<!-- vale off -->"));
2712        assert!(fixed.contains("<!-- vale on -->"));
2713        assert!(fixed.contains("<!-- lint disable remark-lint-some-rule -->"));
2714        assert!(fixed.contains("<!-- lint enable remark-lint-some-rule -->"));
2715        // Body text should be fixed
2716        assert!(fixed.contains("Some JavaScript text."));
2717        assert!(fixed.contains("More JavaScript text."));
2718    }
2719
2720    #[test]
2721    fn test_mixed_tool_directives_all_skipped() {
2722        let config = MD044Config {
2723            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2724            ..MD044Config::default()
2725        };
2726        let rule = MD044ProperNames::from_config_struct(config);
2727
2728        let content = "\
2729<!-- rumdl-disable MD044 -->
2730Some javascript text.
2731<!-- markdownlint-disable -->
2732More javascript text.
2733<!-- vale off -->
2734Even more javascript text.
2735<!-- lint disable some-rule -->
2736Final javascript text.
2737<!-- rumdl-enable MD044 -->
2738<!-- markdownlint-enable -->
2739<!-- vale on -->
2740<!-- lint enable some-rule -->
2741";
2742        let ctx = create_context(content);
2743        let result = rule.check(&ctx).unwrap();
2744
2745        // Only body text lines should be flagged (lines 2, 4, 6, 8)
2746        assert_eq!(
2747            result.len(),
2748            4,
2749            "Should only flag body lines, not any tool directive comments"
2750        );
2751        assert_eq!(result[0].line, 2);
2752        assert_eq!(result[1].line, 4);
2753        assert_eq!(result[2].line, 6);
2754        assert_eq!(result[3].line, 8);
2755    }
2756
2757    #[test]
2758    fn test_vale_remark_lint_edge_cases_not_matched() {
2759        let config = MD044Config {
2760            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2761            ..MD044Config::default()
2762        };
2763        let rule = MD044ProperNames::from_config_struct(config);
2764
2765        // These are regular HTML comments, NOT tool directives:
2766        // - "<!-- vale -->" is not a valid Vale directive (no action keyword)
2767        // - "<!-- vale is a tool -->" starts with "vale" but is prose, not a directive
2768        // - "<!-- valedictorian javascript -->" does not start with "<!-- vale "
2769        // - "<!-- linting javascript tips -->" does not start with "<!-- lint "
2770        // - "<!-- vale javascript -->" starts with "vale" but has no action keyword
2771        // - "<!-- lint your javascript code -->" starts with "lint" but has no action keyword
2772        let content = "\
2773<!-- vale -->
2774<!-- vale is a tool for writing -->
2775<!-- valedictorian javascript -->
2776<!-- linting javascript tips -->
2777<!-- vale javascript -->
2778<!-- lint your javascript code -->
2779";
2780        let ctx = create_context(content);
2781        let result = rule.check(&ctx).unwrap();
2782
2783        // Line 1: "<!-- vale -->" contains "vale" (wrong case for "Vale") -> flagged
2784        // Line 2: "<!-- vale is a tool for writing -->" contains "vale" -> flagged
2785        // Line 3: "<!-- valedictorian javascript -->" contains "javascript" -> flagged
2786        // Line 4: "<!-- linting javascript tips -->" contains "javascript" -> flagged
2787        // Line 5: "<!-- vale javascript -->" contains "vale" and "javascript" -> flagged for both
2788        // Line 6: "<!-- lint your javascript code -->" contains "javascript" -> flagged
2789        assert_eq!(
2790            result.len(),
2791            7,
2792            "Should flag proper names in non-directive HTML comments: got {result:?}"
2793        );
2794        assert_eq!(result[0].line, 1); // "vale" in <!-- vale -->
2795        assert_eq!(result[1].line, 2); // "vale" in <!-- vale is a tool -->
2796        assert_eq!(result[2].line, 3); // "javascript" in <!-- valedictorian javascript -->
2797        assert_eq!(result[3].line, 4); // "javascript" in <!-- linting javascript tips -->
2798        assert_eq!(result[4].line, 5); // "vale" in <!-- vale javascript -->
2799        assert_eq!(result[5].line, 5); // "javascript" in <!-- vale javascript -->
2800        assert_eq!(result[6].line, 6); // "javascript" in <!-- lint your javascript code -->
2801    }
2802
2803    #[test]
2804    fn test_vale_style_directives_skipped() {
2805        let config = MD044Config {
2806            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2807            ..MD044Config::default()
2808        };
2809        let rule = MD044ProperNames::from_config_struct(config);
2810
2811        // These ARE valid Vale directives and should be skipped:
2812        let content = "\
2813<!-- vale style = MyStyle -->
2814<!-- vale styles = Style1, Style2 -->
2815<!-- vale MyRule.Name = YES -->
2816<!-- vale MyRule.Name = NO -->
2817Some javascript text.
2818";
2819        let ctx = create_context(content);
2820        let result = rule.check(&ctx).unwrap();
2821
2822        // Only line 5 (body text) should be flagged
2823        assert_eq!(
2824            result.len(),
2825            1,
2826            "Should only flag body lines, not Vale style/rule directives: got {result:?}"
2827        );
2828        assert_eq!(result[0].line, 5);
2829    }
2830
2831    // --- is_in_backtick_code_in_line unit tests ---
2832
2833    #[test]
2834    fn test_backtick_code_single_backticks() {
2835        let line = "hello `world` bye";
2836        // 'w' is at index 7, inside the backtick span (content between backticks at 6 and 12)
2837        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 7));
2838        // 'h' at index 0 is outside
2839        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2840        // 'b' at index 14 is outside
2841        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 14));
2842    }
2843
2844    #[test]
2845    fn test_backtick_code_double_backticks() {
2846        let line = "a ``code`` b";
2847        // 'c' is at index 4, inside ``...``
2848        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 4));
2849        // 'a' at index 0 is outside
2850        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2851        // 'b' at index 11 is outside
2852        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 11));
2853    }
2854
2855    #[test]
2856    fn test_backtick_code_unclosed() {
2857        let line = "a `code b";
2858        // No closing backtick, so nothing is a code span
2859        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 3));
2860    }
2861
2862    #[test]
2863    fn test_backtick_code_mismatched_count() {
2864        // Single backtick opening, double backtick is not a match
2865        let line = "a `code`` b";
2866        // The single ` at index 2 doesn't match `` at index 7-8
2867        // So 'c' at index 3 is NOT in a code span
2868        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 3));
2869    }
2870
2871    #[test]
2872    fn test_backtick_code_multiple_spans() {
2873        let line = "`first` and `second`";
2874        // 'f' at index 1 (inside first span)
2875        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 1));
2876        // 'a' at index 8 (between spans)
2877        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 8));
2878        // 's' at index 13 (inside second span)
2879        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 13));
2880    }
2881
2882    #[test]
2883    fn test_backtick_code_on_backtick_boundary() {
2884        let line = "`code`";
2885        // Position 0 is the opening backtick itself, not inside the span
2886        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2887        // Position 5 is the closing backtick, not inside the span
2888        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 5));
2889        // Position 1-4 are inside the span
2890        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 1));
2891        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 4));
2892    }
2893
2894    // Double-bracket WikiLink + URL: [[text]](url)
2895    // pulldown-cmark parses [[text]] as a WikiLink but leaves the (url)
2896    // as plain text, so ctx.links does not cover the URL portion.
2897    // MD044 must fall back to is_in_markdown_link_url for all lines.
2898
2899    #[test]
2900    fn test_double_bracket_link_url_not_flagged() {
2901        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
2902        // Exact reproduction from issue #564
2903        let content = "[[rumdl]](https://github.com/rvben/rumdl)";
2904        let ctx = create_context(content);
2905        let result = rule.check(&ctx).unwrap();
2906        assert!(
2907            result.is_empty(),
2908            "URL inside [[text]](url) must not be flagged, got: {result:?}"
2909        );
2910    }
2911
2912    #[test]
2913    fn test_double_bracket_link_url_not_fixed() {
2914        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
2915        let content = "[[rumdl]](https://github.com/rvben/rumdl)\n";
2916        let ctx = create_context(content);
2917        let fixed = rule.fix(&ctx).unwrap();
2918        assert_eq!(
2919            fixed, content,
2920            "fix() must leave the URL inside [[text]](url) unchanged"
2921        );
2922    }
2923
2924    #[test]
2925    fn test_double_bracket_link_text_still_flagged() {
2926        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
2927        // The link text portion [[github]](url) should still be checked.
2928        let content = "[[github]](https://example.com)";
2929        let ctx = create_context(content);
2930        let result = rule.check(&ctx).unwrap();
2931        assert_eq!(
2932            result.len(),
2933            1,
2934            "Incorrect name in [[text]] link text should still be flagged, got: {result:?}"
2935        );
2936        assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
2937    }
2938
2939    #[test]
2940    fn test_double_bracket_link_mixed_line() {
2941        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
2942        // URL must be skipped, standalone text must be flagged.
2943        let content = "See [[rumdl]](https://github.com/rvben/rumdl) and github for more.";
2944        let ctx = create_context(content);
2945        let result = rule.check(&ctx).unwrap();
2946        assert_eq!(
2947            result.len(),
2948            1,
2949            "Only the standalone 'github' after the link should be flagged, got: {result:?}"
2950        );
2951        assert!(result[0].message.contains("'github'"));
2952        // "See " (4) + "[[rumdl]](https://github.com/rvben/rumdl)" (42) + " and " (4) = column 51
2953        assert_eq!(
2954            result[0].column, 51,
2955            "Flagged column should be the trailing 'github', not the one in the URL"
2956        );
2957    }
2958
2959    #[test]
2960    fn test_regular_link_url_still_not_flagged() {
2961        // Confirm existing [text](url) behavior is unaffected by the fix.
2962        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
2963        let content = "[rumdl](https://github.com/rvben/rumdl)";
2964        let ctx = create_context(content);
2965        let result = rule.check(&ctx).unwrap();
2966        assert!(
2967            result.is_empty(),
2968            "URL inside regular [text](url) must still not be flagged, got: {result:?}"
2969        );
2970    }
2971
2972    #[test]
2973    fn test_link_like_text_in_code_span_still_flagged_when_code_blocks_enabled() {
2974        // When code-blocks = true the user explicitly opts into checking code spans.
2975        // A code span containing link-like text (`[foo](https://github.com)`) must
2976        // NOT be silently suppressed by is_in_markdown_link_url: the content is
2977        // literal characters, not a real Markdown link.
2978        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
2979        let content = "`[foo](https://github.com/org/repo)`";
2980        let ctx = create_context(content);
2981        let result = rule.check(&ctx).unwrap();
2982        assert_eq!(
2983            result.len(),
2984            1,
2985            "Proper name inside a code span must be flagged when code-blocks=true, got: {result:?}"
2986        );
2987        assert!(result[0].message.contains("'github'"));
2988    }
2989
2990    #[test]
2991    fn test_malformed_link_not_treated_as_url() {
2992        // [text](url with spaces) is NOT a valid Markdown link; pulldown-cmark
2993        // does not parse it, so the name inside must still be flagged.
2994        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
2995        let content = "See [rumdl](github repo) for details.";
2996        let ctx = create_context(content);
2997        let result = rule.check(&ctx).unwrap();
2998        assert_eq!(
2999            result.len(),
3000            1,
3001            "Name inside malformed [text](url with spaces) must still be flagged, got: {result:?}"
3002        );
3003        assert!(result[0].message.contains("'github'"));
3004    }
3005
3006    #[test]
3007    fn test_wikilink_followed_by_prose_parens_still_flagged() {
3008        // [[note]](github repo) — WikiLink followed by parenthesised prose, NOT
3009        // a valid link URL (space in destination). pulldown-cmark does not parse
3010        // it as a link, so the name inside must still be flagged.
3011        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], false);
3012        let content = "[[note]](github repo)";
3013        let ctx = create_context(content);
3014        let result = rule.check(&ctx).unwrap();
3015        assert_eq!(
3016            result.len(),
3017            1,
3018            "Name inside [[wikilink]](prose with spaces) must still be flagged, got: {result:?}"
3019        );
3020        assert!(result[0].message.contains("'github'"));
3021    }
3022
3023    /// Roundtrip safety: fix() output must produce zero warnings on re-check.
3024    #[test]
3025    fn test_roundtrip_fix_then_check_basic() {
3026        let rule = MD044ProperNames::new(
3027            vec![
3028                "JavaScript".to_string(),
3029                "TypeScript".to_string(),
3030                "Node.js".to_string(),
3031            ],
3032            true,
3033        );
3034        let content = "I love javascript, typescript, and nodejs!";
3035        let ctx = create_context(content);
3036        let fixed = rule.fix(&ctx).unwrap();
3037        let ctx2 = create_context(&fixed);
3038        let warnings = rule.check(&ctx2).unwrap();
3039        assert!(
3040            warnings.is_empty(),
3041            "Re-check after fix should produce zero warnings, got: {warnings:?}"
3042        );
3043    }
3044
3045    /// Roundtrip safety: fix() output must produce zero warnings for multiline content.
3046    #[test]
3047    fn test_roundtrip_fix_then_check_multiline() {
3048        let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
3049        let content = "First line with rust.\nSecond line with python.\nThird line with RUST and PYTHON.\n";
3050        let ctx = create_context(content);
3051        let fixed = rule.fix(&ctx).unwrap();
3052        let ctx2 = create_context(&fixed);
3053        let warnings = rule.check(&ctx2).unwrap();
3054        assert!(
3055            warnings.is_empty(),
3056            "Re-check after fix should produce zero warnings, got: {warnings:?}"
3057        );
3058    }
3059
3060    /// Roundtrip safety: fix() with inline config disable blocks.
3061    #[test]
3062    fn test_roundtrip_fix_then_check_inline_config() {
3063        let config = MD044Config {
3064            names: vec!["RUMDL".to_string()],
3065            ..MD044Config::default()
3066        };
3067        let rule = MD044ProperNames::from_config_struct(config);
3068        let content =
3069            "<!-- rumdl-disable MD044 -->\nSome rumdl text.\n<!-- rumdl-enable MD044 -->\n\nSome rumdl text outside.\n";
3070        let ctx = create_context(content);
3071        let fixed = rule.fix(&ctx).unwrap();
3072        // The disabled block should be preserved, the outside text fixed
3073        assert!(
3074            fixed.contains("Some rumdl text.\n"),
3075            "Disabled block text should be preserved"
3076        );
3077        assert!(
3078            fixed.contains("Some RUMDL text outside."),
3079            "Outside text should be fixed"
3080        );
3081    }
3082
3083    /// Roundtrip safety: fix() with HTML comment content.
3084    #[test]
3085    fn test_roundtrip_fix_then_check_html_comments() {
3086        let config = MD044Config {
3087            names: vec!["JavaScript".to_string()],
3088            ..MD044Config::default()
3089        };
3090        let rule = MD044ProperNames::from_config_struct(config);
3091        let content = "# Guide\n\n<!-- javascript mentioned here -->\n\njavascript outside\n";
3092        let ctx = create_context(content);
3093        let fixed = rule.fix(&ctx).unwrap();
3094        let ctx2 = create_context(&fixed);
3095        let warnings = rule.check(&ctx2).unwrap();
3096        assert!(
3097            warnings.is_empty(),
3098            "Re-check after fix should produce zero warnings, got: {warnings:?}"
3099        );
3100    }
3101
3102    /// Roundtrip safety: fix() preserves content when no violations exist.
3103    #[test]
3104    fn test_roundtrip_no_op_when_correct() {
3105        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
3106        let content = "This uses JavaScript and TypeScript correctly.\n";
3107        let ctx = create_context(content);
3108        let fixed = rule.fix(&ctx).unwrap();
3109        assert_eq!(fixed, content, "Fix should be a no-op when content is already correct");
3110    }
3111}
rumdl_lib/rules/md044_proper_names.rs

rumdl_lib/rules/
md044_proper_names.rs